Skip to content

[3.11] gh-98744: Prevent column-level decoding crashes on traceback module #98850

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 29, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions Lib/test/test_traceback.py
Original file line number Diff line number Diff line change
Expand Up @@ -778,6 +778,56 @@ def f():
]
self.assertEqual(actual, expected)

def test_wide_characters_unicode_with_problematic_byte_offset(self):
def f():
width

actual = self.get_exception(f)
expected = [
f"Traceback (most recent call last):",
f" File \"{__file__}\", line {self.callable_line}, in get_exception",
f" callable()",
f" File \"{__file__}\", line {f.__code__.co_firstlineno + 1}, in f",
f" width",
]
self.assertEqual(actual, expected)


def test_byte_offset_with_wide_characters_middle(self):
def f():
width = 1
raise ValueError(width)

actual = self.get_exception(f)
expected = [
f"Traceback (most recent call last):",
f" File \"{__file__}\", line {self.callable_line}, in get_exception",
f" callable()",
f" File \"{__file__}\", line {f.__code__.co_firstlineno + 2}, in f",
f" raise ValueError(width)",
]
self.assertEqual(actual, expected)

def test_byte_offset_multiline(self):
def f():
www = 1
th = 0

print(1, www(
th))

actual = self.get_exception(f)
expected = [
f"Traceback (most recent call last):",
f" File \"{__file__}\", line {self.callable_line}, in get_exception",
f" callable()",
f" File \"{__file__}\", line {f.__code__.co_firstlineno + 4}, in f",
f" print(1, www(",
f" ^^^^",
]
self.assertEqual(actual, expected)


@cpython_only
@requires_debug_ranges()
class CPythonTracebackErrorCaretTests(TracebackErrorLocationCaretTests):
Expand Down
25 changes: 11 additions & 14 deletions Lib/traceback.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,32 +475,32 @@ def format_frame_summary(self, frame_summary):
frame_summary.colno is not None
and frame_summary.end_colno is not None
):
colno = _byte_offset_to_character_offset(
frame_summary._original_line, frame_summary.colno)
end_colno = _byte_offset_to_character_offset(
frame_summary._original_line, frame_summary.end_colno)
start_offset = _byte_offset_to_character_offset(
frame_summary._original_line, frame_summary.colno) + 1
end_offset = _byte_offset_to_character_offset(
frame_summary._original_line, frame_summary.end_colno) + 1

anchors = None
if frame_summary.lineno == frame_summary.end_lineno:
with suppress(Exception):
anchors = _extract_caret_anchors_from_line_segment(
frame_summary._original_line[colno - 1:end_colno - 1]
frame_summary._original_line[start_offset - 1:end_offset - 1]
)
else:
end_colno = stripped_characters + len(stripped_line)
end_offset = stripped_characters + len(stripped_line)

# show indicators if primary char doesn't span the frame line
if end_colno - colno < len(stripped_line) or (
if end_offset - start_offset < len(stripped_line) or (
anchors and anchors.right_start_offset - anchors.left_end_offset > 0):
row.append(' ')
row.append(' ' * (colno - stripped_characters))
row.append(' ' * (start_offset - stripped_characters))

if anchors:
row.append(anchors.primary_char * (anchors.left_end_offset))
row.append(anchors.secondary_char * (anchors.right_start_offset - anchors.left_end_offset))
row.append(anchors.primary_char * (end_colno - colno - anchors.right_start_offset))
row.append(anchors.primary_char * (end_offset - start_offset - anchors.right_start_offset))
else:
row.append('^' * (end_colno - colno))
row.append('^' * (end_offset - start_offset))

row.append('\n')

Expand Down Expand Up @@ -560,10 +560,7 @@ def format(self):

def _byte_offset_to_character_offset(str, offset):
as_utf8 = str.encode('utf-8')
if offset > len(as_utf8):
offset = len(as_utf8)

return len(as_utf8[:offset + 1].decode("utf-8"))
return len(as_utf8[:offset].decode("utf-8", errors="replace"))


_Anchors = collections.namedtuple(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Prevent crashing in :mod:`traceback` when retrieving the byte-offset for
some source files that contain certain unicode characters.