Skip to content

Commit 751da28

Browse files
authored
[3.11] gh-98744: Prevent column-level decoding crashes on traceback module (#98850)
Co-authored-by: Batuhan Taskaya <[email protected]>
1 parent 12957d7 commit 751da28

File tree

3 files changed

+63
-14
lines changed

3 files changed

+63
-14
lines changed

Lib/test/test_traceback.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -778,6 +778,56 @@ def f():
778778
]
779779
self.assertEqual(actual, expected)
780780

781+
def test_wide_characters_unicode_with_problematic_byte_offset(self):
782+
def f():
783+
width
784+
785+
actual = self.get_exception(f)
786+
expected = [
787+
f"Traceback (most recent call last):",
788+
f" File \"{__file__}\", line {self.callable_line}, in get_exception",
789+
f" callable()",
790+
f" File \"{__file__}\", line {f.__code__.co_firstlineno + 1}, in f",
791+
f" width",
792+
]
793+
self.assertEqual(actual, expected)
794+
795+
796+
def test_byte_offset_with_wide_characters_middle(self):
797+
def f():
798+
width = 1
799+
raise ValueError(width)
800+
801+
actual = self.get_exception(f)
802+
expected = [
803+
f"Traceback (most recent call last):",
804+
f" File \"{__file__}\", line {self.callable_line}, in get_exception",
805+
f" callable()",
806+
f" File \"{__file__}\", line {f.__code__.co_firstlineno + 2}, in f",
807+
f" raise ValueError(width)",
808+
]
809+
self.assertEqual(actual, expected)
810+
811+
def test_byte_offset_multiline(self):
812+
def f():
813+
www = 1
814+
th = 0
815+
816+
print(1, www(
817+
th))
818+
819+
actual = self.get_exception(f)
820+
expected = [
821+
f"Traceback (most recent call last):",
822+
f" File \"{__file__}\", line {self.callable_line}, in get_exception",
823+
f" callable()",
824+
f" File \"{__file__}\", line {f.__code__.co_firstlineno + 4}, in f",
825+
f" print(1, www(",
826+
f" ^^^^",
827+
]
828+
self.assertEqual(actual, expected)
829+
830+
781831
@cpython_only
782832
@requires_debug_ranges()
783833
class CPythonTracebackErrorCaretTests(TracebackErrorLocationCaretTests):

Lib/traceback.py

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -475,32 +475,32 @@ def format_frame_summary(self, frame_summary):
475475
frame_summary.colno is not None
476476
and frame_summary.end_colno is not None
477477
):
478-
colno = _byte_offset_to_character_offset(
479-
frame_summary._original_line, frame_summary.colno)
480-
end_colno = _byte_offset_to_character_offset(
481-
frame_summary._original_line, frame_summary.end_colno)
478+
start_offset = _byte_offset_to_character_offset(
479+
frame_summary._original_line, frame_summary.colno) + 1
480+
end_offset = _byte_offset_to_character_offset(
481+
frame_summary._original_line, frame_summary.end_colno) + 1
482482

483483
anchors = None
484484
if frame_summary.lineno == frame_summary.end_lineno:
485485
with suppress(Exception):
486486
anchors = _extract_caret_anchors_from_line_segment(
487-
frame_summary._original_line[colno - 1:end_colno - 1]
487+
frame_summary._original_line[start_offset - 1:end_offset - 1]
488488
)
489489
else:
490-
end_colno = stripped_characters + len(stripped_line)
490+
end_offset = stripped_characters + len(stripped_line)
491491

492492
# show indicators if primary char doesn't span the frame line
493-
if end_colno - colno < len(stripped_line) or (
493+
if end_offset - start_offset < len(stripped_line) or (
494494
anchors and anchors.right_start_offset - anchors.left_end_offset > 0):
495495
row.append(' ')
496-
row.append(' ' * (colno - stripped_characters))
496+
row.append(' ' * (start_offset - stripped_characters))
497497

498498
if anchors:
499499
row.append(anchors.primary_char * (anchors.left_end_offset))
500500
row.append(anchors.secondary_char * (anchors.right_start_offset - anchors.left_end_offset))
501-
row.append(anchors.primary_char * (end_colno - colno - anchors.right_start_offset))
501+
row.append(anchors.primary_char * (end_offset - start_offset - anchors.right_start_offset))
502502
else:
503-
row.append('^' * (end_colno - colno))
503+
row.append('^' * (end_offset - start_offset))
504504

505505
row.append('\n')
506506

@@ -560,10 +560,7 @@ def format(self):
560560

561561
def _byte_offset_to_character_offset(str, offset):
562562
as_utf8 = str.encode('utf-8')
563-
if offset > len(as_utf8):
564-
offset = len(as_utf8)
565-
566-
return len(as_utf8[:offset + 1].decode("utf-8"))
563+
return len(as_utf8[:offset].decode("utf-8", errors="replace"))
567564

568565

569566
_Anchors = collections.namedtuple(
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Prevent crashing in :mod:`traceback` when retrieving the byte-offset for
2+
some source files that contain certain unicode characters.

0 commit comments

Comments
 (0)