Skip to content

Commit c0f2a5e

Browse files
authored
gh-98744: Prevent column-level decoding crashes on traceback module (#98824)
1 parent 7ea1056 commit c0f2a5e

File tree

3 files changed

+63
-14
lines changed

3 files changed

+63
-14
lines changed

Lib/test/test_traceback.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -804,6 +804,56 @@ def f():
804804
]
805805
self.assertEqual(actual, expected)
806806

807+
def test_wide_characters_unicode_with_problematic_byte_offset(self):
808+
def f():
809+
width
810+
811+
actual = self.get_exception(f)
812+
expected = [
813+
f"Traceback (most recent call last):",
814+
f" File \"{__file__}\", line {self.callable_line}, in get_exception",
815+
f" callable()",
816+
f" File \"{__file__}\", line {f.__code__.co_firstlineno + 1}, in f",
817+
f" width",
818+
]
819+
self.assertEqual(actual, expected)
820+
821+
822+
def test_byte_offset_with_wide_characters_middle(self):
823+
def f():
824+
width = 1
825+
raise ValueError(width)
826+
827+
actual = self.get_exception(f)
828+
expected = [
829+
f"Traceback (most recent call last):",
830+
f" File \"{__file__}\", line {self.callable_line}, in get_exception",
831+
f" callable()",
832+
f" File \"{__file__}\", line {f.__code__.co_firstlineno + 2}, in f",
833+
f" raise ValueError(width)",
834+
]
835+
self.assertEqual(actual, expected)
836+
837+
def test_byte_offset_multiline(self):
838+
def f():
839+
www = 1
840+
th = 0
841+
842+
print(1, www(
843+
th))
844+
845+
actual = self.get_exception(f)
846+
expected = [
847+
f"Traceback (most recent call last):",
848+
f" File \"{__file__}\", line {self.callable_line}, in get_exception",
849+
f" callable()",
850+
f" File \"{__file__}\", line {f.__code__.co_firstlineno + 4}, in f",
851+
f" print(1, www(",
852+
f" ^^^^",
853+
]
854+
self.assertEqual(actual, expected)
855+
856+
807857

808858
@requires_debug_ranges()
809859
class PurePythonTracebackErrorCaretTests(

Lib/traceback.py

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -476,32 +476,32 @@ def format_frame_summary(self, frame_summary):
476476
frame_summary.colno is not None
477477
and frame_summary.end_colno is not None
478478
):
479-
colno = _byte_offset_to_character_offset(
480-
frame_summary._original_line, frame_summary.colno)
481-
end_colno = _byte_offset_to_character_offset(
482-
frame_summary._original_line, frame_summary.end_colno)
479+
start_offset = _byte_offset_to_character_offset(
480+
frame_summary._original_line, frame_summary.colno) + 1
481+
end_offset = _byte_offset_to_character_offset(
482+
frame_summary._original_line, frame_summary.end_colno) + 1
483483

484484
anchors = None
485485
if frame_summary.lineno == frame_summary.end_lineno:
486486
with suppress(Exception):
487487
anchors = _extract_caret_anchors_from_line_segment(
488-
frame_summary._original_line[colno - 1:end_colno - 1]
488+
frame_summary._original_line[start_offset - 1:end_offset - 1]
489489
)
490490
else:
491-
end_colno = stripped_characters + len(stripped_line)
491+
end_offset = stripped_characters + len(stripped_line)
492492

493493
# show indicators if primary char doesn't span the frame line
494-
if end_colno - colno < len(stripped_line) or (
494+
if end_offset - start_offset < len(stripped_line) or (
495495
anchors and anchors.right_start_offset - anchors.left_end_offset > 0):
496496
row.append(' ')
497-
row.append(' ' * (colno - stripped_characters))
497+
row.append(' ' * (start_offset - stripped_characters))
498498

499499
if anchors:
500500
row.append(anchors.primary_char * (anchors.left_end_offset))
501501
row.append(anchors.secondary_char * (anchors.right_start_offset - anchors.left_end_offset))
502-
row.append(anchors.primary_char * (end_colno - colno - anchors.right_start_offset))
502+
row.append(anchors.primary_char * (end_offset - start_offset - anchors.right_start_offset))
503503
else:
504-
row.append('^' * (end_colno - colno))
504+
row.append('^' * (end_offset - start_offset))
505505

506506
row.append('\n')
507507

@@ -561,10 +561,7 @@ def format(self):
561561

562562
def _byte_offset_to_character_offset(str, offset):
563563
as_utf8 = str.encode('utf-8')
564-
if offset > len(as_utf8):
565-
offset = len(as_utf8)
566-
567-
return len(as_utf8[:offset + 1].decode("utf-8"))
564+
return len(as_utf8[:offset].decode("utf-8", errors="replace"))
568565

569566

570567
_Anchors = collections.namedtuple(
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Prevent crashing in :mod:`traceback` when retrieving the byte-offset for
2+
some source files that contain certain unicode characters.

0 commit comments

Comments
 (0)