Skip to content

Commit c049609

Browse files
bpo-44349: Fix edge case when displaying text from files with encoding in syntax errors (GH-26611) (GH-26616)
(cherry picked from commit 9fd21f6) Co-authored-by: Pablo Galindo <[email protected]> Co-authored-by: Pablo Galindo <[email protected]>
1 parent eeefa7f commit c049609

File tree

3 files changed

+22
-2
lines changed

3 files changed

+22
-2
lines changed

Lib/test/test_exceptions.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2105,6 +2105,22 @@ def test_range_of_offsets(self):
21052105
sys.__excepthook__(*sys.exc_info())
21062106
the_exception = exc
21072107

2108+
def test_encodings(self):
2109+
source = (
2110+
'# -*- coding: cp437 -*-\n'
2111+
'"¢¢¢¢¢¢" + f(4, x for x in range(1))\n'
2112+
)
2113+
try:
2114+
with open(TESTFN, 'w', encoding='cp437') as testfile:
2115+
testfile.write(source)
2116+
rc, out, err = script_helper.assert_python_failure('-Wd', '-X', 'utf8', TESTFN)
2117+
err = err.decode('utf-8').splitlines()
2118+
2119+
self.assertEqual(err[-3], ' "¢¢¢¢¢¢" + f(4, x for x in range(1))')
2120+
self.assertEqual(err[-2], ' ^^^^^^^^^^^^^^^^^^^')
2121+
finally:
2122+
unlink(TESTFN)
2123+
21082124
def test_attributes_new_constructor(self):
21092125
args = ("bad.py", 1, 2, "abcdefg", 1, 100)
21102126
the_exception = SyntaxError("bad bad", args)
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix an edge case when displaying text from files with encoding in syntax errors. Patch by Pablo Galindo.

Parser/pegen.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -456,10 +456,13 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
456456
goto error;
457457
}
458458

459+
// PyErr_ProgramTextObject assumes that the text is utf-8 so we cannot call it with a file
460+
// with an arbitrary encoding or otherwise we could get some badly decoded text.
461+
int uses_utf8_codec = (!p->tok->encoding || strcmp(p->tok->encoding, "utf-8") == 0);
459462
if (p->tok->fp_interactive) {
460463
error_line = get_error_line(p, lineno);
461464
}
462-
else if (p->start_rule == Py_file_input) {
465+
else if (uses_utf8_codec && p->start_rule == Py_file_input) {
463466
error_line = PyErr_ProgramTextObject(p->tok->filename, (int) lineno);
464467
}
465468

@@ -471,7 +474,7 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
471474
we're actually parsing from a file, which has an E_EOF SyntaxError and in that case
472475
`PyErr_ProgramTextObject` fails because lineno points to last_file_line + 1, which
473476
does not physically exist */
474-
assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF);
477+
assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF || !uses_utf8_codec);
475478

476479
if (p->tok->lineno <= lineno) {
477480
Py_ssize_t size = p->tok->inp - p->tok->buf;

0 commit comments

Comments
 (0)