Skip to content

Commit 010669f

Browse files
committed
bpo-42827: Fix crash on SyntaxError in multiline expressions
When trying to extract the error line for the error message there are two distinct cases: 1. The input comes from a file, which means that we can extract the error line by using `PyErr_ProgramTextObject` and which we already do. 2. The input does not come from a file, at which point we need to get the source code from the tokenizer: * If the tokenizer's current line number is the same with the line of the error, we get the line from `tok->buf` and we're ready. * Else, we can extract the error line from the source code in the following two ways: * If the input comes from a string we have all the input in `tok->str` and we can extract the error line from it. * If the input comes from stdin, i.e. the interactive prompt, we do not have access to the previous line. That's why a new field `tok->stdin_content` is added which holds the whole input for the current (multiline) statement or expression. We can then extract the error line from `tok->stdin_content` like we do in the string case above.
1 parent ddb5e11 commit 010669f

File tree

4 files changed

+46
-2
lines changed

4 files changed

+46
-2
lines changed

Lib/test/test_exceptions.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,9 @@ def testSyntaxErrorOffset(self):
209209
check('x = "a', 1, 7)
210210
check('lambda x: x = 2', 1, 1)
211211
check('f{a + b + c}', 1, 2)
212+
check('[file for str(file) in []\n])', 1, 11)
213+
check('[\nfile\nfor str(file)\nin\n[]\n]', 3, 5)
214+
check('[file for\n str(file) in []]', 2, 2)
212215

213216
# Errors thrown by compile.c
214217
check('class foo:return 1', 1, 11)

Parser/pegen.c

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,21 @@ _PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...)
380380
return NULL;
381381
}
382382

383+
PyObject *
384+
get_error_line(Parser *p, int lineno)
385+
{
386+
char *cur_line = p->tok->fp == NULL ? p->tok->str : p->tok->stdin_content;
387+
for (int i = 0; i < lineno - 1; i++) {
388+
cur_line = strchr(cur_line, '\n') + 1;
389+
}
390+
391+
char *next_newline;
392+
if ((next_newline = strchr(cur_line, '\n')) == NULL) { // This is the last line
393+
return PyUnicode_DecodeUTF8(cur_line, strlen(cur_line), "replace");
394+
}
395+
return PyUnicode_DecodeUTF8(cur_line, next_newline - cur_line, "replace");
396+
}
397+
383398
void *
384399
_PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
385400
Py_ssize_t lineno, Py_ssize_t col_offset,
@@ -416,8 +431,13 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
416431
}
417432

418433
if (!error_line) {
419-
Py_ssize_t size = p->tok->inp - p->tok->buf;
420-
error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace");
434+
if (p->tok->lineno == lineno) {
435+
Py_ssize_t size = p->tok->inp - p->tok->buf;
436+
error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace");
437+
}
438+
else {
439+
error_line = get_error_line(p, lineno);
440+
}
421441
if (!error_line) {
422442
goto error;
423443
}

Parser/tokenizer.c

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ tok_new(void)
8181
tok->decoding_readline = NULL;
8282
tok->decoding_buffer = NULL;
8383
tok->type_comments = 0;
84+
tok->stdin_content = NULL;
8485

8586
tok->async_hacks = 0;
8687
tok->async_def = 0;
@@ -856,6 +857,25 @@ tok_nextc(struct tok_state *tok)
856857
if (translated == NULL)
857858
return EOF;
858859
newtok = translated;
860+
if (tok->stdin_content == NULL) {
861+
tok->stdin_content = PyMem_Malloc(strlen(translated) + 1);
862+
if (tok->stdin_content == NULL) {
863+
tok->done = E_NOMEM;
864+
return EOF;
865+
}
866+
strcpy(tok->stdin_content, translated);
867+
tok->stdin_content[strlen(translated)] = 0;
868+
}
869+
else {
870+
char *new_str = PyMem_Malloc(strlen(tok->stdin_content) + strlen(translated) + 1);
871+
if (new_str == NULL) {
872+
tok->done = E_NOMEM;
873+
return EOF;
874+
}
875+
sprintf(new_str, "%s%s", tok->stdin_content, translated);
876+
PyMem_Free(tok->stdin_content);
877+
tok->stdin_content = new_str;
878+
}
859879
}
860880
if (tok->encoding && newtok && *newtok) {
861881
/* Recode to UTF-8 */

Parser/tokenizer.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ struct tok_state {
3737
int atbol; /* Nonzero if at begin of new line */
3838
int pendin; /* Pending indents (if > 0) or dedents (if < 0) */
3939
const char *prompt, *nextprompt; /* For interactive prompting */
40+
char *stdin_content;
4041
int lineno; /* Current line number */
4142
int first_lineno; /* First line of a single line or multi line string
4243
expression (cf. issue 16806) */

0 commit comments

Comments
 (0)