Skip to content

Commit cd8dcbc

Browse files
authored
bpo-43410: Fix crash in the parser when producing syntax errors when reading from stdin (GH-24763)
1 parent 9923df9 commit cd8dcbc

File tree

5 files changed

+76
-31
lines changed

5 files changed

+76
-31
lines changed

Lib/test/test_cmd_line.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -816,9 +816,16 @@ def test_sys_flags_not_set(self):
816816
PYTHONVERBOSE="1",
817817
)
818818

819+
class SyntaxErrorTests(unittest.TestCase):
820+
def test_tokenizer_error_with_stdin(self):
821+
proc = subprocess.run([sys.executable, "-"], input = b"(1+2+3",
822+
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
823+
self.assertNotEqual(proc.returncode, 0)
824+
self.assertNotEqual(proc.stderr, None)
825+
self.assertIn(b"\nSyntaxError", proc.stderr)
819826

820827
def test_main():
821-
support.run_unittest(CmdLineTest, IgnoreEnvironmentTest)
828+
support.run_unittest(CmdLineTest, IgnoreEnvironmentTest, SyntaxErrorTests)
822829
support.reap_children()
823830

824831
if __name__ == "__main__":
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix a bug that was causing the parser to crash when emiting syntax errors
2+
when reading input from stdin. Patch by Pablo Galindo

Parser/pegen.c

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -397,7 +397,8 @@ get_error_line(Parser *p, Py_ssize_t lineno)
397397
are stored in p->tok->stdin_content */
398398
assert(p->tok->fp == NULL || p->tok->fp == stdin);
399399

400-
char *cur_line = p->tok->fp == NULL ? p->tok->str : p->tok->stdin_content;
400+
char *cur_line = p->tok->fp_interactive ? p->tok->interactive_src_start : p->tok->str;
401+
401402
for (int i = 0; i < lineno - 1; i++) {
402403
cur_line = strchr(cur_line, '\n') + 1;
403404
}
@@ -440,7 +441,10 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
440441
goto error;
441442
}
442443

443-
if (p->start_rule == Py_file_input) {
444+
if (p->tok->fp_interactive) {
445+
error_line = get_error_line(p, lineno);
446+
}
447+
else if (p->start_rule == Py_file_input) {
444448
error_line = PyErr_ProgramTextObject(p->tok->filename, (int) lineno);
445449
}
446450

@@ -1232,7 +1236,7 @@ _PyPegen_run_parser(Parser *p)
12321236
if (p->fill == 0) {
12331237
RAISE_SYNTAX_ERROR("error at start before reading any input");
12341238
}
1235-
else if (p->tok->done == E_EOF) {
1239+
else if (p->tok->done == E_EOF) {
12361240
if (p->tok->level) {
12371241
raise_unclosed_parentheses_error(p);
12381242
} else {
@@ -1287,6 +1291,10 @@ _PyPegen_run_parser_from_file_pointer(FILE *fp, int start_rule, PyObject *filena
12871291
}
12881292
return NULL;
12891293
}
1294+
if (!tok->fp || ps1 != NULL || ps2 != NULL ||
1295+
PyUnicode_CompareWithASCIIString(filename_ob, "<stdin>") == 0) {
1296+
tok->fp_interactive = 1;
1297+
}
12901298
// This transfers the ownership to the tokenizer
12911299
tok->filename = filename_ob;
12921300
Py_INCREF(filename_ob);

Parser/tokenizer.c

Lines changed: 52 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@ tok_new(void)
5656
if (tok == NULL)
5757
return NULL;
5858
tok->buf = tok->cur = tok->inp = NULL;
59+
tok->fp_interactive = 0;
60+
tok->interactive_src_start = NULL;
61+
tok->interactive_src_end = NULL;
5962
tok->start = NULL;
6063
tok->end = NULL;
6164
tok->done = E_OK;
@@ -80,8 +83,6 @@ tok_new(void)
8083
tok->decoding_readline = NULL;
8184
tok->decoding_buffer = NULL;
8285
tok->type_comments = 0;
83-
tok->stdin_content = NULL;
84-
8586
tok->async_hacks = 0;
8687
tok->async_def = 0;
8788
tok->async_def_indent = 0;
@@ -323,6 +324,35 @@ check_bom(int get_char(struct tok_state *),
323324
return 1;
324325
}
325326

327+
static int tok_concatenate_interactive_new_line(struct tok_state* tok, char* line) {
328+
assert(tok->fp_interactive);
329+
330+
if (!line) {
331+
return 0;
332+
}
333+
334+
Py_ssize_t current_size = tok->interactive_src_end - tok->interactive_src_start;
335+
Py_ssize_t line_size = strlen(line);
336+
char* new_str = tok->interactive_src_start;
337+
338+
new_str = PyMem_Realloc(new_str, current_size + line_size + 1);
339+
if (!new_str) {
340+
if (tok->interactive_src_start) {
341+
PyMem_Free(tok->interactive_src_start);
342+
}
343+
tok->interactive_src_start = NULL;
344+
tok->interactive_src_end = NULL;
345+
tok->done = E_NOMEM;
346+
return -1;
347+
}
348+
strcpy(new_str + current_size, line);
349+
350+
tok->interactive_src_start = new_str;
351+
tok->interactive_src_end = new_str + current_size + line_size;
352+
return 0;
353+
}
354+
355+
326356
/* Read a line of text from TOK into S, using the stream in TOK.
327357
Return NULL on failure, else S.
328358
@@ -552,6 +582,12 @@ decoding_fgets(char *s, int size, struct tok_state *tok)
552582
badchar, tok->filename, tok->lineno + 1);
553583
return error_ret(tok);
554584
}
585+
586+
if (tok->fp_interactive &&
587+
tok_concatenate_interactive_new_line(tok, line) == -1) {
588+
return NULL;
589+
}
590+
555591
return line;
556592
}
557593

@@ -807,17 +843,21 @@ PyTokenizer_FromFile(FILE *fp, const char* enc,
807843
void
808844
PyTokenizer_Free(struct tok_state *tok)
809845
{
810-
if (tok->encoding != NULL)
846+
if (tok->encoding != NULL) {
811847
PyMem_Free(tok->encoding);
848+
}
812849
Py_XDECREF(tok->decoding_readline);
813850
Py_XDECREF(tok->decoding_buffer);
814851
Py_XDECREF(tok->filename);
815-
if (tok->fp != NULL && tok->buf != NULL)
852+
if (tok->fp != NULL && tok->buf != NULL) {
816853
PyMem_Free(tok->buf);
817-
if (tok->input)
854+
}
855+
if (tok->input) {
818856
PyMem_Free(tok->input);
819-
if (tok->stdin_content)
820-
PyMem_Free(tok->stdin_content);
857+
}
858+
if (tok->interactive_src_start != NULL) {
859+
PyMem_Free(tok->interactive_src_start);
860+
}
821861
PyMem_Free(tok);
822862
}
823863

@@ -858,24 +898,6 @@ tok_nextc(struct tok_state *tok)
858898
if (translated == NULL)
859899
return EOF;
860900
newtok = translated;
861-
if (tok->stdin_content == NULL) {
862-
tok->stdin_content = PyMem_Malloc(strlen(translated) + 1);
863-
if (tok->stdin_content == NULL) {
864-
tok->done = E_NOMEM;
865-
return EOF;
866-
}
867-
sprintf(tok->stdin_content, "%s", translated);
868-
}
869-
else {
870-
char *new_str = PyMem_Malloc(strlen(tok->stdin_content) + strlen(translated) + 1);
871-
if (new_str == NULL) {
872-
tok->done = E_NOMEM;
873-
return EOF;
874-
}
875-
sprintf(new_str, "%s%s", tok->stdin_content, translated);
876-
PyMem_Free(tok->stdin_content);
877-
tok->stdin_content = new_str;
878-
}
879901
}
880902
if (tok->encoding && newtok && *newtok) {
881903
/* Recode to UTF-8 */
@@ -898,6 +920,10 @@ tok_nextc(struct tok_state *tok)
898920
strcpy(newtok, buf);
899921
Py_DECREF(u);
900922
}
923+
if (tok->fp_interactive &&
924+
tok_concatenate_interactive_new_line(tok, newtok) == -1) {
925+
return EOF;
926+
}
901927
if (tok->nextprompt != NULL)
902928
tok->prompt = tok->nextprompt;
903929
if (newtok == NULL)
@@ -958,7 +984,7 @@ tok_nextc(struct tok_state *tok)
958984
}
959985
if (decoding_fgets(tok->buf, (int)(tok->end - tok->buf),
960986
tok) == NULL) {
961-
if (!tok->decoding_erred)
987+
if (!tok->decoding_erred && !(tok->done == E_NOMEM))
962988
tok->done = E_EOF;
963989
done = 1;
964990
}

Parser/tokenizer.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@ struct tok_state {
2626
char *buf; /* Input buffer, or NULL; malloc'ed if fp != NULL */
2727
char *cur; /* Next character in buffer */
2828
char *inp; /* End of data in buffer */
29+
int fp_interactive; /* If the file descriptor is interactive */
30+
char *interactive_src_start; /* The start of the source parsed so far in interactive mode */
31+
char *interactive_src_end; /* The end of the source parsed so far in interactive mode */
2932
const char *end; /* End of input buffer if buf != NULL */
3033
const char *start; /* Start of current token if not NULL */
3134
int done; /* E_OK normally, E_EOF at EOF, otherwise error code */
@@ -37,7 +40,6 @@ struct tok_state {
3740
int atbol; /* Nonzero if at begin of new line */
3841
int pendin; /* Pending indents (if > 0) or dedents (if < 0) */
3942
const char *prompt, *nextprompt; /* For interactive prompting */
40-
char *stdin_content;
4143
int lineno; /* Current line number */
4244
int first_lineno; /* First line of a single line or multi line string
4345
expression (cf. issue 16806) */

0 commit comments

Comments
 (0)