Skip to content

bpo-42864: Improve error messages regarding unclosed parentheses #24161

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jan 19, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion Lib/test/test_codeop.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,6 @@ def test_incomplete(self):
ai("","eval")
ai("\n","eval")
ai("(","eval")
ai("(\n\n\n","eval")
Copy link
Member Author

@pablogsal pablogsal Jan 14, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The EOF was reached in a different line before but now it raises the same error as the case without newlines (because the unopen parentheses are at the same place). I think we can allow that to happen.

ai("(9+","eval")
ai("9+ \\","eval")
ai("lambda z: \\","eval")
Expand Down
2 changes: 1 addition & 1 deletion Lib/test/test_grammar.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ def test_eof_error(self):
for s in samples:
with self.assertRaises(SyntaxError) as cm:
compile(s, "<test>", "exec")
self.assertIn("unexpected EOF", str(cm.exception))
self.assertIn("was never closed", str(cm.exception))

var_annot_global: int # a global annotated is necessary for test_var_annot

Expand Down
4 changes: 2 additions & 2 deletions Lib/test/test_pdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -1649,10 +1649,10 @@ def test_errors_in_command(self):

self.assertEqual(stdout.splitlines()[1:], [
'-> pass',
'(Pdb) *** SyntaxError: unexpected EOF while parsing',
'(Pdb) *** SyntaxError: \'(\' was never closed',

'(Pdb) ENTERING RECURSIVE DEBUGGER',
'*** SyntaxError: unexpected EOF while parsing',
'*** SyntaxError: \'(\' was never closed',
'LEAVING RECURSIVE DEBUGGER',

'(Pdb) ENTERING RECURSIVE DEBUGGER',
Expand Down
8 changes: 8 additions & 0 deletions Lib/test/test_syntax.py
Original file line number Diff line number Diff line change
Expand Up @@ -987,6 +987,14 @@ def test_invalid_line_continuation_left_recursive(self):
self._check_error("A.\u03bc\\\n",
"unexpected EOF while parsing")

def test_error_parenthesis(self):
for paren in "([{":
self._check_error(paren + "1 + 2", f"\\{paren}' was never closed")

for paren in ")]}":
self._check_error(paren + "1 + 2", f"unmatched '\\{paren}'")


def test_main():
support.run_unittest(SyntaxTestCase)
from test import test_syntax
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Improve error messages in the parser when parentheses are not closed. Patch
by Pablo Galindo.
73 changes: 70 additions & 3 deletions Parser/pegen.c
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,16 @@ raise_decode_error(Parser *p)
return -1;
}

static inline void
raise_unclosed_parentheses_error(Parser *p) {
int error_lineno = p->tok->parenlinenostack[p->tok->level-1];
int error_col = p->tok->parencolstack[p->tok->level-1];
RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError,
error_lineno, error_col,
"'%c' was never closed",
p->tok->parenstack[p->tok->level-1]);
}

static void
raise_tokenizer_init_error(PyObject *filename)
{
Expand Down Expand Up @@ -324,7 +334,11 @@ tokenizer_error(Parser *p)
RAISE_SYNTAX_ERROR("EOL while scanning string literal");
return -1;
case E_EOF:
RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
if (p->tok->level) {
raise_unclosed_parentheses_error(p);
} else {
RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
}
return -1;
case E_DEDENT:
RAISE_INDENTATION_ERROR("unindent does not match any outer indentation level");
Expand Down Expand Up @@ -1151,6 +1165,52 @@ reset_parser_state(Parser *p)
p->call_invalid_rules = 1;
}

static int
_PyPegen_check_tokenizer_errors(Parser *p) {
// Tokenize the whole input to see if there are any tokenization
// errors such as mistmatching parentheses. These will get priority
// over generic syntax errors only if the line number of the error is
// before the one that we had for the generic error.

// We don't want to tokenize to the end for interactive input
if (p->tok->prompt != NULL) {
return 0;
}


Token *current_token = p->known_err_token != NULL ? p->known_err_token : p->tokens[p->fill - 1];
Py_ssize_t current_err_line = current_token->lineno;

// Save the tokenizer state to restore them later in case we found nothing
struct tok_state saved_tok;
memcpy(&saved_tok, p->tok, sizeof(struct tok_state));

for (;;) {
const char *start;
const char *end;
switch (PyTokenizer_Get(p->tok, &start, &end)) {
case ERRORTOKEN:
if (p->tok->level != 0) {
int error_lineno = p->tok->parenlinenostack[p->tok->level-1];
if (current_err_line > error_lineno) {
raise_unclosed_parentheses_error(p);
return -1;
}
}
break;
case ENDMARKER:
break;
default:
continue;
}
break;
}

// Restore the tokenizer state
memcpy(p->tok, &saved_tok, sizeof(struct tok_state));
return 0;
}

void *
_PyPegen_run_parser(Parser *p)
{
Expand All @@ -1164,8 +1224,12 @@ _PyPegen_run_parser(Parser *p)
if (p->fill == 0) {
RAISE_SYNTAX_ERROR("error at start before reading any input");
}
else if (p->tok->done == E_EOF) {
RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
else if (p->tok->done == E_EOF) {
if (p->tok->level) {
raise_unclosed_parentheses_error(p);
} else {
RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
}
}
else {
if (p->tokens[p->fill-1]->type == INDENT) {
Expand All @@ -1175,6 +1239,9 @@ _PyPegen_run_parser(Parser *p)
RAISE_INDENTATION_ERROR("unexpected unindent");
}
else {
if (_PyPegen_check_tokenizer_errors(p)) {
return NULL;
}
RAISE_SYNTAX_ERROR("invalid syntax");
}
}
Expand Down
5 changes: 4 additions & 1 deletion Parser/tokenizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ tok_new(void)
tok->tabsize = TABSIZE;
tok->indent = 0;
tok->indstack[0] = 0;

tok->atbol = 1;
tok->pendin = 0;
tok->prompt = tok->nextprompt = NULL;
Expand Down Expand Up @@ -1396,6 +1395,9 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)

/* Check for EOF and errors now */
if (c == EOF) {
if (tok->level) {
return ERRORTOKEN;
}
return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN;
}

Expand Down Expand Up @@ -1818,6 +1820,7 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
}
tok->parenstack[tok->level] = c;
tok->parenlinenostack[tok->level] = tok->lineno;
tok->parencolstack[tok->level] = tok->start - tok->line_start;
tok->level++;
break;
case ')':
Expand Down
1 change: 1 addition & 0 deletions Parser/tokenizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ struct tok_state {
/* Used to allow free continuations inside them */
char parenstack[MAXLEVEL];
int parenlinenostack[MAXLEVEL];
int parencolstack[MAXLEVEL];
PyObject *filename;
/* Stuff for checking on different tab sizes */
int altindstack[MAXINDENT]; /* Stack of alternate indents */
Expand Down