Skip to content

Commit d6d6371

Browse files
authored
bpo-42864: Improve error messages regarding unclosed parentheses (GH-24161)
1 parent 66f77ca commit d6d6371

File tree

8 files changed

+88
-8
lines changed

8 files changed

+88
-8
lines changed

Lib/test/test_codeop.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,6 @@ def test_incomplete(self):
160160
ai("","eval")
161161
ai("\n","eval")
162162
ai("(","eval")
163-
ai("(\n\n\n","eval")
164163
ai("(9+","eval")
165164
ai("9+ \\","eval")
166165
ai("lambda z: \\","eval")

Lib/test/test_grammar.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ def test_eof_error(self):
260260
for s in samples:
261261
with self.assertRaises(SyntaxError) as cm:
262262
compile(s, "<test>", "exec")
263-
self.assertIn("unexpected EOF", str(cm.exception))
263+
self.assertIn("was never closed", str(cm.exception))
264264

265265
var_annot_global: int # a global annotated is necessary for test_var_annot
266266

Lib/test/test_pdb.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1649,10 +1649,10 @@ def test_errors_in_command(self):
16491649

16501650
self.assertEqual(stdout.splitlines()[1:], [
16511651
'-> pass',
1652-
'(Pdb) *** SyntaxError: unexpected EOF while parsing',
1652+
'(Pdb) *** SyntaxError: \'(\' was never closed',
16531653

16541654
'(Pdb) ENTERING RECURSIVE DEBUGGER',
1655-
'*** SyntaxError: unexpected EOF while parsing',
1655+
'*** SyntaxError: \'(\' was never closed',
16561656
'LEAVING RECURSIVE DEBUGGER',
16571657

16581658
'(Pdb) ENTERING RECURSIVE DEBUGGER',

Lib/test/test_syntax.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -987,6 +987,14 @@ def test_invalid_line_continuation_left_recursive(self):
987987
self._check_error("A.\u03bc\\\n",
988988
"unexpected EOF while parsing")
989989

990+
def test_error_parenthesis(self):
991+
for paren in "([{":
992+
self._check_error(paren + "1 + 2", f"\\{paren}' was never closed")
993+
994+
for paren in ")]}":
995+
self._check_error(paren + "1 + 2", f"unmatched '\\{paren}'")
996+
997+
990998
def test_main():
991999
support.run_unittest(SyntaxTestCase)
9921000
from test import test_syntax
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Improve error messages in the parser when parentheses are not closed. Patch
2+
by Pablo Galindo.

Parser/pegen.c

Lines changed: 70 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,16 @@ raise_decode_error(Parser *p)
265265
return -1;
266266
}
267267

268+
static inline void
269+
raise_unclosed_parentheses_error(Parser *p) {
270+
int error_lineno = p->tok->parenlinenostack[p->tok->level-1];
271+
int error_col = p->tok->parencolstack[p->tok->level-1];
272+
RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError,
273+
error_lineno, error_col,
274+
"'%c' was never closed",
275+
p->tok->parenstack[p->tok->level-1]);
276+
}
277+
268278
static void
269279
raise_tokenizer_init_error(PyObject *filename)
270280
{
@@ -324,7 +334,11 @@ tokenizer_error(Parser *p)
324334
RAISE_SYNTAX_ERROR("EOL while scanning string literal");
325335
return -1;
326336
case E_EOF:
327-
RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
337+
if (p->tok->level) {
338+
raise_unclosed_parentheses_error(p);
339+
} else {
340+
RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
341+
}
328342
return -1;
329343
case E_DEDENT:
330344
RAISE_INDENTATION_ERROR("unindent does not match any outer indentation level");
@@ -1151,6 +1165,52 @@ reset_parser_state(Parser *p)
11511165
p->call_invalid_rules = 1;
11521166
}
11531167

1168+
static int
1169+
_PyPegen_check_tokenizer_errors(Parser *p) {
1170+
// Tokenize the whole input to see if there are any tokenization
1171+
// errors such as mistmatching parentheses. These will get priority
1172+
// over generic syntax errors only if the line number of the error is
1173+
// before the one that we had for the generic error.
1174+
1175+
// We don't want to tokenize to the end for interactive input
1176+
if (p->tok->prompt != NULL) {
1177+
return 0;
1178+
}
1179+
1180+
1181+
Token *current_token = p->known_err_token != NULL ? p->known_err_token : p->tokens[p->fill - 1];
1182+
Py_ssize_t current_err_line = current_token->lineno;
1183+
1184+
// Save the tokenizer state to restore them later in case we found nothing
1185+
struct tok_state saved_tok;
1186+
memcpy(&saved_tok, p->tok, sizeof(struct tok_state));
1187+
1188+
for (;;) {
1189+
const char *start;
1190+
const char *end;
1191+
switch (PyTokenizer_Get(p->tok, &start, &end)) {
1192+
case ERRORTOKEN:
1193+
if (p->tok->level != 0) {
1194+
int error_lineno = p->tok->parenlinenostack[p->tok->level-1];
1195+
if (current_err_line > error_lineno) {
1196+
raise_unclosed_parentheses_error(p);
1197+
return -1;
1198+
}
1199+
}
1200+
break;
1201+
case ENDMARKER:
1202+
break;
1203+
default:
1204+
continue;
1205+
}
1206+
break;
1207+
}
1208+
1209+
// Restore the tokenizer state
1210+
memcpy(p->tok, &saved_tok, sizeof(struct tok_state));
1211+
return 0;
1212+
}
1213+
11541214
void *
11551215
_PyPegen_run_parser(Parser *p)
11561216
{
@@ -1164,8 +1224,12 @@ _PyPegen_run_parser(Parser *p)
11641224
if (p->fill == 0) {
11651225
RAISE_SYNTAX_ERROR("error at start before reading any input");
11661226
}
1167-
else if (p->tok->done == E_EOF) {
1168-
RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
1227+
else if (p->tok->done == E_EOF) {
1228+
if (p->tok->level) {
1229+
raise_unclosed_parentheses_error(p);
1230+
} else {
1231+
RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
1232+
}
11691233
}
11701234
else {
11711235
if (p->tokens[p->fill-1]->type == INDENT) {
@@ -1175,6 +1239,9 @@ _PyPegen_run_parser(Parser *p)
11751239
RAISE_INDENTATION_ERROR("unexpected unindent");
11761240
}
11771241
else {
1242+
if (_PyPegen_check_tokenizer_errors(p)) {
1243+
return NULL;
1244+
}
11781245
RAISE_SYNTAX_ERROR("invalid syntax");
11791246
}
11801247
}

Parser/tokenizer.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,6 @@ tok_new(void)
6464
tok->tabsize = TABSIZE;
6565
tok->indent = 0;
6666
tok->indstack[0] = 0;
67-
6867
tok->atbol = 1;
6968
tok->pendin = 0;
7069
tok->prompt = tok->nextprompt = NULL;
@@ -1396,6 +1395,9 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
13961395

13971396
/* Check for EOF and errors now */
13981397
if (c == EOF) {
1398+
if (tok->level) {
1399+
return ERRORTOKEN;
1400+
}
13991401
return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN;
14001402
}
14011403

@@ -1818,6 +1820,7 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
18181820
}
18191821
tok->parenstack[tok->level] = c;
18201822
tok->parenlinenostack[tok->level] = tok->lineno;
1823+
tok->parencolstack[tok->level] = tok->start - tok->line_start;
18211824
tok->level++;
18221825
break;
18231826
case ')':

Parser/tokenizer.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ struct tok_state {
4545
/* Used to allow free continuations inside them */
4646
char parenstack[MAXLEVEL];
4747
int parenlinenostack[MAXLEVEL];
48+
int parencolstack[MAXLEVEL];
4849
PyObject *filename;
4950
/* Stuff for checking on different tab sizes */
5051
int altindstack[MAXINDENT]; /* Stack of alternate indents */

0 commit comments

Comments
 (0)