Skip to content

Commit 11a7f15

Browse files
bpo-40335: Correctly handle multi-line strings in tokenize error scenarios (GH-19619)
Co-authored-by: Guido van Rossum <[email protected]>
1 parent 6a9e80a commit 11a7f15

File tree

3 files changed

+37
-23
lines changed

3 files changed

+37
-23
lines changed

Lib/test/test_exceptions.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ def check(src, lineno, offset, encoding='utf-8'):
188188
if not isinstance(src, str):
189189
src = src.decode(encoding, 'replace')
190190
line = src.split('\n')[lineno-1]
191-
self.assertEqual(cm.exception.text.rstrip('\n'), line)
191+
self.assertIn(line, cm.exception.text)
192192

193193
check('def fact(x):\n\treturn x!\n', 2, 10)
194194
check('1 +\n', 1, 4)
@@ -217,6 +217,16 @@ def check(src, lineno, offset, encoding='utf-8'):
217217
check(b'\xce\xb1 = 0xI', 1, 6)
218218
check(b'# -*- coding: iso8859-7 -*-\n\xe1 = 0xI', 2, 6,
219219
encoding='iso8859-7')
220+
check(b"""if 1:
221+
def foo():
222+
'''
223+
224+
def bar():
225+
pass
226+
227+
def baz():
228+
'''quux'''
229+
""", 9, 20)
220230

221231
# Errors thrown by symtable.c
222232
check('x = [(yield i) for i in range(3)]', 1, 5)

Parser/parsetok.c

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -251,25 +251,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
251251
const char *line_start;
252252

253253
type = PyTokenizer_Get(tok, &a, &b);
254-
if (type == ERRORTOKEN) {
255-
err_ret->error = tok->done;
256-
break;
257-
}
258-
if (type == ENDMARKER && started) {
259-
type = NEWLINE; /* Add an extra newline */
260-
started = 0;
261-
/* Add the right number of dedent tokens,
262-
except if a certain flag is given --
263-
codeop.py uses this. */
264-
if (tok->indent &&
265-
!(*flags & PyPARSE_DONT_IMPLY_DEDENT))
266-
{
267-
tok->pendin = -tok->indent;
268-
tok->indent = 0;
269-
}
270-
}
271-
else
272-
started = 1;
254+
273255
len = (a != NULL && b != NULL) ? b - a : 0;
274256
str = (char *) PyObject_MALLOC(len + 1);
275257
if (str == NULL) {
@@ -328,6 +310,27 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
328310
continue;
329311
}
330312

313+
if (type == ERRORTOKEN) {
314+
err_ret->error = tok->done;
315+
break;
316+
}
317+
if (type == ENDMARKER && started) {
318+
type = NEWLINE; /* Add an extra newline */
319+
started = 0;
320+
/* Add the right number of dedent tokens,
321+
except if a certain flag is given --
322+
codeop.py uses this. */
323+
if (tok->indent &&
324+
!(*flags & PyPARSE_DONT_IMPLY_DEDENT))
325+
{
326+
tok->pendin = -tok->indent;
327+
tok->indent = 0;
328+
}
329+
}
330+
else {
331+
started = 1;
332+
}
333+
331334
if ((err_ret->error =
332335
PyParser_AddToken(ps, (int)type, str,
333336
lineno, col_offset, tok->lineno, end_col_offset,

Parser/tokenizer.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1392,13 +1392,14 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
13921392
if (nonascii && !verify_identifier(tok)) {
13931393
return ERRORTOKEN;
13941394
}
1395+
1396+
*p_start = tok->start;
1397+
*p_end = tok->cur;
1398+
13951399
if (c == '"' || c == '\'') {
13961400
tok->done = E_BADPREFIX;
13971401
return ERRORTOKEN;
13981402
}
1399-
*p_start = tok->start;
1400-
*p_end = tok->cur;
1401-
14021403
/* async/await parsing block. */
14031404
if (tok->cur - tok->start == 5 && tok->start[0] == 'a') {
14041405
/* May be an 'async' or 'await' token. For Python 3.7 or

0 commit comments

Comments
 (0)