Skip to content

Commit 995d9b9

Browse files
asottilemethane
authored andcommitted
bpo-16806: Fix lineno and col_offset for multi-line string tokens (GH-10021)
1 parent 1cffd0e commit 995d9b9

File tree

13 files changed

+91
-51
lines changed

13 files changed

+91
-51
lines changed

Lib/test/test_ast.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -683,6 +683,25 @@ def test_get_docstring_none(self):
683683
node = ast.parse('async def foo():\n x = "not docstring"')
684684
self.assertIsNone(ast.get_docstring(node.body[0]))
685685

686+
def test_multi_line_docstring_col_offset_and_lineno_issue16806(self):
687+
node = ast.parse(
688+
'"""line one\nline two"""\n\n'
689+
'def foo():\n """line one\n line two"""\n\n'
690+
' def bar():\n """line one\n line two"""\n'
691+
' """line one\n line two"""\n'
692+
'"""line one\nline two"""\n\n'
693+
)
694+
self.assertEqual(node.body[0].col_offset, 0)
695+
self.assertEqual(node.body[0].lineno, 1)
696+
self.assertEqual(node.body[1].body[0].col_offset, 2)
697+
self.assertEqual(node.body[1].body[0].lineno, 5)
698+
self.assertEqual(node.body[1].body[1].body[0].col_offset, 4)
699+
self.assertEqual(node.body[1].body[1].body[0].lineno, 9)
700+
self.assertEqual(node.body[1].body[2].col_offset, 2)
701+
self.assertEqual(node.body[1].body[2].lineno, 11)
702+
self.assertEqual(node.body[2].col_offset, 0)
703+
self.assertEqual(node.body[2].lineno, 13)
704+
686705
def test_literal_eval(self):
687706
self.assertEqual(ast.literal_eval('[1, 2, 3]'), [1, 2, 3])
688707
self.assertEqual(ast.literal_eval('{"foo": 42}'), {"foo": 42})

Lib/test/test_fstring.py

Lines changed: 13 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -270,10 +270,7 @@ def test_ast_line_numbers_duplicate_expression(self):
270270
self.assertEqual(binop.right.col_offset, 7) # FIXME: this is wrong
271271

272272
def test_ast_line_numbers_multiline_fstring(self):
273-
# FIXME: This test demonstrates invalid behavior due to JoinedStr's
274-
# immediate child nodes containing the wrong lineno. The enclosed
275-
# expressions have valid line information and column offsets.
276-
# See bpo-16806 and bpo-30465 for details.
273+
# See bpo-30465 for details.
277274
expr = """
278275
a = 10
279276
f'''
@@ -298,19 +295,16 @@ def test_ast_line_numbers_multiline_fstring(self):
298295
self.assertEqual(type(t.body[1].value.values[1]), ast.FormattedValue)
299296
self.assertEqual(type(t.body[1].value.values[2]), ast.Constant)
300297
self.assertEqual(type(t.body[1].value.values[2].value), str)
301-
# NOTE: the following invalid behavior is described in bpo-16806.
302-
# - line number should be the *first* line (3), not the *last* (8)
303-
# - column offset should not be -1
304-
self.assertEqual(t.body[1].lineno, 8)
305-
self.assertEqual(t.body[1].value.lineno, 8)
306-
self.assertEqual(t.body[1].value.values[0].lineno, 8)
307-
self.assertEqual(t.body[1].value.values[1].lineno, 8)
308-
self.assertEqual(t.body[1].value.values[2].lineno, 8)
309-
self.assertEqual(t.body[1].col_offset, -1)
310-
self.assertEqual(t.body[1].value.col_offset, -1)
311-
self.assertEqual(t.body[1].value.values[0].col_offset, -1)
312-
self.assertEqual(t.body[1].value.values[1].col_offset, -1)
313-
self.assertEqual(t.body[1].value.values[2].col_offset, -1)
298+
self.assertEqual(t.body[1].lineno, 3)
299+
self.assertEqual(t.body[1].value.lineno, 3)
300+
self.assertEqual(t.body[1].value.values[0].lineno, 3)
301+
self.assertEqual(t.body[1].value.values[1].lineno, 3)
302+
self.assertEqual(t.body[1].value.values[2].lineno, 3)
303+
self.assertEqual(t.body[1].col_offset, 0)
304+
self.assertEqual(t.body[1].value.col_offset, 0)
305+
self.assertEqual(t.body[1].value.values[0].col_offset, 0)
306+
self.assertEqual(t.body[1].value.values[1].col_offset, 0)
307+
self.assertEqual(t.body[1].value.values[2].col_offset, 0)
314308
# NOTE: the following lineno information and col_offset is correct for
315309
# expressions within FormattedValues.
316310
binop = t.body[1].value.values[1].value
@@ -321,8 +315,8 @@ def test_ast_line_numbers_multiline_fstring(self):
321315
self.assertEqual(binop.lineno, 4)
322316
self.assertEqual(binop.left.lineno, 4)
323317
self.assertEqual(binop.right.lineno, 6)
324-
self.assertEqual(binop.col_offset, 3)
325-
self.assertEqual(binop.left.col_offset, 3)
318+
self.assertEqual(binop.col_offset, 4)
319+
self.assertEqual(binop.left.col_offset, 4)
326320
self.assertEqual(binop.right.col_offset, 7)
327321

328322
def test_docstring(self):

Lib/test/test_opcodes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def test_setup_annotations_line(self):
2727
with open(ann_module.__file__) as f:
2828
txt = f.read()
2929
co = compile(txt, ann_module.__file__, 'exec')
30-
self.assertEqual(co.co_firstlineno, 6)
30+
self.assertEqual(co.co_firstlineno, 3)
3131
except OSError:
3232
pass
3333

Lib/test/test_string_literals.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ def test_eval_str_invalid_escape(self):
117117
eval("'''\n\\z'''")
118118
self.assertEqual(len(w), 1)
119119
self.assertEqual(w[0].filename, '<string>')
120-
self.assertEqual(w[0].lineno, 2)
120+
self.assertEqual(w[0].lineno, 1)
121121

122122
with warnings.catch_warnings(record=True) as w:
123123
warnings.simplefilter('error', category=SyntaxWarning)
@@ -126,7 +126,7 @@ def test_eval_str_invalid_escape(self):
126126
exc = cm.exception
127127
self.assertEqual(w, [])
128128
self.assertEqual(exc.filename, '<string>')
129-
self.assertEqual(exc.lineno, 2)
129+
self.assertEqual(exc.lineno, 1)
130130

131131
def test_eval_str_raw(self):
132132
self.assertEqual(eval(""" r'x' """), 'x')
@@ -166,7 +166,7 @@ def test_eval_bytes_invalid_escape(self):
166166
eval("b'''\n\\z'''")
167167
self.assertEqual(len(w), 1)
168168
self.assertEqual(w[0].filename, '<string>')
169-
self.assertEqual(w[0].lineno, 2)
169+
self.assertEqual(w[0].lineno, 1)
170170

171171
with warnings.catch_warnings(record=True) as w:
172172
warnings.simplefilter('error', category=SyntaxWarning)
@@ -175,7 +175,7 @@ def test_eval_bytes_invalid_escape(self):
175175
exc = cm.exception
176176
self.assertEqual(w, [])
177177
self.assertEqual(exc.filename, '<string>')
178-
self.assertEqual(exc.lineno, 2)
178+
self.assertEqual(exc.lineno, 1)
179179

180180
def test_eval_bytes_raw(self):
181181
self.assertEqual(eval(""" br'x' """), b'x')

Misc/ACKS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1845,3 +1845,4 @@ Gennadiy Zlobin
18451845
Doug Zongker
18461846
Peter Åstrand
18471847
Zheao Li
1848+
Carsten Klein
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix ``lineno`` and ``col_offset`` for multi-line string tokens.

Parser/parsetok.c

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,8 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
205205
size_t len;
206206
char *str;
207207
col_offset = -1;
208+
int lineno;
209+
const char *line_start;
208210

209211
type = PyTokenizer_Get(tok, &a, &b);
210212
if (type == ERRORTOKEN) {
@@ -253,8 +255,15 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
253255
}
254256
}
255257
#endif
256-
if (a != NULL && a >= tok->line_start) {
257-
col_offset = Py_SAFE_DOWNCAST(a - tok->line_start,
258+
259+
/* Nodes of type STRING, especially multi line strings
260+
must be handled differently in order to get both
261+
the starting line number and the column offset right.
262+
(cf. issue 16806) */
263+
lineno = type == STRING ? tok->first_lineno : tok->lineno;
264+
line_start = type == STRING ? tok->multi_line_start : tok->line_start;
265+
if (a != NULL && a >= line_start) {
266+
col_offset = Py_SAFE_DOWNCAST(a - line_start,
258267
intptr_t, int);
259268
}
260269
else {
@@ -263,7 +272,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
263272

264273
if ((err_ret->error =
265274
PyParser_AddToken(ps, (int)type, str,
266-
tok->lineno, col_offset,
275+
lineno, col_offset,
267276
&(err_ret->expected))) != E_OK) {
268277
if (err_ret->error != E_DONE) {
269278
PyObject_FREE(str);

Parser/tokenizer.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1519,6 +1519,13 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
15191519
int quote_size = 1; /* 1 or 3 */
15201520
int end_quote_size = 0;
15211521

1522+
/* Nodes of type STRING, especially multi line strings
1523+
must be handled differently in order to get both
1524+
the starting line number and the column offset right.
1525+
(cf. issue 16806) */
1526+
tok->first_lineno = tok->lineno;
1527+
tok->multi_line_start = tok->line_start;
1528+
15221529
/* Find the quote size and start of string */
15231530
c = tok_nextc(tok);
15241531
if (c == quote) {

Parser/tokenizer.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ struct tok_state {
3838
int pendin; /* Pending indents (if > 0) or dedents (if < 0) */
3939
const char *prompt, *nextprompt; /* For interactive prompting */
4040
int lineno; /* Current line number */
41+
int first_lineno; /* First line of a single line or multi line string
42+
expression (cf. issue 16806) */
4143
int level; /* () [] {} Parentheses nesting level */
4244
/* Used to allow free continuations inside them */
4345
#ifndef PGEN
@@ -58,6 +60,9 @@ struct tok_state {
5860
char *encoding; /* Source encoding. */
5961
int cont_line; /* whether we are in a continuation line. */
6062
const char* line_start; /* pointer to start of current line */
63+
const char* multi_line_start; /* pointer to start of first line of
64+
a single line or multi line string
65+
expression (cf. issue 16806) */
6166
#ifndef PGEN
6267
PyObject *decoding_readline; /* open(...).readline */
6368
PyObject *decoding_buffer;

Python/ast.c

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4284,9 +4284,13 @@ fstring_fix_node_location(const node *parent, node *n, char *expr_str)
42844284
start--;
42854285
}
42864286
cols += (int)(substr - start);
4287-
/* Fix lineno in mulitline strings. */
4288-
while ((substr = strchr(substr + 1, '\n')))
4289-
lines--;
4287+
/* adjust the start based on the number of newlines encountered
4288+
before the f-string expression */
4289+
for (char* p = parent->n_str; p < substr; p++) {
4290+
if (*p == '\n') {
4291+
lines++;
4292+
}
4293+
}
42904294
}
42914295
}
42924296
fstring_shift_node_locations(n, lines, cols);

Python/importlib.h

Lines changed: 8 additions & 8 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Python/importlib_external.h

Lines changed: 9 additions & 9 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Python/importlib_zipimport.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -484,7 +484,7 @@ const unsigned char _Py_M__zipimport[] = {
484484
64,0,0,0,114,65,0,0,0,114,78,0,0,0,114,82,
485485
0,0,0,114,83,0,0,0,114,9,0,0,0,114,9,0,
486486
0,0,114,9,0,0,0,114,10,0,0,0,114,4,0,0,
487-
0,45,0,0,0,115,24,0,0,0,8,13,4,5,8,46,
487+
0,45,0,0,0,115,24,0,0,0,8,1,4,17,8,46,
488488
10,32,10,12,8,10,8,21,8,11,8,26,8,13,8,38,
489489
8,18,122,12,95,95,105,110,105,116,95,95,46,112,121,99,
490490
84,114,60,0,0,0,70,41,3,122,4,46,112,121,99,84,
@@ -1044,7 +1044,7 @@ const unsigned char _Py_M__zipimport[] = {
10441044
34,0,0,0,114,182,0,0,0,114,183,0,0,0,114,184,
10451045
0,0,0,114,189,0,0,0,114,9,0,0,0,114,9,0,
10461046
0,0,114,9,0,0,0,114,10,0,0,0,114,80,0,0,
1047-
0,212,2,0,0,115,14,0,0,0,8,5,4,1,4,2,
1047+
0,212,2,0,0,115,14,0,0,0,8,1,4,5,4,2,
10481048
8,4,8,9,8,6,8,11,114,80,0,0,0,41,45,114,
10491049
84,0,0,0,90,26,95,102,114,111,122,101,110,95,105,109,
10501050
112,111,114,116,108,105,98,95,101,120,116,101,114,110,97,108,
@@ -1065,8 +1065,8 @@ const unsigned char _Py_M__zipimport[] = {
10651065
0,0,114,170,0,0,0,114,152,0,0,0,114,150,0,0,
10661066
0,114,44,0,0,0,114,80,0,0,0,114,9,0,0,0,
10671067
114,9,0,0,0,114,9,0,0,0,114,10,0,0,0,218,
1068-
8,60,109,111,100,117,108,101,62,13,0,0,0,115,88,0,
1069-
0,0,4,4,8,1,16,1,8,1,8,1,8,1,8,1,
1068+
8,60,109,111,100,117,108,101,62,1,0,0,0,115,88,0,
1069+
0,0,4,16,8,1,16,1,8,1,8,1,8,1,8,1,
10701070
8,1,8,2,8,3,6,1,14,3,16,4,4,2,8,2,
10711071
4,1,4,1,4,2,14,127,0,127,0,1,12,1,12,1,
10721072
2,1,2,252,4,9,8,4,8,9,8,31,8,126,2,254,

0 commit comments

Comments
 (0)