Skip to content

Commit 09ec815

Browse files
[3.12] gh-110259: Fix f-strings with multiline expressions and format specs (GH-110271) (#110396)
gh-110259: Fix f-strings with multiline expressions and format specs (GH-110271) (cherry picked from commit cc389ef) Signed-off-by: Pablo Galindo <[email protected]> Co-authored-by: Pablo Galindo Salgado <[email protected]>
1 parent 7bfcfcf commit 09ec815

File tree

5 files changed

+128
-10
lines changed

5 files changed

+128
-10
lines changed

Lib/ast.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1268,13 +1268,15 @@ def visit_JoinedStr(self, node):
12681268
quote_type = quote_types[0]
12691269
self.write(f"{quote_type}{value}{quote_type}")
12701270

1271-
def _write_fstring_inner(self, node):
1271+
def _write_fstring_inner(self, node, scape_newlines=False):
12721272
if isinstance(node, JoinedStr):
12731273
# for both the f-string itself, and format_spec
12741274
for value in node.values:
1275-
self._write_fstring_inner(value)
1275+
self._write_fstring_inner(value, scape_newlines=scape_newlines)
12761276
elif isinstance(node, Constant) and isinstance(node.value, str):
12771277
value = node.value.replace("{", "{{").replace("}", "}}")
1278+
if scape_newlines:
1279+
value = value.replace("\n", "\\n")
12781280
self.write(value)
12791281
elif isinstance(node, FormattedValue):
12801282
self.visit_FormattedValue(node)
@@ -1297,7 +1299,10 @@ def unparse_inner(inner):
12971299
self.write(f"!{chr(node.conversion)}")
12981300
if node.format_spec:
12991301
self.write(":")
1300-
self._write_fstring_inner(node.format_spec)
1302+
self._write_fstring_inner(
1303+
node.format_spec,
1304+
scape_newlines=True
1305+
)
13011306

13021307
def visit_Name(self, node):
13031308
self.write(node.id)

Lib/test/test_tokenize.py

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -571,6 +571,55 @@ def test_string(self):
571571
OP '=' (3, 0) (3, 1)
572572
OP '}' (3, 1) (3, 2)
573573
FSTRING_END "'''" (3, 2) (3, 5)
574+
""")
575+
self.check_tokenize("""\
576+
f'''__{
577+
x:a
578+
}__'''""", """\
579+
FSTRING_START "f'''" (1, 0) (1, 4)
580+
FSTRING_MIDDLE '__' (1, 4) (1, 6)
581+
OP '{' (1, 6) (1, 7)
582+
NL '\\n' (1, 7) (1, 8)
583+
NAME 'x' (2, 4) (2, 5)
584+
OP ':' (2, 5) (2, 6)
585+
FSTRING_MIDDLE 'a\\n' (2, 6) (3, 0)
586+
OP '}' (3, 0) (3, 1)
587+
FSTRING_MIDDLE '__' (3, 1) (3, 3)
588+
FSTRING_END "'''" (3, 3) (3, 6)
589+
""")
590+
self.check_tokenize("""\
591+
f'''__{
592+
x:a
593+
b
594+
c
595+
d
596+
}__'''""", """\
597+
FSTRING_START "f'''" (1, 0) (1, 4)
598+
FSTRING_MIDDLE '__' (1, 4) (1, 6)
599+
OP '{' (1, 6) (1, 7)
600+
NL '\\n' (1, 7) (1, 8)
601+
NAME 'x' (2, 4) (2, 5)
602+
OP ':' (2, 5) (2, 6)
603+
FSTRING_MIDDLE 'a\\n b\\n c\\n d\\n' (2, 6) (6, 0)
604+
OP '}' (6, 0) (6, 1)
605+
FSTRING_MIDDLE '__' (6, 1) (6, 3)
606+
FSTRING_END "'''" (6, 3) (6, 6)
607+
""")
608+
self.check_tokenize("""\
609+
f'__{
610+
x:d
611+
}__'""", """\
612+
FSTRING_START "f'" (1, 0) (1, 2)
613+
FSTRING_MIDDLE '__' (1, 2) (1, 4)
614+
OP '{' (1, 4) (1, 5)
615+
NL '\\n' (1, 5) (1, 6)
616+
NAME 'x' (2, 4) (2, 5)
617+
OP ':' (2, 5) (2, 6)
618+
FSTRING_MIDDLE 'd' (2, 6) (2, 7)
619+
NL '\\n' (2, 7) (2, 8)
620+
OP '}' (3, 0) (3, 1)
621+
FSTRING_MIDDLE '__' (3, 1) (3, 3)
622+
FSTRING_END "'" (3, 3) (3, 4)
574623
""")
575624

576625
def test_function(self):
@@ -2274,6 +2323,54 @@ def test_string(self):
22742323
FSTRING_START \'f"\' (1, 0) (1, 2)
22752324
FSTRING_MIDDLE 'hola\\\\\\\\\\\\r\\\\ndfgf' (1, 2) (1, 16)
22762325
FSTRING_END \'"\' (1, 16) (1, 17)
2326+
""")
2327+
2328+
self.check_tokenize("""\
2329+
f'''__{
2330+
x:a
2331+
}__'''""", """\
2332+
FSTRING_START "f'''" (1, 0) (1, 4)
2333+
FSTRING_MIDDLE '__' (1, 4) (1, 6)
2334+
LBRACE '{' (1, 6) (1, 7)
2335+
NAME 'x' (2, 4) (2, 5)
2336+
COLON ':' (2, 5) (2, 6)
2337+
FSTRING_MIDDLE 'a\\n' (2, 6) (3, 0)
2338+
RBRACE '}' (3, 0) (3, 1)
2339+
FSTRING_MIDDLE '__' (3, 1) (3, 3)
2340+
FSTRING_END "'''" (3, 3) (3, 6)
2341+
""")
2342+
2343+
self.check_tokenize("""\
2344+
f'''__{
2345+
x:a
2346+
b
2347+
c
2348+
d
2349+
}__'''""", """\
2350+
FSTRING_START "f'''" (1, 0) (1, 4)
2351+
FSTRING_MIDDLE '__' (1, 4) (1, 6)
2352+
LBRACE '{' (1, 6) (1, 7)
2353+
NAME 'x' (2, 4) (2, 5)
2354+
COLON ':' (2, 5) (2, 6)
2355+
FSTRING_MIDDLE 'a\\n b\\n c\\n d\\n' (2, 6) (6, 0)
2356+
RBRACE '}' (6, 0) (6, 1)
2357+
FSTRING_MIDDLE '__' (6, 1) (6, 3)
2358+
FSTRING_END "'''" (6, 3) (6, 6)
2359+
""")
2360+
2361+
self.check_tokenize("""\
2362+
f'__{
2363+
x:d
2364+
}__'""", """\
2365+
FSTRING_START "f'" (1, 0) (1, 2)
2366+
FSTRING_MIDDLE '__' (1, 2) (1, 4)
2367+
LBRACE '{' (1, 4) (1, 5)
2368+
NAME 'x' (2, 4) (2, 5)
2369+
COLON ':' (2, 5) (2, 6)
2370+
FSTRING_MIDDLE 'd' (2, 6) (2, 7)
2371+
RBRACE '}' (3, 0) (3, 1)
2372+
FSTRING_MIDDLE '__' (3, 1) (3, 3)
2373+
FSTRING_END "'" (3, 3) (3, 4)
22772374
""")
22782375

22792376
def test_function(self):

Lib/test/test_unparse.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -730,7 +730,8 @@ class DirectoryTestCase(ASTTestCase):
730730
test_directories = (lib_dir, lib_dir / "test")
731731
run_always_files = {"test_grammar.py", "test_syntax.py", "test_compile.py",
732732
"test_ast.py", "test_asdl_parser.py", "test_fstring.py",
733-
"test_patma.py", "test_type_alias.py", "test_type_params.py"}
733+
"test_patma.py", "test_type_alias.py", "test_type_params.py",
734+
"test_tokenize.py"}
734735

735736
_files_to_test = None
736737

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Correctly identify the format spec in f-strings (with single or triple
2+
quotes) that have multiple lines in the expression part and include a
3+
formatting spec. Patch by Pablo Galindo

Parser/tokenizer.c

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2768,11 +2768,28 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
27682768
if (tok->done == E_ERROR) {
27692769
return MAKE_TOKEN(ERRORTOKEN);
27702770
}
2771-
if (c == EOF || (current_tok->f_string_quote_size == 1 && c == '\n')) {
2771+
int in_format_spec = (
2772+
current_tok->last_expr_end != -1
2773+
&&
2774+
INSIDE_FSTRING_EXPR(current_tok)
2775+
);
2776+
2777+
if (c == EOF || (current_tok->f_string_quote_size == 1 && c == '\n')) {
27722778
if (tok->decoding_erred) {
27732779
return MAKE_TOKEN(ERRORTOKEN);
27742780
}
27752781

2782+
// If we are in a format spec and we found a newline,
2783+
// it means that the format spec ends here and we should
2784+
// return to the regular mode.
2785+
if (in_format_spec && c == '\n') {
2786+
tok_backup(tok, c);
2787+
TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
2788+
p_start = tok->start;
2789+
p_end = tok->cur;
2790+
return MAKE_TOKEN(FSTRING_MIDDLE);
2791+
}
2792+
27762793
assert(tok->multi_line_start != NULL);
27772794
// shift the tok_state's location into
27782795
// the start of string, and report the error
@@ -2804,11 +2821,6 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
28042821
end_quote_size = 0;
28052822
}
28062823

2807-
int in_format_spec = (
2808-
current_tok->last_expr_end != -1
2809-
&&
2810-
INSIDE_FSTRING_EXPR(current_tok)
2811-
);
28122824
if (c == '{') {
28132825
int peek = tok_nextc(tok);
28142826
if (peek != '{' || in_format_spec) {

0 commit comments

Comments
 (0)