Skip to content

Commit 5f8456e

Browse files
committed
gh-135148: Correctly handle f/t strings with comments and debug expressions
1 parent 1ffe913 commit 5f8456e

File tree

2 files changed

+78
-13
lines changed

2 files changed

+78
-13
lines changed

Lib/test/test_fstring.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1651,6 +1651,12 @@ def __repr__(self):
16511651
self.assertEqual(f"{1+2 = # my comment
16521652
}", '1+2 = \n 3')
16531653

1654+
self.assertEqual(f'{""" # booo
1655+
"""=}', '""" # booo\n """=\' # booo\\n \'')
1656+
1657+
self.assertEqual(f'{" # nooo "=}', '" # nooo "=\' # nooo \'')
1658+
self.assertEqual(f'{" \" # nooo \" "=}', '" \\" # nooo \\" "=\' " # nooo " \'')
1659+
16541660
# These next lines contains tabs. Backslash escapes don't
16551661
# work in f-strings.
16561662
# patchcheck doesn't like these tabs. So the only way to test

Parser/lexer/lexer.c

Lines changed: 72 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -123,35 +123,96 @@ set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
123123

124124
// Check if there is a # character in the expression
125125
int hash_detected = 0;
126+
int in_string = 0;
127+
char string_quote = 0;
126128
for (Py_ssize_t i = 0; i < tok_mode->last_expr_size - tok_mode->last_expr_end; i++) {
127-
if (tok_mode->last_expr_buffer[i] == '#') {
129+
char ch = tok_mode->last_expr_buffer[i];
130+
if (ch == '\\' && i + 1 < tok_mode->last_expr_size - tok_mode->last_expr_end) {
131+
// Skip the next character if it's an escape sequence
132+
i++;
133+
continue;
134+
}
135+
if (ch == '"' || ch == '\'') {
136+
if (!in_string) {
137+
in_string = 1;
138+
string_quote = ch;
139+
} else if (ch == string_quote) {
140+
// Check for triple quotes
141+
if (i > 0 && tok_mode->last_expr_buffer[i-1] == ch &&
142+
i > 1 && tok_mode->last_expr_buffer[i-2] == ch) {
143+
// Skip the rest of the triple quote
144+
i += 2;
145+
}
146+
in_string = 0;
147+
}
148+
} else if (ch == '#' && !in_string) {
128149
hash_detected = 1;
129150
break;
130151
}
131152
}
132-
153+
// If we found a # character in the expression, we need to handle comments
133154
if (hash_detected) {
155+
// Calculate length of input we need to process
134156
Py_ssize_t input_length = tok_mode->last_expr_size - tok_mode->last_expr_end;
157+
158+
// Allocate buffer for processed result, with room for null terminator
135159
char *result = (char *)PyMem_Malloc((input_length + 1) * sizeof(char));
136160
if (!result) {
137161
return -1;
138162
}
139163

140-
Py_ssize_t i = 0;
141-
Py_ssize_t j = 0;
164+
// Initialize counters and state
165+
Py_ssize_t i = 0; // Input position
166+
Py_ssize_t j = 0; // Output position
167+
in_string = 0; // Whether we're currently inside a string
168+
string_quote = 0; // The quote character for current string (' or ")
142169

170+
// Process each character of input
143171
for (i = 0, j = 0; i < input_length; i++) {
144-
if (tok_mode->last_expr_buffer[i] == '#') {
145-
// Skip characters until newline or end of string
172+
char ch = tok_mode->last_expr_buffer[i];
173+
174+
// Handle escape sequences - copy both backslash and next char
175+
if (ch == '\\' && i + 1 < input_length) {
176+
result[j++] = ch; // Copy backslash
177+
result[j++] = tok_mode->last_expr_buffer[++i]; // Copy escaped char
178+
continue;
179+
}
180+
181+
// Handle string quotes
182+
if (ch == '"' || ch == '\'') {
183+
if (!in_string) {
184+
// Start of new string
185+
in_string = 1;
186+
string_quote = ch;
187+
} else if (ch == string_quote) {
188+
// Potential end of string - check for triple quotes
189+
if (i > 0 && tok_mode->last_expr_buffer[i-1] == ch &&
190+
i > 1 && tok_mode->last_expr_buffer[i-2] == ch) {
191+
// Found triple quote - copy all three quotes
192+
result[j++] = ch;
193+
result[j++] = ch;
194+
result[j++] = ch;
195+
i += 2; // Skip the other two quotes
196+
continue;
197+
}
198+
// End of regular string
199+
in_string = 0;
200+
}
201+
result[j++] = ch; // Copy the quote character
202+
}
203+
// Handle comments - skip everything until newline
204+
else if (ch == '#' && !in_string) {
146205
while (i < input_length && tok_mode->last_expr_buffer[i] != '\0') {
147206
if (tok_mode->last_expr_buffer[i] == '\n') {
148-
result[j++] = tok_mode->last_expr_buffer[i];
207+
result[j++] = tok_mode->last_expr_buffer[i]; // Keep newline
149208
break;
150209
}
151-
i++;
210+
i++; // Skip comment character
152211
}
153-
} else {
154-
result[j++] = tok_mode->last_expr_buffer[i];
212+
}
213+
// Copy any other character unchanged
214+
else {
215+
result[j++] = ch;
155216
}
156217
}
157218

@@ -164,11 +225,9 @@ set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
164225
tok_mode->last_expr_size - tok_mode->last_expr_end,
165226
NULL
166227
);
167-
168228
}
169229

170-
171-
if (!res) {
230+
if (!res) {
172231
return -1;
173232
}
174233
token->metadata = res;

0 commit comments

Comments
 (0)