Skip to content

gh-135148: Correctly handle f/t strings with comments and debug expressions #135198

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions Lib/test/test_fstring.py
Original file line number Diff line number Diff line change
Expand Up @@ -1651,6 +1651,18 @@ def __repr__(self):
self.assertEqual(f"{1+2 = # my comment
}", '1+2 = \n 3')

self.assertEqual(f'{""" # booo
"""=}', '""" # booo\n """=\' # booo\\n \'')

self.assertEqual(f'{" # nooo "=}', '" # nooo "=\' # nooo \'')
self.assertEqual(f'{" \" # nooo \" "=}', '" \\" # nooo \\" "=\' " # nooo " \'')

self.assertEqual(f'{ # some comment goes here
"""hello"""=}', ' \n """hello"""=\'hello\'')
self.assertEqual(f'{"""# this is not a comment
a""" # this is a comment
}', '# this is not a comment\n a')

# These next lines contains tabs. Backslash escapes don't
# work in f-strings.
# patchcheck doesn't like these tabs. So the only way to test
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Fixed a bug where f-string debug expressions (using =) would incorrectly
strip out parts of strings containing escaped quotes and # characters. Patch
by Pablo Galindo.
81 changes: 61 additions & 20 deletions Parser/lexer/lexer.c
Original file line number Diff line number Diff line change
Expand Up @@ -121,38 +121,81 @@ set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
}
PyObject *res = NULL;

// Check if there is a # character in the expression
// Look for a # character outside of string literals
int hash_detected = 0;
int in_string = 0;
char quote_char = 0;
char string_quote = 0;

for (Py_ssize_t i = 0; i < tok_mode->last_expr_size - tok_mode->last_expr_end; i++) {
if (tok_mode->last_expr_buffer[i] == '#') {
char ch = tok_mode->last_expr_buffer[i];

// Skip escaped characters
if (ch == '\\') {
i++;
continue;
}

// Handle quotes
if (ch == '"' || ch == '\'') {
if (!in_string) {
in_string = 1;
quote_char = ch;
}
else if (ch == quote_char) {
in_string = 0;
}
continue;
}

// Check for # outside strings
if (ch == '#' && !in_string) {
hash_detected = 1;
break;
}
}

// If we found a # character in the expression, we need to handle comments
if (hash_detected) {
Py_ssize_t input_length = tok_mode->last_expr_size - tok_mode->last_expr_end;
char *result = (char *)PyMem_Malloc((input_length + 1) * sizeof(char));
// Allocate buffer for processed result
char *result = (char *)PyMem_Malloc((tok_mode->last_expr_size - tok_mode->last_expr_end + 1) * sizeof(char));
if (!result) {
return -1;
}

Py_ssize_t i = 0;
Py_ssize_t j = 0;
Py_ssize_t i = 0; // Input position
Py_ssize_t j = 0; // Output position
in_string = 0; // Whether we're in a string
string_quote = 0; // Current string quote char

for (i = 0, j = 0; i < input_length; i++) {
if (tok_mode->last_expr_buffer[i] == '#') {
// Skip characters until newline or end of string
while (i < input_length && tok_mode->last_expr_buffer[i] != '\0') {
if (tok_mode->last_expr_buffer[i] == '\n') {
result[j++] = tok_mode->last_expr_buffer[i];
break;
}
// Process each character
while (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
char ch = tok_mode->last_expr_buffer[i];

// Handle string quotes
if (ch == '"' || ch == '\'') {
if (!in_string) {
in_string = 1;
string_quote = ch;
} else if (ch == string_quote) {
in_string = 0;
}
result[j++] = ch;
}
// Skip comments
else if (ch == '#' && !in_string) {
while (i < tok_mode->last_expr_size - tok_mode->last_expr_end &&
tok_mode->last_expr_buffer[i] != '\n') {
i++;
}
} else {
result[j++] = tok_mode->last_expr_buffer[i];
if (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
result[j++] = '\n';
}
}
// Copy other chars
else {
result[j++] = ch;
}
i++;
}

result[j] = '\0'; // Null-terminate the result string
Expand All @@ -164,11 +207,9 @@ set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
tok_mode->last_expr_size - tok_mode->last_expr_end,
NULL
);

}


if (!res) {
if (!res) {
return -1;
}
token->metadata = res;
Expand Down
Loading