@@ -123,35 +123,96 @@ set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
123
123
124
124
// Check if there is a # character in the expression
125
125
int hash_detected = 0 ;
126
+ int in_string = 0 ;
127
+ char string_quote = 0 ;
126
128
for (Py_ssize_t i = 0 ; i < tok_mode -> last_expr_size - tok_mode -> last_expr_end ; i ++ ) {
127
- if (tok_mode -> last_expr_buffer [i ] == '#' ) {
129
+ char ch = tok_mode -> last_expr_buffer [i ];
130
+ if (ch == '\\' && i + 1 < tok_mode -> last_expr_size - tok_mode -> last_expr_end ) {
131
+ // Skip the next character if it's an escape sequence
132
+ i ++ ;
133
+ continue ;
134
+ }
135
+ if (ch == '"' || ch == '\'' ) {
136
+ if (!in_string ) {
137
+ in_string = 1 ;
138
+ string_quote = ch ;
139
+ } else if (ch == string_quote ) {
140
+ // Check for triple quotes
141
+ if (i > 0 && tok_mode -> last_expr_buffer [i - 1 ] == ch &&
142
+ i > 1 && tok_mode -> last_expr_buffer [i - 2 ] == ch ) {
143
+ // Skip the rest of the triple quote
144
+ i += 2 ;
145
+ }
146
+ in_string = 0 ;
147
+ }
148
+ } else if (ch == '#' && !in_string ) {
128
149
hash_detected = 1 ;
129
150
break ;
130
151
}
131
152
}
132
-
153
+ // If we found a # character in the expression, we need to handle comments
133
154
if (hash_detected ) {
155
+ // Calculate length of input we need to process
134
156
Py_ssize_t input_length = tok_mode -> last_expr_size - tok_mode -> last_expr_end ;
157
+
158
+ // Allocate buffer for processed result, with room for null terminator
135
159
char * result = (char * )PyMem_Malloc ((input_length + 1 ) * sizeof (char ));
136
160
if (!result ) {
137
161
return -1 ;
138
162
}
139
163
140
- Py_ssize_t i = 0 ;
141
- Py_ssize_t j = 0 ;
164
+ // Initialize counters and state
165
+ Py_ssize_t i = 0 ; // Input position
166
+ Py_ssize_t j = 0 ; // Output position
167
+ in_string = 0 ; // Whether we're currently inside a string
168
+ string_quote = 0 ; // The quote character for current string (' or ")
142
169
170
+ // Process each character of input
143
171
for (i = 0 , j = 0 ; i < input_length ; i ++ ) {
144
- if (tok_mode -> last_expr_buffer [i ] == '#' ) {
145
- // Skip characters until newline or end of string
172
+ char ch = tok_mode -> last_expr_buffer [i ];
173
+
174
+ // Handle escape sequences - copy both backslash and next char
175
+ if (ch == '\\' && i + 1 < input_length ) {
176
+ result [j ++ ] = ch ; // Copy backslash
177
+ result [j ++ ] = tok_mode -> last_expr_buffer [++ i ]; // Copy escaped char
178
+ continue ;
179
+ }
180
+
181
+ // Handle string quotes
182
+ if (ch == '"' || ch == '\'' ) {
183
+ if (!in_string ) {
184
+ // Start of new string
185
+ in_string = 1 ;
186
+ string_quote = ch ;
187
+ } else if (ch == string_quote ) {
188
+ // Potential end of string - check for triple quotes
189
+ if (i > 0 && tok_mode -> last_expr_buffer [i - 1 ] == ch &&
190
+ i > 1 && tok_mode -> last_expr_buffer [i - 2 ] == ch ) {
191
+ // Found triple quote - copy all three quotes
192
+ result [j ++ ] = ch ;
193
+ result [j ++ ] = ch ;
194
+ result [j ++ ] = ch ;
195
+ i += 2 ; // Skip the other two quotes
196
+ continue ;
197
+ }
198
+ // End of regular string
199
+ in_string = 0 ;
200
+ }
201
+ result [j ++ ] = ch ; // Copy the quote character
202
+ }
203
+ // Handle comments - skip everything until newline
204
+ else if (ch == '#' && !in_string ) {
146
205
while (i < input_length && tok_mode -> last_expr_buffer [i ] != '\0' ) {
147
206
if (tok_mode -> last_expr_buffer [i ] == '\n' ) {
148
- result [j ++ ] = tok_mode -> last_expr_buffer [i ];
207
+ result [j ++ ] = tok_mode -> last_expr_buffer [i ]; // Keep newline
149
208
break ;
150
209
}
151
- i ++ ;
210
+ i ++ ; // Skip comment character
152
211
}
153
- } else {
154
- result [j ++ ] = tok_mode -> last_expr_buffer [i ];
212
+ }
213
+ // Copy any other character unchanged
214
+ else {
215
+ result [j ++ ] = ch ;
155
216
}
156
217
}
157
218
@@ -164,11 +225,9 @@ set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
164
225
tok_mode -> last_expr_size - tok_mode -> last_expr_end ,
165
226
NULL
166
227
);
167
-
168
228
}
169
229
170
-
171
- if (!res ) {
230
+ if (!res ) {
172
231
return -1 ;
173
232
}
174
233
token -> metadata = res ;
0 commit comments