@@ -192,6 +192,7 @@ void startup_scanner(void)
192
192
CG (doc_comment) = NULL ;
193
193
CG (extra_fn_flags) = 0 ;
194
194
zend_stack_init (&SCNG (state_stack), sizeof (int ));
195
+ zend_stack_init (&SCNG (nest_location_stack), sizeof (zend_nest_location));
195
196
zend_ptr_stack_init (&SCNG (heredoc_label_stack));
196
197
SCNG (heredoc_scan_ahead) = 0 ;
197
198
}
@@ -205,6 +206,7 @@ void shutdown_scanner(void)
205
206
CG (parse_error) = 0 ;
206
207
RESET_DOC_COMMENT ();
207
208
zend_stack_destroy (&SCNG (state_stack));
209
+ zend_stack_destroy (&SCNG (nest_location_stack));
208
210
zend_ptr_stack_clean (&SCNG (heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1 );
209
211
zend_ptr_stack_destroy (&SCNG (heredoc_label_stack));
210
212
SCNG (heredoc_scan_ahead) = 0 ;
@@ -223,6 +225,9 @@ ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state)
223
225
lex_state->state_stack = SCNG (state_stack);
224
226
zend_stack_init (&SCNG (state_stack), sizeof (int ));
225
227
228
+ lex_state->nest_location_stack = SCNG (nest_location_stack);
229
+ zend_stack_init (&SCNG (nest_location_stack), sizeof (zend_nest_location));
230
+
226
231
lex_state->heredoc_label_stack = SCNG (heredoc_label_stack);
227
232
zend_ptr_stack_init (&SCNG (heredoc_label_stack));
228
233
@@ -258,6 +263,9 @@ ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state)
258
263
zend_stack_destroy (&SCNG (state_stack));
259
264
SCNG (state_stack) = lex_state->state_stack ;
260
265
266
+ zend_stack_destroy (&SCNG (nest_location_stack));
267
+ SCNG (nest_location_stack) = lex_state->nest_location_stack ;
268
+
261
269
zend_ptr_stack_clean (&SCNG (heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1 );
262
270
zend_ptr_stack_destroy (&SCNG (heredoc_label_stack));
263
271
SCNG (heredoc_label_stack) = lex_state->heredoc_label_stack ;
@@ -1250,6 +1258,64 @@ static void copy_heredoc_label_stack(void *void_heredoc_label)
1250
1258
zend_ptr_stack_push (&SCNG (heredoc_label_stack), (void *) new_heredoc_label);
1251
1259
}
1252
1260
1261
+ /* Check that { }, [ ], ( ) are nested correctly */
1262
+ static void report_bad_nesting (char opening, int opening_lineno, char closing)
1263
+ {
1264
+ char buf[256 ];
1265
+ size_t used = 0 ;
1266
+
1267
+ used = snprintf (buf, sizeof (buf), " Unclosed '%c'" , opening);
1268
+
1269
+ if (opening_lineno != CG (zend_lineno)) {
1270
+ used += snprintf (buf + used, sizeof (buf) - used, " on line %d" , opening_lineno);
1271
+ }
1272
+
1273
+ if (closing) { /* 'closing' will be 0 if at end of file */
1274
+ used += snprintf (buf + used, sizeof (buf) - used, " does not match '%c'" , closing);
1275
+ }
1276
+
1277
+ zend_throw_exception (zend_ce_parse_error, buf, 0 );
1278
+ }
1279
+
1280
+ static void enter_nesting (char opening)
1281
+ {
1282
+ zend_nest_location nest_loc = {opening, CG (zend_lineno)};
1283
+ zend_stack_push (&SCNG (nest_location_stack), &nest_loc);
1284
+ }
1285
+
1286
+ static int exit_nesting (char closing)
1287
+ {
1288
+ if (zend_stack_is_empty (&SCNG (nest_location_stack))) {
1289
+ zend_throw_exception_ex (zend_ce_parse_error, 0 , " Unmatched '%c'" , closing);
1290
+ return -1 ;
1291
+ }
1292
+
1293
+ zend_nest_location *nest_loc = zend_stack_top (&SCNG (nest_location_stack));
1294
+ char opening = nest_loc->text ;
1295
+ char closing = *location;
1296
+
1297
+ if ((opening == ' {' && closing != ' }' ) ||
1298
+ (opening == ' [' && closing != ' ]' ) ||
1299
+ (opening == ' (' && closing != ' )' )) {
1300
+ report_bad_nesting (opening, nest_loc->lineno , closing);
1301
+ return -1 ;
1302
+ }
1303
+
1304
+ zend_stack_del_top (&SCNG (nest_location_stack));
1305
+ return 0 ;
1306
+ }
1307
+
1308
+ static int check_nesting_at_end ()
1309
+ {
1310
+ if (!zend_stack_is_empty (&SCNG (nest_location_stack))) {
1311
+ zend_nest_location *nest_loc = zend_stack_top (&SCNG (nest_location_stack));
1312
+ report_bad_nesting (nest_loc->text , nest_loc->lineno , 0 );
1313
+ return -1 ;
1314
+ }
1315
+
1316
+ return 0 ;
1317
+ }
1318
+
1253
1319
#define PARSER_MODE () \
1254
1320
EXPECTED (elem != NULL )
1255
1321
@@ -1277,6 +1343,22 @@ static void copy_heredoc_label_stack(void *void_heredoc_label)
1277
1343
goto emit_token; \
1278
1344
} while (0 )
1279
1345
1346
+ #define RETURN_EXIT_NESTING_TOKEN (_token ) do { \
1347
+ if (exit_nesting (_token) && PARSER_MODE ()) { \
1348
+ RETURN_TOKEN (T_ERROR); \
1349
+ } else { \
1350
+ RETURN_TOKEN (_token); \
1351
+ } \
1352
+ } while (0 )
1353
+
1354
+ #define RETURN_END_TOKEN do { \
1355
+ if (check_nesting_at_end () && PARSER_MODE ()) { \
1356
+ RETURN_TOKEN (T_ERROR); \
1357
+ } else { \
1358
+ RETURN_TOKEN (END); \
1359
+ } \
1360
+ } while (0 )
1361
+
1280
1362
int ZEND_FASTCALL lex_scan (zval *zendlval, zend_parser_stack_elem *elem)
1281
1363
{
1282
1364
int token;
@@ -1297,7 +1379,7 @@ BNUM "0b"[01]+(_[01]+)*
1297
1379
LABEL [a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*
1298
1380
WHITESPACE [ \n\r\t]+
1299
1381
TABS_AND_SPACES [ \t]*
1300
- TOKENS [;:,.\[\]() |^&+-/*=%!~$<>?@]
1382
+ TOKENS [;:,.|^&+-/*=%!~$<>?@]
1301
1383
ANY_CHAR [^]
1302
1384
NEWLINE ("\r"|"\n"|"\r\n")
1303
1385
@@ -1770,29 +1852,40 @@ NEWLINE ("\r"|"\n"|"\r\n")
1770
1852
RETURN_TOKEN (T_SR);
1771
1853
}
1772
1854
1855
+ <ST_IN_SCRIPTING>" ]" |" )" {
1856
+ /* Check that ] and ) match up properly with a preceding [ or ( */
1857
+ RETURN_EXIT_NESTING_TOKEN (yytext[0 ]);
1858
+ }
1859
+
1860
+ <ST_IN_SCRIPTING>" [" |" (" {
1861
+ enter_nesting (yytext[0 ]);
1862
+ RETURN_TOKEN (yytext[0 ]);
1863
+ }
1864
+
1773
1865
<ST_IN_SCRIPTING>{TOKENS} {
1774
1866
RETURN_TOKEN (yytext[0 ]);
1775
1867
}
1776
1868
1777
1869
1778
1870
<ST_IN_SCRIPTING>" {" {
1779
1871
yy_push_state (ST_IN_SCRIPTING);
1872
+ enter_nesting (' {' );
1780
1873
RETURN_TOKEN (' {' );
1781
1874
}
1782
1875
1783
1876
1784
1877
<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>" ${" {
1785
1878
yy_push_state (ST_LOOKING_FOR_VARNAME);
1879
+ enter_nesting (' {' );
1786
1880
RETURN_TOKEN (T_DOLLAR_OPEN_CURLY_BRACES);
1787
1881
}
1788
1882
1789
-
1790
1883
<ST_IN_SCRIPTING>" }" {
1791
1884
RESET_DOC_COMMENT ();
1792
1885
if (!zend_stack_is_empty (&SCNG (state_stack))) {
1793
1886
yy_pop_state ();
1794
1887
}
1795
- RETURN_TOKEN (' }' );
1888
+ RETURN_EXIT_NESTING_TOKEN (' }' );
1796
1889
}
1797
1890
1798
1891
@@ -2088,7 +2181,7 @@ string:
2088
2181
2089
2182
<INITIAL>{ANY_CHAR} {
2090
2183
if (YYCURSOR > YYLIMIT) {
2091
- RETURN_TOKEN (END) ;
2184
+ RETURN_END_TOKEN ;
2092
2185
}
2093
2186
2094
2187
inline_char_handler:
@@ -2165,7 +2258,7 @@ inline_char_handler:
2165
2258
RETURN_TOKEN (' ]' );
2166
2259
}
2167
2260
2168
- <ST_VAR_OFFSET>{TOKENS}|[{}" `] {
2261
+ <ST_VAR_OFFSET>{TOKENS}|[[() {}" `] {
2169
2262
/* Only '[' or '-' can be valid, but returning other tokens will allow a more explicit parse error */
2170
2263
RETURN_TOKEN(yytext[0]);
2171
2264
}
@@ -2569,6 +2662,7 @@ skip_escape_conversion:
2569
2662
<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{$" {
2570
2663
yy_push_state(ST_IN_SCRIPTING);
2571
2664
yyless(1);
2665
+ enter_nesting(' {' );
2572
2666
RETURN_TOKEN(T_CURLY_OPEN);
2573
2667
}
2574
2668
@@ -2593,7 +2687,7 @@ skip_escape_conversion:
2593
2687
}
2594
2688
2595
2689
if (YYCURSOR > YYLIMIT) {
2596
- RETURN_TOKEN(END) ;
2690
+ RETURN_END_TOKEN ;
2597
2691
}
2598
2692
if (yytext[0] == '\\ ' && YYCURSOR < YYLIMIT) {
2599
2693
YYCURSOR++;
@@ -2640,7 +2734,7 @@ double_quotes_scan_done:
2640
2734
2641
2735
<ST_BACKQUOTE>{ANY_CHAR} {
2642
2736
if (YYCURSOR > YYLIMIT) {
2643
- RETURN_TOKEN(END) ;
2737
+ RETURN_END_TOKEN ;
2644
2738
}
2645
2739
if (yytext[0] == '\\ ' && YYCURSOR < YYLIMIT) {
2646
2740
YYCURSOR++;
@@ -2689,7 +2783,7 @@ double_quotes_scan_done:
2689
2783
int newline = 0, indentation = 0, spacing = 0;
2690
2784
2691
2785
if (YYCURSOR > YYLIMIT) {
2692
- RETURN_TOKEN(END) ;
2786
+ RETURN_END_TOKEN ;
2693
2787
}
2694
2788
2695
2789
YYCURSOR--;
@@ -2813,7 +2907,7 @@ heredoc_scan_done:
2813
2907
int newline = 0, indentation = 0, spacing = -1;
2814
2908
2815
2909
if (YYCURSOR > YYLIMIT) {
2816
- RETURN_TOKEN(END) ;
2910
+ RETURN_END_TOKEN ;
2817
2911
}
2818
2912
2819
2913
YYCURSOR--;
@@ -2901,7 +2995,7 @@ nowdoc_scan_done:
2901
2995
2902
2996
<ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} {
2903
2997
if (YYCURSOR > YYLIMIT) {
2904
- RETURN_TOKEN(END) ;
2998
+ RETURN_END_TOKEN ;
2905
2999
}
2906
3000
2907
3001
RETURN_TOKEN(T_BAD_CHARACTER);
0 commit comments