Skip to content

Commit 0272b68

Browse files
committed
Syntax errors caused by unclosed {, [, ( mention specific location
Aside from a few very specific syntax errors for which detailed exceptions are thrown, generally PHP just emits the default error messages generated by bison on syntax error. These messages are very uninformative; they just say "Unexpected ... at line ...". This is most problematic with constructs which can span an arbitrary number of lines, such as blocks of code delimited by { }, 'if' conditions delimited by ( ), and so on. If a closing delimiter is missed, the block will run for the entire remainder of the source file (which could be thousands of lines), and then at the end, a parse error will be thrown with the dreaded words: "Unexpected end of file". Therefore, track the positions of opening and closing delimiters and ensure that they match up correctly. If any mismatch or missing delimiter is detected, immediately throw a parse error which points the user to the offending line. This is best done in the *lexer* and not in the parser. Thanks to Nikita Popov and George Peter Banyard for suggesting improvements.
1 parent 1bba691 commit 0272b68

9 files changed

+191
-17
lines changed

Zend/tests/bug60099.phpt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,4 @@ namespace foo {
77

88
?>
99
--EXPECTF--
10-
Parse error: syntax error, unexpected end of file in %s on line %d
10+
Parse error: Unclosed '{' on line 2 in %s on line %d

Zend/tests/eval_parse_error_with_doc_comment.phpt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,4 @@ EOC
1212

1313
?>
1414
--EXPECTF--
15-
Parse error: syntax error, unexpected end of file in %s(%d) : eval()'d code on line %d
15+
Parse error: Unclosed '{' in %s(%d) : eval()'d code on line %d

Zend/tests/require_parse_exception.phpt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,8 @@ var_dump("\u{ffffff}");');
4343
?>
4444
--EXPECT--
4545
Deprecated: Directive 'allow_url_include' is deprecated in Unknown on line 0
46-
syntax error, unexpected end of file on line 2
47-
syntax error, unexpected end of file on line 3
46+
Unclosed '{' on line 2
47+
Unclosed '{' on line 3
4848
syntax error, unexpected end of file, expecting '(' on line 2
4949
Invalid numeric literal on line 2
5050
Invalid UTF-8 codepoint escape sequence on line 2

Zend/zend_globals.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,7 @@ struct _zend_php_scanner_globals {
285285
int yy_state;
286286
zend_stack state_stack;
287287
zend_ptr_stack heredoc_label_stack;
288+
zend_stack nest_location_stack; /* for syntax error reporting */
288289
zend_bool heredoc_scan_ahead;
289290
int heredoc_indentation;
290291
zend_bool heredoc_indentation_uses_spaces;

Zend/zend_language_scanner.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ typedef struct _zend_lex_state {
3030
int yy_state;
3131
zend_stack state_stack;
3232
zend_ptr_stack heredoc_label_stack;
33+
zend_stack nest_location_stack; /* for syntax error reporting */
3334

3435
zend_file_handle *in;
3536
uint32_t lineno;
@@ -63,6 +64,12 @@ typedef struct _zend_heredoc_label {
6364
zend_bool indentation_uses_spaces;
6465
} zend_heredoc_label;
6566

67+
/* Track locations of unclosed {, [, (, etc. for better syntax error reporting */
68+
typedef struct _zend_nest_location {
69+
char text;
70+
int lineno;
71+
} zend_nest_location;
72+
6673
BEGIN_EXTERN_C()
6774
ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state);
6875
ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state);

Zend/zend_language_scanner.l

Lines changed: 104 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,7 @@ void startup_scanner(void)
192192
CG(doc_comment) = NULL;
193193
CG(extra_fn_flags) = 0;
194194
zend_stack_init(&SCNG(state_stack), sizeof(int));
195+
zend_stack_init(&SCNG(nest_location_stack), sizeof(zend_nest_location));
195196
zend_ptr_stack_init(&SCNG(heredoc_label_stack));
196197
SCNG(heredoc_scan_ahead) = 0;
197198
}
@@ -205,6 +206,7 @@ void shutdown_scanner(void)
205206
CG(parse_error) = 0;
206207
RESET_DOC_COMMENT();
207208
zend_stack_destroy(&SCNG(state_stack));
209+
zend_stack_destroy(&SCNG(nest_location_stack));
208210
zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
209211
zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
210212
SCNG(heredoc_scan_ahead) = 0;
@@ -223,6 +225,9 @@ ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state)
223225
lex_state->state_stack = SCNG(state_stack);
224226
zend_stack_init(&SCNG(state_stack), sizeof(int));
225227

228+
lex_state->nest_location_stack = SCNG(nest_location_stack);
229+
zend_stack_init(&SCNG(nest_location_stack), sizeof(zend_nest_location));
230+
226231
lex_state->heredoc_label_stack = SCNG(heredoc_label_stack);
227232
zend_ptr_stack_init(&SCNG(heredoc_label_stack));
228233

@@ -258,6 +263,9 @@ ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state)
258263
zend_stack_destroy(&SCNG(state_stack));
259264
SCNG(state_stack) = lex_state->state_stack;
260265

266+
zend_stack_destroy(&SCNG(nest_location_stack));
267+
SCNG(nest_location_stack) = lex_state->nest_location_stack;
268+
261269
zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
262270
zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
263271
SCNG(heredoc_label_stack) = lex_state->heredoc_label_stack;
@@ -1250,6 +1258,64 @@ static void copy_heredoc_label_stack(void *void_heredoc_label)
12501258
zend_ptr_stack_push(&SCNG(heredoc_label_stack), (void *) new_heredoc_label);
12511259
}
12521260

1261+
/* Check that { }, [ ], ( ) are nested correctly */
1262+
static void report_bad_nesting(char opening, int opening_lineno, char closing)
1263+
{
1264+
char buf[256];
1265+
size_t used = 0;
1266+
1267+
used = snprintf(buf, sizeof(buf), "Unclosed '%c'", opening);
1268+
1269+
if (opening_lineno != CG(zend_lineno)) {
1270+
used += snprintf(buf + used, sizeof(buf) - used, " on line %d", opening_lineno);
1271+
}
1272+
1273+
if (closing) { /* 'closing' will be 0 if at end of file */
1274+
used += snprintf(buf + used, sizeof(buf) - used, " does not match '%c'", closing);
1275+
}
1276+
1277+
zend_throw_exception(zend_ce_parse_error, buf, 0);
1278+
}
1279+
1280+
static void enter_nesting(char opening)
1281+
{
1282+
zend_nest_location nest_loc = {opening, CG(zend_lineno)};
1283+
zend_stack_push(&SCNG(nest_location_stack), &nest_loc);
1284+
}
1285+
1286+
static int exit_nesting(char closing)
1287+
{
1288+
if (zend_stack_is_empty(&SCNG(nest_location_stack))) {
1289+
zend_throw_exception_ex(zend_ce_parse_error, 0, "Unmatched '%c'", closing);
1290+
return -1;
1291+
}
1292+
1293+
zend_nest_location *nest_loc = zend_stack_top(&SCNG(nest_location_stack));
1294+
char opening = nest_loc->text;
1295+
char closing = *location;
1296+
1297+
if ((opening == '{' && closing != '}') ||
1298+
(opening == '[' && closing != ']') ||
1299+
(opening == '(' && closing != ')')) {
1300+
report_bad_nesting(opening, nest_loc->lineno, closing);
1301+
return -1;
1302+
}
1303+
1304+
zend_stack_del_top(&SCNG(nest_location_stack));
1305+
return 0;
1306+
}
1307+
1308+
static int check_nesting_at_end()
1309+
{
1310+
if (!zend_stack_is_empty(&SCNG(nest_location_stack))) {
1311+
zend_nest_location *nest_loc = zend_stack_top(&SCNG(nest_location_stack));
1312+
report_bad_nesting(nest_loc->text, nest_loc->lineno, 0);
1313+
return -1;
1314+
}
1315+
1316+
return 0;
1317+
}
1318+
12531319
#define PARSER_MODE() \
12541320
EXPECTED(elem != NULL)
12551321

@@ -1277,6 +1343,22 @@ static void copy_heredoc_label_stack(void *void_heredoc_label)
12771343
goto emit_token; \
12781344
} while (0)
12791345

1346+
#define RETURN_EXIT_NESTING_TOKEN(_token) do { \
1347+
if (exit_nesting(_token) && PARSER_MODE()) { \
1348+
RETURN_TOKEN(T_ERROR); \
1349+
} else { \
1350+
RETURN_TOKEN(_token); \
1351+
} \
1352+
} while(0)
1353+
1354+
#define RETURN_END_TOKEN do { \
1355+
if (check_nesting_at_end() && PARSER_MODE()) { \
1356+
RETURN_TOKEN(T_ERROR); \
1357+
} else { \
1358+
RETURN_TOKEN(END); \
1359+
} \
1360+
} while (0)
1361+
12801362
int ZEND_FASTCALL lex_scan(zval *zendlval, zend_parser_stack_elem *elem)
12811363
{
12821364
int token;
@@ -1297,7 +1379,7 @@ BNUM "0b"[01]+(_[01]+)*
12971379
LABEL [a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*
12981380
WHITESPACE [ \n\r\t]+
12991381
TABS_AND_SPACES [ \t]*
1300-
TOKENS [;:,.\[\]()|^&+-/*=%!~$<>?@]
1382+
TOKENS [;:,.|^&+-/*=%!~$<>?@]
13011383
ANY_CHAR [^]
13021384
NEWLINE ("\r"|"\n"|"\r\n")
13031385
@@ -1770,29 +1852,40 @@ NEWLINE ("\r"|"\n"|"\r\n")
17701852
RETURN_TOKEN(T_SR);
17711853
}
17721854

1855+
<ST_IN_SCRIPTING>"]"|")" {
1856+
/* Check that ] and ) match up properly with a preceding [ or ( */
1857+
RETURN_EXIT_NESTING_TOKEN(yytext[0]);
1858+
}
1859+
1860+
<ST_IN_SCRIPTING>"["|"(" {
1861+
enter_nesting(yytext[0]);
1862+
RETURN_TOKEN(yytext[0]);
1863+
}
1864+
17731865
<ST_IN_SCRIPTING>{TOKENS} {
17741866
RETURN_TOKEN(yytext[0]);
17751867
}
17761868

17771869

17781870
<ST_IN_SCRIPTING>"{" {
17791871
yy_push_state(ST_IN_SCRIPTING);
1872+
enter_nesting('{');
17801873
RETURN_TOKEN('{');
17811874
}
17821875

17831876

17841877
<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
17851878
yy_push_state(ST_LOOKING_FOR_VARNAME);
1879+
enter_nesting('{');
17861880
RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);
17871881
}
17881882

1789-
17901883
<ST_IN_SCRIPTING>"}" {
17911884
RESET_DOC_COMMENT();
17921885
if (!zend_stack_is_empty(&SCNG(state_stack))) {
17931886
yy_pop_state();
17941887
}
1795-
RETURN_TOKEN('}');
1888+
RETURN_EXIT_NESTING_TOKEN('}');
17961889
}
17971890

17981891

@@ -2088,7 +2181,7 @@ string:
20882181

20892182
<INITIAL>{ANY_CHAR} {
20902183
if (YYCURSOR > YYLIMIT) {
2091-
RETURN_TOKEN(END);
2184+
RETURN_END_TOKEN;
20922185
}
20932186

20942187
inline_char_handler:
@@ -2165,7 +2258,7 @@ inline_char_handler:
21652258
RETURN_TOKEN(']');
21662259
}
21672260

2168-
<ST_VAR_OFFSET>{TOKENS}|[{}"`] {
2261+
<ST_VAR_OFFSET>{TOKENS}|[[(){}"`] {
21692262
/* Only '[' or '-' can be valid, but returning other tokens will allow a more explicit parse error */
21702263
RETURN_TOKEN(yytext[0]);
21712264
}
@@ -2569,6 +2662,7 @@ skip_escape_conversion:
25692662
<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{$" {
25702663
yy_push_state(ST_IN_SCRIPTING);
25712664
yyless(1);
2665+
enter_nesting('{');
25722666
RETURN_TOKEN(T_CURLY_OPEN);
25732667
}
25742668
@@ -2593,7 +2687,7 @@ skip_escape_conversion:
25932687
}
25942688
25952689
if (YYCURSOR > YYLIMIT) {
2596-
RETURN_TOKEN(END);
2690+
RETURN_END_TOKEN;
25972691
}
25982692
if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
25992693
YYCURSOR++;
@@ -2640,7 +2734,7 @@ double_quotes_scan_done:
26402734
26412735
<ST_BACKQUOTE>{ANY_CHAR} {
26422736
if (YYCURSOR > YYLIMIT) {
2643-
RETURN_TOKEN(END);
2737+
RETURN_END_TOKEN;
26442738
}
26452739
if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
26462740
YYCURSOR++;
@@ -2689,7 +2783,7 @@ double_quotes_scan_done:
26892783
int newline = 0, indentation = 0, spacing = 0;
26902784
26912785
if (YYCURSOR > YYLIMIT) {
2692-
RETURN_TOKEN(END);
2786+
RETURN_END_TOKEN;
26932787
}
26942788
26952789
YYCURSOR--;
@@ -2813,7 +2907,7 @@ heredoc_scan_done:
28132907
int newline = 0, indentation = 0, spacing = -1;
28142908
28152909
if (YYCURSOR > YYLIMIT) {
2816-
RETURN_TOKEN(END);
2910+
RETURN_END_TOKEN;
28172911
}
28182912
28192913
YYCURSOR--;
@@ -2901,7 +2995,7 @@ nowdoc_scan_done:
29012995
29022996
<ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} {
29032997
if (YYCURSOR > YYLIMIT) {
2904-
RETURN_TOKEN(END);
2998+
RETURN_END_TOKEN;
29052999
}
29063000
29073001
RETURN_TOKEN(T_BAD_CHARACTER);

ext/tokenizer/tests/token_get_all_variation17.phpt

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ try {
3232
} catch(Exception $e) {
3333
echo "caught exception:";
3434
}
35+
}
3536
?>';
3637
$tokens = token_get_all($source);
3738
var_dump($tokens);
@@ -40,7 +41,7 @@ echo "Done"
4041
?>
4142
--EXPECTF--
4243
*** Testing token_get_all() : with exception keywords ***
43-
array(81) {
44+
array(83) {
4445
[0]=>
4546
array(3) {
4647
[0]=>
@@ -601,13 +602,25 @@ array(81) {
601602
int(14)
602603
}
603604
[80]=>
605+
string(1) "}"
606+
[81]=>
604607
array(3) {
605608
[0]=>
606609
int(%d)
607610
[1]=>
608-
string(2) "?>"
611+
string(1) "
612+
"
609613
[2]=>
610614
int(15)
611615
}
616+
[82]=>
617+
array(3) {
618+
[0]=>
619+
int(%d)
620+
[1]=>
621+
string(2) "?>"
622+
[2]=>
623+
int(16)
624+
}
612625
}
613626
Done

ext/tokenizer/tokenizer.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -392,6 +392,8 @@ static zend_bool tokenize(zval *return_value, zend_string *source, zend_class_en
392392
array_init(return_value);
393393

394394
while ((token_type = lex_scan(&token, NULL))) {
395+
ZEND_ASSERT(token_type != T_ERROR);
396+
395397
add_token(
396398
return_value, token_type, zendtext, zendleng, token_line,
397399
token_class, &interned_strings);
@@ -408,7 +410,7 @@ static zend_bool tokenize(zval *return_value, zend_string *source, zend_class_en
408410
&& --need_tokens == 0
409411
) {
410412
/* fetch the rest into a T_INLINE_HTML */
411-
if (zendcursor != zendlimit) {
413+
if (zendcursor < zendlimit) {
412414
add_token(
413415
return_value, T_INLINE_HTML, zendcursor, zendlimit - zendcursor,
414416
token_line, token_class, &interned_strings);

0 commit comments

Comments
 (0)