Skip to content

Commit 5a73f07

Browse files
authored
RichParser: stricter identifiers parser
1 parent 49f87ce commit 5a73f07

File tree

5 files changed

+208
-75
lines changed

5 files changed

+208
-75
lines changed

src/Analyser/Ignore/IgnoreLexer.php

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ final class IgnoreLexer
1111
{
1212

1313
public const TOKEN_WHITESPACE = 1;
14-
public const TOKEN_EOL = 2;
14+
public const TOKEN_END = 2;
1515
public const TOKEN_IDENTIFIER = 3;
1616
public const TOKEN_COMMA = 4;
1717
public const TOKEN_OPEN_PARENTHESIS = 5;
@@ -20,9 +20,9 @@ final class IgnoreLexer
2020

2121
private const LABELS = [
2222
self::TOKEN_WHITESPACE => 'T_WHITESPACE',
23-
self::TOKEN_EOL => 'T_EOL',
24-
self::TOKEN_IDENTIFIER => 'T_IDENTIFIER',
25-
self::TOKEN_COMMA => 'T_COMMA',
23+
self::TOKEN_END => 'end',
24+
self::TOKEN_IDENTIFIER => 'identifier',
25+
self::TOKEN_COMMA => 'comma (,)',
2626
self::TOKEN_OPEN_PARENTHESIS => 'T_OPEN_PARENTHESIS',
2727
self::TOKEN_CLOSE_PARENTHESIS => 'T_CLOSE_PARENTHESIS',
2828
self::TOKEN_OTHER => 'T_OTHER',
@@ -51,13 +51,17 @@ public function tokenize(string $input): array
5151
/** @var self::TOKEN_* $type */
5252
$type = (int) $match['MARK'];
5353
$tokens[] = [$match[0], $type, $line];
54-
if ($type !== self::TOKEN_EOL) {
54+
if ($type !== self::TOKEN_END) {
5555
continue;
5656
}
5757

5858
$line++;
5959
}
6060

61+
if (($type ?? null) !== self::TOKEN_END) {
62+
$tokens[] = ['', self::TOKEN_END, $line]; // ensure ending token is present
63+
}
64+
6165
return $tokens;
6266
}
6367

@@ -73,7 +77,7 @@ private function generateRegexp(): string
7377
{
7478
$patterns = [
7579
self::TOKEN_WHITESPACE => '[\\x09\\x20]++',
76-
self::TOKEN_EOL => '\\r?+\\n[\\x09\\x20]*+(?:\\*(?!/)\\x20?+)?',
80+
self::TOKEN_END => '(\\r?+\\n[\\x09\\x20]*+(?:\\*(?!/)\\x20?+)?|\\*/)',
7781
self::TOKEN_IDENTIFIER => Error::PATTERN_IDENTIFIER,
7882
self::TOKEN_COMMA => ',',
7983
self::TOKEN_OPEN_PARENTHESIS => '\\(',

src/Parser/RichParser.php

Lines changed: 35 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,10 @@
1313
use PHPStan\File\FileReader;
1414
use PHPStan\ShouldNotHappenException;
1515
use function array_filter;
16-
use function array_pop;
16+
use function array_map;
1717
use function count;
1818
use function implode;
19+
use function in_array;
1920
use function is_string;
2021
use function preg_match_all;
2122
use function sprintf;
@@ -283,59 +284,57 @@ private function parseIdentifiers(string $text, int $ignorePos): array
283284
if ($originalToken[IgnoreLexer::TYPE_OFFSET] === IgnoreLexer::TOKEN_WHITESPACE) {
284285
continue;
285286
}
286-
if ($originalToken[IgnoreLexer::TYPE_OFFSET] === IgnoreLexer::TOKEN_EOL) {
287-
break;
288-
}
289287
$tokens[] = $originalToken;
290288
}
291289

292290
$c = count($tokens);
293291

294292
$identifiers = [];
295-
$depth = 0;
296-
$parenthesisStack = [];
293+
$openParenthesisCount = 0;
294+
$expected = [IgnoreLexer::TOKEN_IDENTIFIER];
295+
297296
for ($i = 0; $i < $c; $i++) {
297+
$lastTokenTypeLabel = isset($tokenType) ? $this->ignoreLexer->getLabel($tokenType) : '@phpstan-ignore';
298298
[IgnoreLexer::VALUE_OFFSET => $content, IgnoreLexer::TYPE_OFFSET => $tokenType, IgnoreLexer::LINE_OFFSET => $tokenLine] = $tokens[$i];
299-
if ($tokenType === IgnoreLexer::TOKEN_IDENTIFIER && $depth === 0) {
300-
$identifiers[] = $content;
301-
if (isset($tokens[$i + 1])) {
302-
if ($tokens[$i + 1][IgnoreLexer::TYPE_OFFSET] === IgnoreLexer::TOKEN_COMMA) {
303-
$i++;
304-
}
305-
}
306-
continue;
307-
}
308-
if ($i === 0) {
309-
throw new IgnoreParseException('First token is not an identifier', $tokenLine);
299+
300+
if ($expected !== null && !in_array($tokenType, $expected, true)) {
301+
$tokenTypeLabel = $this->ignoreLexer->getLabel($tokenType);
302+
$otherTokenContent = $tokenType === IgnoreLexer::TOKEN_OTHER ? sprintf(" '%s'", $content) : '';
303+
$expectedLabels = implode(' or ', array_map(fn ($token) => $this->ignoreLexer->getLabel($token), $expected));
304+
305+
throw new IgnoreParseException(sprintf('Unexpected %s%s after %s, expected %s', $tokenTypeLabel, $otherTokenContent, $lastTokenTypeLabel, $expectedLabels), $tokenLine);
310306
}
311-
if ($tokenType === IgnoreLexer::TOKEN_COMMA && $depth === 0) {
312-
throw new IgnoreParseException('Unexpected comma (,)', $tokenLine);
307+
308+
if ($tokenType === IgnoreLexer::TOKEN_OPEN_PARENTHESIS) {
309+
$openParenthesisCount++;
310+
$expected = null;
311+
continue;
313312
}
313+
314314
if ($tokenType === IgnoreLexer::TOKEN_CLOSE_PARENTHESIS) {
315-
if ($depth < 1) {
316-
throw new IgnoreParseException('Closing parenthesis ")" before opening parenthesis "("', $tokenLine);
317-
}
315+
$openParenthesisCount--;
316+
$expected = [IgnoreLexer::TOKEN_COMMA, IgnoreLexer::TOKEN_END];
317+
continue;
318+
}
318319

319-
$depth--;
320-
array_pop($parenthesisStack);
321-
if ($depth === 0) {
322-
break;
323-
}
320+
if ($openParenthesisCount > 0) {
321+
continue; // waiting for comment end
324322
}
325-
if ($tokenType !== IgnoreLexer::TOKEN_OPEN_PARENTHESIS) {
323+
324+
if ($tokenType === IgnoreLexer::TOKEN_IDENTIFIER) {
325+
$identifiers[] = $content;
326+
$expected = [IgnoreLexer::TOKEN_COMMA, IgnoreLexer::TOKEN_END, IgnoreLexer::TOKEN_OPEN_PARENTHESIS];
326327
continue;
327328
}
328329

329-
$depth++;
330-
$parenthesisStack[] = $tokenLine;
331-
}
332-
333-
if (isset($tokens[$c - 1]) && $tokens[$c - 1][IgnoreLexer::TYPE_OFFSET] === IgnoreLexer::TOKEN_COMMA) {
334-
throw new IgnoreParseException('Unexpected trailing comma (,)', $tokens[$c - 1][IgnoreLexer::LINE_OFFSET]);
330+
if ($tokenType === IgnoreLexer::TOKEN_COMMA) {
331+
$expected = [IgnoreLexer::TOKEN_IDENTIFIER];
332+
continue;
333+
}
335334
}
336335

337-
if (count($parenthesisStack) > 0) {
338-
throw new IgnoreParseException('Unclosed opening parenthesis "(" without closing parenthesis ")"', $parenthesisStack[count($parenthesisStack) - 1]);
336+
if ($openParenthesisCount > 0) {
337+
throw new IgnoreParseException('Unexpected end, unclosed opening parenthesis', $tokenLine ?? 1);
339338
}
340339

341340
if (count($identifiers) === 0) {

tests/PHPStan/Analyser/Ignore/IgnoreLexerTest.php

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
namespace PHPStan\Analyser\Ignore;
44

55
use PHPStan\Testing\PHPStanTestCase;
6+
use function array_pop;
7+
use function substr_count;
68
use const PHP_EOL;
79

810
class IgnoreLexerTest extends PHPStanTestCase
@@ -69,7 +71,7 @@ public function dataTokenize(): iterable
6971
[
7072
['return.ref', IgnoreLexer::TOKEN_IDENTIFIER, 1],
7173
[' ', IgnoreLexer::TOKEN_WHITESPACE, 1],
72-
[PHP_EOL . ' ', IgnoreLexer::TOKEN_EOL, 1],
74+
[PHP_EOL . ' ', IgnoreLexer::TOKEN_END, 1],
7375
['(', IgnoreLexer::TOKEN_OPEN_PARENTHESIS, 2],
7476
['čičí', IgnoreLexer::TOKEN_OTHER, 2],
7577
[')', IgnoreLexer::TOKEN_CLOSE_PARENTHESIS, 2],
@@ -84,7 +86,11 @@ public function dataTokenize(): iterable
8486
public function testTokenize(string $input, array $expectedTokens): void
8587
{
8688
$lexer = new IgnoreLexer();
87-
$this->assertSame($expectedTokens, $lexer->tokenize($input));
89+
$tokens = $lexer->tokenize($input);
90+
$lastToken = array_pop($tokens);
91+
92+
$this->assertSame(['', IgnoreLexer::TOKEN_END, substr_count($input, PHP_EOL) + 1], $lastToken);
93+
$this->assertSame($expectedTokens, $tokens);
8894
}
8995

9096
}

0 commit comments

Comments
 (0)