Skip to content

Commit fb351b3

Browse files
authored
Merge pull request #508 from PHPCSStandards/feature/tokenizer-php-harden-the-dnf-layer-some-more
Tokenizer/PHP: add extra hardening to the (DNF) type handling + efficiency improvement
2 parents 027c0cb + 83afad8 commit fb351b3

File tree

5 files changed

+379
-10
lines changed

5 files changed

+379
-10
lines changed

src/Tokenizers/PHP.php

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2602,7 +2602,9 @@ protected function processAdditional()
26022602

26032603
$this->createAttributesNestingMap();
26042604

2605-
$numTokens = count($this->tokens);
2605+
$numTokens = count($this->tokens);
2606+
$lastSeenTypeToken = $numTokens;
2607+
26062608
for ($i = ($numTokens - 1); $i >= 0; $i--) {
26072609
// Check for any unset scope conditions due to alternate IF/ENDIF syntax.
26082610
if (isset($this->tokens[$i]['scope_opener']) === true
@@ -3036,9 +3038,14 @@ protected function processAdditional()
30363038
continue;
30373039
} else if ($this->tokens[$i]['code'] === T_BITWISE_OR
30383040
|| $this->tokens[$i]['code'] === T_BITWISE_AND
3039-
|| $this->tokens[$i]['code'] === T_OPEN_PARENTHESIS
30403041
|| $this->tokens[$i]['code'] === T_CLOSE_PARENTHESIS
30413042
) {
3043+
if ($lastSeenTypeToken < $i) {
3044+
// We've already examined this code to check if it is a type declaration and concluded it wasn't.
3045+
// No need to do it again.
3046+
continue;
3047+
}
3048+
30423049
/*
30433050
Convert "|" to T_TYPE_UNION or leave as T_BITWISE_OR.
30443051
Convert "&" to T_TYPE_INTERSECTION or leave as T_BITWISE_AND.
@@ -3133,9 +3140,14 @@ protected function processAdditional()
31333140

31343141
$typeTokenCountBefore = 0;
31353142
$typeOperators = [$i];
3143+
$parenthesesCount = 0;
31363144
$confirmed = false;
31373145
$maybeNullable = null;
31383146

3147+
if ($this->tokens[$i]['code'] === T_OPEN_PARENTHESIS || $this->tokens[$i]['code'] === T_CLOSE_PARENTHESIS) {
3148+
++$parenthesesCount;
3149+
}
3150+
31393151
for ($x = ($i - 1); $x >= 0; $x--) {
31403152
if (isset(Tokens::$emptyTokens[$this->tokens[$x]['code']]) === true) {
31413153
continue;
@@ -3167,7 +3179,7 @@ protected function processAdditional()
31673179
$confirmed = true;
31683180
break;
31693181
} else {
3170-
// This may still be an arrow function which hasn't be handled yet.
3182+
// This may still be an arrow function which hasn't been handled yet.
31713183
for ($y = ($x - 1); $y > 0; $y--) {
31723184
if (isset(Tokens::$emptyTokens[$this->tokens[$y]['code']]) === false
31733185
&& $this->tokens[$y]['code'] !== T_BITWISE_AND
@@ -3202,11 +3214,13 @@ protected function processAdditional()
32023214
continue;
32033215
}
32043216

3205-
if ($this->tokens[$x]['code'] === T_BITWISE_OR
3206-
|| $this->tokens[$x]['code'] === T_BITWISE_AND
3207-
|| $this->tokens[$x]['code'] === T_OPEN_PARENTHESIS
3208-
|| $this->tokens[$x]['code'] === T_CLOSE_PARENTHESIS
3209-
) {
3217+
if ($this->tokens[$x]['code'] === T_BITWISE_OR || $this->tokens[$x]['code'] === T_BITWISE_AND) {
3218+
$typeOperators[] = $x;
3219+
continue;
3220+
}
3221+
3222+
if ($this->tokens[$x]['code'] === T_OPEN_PARENTHESIS || $this->tokens[$x]['code'] === T_CLOSE_PARENTHESIS) {
3223+
++$parenthesesCount;
32103224
$typeOperators[] = $x;
32113225
continue;
32123226
}
@@ -3244,6 +3258,9 @@ protected function processAdditional()
32443258
break;
32453259
}//end for
32463260

3261+
// Remember the last token we examined as part of the (non-)"type declaration".
3262+
$lastSeenTypeToken = $x;
3263+
32473264
if ($confirmed === false
32483265
&& $suspectedType === 'property or parameter'
32493266
&& isset($this->tokens[$i]['nested_parenthesis']) === true
@@ -3288,8 +3305,8 @@ protected function processAdditional()
32883305
unset($parens, $last);
32893306
}//end if
32903307

3291-
if ($confirmed === false) {
3292-
// Not a union or intersection type after all, move on.
3308+
if ($confirmed === false || ($parenthesesCount % 2) !== 0) {
3309+
// Not a (valid) union, intersection or DNF type after all, move on.
32933310
continue;
32943311
}
32953312

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
<?php
2+
3+
// Parentheses in broken DNF type declarations will remain tokenized as normal parentheses.
4+
// This test is in a separate file as the 'nested_parenthesis' indexes will be off after this code.
5+
class ParseErrors {
6+
/* testBrokenConstDNFTypeEndOnOpenParenthesis */
7+
const A|(B PARSE_ERROR = null;
8+
9+
/* testBrokenPropertyDNFTypeEndOnOpenParenthesis */
10+
public A|(B $parseError;
11+
12+
function unmatchedParens {
13+
/* testBrokenParamDNFTypeEndOnOpenParenthesis */
14+
A|(B $parseError,
15+
/* testBrokenReturnDNFTypeEndOnOpenParenthesis */
16+
) : A|(B {}
17+
}
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
<?php
2+
/**
3+
* Tests that parentheses tokens are not converted to type parentheses tokens in broken DNF types.
4+
*
5+
* @author Juliette Reinders Folmer <[email protected]>
6+
* @copyright 2024 PHPCSStandards and contributors
7+
* @license https://github.com/PHPCSStandards/PHP_CodeSniffer/blob/master/licence.txt BSD Licence
8+
*/
9+
10+
namespace PHP_CodeSniffer\Tests\Core\Tokenizer\PHP;
11+
12+
use PHP_CodeSniffer\Tests\Core\Tokenizer\AbstractTokenizerTestCase;
13+
14+
final class DNFTypesParseError1Test extends AbstractTokenizerTestCase
15+
{
16+
17+
18+
/**
19+
* Document handling for a DNF type / parse error where the last significant type specific token is an open parenthesis.
20+
*
21+
* @param string $testMarker The comment prefacing the target token.
22+
*
23+
* @dataProvider dataBrokenDNFTypeCantEndOnOpenParenthesis
24+
* @covers PHP_CodeSniffer\Tokenizers\PHP::processAdditional
25+
*
26+
* @return void
27+
*/
28+
public function testBrokenDNFTypeCantEndOnOpenParenthesis($testMarker)
29+
{
30+
$tokens = $this->phpcsFile->getTokens();
31+
32+
$openPtr = $this->getTargetToken($testMarker, [T_OPEN_PARENTHESIS, T_TYPE_OPEN_PARENTHESIS], '(');
33+
$token = $tokens[$openPtr];
34+
35+
// Verify that the open parenthesis is tokenized as a normal parenthesis.
36+
$this->assertSame(T_OPEN_PARENTHESIS, $token['code'], 'Token tokenized as '.$token['type'].', not T_OPEN_PARENTHESIS (code)');
37+
$this->assertSame('T_OPEN_PARENTHESIS', $token['type'], 'Token tokenized as '.$token['type'].', not T_OPEN_PARENTHESIS (type)');
38+
39+
// Verify that the type union is still tokenized as T_BITWISE_OR as the type declaration
40+
// is not recognized as a valid type declaration.
41+
$unionPtr = $this->getTargetToken($testMarker, [T_BITWISE_OR, T_TYPE_UNION], '|');
42+
$token = $tokens[$unionPtr];
43+
44+
$this->assertSame(T_BITWISE_OR, $token['code'], 'Token tokenized as '.$token['type'].', not T_BITWISE_OR (code)');
45+
$this->assertSame('T_BITWISE_OR', $token['type'], 'Token tokenized as '.$token['type'].', not T_BITWISE_OR (type)');
46+
47+
}//end testBrokenDNFTypeCantEndOnOpenParenthesis()
48+
49+
50+
/**
51+
* Data provider.
52+
*
53+
* @see testBrokenDNFTypeCantEndOnOpenParenthesis()
54+
*
55+
* @return array<string, array<string, string>>
56+
*/
57+
public static function dataBrokenDNFTypeCantEndOnOpenParenthesis()
58+
{
59+
return [
60+
'OO const type' => ['/* testBrokenConstDNFTypeEndOnOpenParenthesis */'],
61+
'OO property type' => ['/* testBrokenPropertyDNFTypeEndOnOpenParenthesis */'],
62+
'Parameter type' => ['/* testBrokenParamDNFTypeEndOnOpenParenthesis */'],
63+
'Return type' => ['/* testBrokenReturnDNFTypeEndOnOpenParenthesis */'],
64+
];
65+
66+
}//end dataBrokenDNFTypeCantEndOnOpenParenthesis()
67+
68+
69+
}//end class
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
<?php
2+
3+
// Parentheses in broken DNF type declarations will remain tokenized as normal parentheses.
4+
// This test is in a separate file as the 'nested_parenthesis' indexes will be off after this code.
5+
//
6+
// Also note that the order of these tests is deliberate to try and trick the parentheses handling
7+
// in the Tokenizer class into matching parentheses pairs, even though the parentheses do
8+
// no belong together.
9+
10+
class UnmatchedParentheses {
11+
/* testBrokenConstDNFTypeParensMissingClose */
12+
const A|(B&C PARSE_ERROR_1 = null;
13+
14+
/* testBrokenConstDNFTypeParensMissingOpen */
15+
const A|B&C) PARSE_ERROR_2 = null;
16+
17+
/* testBrokenPropertyDNFTypeParensMissingClose */
18+
private A|(B&C $parseError1;
19+
20+
/* testBrokenPropertyDNFTypeParensMissingOpen */
21+
protected A|B&C) $parseError2;
22+
23+
function unmatchedParens1 (
24+
/* testBrokenParamDNFTypeParensMissingClose */
25+
A|(B&C $parseError,
26+
/* testBrokenReturnDNFTypeParensMissingOpen */
27+
) : A|B&C) {}
28+
29+
function unmatchedParens2 (
30+
/* testBrokenParamDNFTypeParensMissingOpen */
31+
A|B&C) $parseError
32+
/* testBrokenReturnDNFTypeParensMissingClose */
33+
) : A|(B&C {}
34+
}
35+
36+
class MatchedAndUnmatchedParentheses {
37+
/* testBrokenConstDNFTypeParensMissingOneClose */
38+
const (A&B)|(B&C PARSE_ERROR = null;
39+
40+
/* testBrokenPropertyDNFTypeParensMissingOneOpen */
41+
protected (A&B)|B&C) $parseError;
42+
43+
function unmatchedParens (
44+
/* testBrokenParamDNFTypeParensMissingOneClose */
45+
(A&B)|(B&C $parseError,
46+
/* testBrokenReturnDNFTypeParensMissingOneOpen */
47+
) : (A&B)|B&C) {}
48+
}

0 commit comments

Comments
 (0)