Skip to content

Commit 07477a7

Browse files
committed
Tokenizer/PHP: don't retokenize tokens in a broken type DNF declaration
A DNF type always needs to have matching open and close parentheses. If the parentheses are unbalanced, we can't be sure this is a DNF type and run the risk of retokenizing non-type tokens. Even if the code under scan was intended as a DNF type, it will be invalid code/contain a parse error if the parentheses are unbalanced. In that case, retokenizing to the type specific tokens is likely to make the effect of this parse error on sniffs _worse_. With that in mind, I'm adding extra hardening to the type handling layer in the PHP tokenizer, which will ensure that the retokenization to type specific tokens _only_ happens when there are balanced pairs of DNF parentheses. Includes tests safeguarding the behaviour of the type handling layer for this type of invalid/parse error types. Safe for two, all these tests would previously fail on containing at least _some_ type specific tokens.
1 parent cac9038 commit 07477a7

File tree

3 files changed

+280
-7
lines changed

3 files changed

+280
-7
lines changed

src/Tokenizers/PHP.php

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3132,9 +3132,14 @@ protected function processAdditional()
31323132

31333133
$typeTokenCountBefore = 0;
31343134
$typeOperators = [$i];
3135+
$parenthesesCount = 0;
31353136
$confirmed = false;
31363137
$maybeNullable = null;
31373138

3139+
if ($this->tokens[$i]['code'] === T_OPEN_PARENTHESIS || $this->tokens[$i]['code'] === T_CLOSE_PARENTHESIS) {
3140+
++$parenthesesCount;
3141+
}
3142+
31383143
for ($x = ($i - 1); $x >= 0; $x--) {
31393144
if (isset(Tokens::$emptyTokens[$this->tokens[$x]['code']]) === true) {
31403145
continue;
@@ -3201,11 +3206,13 @@ protected function processAdditional()
32013206
continue;
32023207
}
32033208

3204-
if ($this->tokens[$x]['code'] === T_BITWISE_OR
3205-
|| $this->tokens[$x]['code'] === T_BITWISE_AND
3206-
|| $this->tokens[$x]['code'] === T_OPEN_PARENTHESIS
3207-
|| $this->tokens[$x]['code'] === T_CLOSE_PARENTHESIS
3208-
) {
3209+
if ($this->tokens[$x]['code'] === T_BITWISE_OR || $this->tokens[$x]['code'] === T_BITWISE_AND) {
3210+
$typeOperators[] = $x;
3211+
continue;
3212+
}
3213+
3214+
if ($this->tokens[$x]['code'] === T_OPEN_PARENTHESIS || $this->tokens[$x]['code'] === T_CLOSE_PARENTHESIS) {
3215+
++$parenthesesCount;
32093216
$typeOperators[] = $x;
32103217
continue;
32113218
}
@@ -3287,8 +3294,8 @@ protected function processAdditional()
32873294
unset($parens, $last);
32883295
}//end if
32893296

3290-
if ($confirmed === false) {
3291-
// Not a union or intersection type after all, move on.
3297+
if ($confirmed === false || ($parenthesesCount % 2) !== 0) {
3298+
// Not a (valid) union, intersection or DNF type after all, move on.
32923299
continue;
32933300
}
32943301

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
<?php
2+
3+
// Parentheses in broken DNF type declarations will remain tokenized as normal parentheses.
4+
// This test is in a separate file as the 'nested_parenthesis' indexes will be off after this code.
5+
//
6+
// Also note that the order of these tests is deliberate to try and trick the parentheses handling
7+
// in the Tokenizer class into matching parentheses pairs, even though the parentheses do
8+
// no belong together.
9+
10+
class UnmatchedParentheses {
11+
/* testBrokenConstDNFTypeParensMissingClose */
12+
const A|(B&C PARSE_ERROR_1 = null;
13+
14+
/* testBrokenConstDNFTypeParensMissingOpen */
15+
const A|B&C) PARSE_ERROR_2 = null;
16+
17+
/* testBrokenPropertyDNFTypeParensMissingClose */
18+
private A|(B&C $parseError1;
19+
20+
/* testBrokenPropertyDNFTypeParensMissingOpen */
21+
protected A|B&C) $parseError2;
22+
23+
function unmatchedParens1 (
24+
/* testBrokenParamDNFTypeParensMissingClose */
25+
A|(B&C $parseError,
26+
/* testBrokenReturnDNFTypeParensMissingOpen */
27+
) : A|B&C) {}
28+
29+
function unmatchedParens2 (
30+
/* testBrokenParamDNFTypeParensMissingOpen */
31+
A|B&C) $parseError
32+
/* testBrokenReturnDNFTypeParensMissingClose */
33+
) : A|(B&C {}
34+
}
35+
36+
class MatchedAndUnmatchedParentheses {
37+
/* testBrokenConstDNFTypeParensMissingOneClose */
38+
const (A&B)|(B&C PARSE_ERROR = null;
39+
40+
/* testBrokenPropertyDNFTypeParensMissingOneOpen */
41+
protected (A&B)|B&C) $parseError;
42+
43+
function unmatchedParens (
44+
/* testBrokenParamDNFTypeParensMissingOneClose */
45+
(A&B)|(B&C $parseError,
46+
/* testBrokenReturnDNFTypeParensMissingOneOpen */
47+
) : (A&B)|B&C) {}
48+
}
Lines changed: 218 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,218 @@
1+
<?php
2+
/**
3+
* Tests that parentheses tokens are not converted to type parentheses tokens in broken DNF types.
4+
*
5+
* @author Juliette Reinders Folmer <[email protected]>
6+
* @copyright 2024 PHPCSStandards and contributors
7+
* @license https://github.com/PHPCSStandards/PHP_CodeSniffer/blob/master/licence.txt BSD Licence
8+
*/
9+
10+
namespace PHP_CodeSniffer\Tests\Core\Tokenizer\PHP;
11+
12+
use PHP_CodeSniffer\Tests\Core\Tokenizer\AbstractTokenizerTestCase;
13+
use PHP_CodeSniffer\Util\Tokens;
14+
15+
final class DNFTypesParseError2Test extends AbstractTokenizerTestCase
16+
{
17+
18+
19+
/**
20+
* Document handling for a DNF type / parse error where the type declaration contains an unmatched parenthesis.
21+
*
22+
* @param string $testMarker The comment prefacing the target token.
23+
*
24+
* @dataProvider dataBrokenDNFTypeParensShouldAlwaysBeAPairMissingCloseParens
25+
* @covers PHP_CodeSniffer\Tokenizers\PHP::processAdditional
26+
*
27+
* @return void
28+
*/
29+
public function testBrokenDNFTypeParensShouldAlwaysBeAPairMissingCloseParens($testMarker)
30+
{
31+
$tokens = $this->phpcsFile->getTokens();
32+
33+
// Verify that the type union is still tokenized as T_BITWISE_OR as the type declaration
34+
// is not recognized as a valid type declaration.
35+
$unionPtr = $this->getTargetToken($testMarker, [T_BITWISE_OR, T_TYPE_UNION], '|');
36+
$token = $tokens[$unionPtr];
37+
38+
$this->assertSame(T_BITWISE_OR, $token['code'], 'Token tokenized as '.$token['type'].', not T_BITWISE_OR (code)');
39+
$this->assertSame('T_BITWISE_OR', $token['type'], 'Token tokenized as '.$token['type'].', not T_BITWISE_OR (type)');
40+
41+
// Verify that the unmatched open parenthesis is tokenized as a normal parenthesis.
42+
$openPtr = $this->getTargetToken($testMarker, [T_OPEN_PARENTHESIS, T_TYPE_OPEN_PARENTHESIS], '(');
43+
$token = $tokens[$openPtr];
44+
45+
$this->assertSame(T_OPEN_PARENTHESIS, $token['code'], 'Token tokenized as '.$token['type'].', not T_OPEN_PARENTHESIS (code)');
46+
$this->assertSame('T_OPEN_PARENTHESIS', $token['type'], 'Token tokenized as '.$token['type'].', not T_OPEN_PARENTHESIS (type)');
47+
48+
// Verify that the type intersection is still tokenized as T_BITWISE_AND as the type declaration
49+
// is not recognized as a valid type declaration.
50+
$intersectPtr = $this->getTargetToken($testMarker, [T_BITWISE_AND, T_TYPE_INTERSECTION], '&');
51+
$token = $tokens[$intersectPtr];
52+
53+
$this->assertSame(T_BITWISE_AND, $token['code'], 'Token tokenized as '.$token['type'].', not T_BITWISE_AND (code)');
54+
$this->assertSame('T_BITWISE_AND', $token['type'], 'Token tokenized as '.$token['type'].', not T_BITWISE_AND (type)');
55+
56+
}//end testBrokenDNFTypeParensShouldAlwaysBeAPairMissingCloseParens()
57+
58+
59+
/**
60+
* Data provider.
61+
*
62+
* @see testBrokenDNFTypeParensShouldAlwaysBeAPairMissingCloseParens()
63+
*
64+
* @return array<string, array<string, string>>
65+
*/
66+
public static function dataBrokenDNFTypeParensShouldAlwaysBeAPairMissingCloseParens()
67+
{
68+
return [
69+
'OO const type' => ['/* testBrokenConstDNFTypeParensMissingClose */'],
70+
'OO property type' => ['/* testBrokenPropertyDNFTypeParensMissingClose */'],
71+
'Parameter type' => ['/* testBrokenParamDNFTypeParensMissingClose */'],
72+
'Return type' => ['/* testBrokenReturnDNFTypeParensMissingClose */'],
73+
];
74+
75+
}//end dataBrokenDNFTypeParensShouldAlwaysBeAPairMissingCloseParens()
76+
77+
78+
/**
79+
* Document handling for a DNF type / parse error where the type declaration contains an unmatched parenthesis.
80+
*
81+
* @param string $testMarker The comment prefacing the target token.
82+
*
83+
* @dataProvider dataBrokenDNFTypeParensShouldAlwaysBeAPairMissingOpenParens
84+
* @covers PHP_CodeSniffer\Tokenizers\PHP::processAdditional
85+
*
86+
* @return void
87+
*/
88+
public function testBrokenDNFTypeParensShouldAlwaysBeAPairMissingOpenParens($testMarker)
89+
{
90+
$tokens = $this->phpcsFile->getTokens();
91+
92+
// Verify that the type union is still tokenized as T_BITWISE_OR as the type declaration
93+
// is not recognized as a valid type declaration.
94+
$unionPtr = $this->getTargetToken($testMarker, [T_BITWISE_OR, T_TYPE_UNION], '|');
95+
$token = $tokens[$unionPtr];
96+
97+
$this->assertSame(T_BITWISE_OR, $token['code'], 'Token tokenized as '.$token['type'].', not T_BITWISE_OR (code)');
98+
$this->assertSame('T_BITWISE_OR', $token['type'], 'Token tokenized as '.$token['type'].', not T_BITWISE_OR (type)');
99+
100+
// Verify that the unmatched open parenthesis is tokenized as a normal parenthesis.
101+
$closePtr = $this->getTargetToken($testMarker, [T_CLOSE_PARENTHESIS, T_TYPE_CLOSE_PARENTHESIS], ')');
102+
$token = $tokens[$closePtr];
103+
104+
$this->assertSame(T_CLOSE_PARENTHESIS, $token['code'], 'Token tokenized as '.$token['type'].', not T_CLOSE_PARENTHESIS (code)');
105+
$this->assertSame('T_CLOSE_PARENTHESIS', $token['type'], 'Token tokenized as '.$token['type'].', not T_CLOSE_PARENTHESIS (type)');
106+
107+
// Verify that the type intersection is still tokenized as T_BITWISE_AND as the type declaration
108+
// is not recognized as a valid type declaration.
109+
$intersectPtr = $this->getTargetToken($testMarker, [T_BITWISE_AND, T_TYPE_INTERSECTION], '&');
110+
$token = $tokens[$intersectPtr];
111+
112+
$this->assertSame(T_BITWISE_AND, $token['code'], 'Token tokenized as '.$token['type'].', not T_BITWISE_AND (code)');
113+
$this->assertSame('T_BITWISE_AND', $token['type'], 'Token tokenized as '.$token['type'].', not T_BITWISE_AND (type)');
114+
115+
}//end testBrokenDNFTypeParensShouldAlwaysBeAPairMissingOpenParens()
116+
117+
118+
/**
119+
* Data provider.
120+
*
121+
* @see testBrokenDNFTypeParensShouldAlwaysBeAPairMissingOpenParens()
122+
*
123+
* @return array<string, array<string, string>>
124+
*/
125+
public static function dataBrokenDNFTypeParensShouldAlwaysBeAPairMissingOpenParens()
126+
{
127+
return [
128+
'OO const type' => ['/* testBrokenConstDNFTypeParensMissingOpen */'],
129+
'OO property type' => ['/* testBrokenPropertyDNFTypeParensMissingOpen */'],
130+
'Parameter type' => ['/* testBrokenParamDNFTypeParensMissingOpen */'],
131+
'Return type' => ['/* testBrokenReturnDNFTypeParensMissingOpen */'],
132+
];
133+
134+
}//end dataBrokenDNFTypeParensShouldAlwaysBeAPairMissingOpenParens()
135+
136+
137+
/**
138+
* Document handling for a DNF type / parse error where the type declaration contains an unmatched parenthesis,
139+
* but also contains a set of matched parentheses.
140+
*
141+
* @param string $testMarker The comment prefacing the target token.
142+
*
143+
* @dataProvider dataBrokenDNFTypeParensShouldAlwaysBeAPairMatchedAndUnmatched
144+
* @covers PHP_CodeSniffer\Tokenizers\PHP::processAdditional
145+
*
146+
* @return void
147+
*/
148+
public function testBrokenDNFTypeParensShouldAlwaysBeAPairMatchedAndUnmatched($testMarker)
149+
{
150+
$tokens = $this->phpcsFile->getTokens();
151+
$startPtr = $this->getTargetToken($testMarker, [T_OPEN_PARENTHESIS, T_TYPE_OPEN_PARENTHESIS], '(');
152+
153+
for ($i = $startPtr; $i < $this->phpcsFile->numTokens; $i++) {
154+
if (isset(Tokens::$emptyTokens[$tokens[$i]['code']]) === true) {
155+
continue;
156+
}
157+
158+
if ($tokens[$i]['code'] === T_EQUAL
159+
|| $tokens[$i]['code'] === T_VARIABLE
160+
|| $tokens[$i]['code'] === T_OPEN_CURLY_BRACKET
161+
) {
162+
// Reached the end of the type.
163+
break;
164+
}
165+
166+
$errorPrefix = 'Token tokenized as '.$tokens[$i]['type'];
167+
168+
// Verify that type tokens have not been retokenized to `T_TYPE_*` tokens for broken type declarations.
169+
switch ($tokens[$i]['content']) {
170+
case '|':
171+
$this->assertSame(T_BITWISE_OR, $tokens[$i]['code'], $errorPrefix.', not T_BITWISE_OR (code)');
172+
$this->assertSame('T_BITWISE_OR', $tokens[$i]['type'], $errorPrefix.', not T_BITWISE_OR (type)');
173+
break;
174+
175+
case '&':
176+
$this->assertSame(T_BITWISE_AND, $tokens[$i]['code'], $errorPrefix.', not T_BITWISE_AND (code)');
177+
$this->assertSame('T_BITWISE_AND', $tokens[$i]['type'], $errorPrefix.', not T_BITWISE_AND (type)');
178+
break;
179+
180+
case '(':
181+
// Verify that the open parenthesis is tokenized as a normal parenthesis.
182+
$this->assertSame(T_OPEN_PARENTHESIS, $tokens[$i]['code'], $errorPrefix.', not T_OPEN_PARENTHESIS (code)');
183+
$this->assertSame('T_OPEN_PARENTHESIS', $tokens[$i]['type'], $errorPrefix.', not T_OPEN_PARENTHESIS (type)');
184+
break;
185+
186+
case ')':
187+
$this->assertSame(T_CLOSE_PARENTHESIS, $tokens[$i]['code'], $errorPrefix.', not T_CLOSE_PARENTHESIS (code)');
188+
$this->assertSame('T_CLOSE_PARENTHESIS', $tokens[$i]['type'], $errorPrefix.', not T_CLOSE_PARENTHESIS (type)');
189+
break;
190+
191+
default:
192+
break;
193+
}//end switch
194+
}//end for
195+
196+
}//end testBrokenDNFTypeParensShouldAlwaysBeAPairMatchedAndUnmatched()
197+
198+
199+
/**
200+
* Data provider.
201+
*
202+
* @see testBrokenDNFTypeParensShouldAlwaysBeAPairMatchedAndUnmatched()
203+
*
204+
* @return array<string, array<string, string>>
205+
*/
206+
public static function dataBrokenDNFTypeParensShouldAlwaysBeAPairMatchedAndUnmatched()
207+
{
208+
return [
209+
'OO const type - missing one close parenthesis' => ['/* testBrokenConstDNFTypeParensMissingOneClose */'],
210+
'OO property type - missing one open parenthesis' => ['/* testBrokenPropertyDNFTypeParensMissingOneOpen */'],
211+
'Parameter type - missing one close parenthesis' => ['/* testBrokenParamDNFTypeParensMissingOneClose */'],
212+
'Return type - missing one open parenthesis' => ['/* testBrokenReturnDNFTypeParensMissingOneOpen */'],
213+
];
214+
215+
}//end dataBrokenDNFTypeParensShouldAlwaysBeAPairMatchedAndUnmatched()
216+
217+
218+
}//end class

0 commit comments

Comments
 (0)