Skip to content

Commit 939fd02

Browse files
committed
PHP 8.2 | Tokenizer/PHP: add support for DNF types
This commit adds tokenizer support for DNF types as per the proposal outlined in 387. This means that: * Two new tokens are introduced `T_TYPE_OPEN_PARENTHESIS` and `T_TYPE_CLOSE_PARENTHESIS` for the parentheses used in DNF types. This allows for sniffs to specifically target those tokens and prevents sniffs which are looking for the "normal" open/close parenthesis tokens from acting on DNF parentheses. * These new tokens, like other parentheses, will get the `parenthesis_opener` and `parenthesis_closer` token array indexes and the tokens between them will have the `nested_parenthesis` index. Based on the currently added tests, the commit safeguards that: * The `|` in types is still tokenized as `T_TYPE_UNION`, even in DNF types. * The `&` in types is still tokenized as `T_TYPE_INTERSECTION`, even in DNF types. * The `static` keyword for properties is still tokenized as `T_STATIC`, even when right before a DNF type (which could be confused for a function call). * The arrow function retokenization to `T_FN` with a `T_FN_ARROW` scope opener is handled correctly, even when DNF types are involved and including when the arrow function is declared to return by reference. * The keyword tokens, like `self`, `parent`, `static`, `true` or `false`, when used in DNF types are still tokenized to their own token and not tokenized as `T_STRING`. * The `array` keyword when used in DNF types is still tokenized as `T_STRING` and not as `T_ARRAY`. * A `?` intended as an (illegal) nullability operator in combination with a DNF type is still tokenized as `T_NULLABLE` and not as `T_INLINE_THEN`. * A function declaration open parenthesis before a typed parameter isn't accidentally retokenized to `T_TYPE_OPEN_PARENTHESIS`. Includes ample unit tests. Even so, strenuous testing of this PR is recommended as there are so many moving parts involved, it is very easy for something to have been overlooked. Related to 105 Closes 387 Closes squizlabs/PHP_CodeSniffer 3731
1 parent 4c4a914 commit 939fd02

18 files changed

+1278
-64
lines changed

src/Tokenizers/PHP.php

Lines changed: 131 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -471,6 +471,8 @@ class PHP extends Tokenizer
471471
T_CLOSE_SHORT_ARRAY => 1,
472472
T_TYPE_UNION => 1,
473473
T_TYPE_INTERSECTION => 1,
474+
T_TYPE_OPEN_PARENTHESIS => 1,
475+
T_TYPE_CLOSE_PARENTHESIS => 1,
474476
];
475477

476478
/**
@@ -755,6 +757,9 @@ protected function tokenize($string)
755757

756758
/*
757759
Special case for `static` used as a function name, i.e. `static()`.
760+
761+
Note: this may incorrectly change the static keyword directly before a DNF property type.
762+
If so, this will be caught and corrected for in the additional processing.
758763
*/
759764

760765
if ($tokenIsArray === true
@@ -2533,22 +2538,24 @@ protected function processAdditional()
25332538
if (isset($this->tokens[$x]) === true && $this->tokens[$x]['code'] === T_OPEN_PARENTHESIS) {
25342539
$ignore = Tokens::$emptyTokens;
25352540
$ignore += [
2536-
T_ARRAY => T_ARRAY,
2537-
T_CALLABLE => T_CALLABLE,
2538-
T_COLON => T_COLON,
2539-
T_NAME_FULLY_QUALIFIED => T_NAME_FULLY_QUALIFIED,
2540-
T_NAME_QUALIFIED => T_NAME_QUALIFIED,
2541-
T_NAME_RELATIVE => T_NAME_RELATIVE,
2542-
T_NULL => T_NULL,
2543-
T_TRUE => T_TRUE,
2544-
T_FALSE => T_FALSE,
2545-
T_NULLABLE => T_NULLABLE,
2546-
T_PARENT => T_PARENT,
2547-
T_SELF => T_SELF,
2548-
T_STATIC => T_STATIC,
2549-
T_STRING => T_STRING,
2550-
T_TYPE_INTERSECTION => T_TYPE_INTERSECTION,
2551-
T_TYPE_UNION => T_TYPE_UNION,
2541+
T_ARRAY => T_ARRAY,
2542+
T_CALLABLE => T_CALLABLE,
2543+
T_COLON => T_COLON,
2544+
T_NAME_FULLY_QUALIFIED => T_NAME_FULLY_QUALIFIED,
2545+
T_NAME_QUALIFIED => T_NAME_QUALIFIED,
2546+
T_NAME_RELATIVE => T_NAME_RELATIVE,
2547+
T_NULL => T_NULL,
2548+
T_TRUE => T_TRUE,
2549+
T_FALSE => T_FALSE,
2550+
T_NULLABLE => T_NULLABLE,
2551+
T_PARENT => T_PARENT,
2552+
T_SELF => T_SELF,
2553+
T_STATIC => T_STATIC,
2554+
T_STRING => T_STRING,
2555+
T_TYPE_UNION => T_TYPE_UNION,
2556+
T_TYPE_INTERSECTION => T_TYPE_INTERSECTION,
2557+
T_TYPE_OPEN_PARENTHESIS => T_TYPE_OPEN_PARENTHESIS,
2558+
T_TYPE_CLOSE_PARENTHESIS => T_TYPE_CLOSE_PARENTHESIS,
25522559
];
25532560

25542561
$closer = $this->tokens[$x]['parenthesis_closer'];
@@ -2854,10 +2861,15 @@ protected function processAdditional()
28542861
continue;
28552862
} else if ($this->tokens[$i]['code'] === T_BITWISE_OR
28562863
|| $this->tokens[$i]['code'] === T_BITWISE_AND
2864+
|| $this->tokens[$i]['code'] === T_OPEN_PARENTHESIS
2865+
|| $this->tokens[$i]['code'] === T_CLOSE_PARENTHESIS
28572866
) {
28582867
/*
28592868
Convert "|" to T_TYPE_UNION or leave as T_BITWISE_OR.
28602869
Convert "&" to T_TYPE_INTERSECTION or leave as T_BITWISE_AND.
2870+
Convert "(" and ")" to T_TYPE_(OPEN|CLOSE)_PARENTHESIS or leave as T_(OPEN|CLOSE)_PARENTHESIS.
2871+
2872+
All type related tokens will be converted in one go as soon as this section is hit.
28612873
*/
28622874

28632875
$allowed = [
@@ -2874,20 +2886,22 @@ protected function processAdditional()
28742886
T_NULL => T_NULL,
28752887
];
28762888

2877-
$suspectedType = null;
2878-
$typeTokenCount = 0;
2889+
$suspectedType = null;
2890+
$typeTokenCountAfter = 0;
28792891

28802892
for ($x = ($i + 1); $x < $numTokens; $x++) {
28812893
if (isset(Tokens::$emptyTokens[$this->tokens[$x]['code']]) === true) {
28822894
continue;
28832895
}
28842896

28852897
if (isset($allowed[$this->tokens[$x]['code']]) === true) {
2886-
++$typeTokenCount;
2898+
++$typeTokenCountAfter;
28872899
continue;
28882900
}
28892901

2890-
if ($typeTokenCount > 0
2902+
if (($typeTokenCountAfter > 0
2903+
|| ($this->tokens[$i]['code'] === T_CLOSE_PARENTHESIS
2904+
&& isset($this->tokens[$i]['parenthesis_owner']) === false))
28912905
&& ($this->tokens[$x]['code'] === T_BITWISE_AND
28922906
|| $this->tokens[$x]['code'] === T_ELLIPSIS)
28932907
) {
@@ -2918,6 +2932,7 @@ protected function processAdditional()
29182932
&& $this->tokens[$this->tokens[$x]['scope_condition']]['code'] === T_FUNCTION
29192933
) {
29202934
$suspectedType = 'return';
2935+
break;
29212936
}
29222937

29232938
if ($this->tokens[$x]['code'] === T_EQUAL) {
@@ -2929,35 +2944,95 @@ protected function processAdditional()
29292944
break;
29302945
}//end for
29312946

2932-
if ($typeTokenCount === 0 || isset($suspectedType) === false) {
2933-
// Definitely not a union or intersection type, move on.
2947+
if (($typeTokenCountAfter === 0
2948+
&& ($this->tokens[$i]['code'] !== T_CLOSE_PARENTHESIS
2949+
|| isset($this->tokens[$i]['parenthesis_owner']) === true))
2950+
|| isset($suspectedType) === false
2951+
) {
2952+
// Definitely not a union, intersection or DNF type, move on.
29342953
continue;
29352954
}
29362955

29372956
if ($suspectedType === 'property or parameter') {
29382957
unset($allowed[T_STATIC]);
29392958
}
29402959

2941-
$typeTokenCount = 0;
2942-
$typeOperators = [$i];
2943-
$confirmed = false;
2960+
$typeTokenCountBefore = 0;
2961+
$typeOperators = [$i];
2962+
$confirmed = false;
2963+
$maybeNullable = null;
29442964

29452965
for ($x = ($i - 1); $x >= 0; $x--) {
29462966
if (isset(Tokens::$emptyTokens[$this->tokens[$x]['code']]) === true) {
29472967
continue;
29482968
}
29492969

2970+
if ($suspectedType === 'property or parameter'
2971+
&& $this->tokens[$x]['code'] === T_STRING
2972+
&& strtolower($this->tokens[$x]['content']) === 'static'
2973+
) {
2974+
// Static keyword followed directly by an open parenthesis for a DNF type.
2975+
// This token should be T_STATIC and was incorrectly identified as a function call before.
2976+
$this->tokens[$x]['code'] = T_STATIC;
2977+
$this->tokens[$x]['type'] = 'T_STATIC';
2978+
2979+
if (PHP_CODESNIFFER_VERBOSITY > 1) {
2980+
$line = $this->tokens[$x]['line'];
2981+
echo "\t* token $x on line $line changed back from T_STRING to T_STATIC".PHP_EOL;
2982+
}
2983+
}
2984+
2985+
if ($suspectedType === 'property or parameter'
2986+
&& $this->tokens[$x]['code'] === T_OPEN_PARENTHESIS
2987+
) {
2988+
// We need to prevent the open parenthesis for a function/fn declaration from being retokenized
2989+
// to T_TYPE_OPEN_PARENTHESIS if this is the first parameter in the declaration.
2990+
if (isset($this->tokens[$x]['parenthesis_owner']) === true
2991+
&& $this->tokens[$this->tokens[$x]['parenthesis_owner']]['code'] === T_FUNCTION
2992+
) {
2993+
$confirmed = true;
2994+
break;
2995+
} else {
2996+
// This may still be an arrow function which hasn't be handled yet.
2997+
for ($y = ($x - 1); $y > 0; $y--) {
2998+
if (isset(Tokens::$emptyTokens[$this->tokens[$y]['code']]) === false
2999+
&& $this->tokens[$y]['code'] !== T_BITWISE_AND
3000+
) {
3001+
// Non-whitespace content.
3002+
break;
3003+
}
3004+
}
3005+
3006+
if ($this->tokens[$y]['code'] === T_FN) {
3007+
$confirmed = true;
3008+
break;
3009+
}
3010+
}
3011+
}//end if
3012+
29503013
if (isset($allowed[$this->tokens[$x]['code']]) === true) {
2951-
++$typeTokenCount;
3014+
++$typeTokenCountBefore;
29523015
continue;
29533016
}
29543017

2955-
// Union and intersection types can't use the nullable operator, but be tolerant to parse errors.
2956-
if ($typeTokenCount > 0 && $this->tokens[$x]['code'] === T_NULLABLE) {
3018+
// Union, intersection and DNF types can't use the nullable operator, but be tolerant to parse errors.
3019+
if (($typeTokenCountBefore > 0
3020+
|| ($this->tokens[$x]['code'] === T_OPEN_PARENTHESIS && isset($this->tokens[$x]['parenthesis_owner']) === false))
3021+
&& ($this->tokens[$x]['code'] === T_NULLABLE
3022+
|| $this->tokens[$x]['code'] === T_INLINE_THEN)
3023+
) {
3024+
if ($this->tokens[$x]['code'] === T_INLINE_THEN) {
3025+
$maybeNullable = $x;
3026+
}
3027+
29573028
continue;
29583029
}
29593030

2960-
if ($this->tokens[$x]['code'] === T_BITWISE_OR || $this->tokens[$x]['code'] === T_BITWISE_AND) {
3031+
if ($this->tokens[$x]['code'] === T_BITWISE_OR
3032+
|| $this->tokens[$x]['code'] === T_BITWISE_AND
3033+
|| $this->tokens[$x]['code'] === T_OPEN_PARENTHESIS
3034+
|| $this->tokens[$x]['code'] === T_CLOSE_PARENTHESIS
3035+
) {
29613036
$typeOperators[] = $x;
29623037
continue;
29633038
}
@@ -3043,14 +3118,40 @@ protected function processAdditional()
30433118
$line = $this->tokens[$x]['line'];
30443119
Common::printStatusMessage("* token $x on line $line changed from T_BITWISE_OR to T_TYPE_UNION", 1);
30453120
}
3046-
} else {
3121+
} else if ($this->tokens[$x]['code'] === T_BITWISE_AND) {
30473122
$this->tokens[$x]['code'] = T_TYPE_INTERSECTION;
30483123
$this->tokens[$x]['type'] = 'T_TYPE_INTERSECTION';
30493124

30503125
if (PHP_CODESNIFFER_VERBOSITY > 1) {
30513126
$line = $this->tokens[$x]['line'];
30523127
Common::printStatusMessage("* token $x on line $line changed from T_BITWISE_AND to T_TYPE_INTERSECTION", 1);
30533128
}
3129+
} else if ($this->tokens[$x]['code'] === T_OPEN_PARENTHESIS) {
3130+
$this->tokens[$x]['code'] = T_TYPE_OPEN_PARENTHESIS;
3131+
$this->tokens[$x]['type'] = 'T_TYPE_OPEN_PARENTHESIS';
3132+
3133+
if (PHP_CODESNIFFER_VERBOSITY > 1) {
3134+
$line = $this->tokens[$x]['line'];
3135+
echo "\t* token $x on line $line changed from T_OPEN_PARENTHESIS to T_TYPE_OPEN_PARENTHESIS".PHP_EOL;
3136+
}
3137+
} else if ($this->tokens[$x]['code'] === T_CLOSE_PARENTHESIS) {
3138+
$this->tokens[$x]['code'] = T_TYPE_CLOSE_PARENTHESIS;
3139+
$this->tokens[$x]['type'] = 'T_TYPE_CLOSE_PARENTHESIS';
3140+
3141+
if (PHP_CODESNIFFER_VERBOSITY > 1) {
3142+
$line = $this->tokens[$x]['line'];
3143+
echo "\t* token $x on line $line changed from T_CLOSE_PARENTHESIS to T_TYPE_CLOSE_PARENTHESIS".PHP_EOL;
3144+
}
3145+
}//end if
3146+
}//end foreach
3147+
3148+
if (isset($maybeNullable) === true) {
3149+
$this->tokens[$maybeNullable]['code'] = T_NULLABLE;
3150+
$this->tokens[$maybeNullable]['type'] = 'T_NULLABLE';
3151+
3152+
if (PHP_CODESNIFFER_VERBOSITY > 1) {
3153+
$line = $this->tokens[$maybeNullable]['line'];
3154+
echo "\t* token $maybeNullable on line $line changed from T_INLINE_THEN to T_NULLABLE".PHP_EOL;
30543155
}
30553156
}
30563157

src/Util/Tokens.php

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,8 @@
8080
define('T_ATTRIBUTE_END', 'PHPCS_T_ATTRIBUTE_END');
8181
define('T_ENUM_CASE', 'PHPCS_T_ENUM_CASE');
8282
define('T_TYPE_INTERSECTION', 'PHPCS_T_TYPE_INTERSECTION');
83+
define('T_TYPE_OPEN_PARENTHESIS', 'PHPCS_T_TYPE_OPEN_PARENTHESIS');
84+
define('T_TYPE_CLOSE_PARENTHESIS', 'PHPCS_T_TYPE_CLOSE_PARENTHESIS');
8385

8486
// Some PHP 5.5 tokens, replicated for lower versions.
8587
if (defined('T_FINALLY') === false) {

tests/Core/Tokenizer/ArrayKeywordTest.inc

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,20 @@ class Bar {
3939
/* testOOPropertyType */
4040
protected array $property;
4141
}
42+
43+
class DNFTypes {
44+
/* testOOConstDNFType */
45+
const (A&B)|array|(C&D) NAME = [];
46+
47+
/* testOOPropertyDNFType */
48+
protected (A&B)|ARRAY|null $property;
49+
50+
/* testFunctionDeclarationParamDNFType */
51+
public function name(null|array|(A&B) $param) {
52+
/* testClosureDeclarationParamDNFType */
53+
$cl = function ( array|(A&B) $param) {};
54+
55+
/* testArrowDeclarationReturnDNFType */
56+
$arrow = fn($a): (A&B)|Array => new $a;
57+
}
58+
}

tests/Core/Tokenizer/ArrayKeywordTest.php

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,24 @@ public static function dataArrayType()
131131
'OO property type' => [
132132
'testMarker' => '/* testOOPropertyType */',
133133
],
134+
135+
'OO constant DNF type' => [
136+
'testMarker' => '/* testOOConstDNFType */',
137+
],
138+
'OO property DNF type' => [
139+
'testMarker' => '/* testOOPropertyDNFType */',
140+
'testContent' => 'ARRAY',
141+
],
142+
'function param DNF type' => [
143+
'testMarker' => '/* testFunctionDeclarationParamDNFType */',
144+
],
145+
'closure param DNF type' => [
146+
'testMarker' => '/* testClosureDeclarationParamDNFType */',
147+
],
148+
'arrow return DNF type' => [
149+
'testMarker' => '/* testArrowDeclarationReturnDNFType */',
150+
'testContent' => 'Array',
151+
],
134152
];
135153

136154
}//end dataArrayType()

tests/Core/Tokenizer/BackfillFnTokenTest.inc

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,15 @@ $arrowWithUnionParam = fn(Traversable&Countable $param) : int => (new SomeClass(
125125
/* testIntersectionReturnType */
126126
$arrowWithUnionReturn = fn($param) : \MyFoo&SomeInterface => new SomeClass($param);
127127

128+
/* testDNFParamType */
129+
$arrowWithUnionParam = fn((Traversable&Countable)|null $param) : SomeClass => new SomeClass($param) ?? null;
130+
131+
/* testDNFReturnType */
132+
$arrowWithUnionReturn = fn($param) : false|(\MyFoo&SomeInterface) => new \MyFoo($param) ?? false;
133+
134+
/* testDNFParamTypeWithReturnByRef */
135+
$arrowWithParamReturnByRef = fn &((A&B)|null $param) => $param * 10;
136+
128137
/* testTernary */
129138
$fn = fn($a) => $a ? /* testTernaryThen */ fn() : string => 'a' : /* testTernaryElse */ fn() : string => 'b';
130139

tests/Core/Tokenizer/BackfillFnTokenTest.php

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -603,6 +603,54 @@ public function testIntersectionReturnType()
603603
}//end testIntersectionReturnType()
604604

605605

606+
/**
607+
* Test arrow function with a DNF parameter type.
608+
*
609+
* @covers PHP_CodeSniffer\Tokenizers\PHP::processAdditional
610+
*
611+
* @return void
612+
*/
613+
public function testDNFParamType()
614+
{
615+
$token = $this->getTargetToken('/* testDNFParamType */', T_FN);
616+
$this->backfillHelper($token);
617+
$this->scopePositionTestHelper($token, 17, 29);
618+
619+
}//end testDNFParamType()
620+
621+
622+
/**
623+
* Test arrow function with a DNF return type.
624+
*
625+
* @covers PHP_CodeSniffer\Tokenizers\PHP::processAdditional
626+
*
627+
* @return void
628+
*/
629+
public function testDNFReturnType()
630+
{
631+
$token = $this->getTargetToken('/* testDNFReturnType */', T_FN);
632+
$this->backfillHelper($token);
633+
$this->scopePositionTestHelper($token, 15, 27);
634+
635+
}//end testDNFReturnType()
636+
637+
638+
/**
639+
* Test arrow function which returns by reference with a DNF parameter type.
640+
*
641+
* @covers PHP_CodeSniffer\Tokenizers\PHP::processAdditional
642+
*
643+
* @return void
644+
*/
645+
public function testDNFParamTypeWithReturnByRef()
646+
{
647+
$token = $this->getTargetToken('/* testDNFParamTypeWithReturnByRef */', T_FN);
648+
$this->backfillHelper($token);
649+
$this->scopePositionTestHelper($token, 15, 22);
650+
651+
}//end testDNFParamTypeWithReturnByRef()
652+
653+
606654
/**
607655
* Test arrow functions used in ternary operators.
608656
*

0 commit comments

Comments
 (0)