Skip to content

Commit 3d9b04c

Browse files
authored
RegexArrayShapeMatcher - More precise non-empty-string and numeric-string
1 parent 924f173 commit 3d9b04c

File tree

7 files changed

+388
-111
lines changed

7 files changed

+388
-111
lines changed

src/Type/Php/RegexArrayShapeMatcher.php

Lines changed: 140 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,21 @@
77
use Hoa\Compiler\Llk\TreeNode;
88
use Hoa\Exception\Exception;
99
use Hoa\File\Read;
10+
use Nette\Utils\Strings;
1011
use PhpParser\Node\Expr;
1112
use PhpParser\Node\Name;
1213
use PHPStan\Analyser\Scope;
1314
use PHPStan\Php\PhpVersion;
1415
use PHPStan\ShouldNotHappenException;
1516
use PHPStan\TrinaryLogic;
17+
use PHPStan\Type\Accessory\AccessoryNonEmptyStringType;
18+
use PHPStan\Type\Accessory\AccessoryNumericStringType;
1619
use PHPStan\Type\Constant\ConstantArrayType;
1720
use PHPStan\Type\Constant\ConstantArrayTypeBuilder;
1821
use PHPStan\Type\Constant\ConstantIntegerType;
1922
use PHPStan\Type\Constant\ConstantStringType;
2023
use PHPStan\Type\IntegerRangeType;
24+
use PHPStan\Type\IntersectionType;
2125
use PHPStan\Type\StringType;
2226
use PHPStan\Type\Type;
2327
use PHPStan\Type\TypeCombinator;
@@ -126,7 +130,6 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched
126130
$trailingOptionals++;
127131
}
128132

129-
$valueType = $this->getValueType($flags ?? 0);
130133
$onlyOptionalTopLevelGroup = $this->getOnlyOptionalTopLevelGroup($groupList);
131134
$onlyTopLevelAlternationId = $this->getOnlyTopLevelAlternationId($groupList);
132135

@@ -141,7 +144,6 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched
141144

142145
$combiType = $this->buildArrayType(
143146
$groupList,
144-
$valueType,
145147
$wasMatched,
146148
$trailingOptionals,
147149
$flags ?? 0,
@@ -179,7 +181,6 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched
179181

180182
$combiType = $this->buildArrayType(
181183
$comboList,
182-
$valueType,
183184
$wasMatched,
184185
$trailingOptionals,
185186
$flags ?? 0,
@@ -202,7 +203,6 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched
202203

203204
return $this->buildArrayType(
204205
$groupList,
205-
$valueType,
206206
$wasMatched,
207207
$trailingOptionals,
208208
$flags ?? 0,
@@ -264,7 +264,6 @@ private function getOnlyTopLevelAlternationId(array $captureGroups): ?int
264264
*/
265265
private function buildArrayType(
266266
array $captureGroups,
267-
Type $valueType,
268267
TrinaryLogic $wasMatched,
269268
int $trailingOptionals,
270269
int $flags,
@@ -275,14 +274,14 @@ private function buildArrayType(
275274
// first item in matches contains the overall match.
276275
$builder->setOffsetValueType(
277276
$this->getKeyType(0),
278-
TypeCombinator::removeNull($valueType),
277+
TypeCombinator::removeNull($this->getValueType(new StringType(), $flags)),
279278
!$wasMatched->yes(),
280279
);
281280

282281
$countGroups = count($captureGroups);
283282
$i = 0;
284283
foreach ($captureGroups as $captureGroup) {
285-
$groupValueType = $valueType;
284+
$groupValueType = $this->getValueType($captureGroup->getType(), $flags);
286285

287286
if (!$wasMatched->yes()) {
288287
$optional = true;
@@ -299,6 +298,10 @@ private function buildArrayType(
299298
}
300299
}
301300

301+
if (!$optional && $captureGroup->isOptional() && !$this->containsUnmatchedAsNull($flags)) {
302+
$groupValueType = TypeCombinator::union($groupValueType, new ConstantStringType(''));
303+
}
304+
302305
if ($captureGroup->isNamed()) {
303306
$builder->setOffsetValueType(
304307
$this->getKeyType($captureGroup->getName()),
@@ -333,9 +336,10 @@ private function getKeyType(int|string $key): Type
333336
return new ConstantIntegerType($key);
334337
}
335338

336-
private function getValueType(int $flags): Type
339+
private function getValueType(Type $baseType, int $flags): Type
337340
{
338-
$valueType = new StringType();
341+
$valueType = $baseType;
342+
339343
$offsetType = IntegerRangeType::fromInterval(0, null);
340344
if ($this->containsUnmatchedAsNull($flags)) {
341345
$valueType = TypeCombinator::addNull($valueType);
@@ -420,6 +424,7 @@ private function walkRegexAst(
420424
$inAlternation ? $alternationId : null,
421425
$inOptionalQuantification,
422426
$parentGroup,
427+
$this->createGroupType($ast),
423428
);
424429
$parentGroup = $group;
425430
} elseif ($ast->getId() === '#namedcapturing') {
@@ -430,6 +435,7 @@ private function walkRegexAst(
430435
$inAlternation ? $alternationId : null,
431436
$inOptionalQuantification,
432437
$parentGroup,
438+
$this->createGroupType($ast),
433439
);
434440
$parentGroup = $group;
435441
} elseif ($ast->getId() === '#noncapturing') {
@@ -534,6 +540,131 @@ private function getQuantificationRange(TreeNode $node): array
534540
return [$min, $max];
535541
}
536542

543+
private function createGroupType(TreeNode $group): Type
544+
{
545+
$isNonEmpty = TrinaryLogic::createMaybe();
546+
$isNumeric = TrinaryLogic::createMaybe();
547+
$inOptionalQuantification = false;
548+
549+
$this->walkGroupAst($group, $isNonEmpty, $isNumeric, $inOptionalQuantification);
550+
551+
$accessories = [];
552+
if ($isNumeric->yes()) {
553+
$accessories[] = new AccessoryNumericStringType();
554+
} elseif ($isNonEmpty->yes()) {
555+
$accessories[] = new AccessoryNonEmptyStringType();
556+
}
557+
558+
if ($accessories !== []) {
559+
$accessories[] = new StringType();
560+
return new IntersectionType($accessories);
561+
}
562+
563+
return new StringType();
564+
}
565+
566+
private function walkGroupAst(TreeNode $ast, TrinaryLogic &$isNonEmpty, TrinaryLogic &$isNumeric, bool &$inOptionalQuantification): void
567+
{
568+
$children = $ast->getChildren();
569+
570+
if (
571+
$ast->getId() === '#concatenation'
572+
&& count($children) > 0
573+
) {
574+
$isNonEmpty = TrinaryLogic::createYes();
575+
}
576+
577+
if ($ast->getId() === '#quantification') {
578+
[$min] = $this->getQuantificationRange($ast);
579+
580+
if ($min === 0) {
581+
$inOptionalQuantification = true;
582+
}
583+
if ($min >= 1) {
584+
$isNonEmpty = TrinaryLogic::createYes();
585+
$inOptionalQuantification = false;
586+
}
587+
}
588+
589+
if ($ast->getId() === 'token') {
590+
$literalValue = $this->getLiteralValue($ast);
591+
if ($literalValue !== null) {
592+
if (Strings::match($literalValue, '/^\d+$/') === null) {
593+
$isNumeric = TrinaryLogic::createNo();
594+
}
595+
596+
if (!$inOptionalQuantification) {
597+
$isNonEmpty = TrinaryLogic::createYes();
598+
}
599+
}
600+
601+
if ($ast->getValueToken() === 'character_type') {
602+
if ($ast->getValueValue() === '\d') {
603+
if ($isNumeric->maybe()) {
604+
$isNumeric = TrinaryLogic::createYes();
605+
}
606+
} else {
607+
$isNumeric = TrinaryLogic::createNo();
608+
}
609+
610+
if (!$inOptionalQuantification) {
611+
$isNonEmpty = TrinaryLogic::createYes();
612+
}
613+
}
614+
}
615+
616+
if ($ast->getId() === '#range' || $ast->getId() === '#class') {
617+
if ($isNumeric->maybe()) {
618+
$allNumeric = null;
619+
foreach ($children as $child) {
620+
$literalValue = $this->getLiteralValue($child);
621+
622+
if ($literalValue === null) {
623+
break;
624+
}
625+
626+
if (Strings::match($literalValue, '/^\d+$/') === null) {
627+
$allNumeric = false;
628+
break;
629+
}
630+
631+
$allNumeric = true;
632+
}
633+
634+
if ($allNumeric === true) {
635+
$isNumeric = TrinaryLogic::createYes();
636+
}
637+
}
638+
639+
if (!$inOptionalQuantification) {
640+
$isNonEmpty = TrinaryLogic::createYes();
641+
}
642+
}
643+
644+
foreach ($children as $child) {
645+
$this->walkGroupAst(
646+
$child,
647+
$isNonEmpty,
648+
$isNumeric,
649+
$inOptionalQuantification,
650+
);
651+
}
652+
}
653+
654+
private function getLiteralValue(TreeNode $node): ?string
655+
{
656+
if ($node->getId() === 'token' && $node->getValueToken() === 'literal') {
657+
return $node->getValueValue();
658+
}
659+
660+
// literal "-" outside of a character class like '~^((\\d{1,6})-)$~'
661+
if ($node->getId() === 'token' && $node->getValueToken() === 'range') {
662+
return $node->getValueValue();
663+
}
664+
665+
return null;
666+
}
667+
537668
private function getPatternType(Expr $patternExpr, Scope $scope): Type
538669
{
539670
if ($patternExpr instanceof Expr\BinaryOp\Concat) {

src/Type/Php/RegexCapturingGroup.php

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
namespace PHPStan\Type\Php;
44

5+
use PHPStan\Type\Type;
6+
57
class RegexCapturingGroup
68
{
79

@@ -13,6 +15,7 @@ public function __construct(
1315
private ?int $alternationId,
1416
private bool $inOptionalQuantification,
1517
private RegexCapturingGroup|RegexNonCapturingGroup|null $parent,
18+
private Type $type,
1619
)
1720
{
1821
}
@@ -92,4 +95,9 @@ public function getName(): ?string
9295
return $this->name;
9396
}
9497

98+
public function getType(): Type
99+
{
100+
return $this->type;
101+
}
102+
95103
}

tests/PHPStan/Analyser/nsrt/bug-11311-php72.php

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,23 +8,23 @@
88

99
function doFoo(string $s) {
1010
if (1 === preg_match('/(?<major>\d+)\.(?<minor>\d+)(?:\.(?<patch>\d+))?/', $s, $matches, PREG_UNMATCHED_AS_NULL)) {
11-
assertType('array{0: string, major: string, 1: string, minor: string, 2: string, patch?: string, 3?: string}', $matches);
11+
assertType('array{0: string, major: numeric-string, 1: numeric-string, minor: numeric-string, 2: numeric-string, patch?: numeric-string, 3?: numeric-string}', $matches);
1212
}
1313
}
1414

1515
function doUnmatchedAsNull(string $s): void {
1616
if (preg_match('/(foo)?(bar)?(baz)?/', $s, $matches, PREG_UNMATCHED_AS_NULL)) {
17-
assertType('array{0: string, 1?: string, 2?: string, 3?: string}', $matches);
17+
assertType('array{0: string, 1?: non-empty-string, 2?: non-empty-string, 3?: non-empty-string}', $matches);
1818
}
19-
assertType('array{}|array{0: string, 1?: string, 2?: string, 3?: string}', $matches);
19+
assertType('array{}|array{0: string, 1?: non-empty-string, 2?: non-empty-string, 3?: non-empty-string}', $matches);
2020
}
2121

2222
// see https://3v4l.org/VeDob#veol
2323
function unmatchedAsNullWithOptionalGroup(string $s): void {
2424
if (preg_match('/Price: (£|€)?\d+/', $s, $matches, PREG_UNMATCHED_AS_NULL)) {
25-
assertType('array{0: string, 1?: string}', $matches);
25+
assertType('array{0: string, 1?: non-empty-string}', $matches);
2626
} else {
2727
assertType('array{}', $matches);
2828
}
29-
assertType('array{}|array{0: string, 1?: string}', $matches);
29+
assertType('array{}|array{0: string, 1?: non-empty-string}', $matches);
3030
}

0 commit comments

Comments
 (0)