Skip to content

Commit 86315f7

Browse files
authored
Fix #578: lexer wrongly interprets ".e[0-9]" as a number with scientific notation. (#579)
* Add invalid number on lexer to show it is wrongly detected as a number. * Fix the lexer about parsing invalid numbers with updated unit tests. * Remove forgotten debug function. * Fix forgotten statement on parseNumber to move from state 10 to state 4, causing wrong lexing process. * Fix linters and ignore new psalm issues. * Add test case with parser for issue #578. Fixes #578
1 parent a1c555a commit 86315f7

File tree

7 files changed

+583
-13
lines changed

7 files changed

+583
-13
lines changed

psalm-baseline.xml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -671,7 +671,7 @@
671671
<code>$this-&gt;last</code>
672672
<code>$this-&gt;last</code>
673673
</LoopInvalidation>
674-
<MixedArrayAccess occurrences="41">
674+
<MixedArrayAccess occurrences="43">
675675
<code>$this-&gt;str[$this-&gt;last + 1]</code>
676676
<code>$this-&gt;str[$this-&gt;last++]</code>
677677
<code>$this-&gt;str[$this-&gt;last]</code>
@@ -713,6 +713,8 @@
713713
<code>$this-&gt;str[$this-&gt;last]</code>
714714
<code>$this-&gt;str[$this-&gt;last]</code>
715715
<code>$this-&gt;str[$this-&gt;last]</code>
716+
<code>$this-&gt;str[$this-&gt;last]</code>
717+
<code>$this-&gt;str[$this-&gt;last]</code>
716718
</MixedArrayAccess>
717719
<MixedAssignment occurrences="2">
718720
<code>$lastToken</code>

src/Lexer.php

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -812,7 +812,7 @@ public function parseNumber()
812812
// 1 --------------------[ + or - ]-------------------> 1
813813
// 1 -------------------[ 0x or 0X ]------------------> 2
814814
// 1 --------------------[ 0 to 9 ]-------------------> 3
815-
// 1 -----------------------[ . ]---------------------> 4
815+
// 1 -----------------------[ . ]---------------------> 10
816816
// 1 -----------------------[ b ]---------------------> 7
817817
//
818818
// 2 --------------------[ 0 to F ]-------------------> 2
@@ -831,11 +831,16 @@ public function parseNumber()
831831
// 8 --------------------[ 0 or 1 ]-------------------> 8
832832
// 8 -----------------------[ ' ]---------------------> 9
833833
//
834+
// 10 -------------------[ 0 to 9 ]-------------------> 4
835+
//
834836
// State 1 may be reached by negative numbers.
835837
// State 2 is reached only by hex numbers.
836838
// State 4 is reached only by float numbers.
837839
// State 5 is reached only by numbers in approximate form.
838840
// State 7 is reached only by numbers in bit representation.
841+
// State 10 is a forced proxy to state 4 ensuring a starting dot (= "0.something") precedes a digit, and not "e"
842+
// or "E" causing wrongly interpreted scientific notation (".e[0 to 9]" is invalid). Such invalid notation could
843+
// break the lexer when table names under a given database context starts with ".e[0-9]".
839844
//
840845
// Valid final states are: 2, 3, 4 and 6. Any parsing that finished in a
841846
// state other than these is invalid.
@@ -858,7 +863,7 @@ public function parseNumber()
858863
} elseif ($this->str[$this->last] >= '0' && $this->str[$this->last] <= '9') {
859864
$state = 3;
860865
} elseif ($this->str[$this->last] === '.') {
861-
$state = 4;
866+
$state = 10;
862867
} elseif ($this->str[$this->last] === 'b') {
863868
$state = 7;
864869
} elseif ($this->str[$this->last] !== '+') {
@@ -885,7 +890,7 @@ public function parseNumber()
885890
($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'z')
886891
|| ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'Z')
887892
) {
888-
// A number can't be directly followed by a letter
893+
// A number can't be directly followed by a letter
889894
$state = -$state;
890895
} elseif ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') {
891896
// Just digits and `.`, `e` and `E` are valid characters.
@@ -899,7 +904,7 @@ public function parseNumber()
899904
($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'z')
900905
|| ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'Z')
901906
) {
902-
// A number can't be directly followed by a letter
907+
// A number can't be directly followed by a letter
903908
$state = -$state;
904909
} elseif ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') {
905910
// Just digits, `e` and `E` are valid characters.
@@ -916,7 +921,7 @@ public function parseNumber()
916921
($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'z')
917922
|| ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'Z')
918923
) {
919-
// A number can't be directly followed by a letter
924+
// A number can't be directly followed by a letter
920925
$state = -$state;
921926
} else {
922927
break;
@@ -941,6 +946,13 @@ public function parseNumber()
941946
}
942947
} elseif ($state === 9) {
943948
break;
949+
} elseif ($state === 10) {
950+
$flags |= Token::FLAG_NUMBER_FLOAT;
951+
if ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') {
952+
break;
953+
}
954+
955+
$state = 4;
944956
}
945957

946958
$token .= $this->str[$this->last];

tests/Parser/LoadStatementTest.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ public static function loadProvider(): array
3838
['parser/parseLoad5'],
3939
['parser/parseLoad6'],
4040
['parser/parseLoad7'],
41+
['parser/parseLoad8'],
4142
['parser/parseLoadErr1'],
4243
['parser/parseLoadErr2'],
4344
['parser/parseLoadErr3'],

tests/data/lexer/lexNumber.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
SELECT 12, 34, 5.67, 0x89, -10, --11, +12, .15, 0xFFa, 0xfFA, +0xfFA, -0xFFa, -0xfFA, 1e-10, 1e10, .5e10, b'10';
22
-- invalid numbers
3-
SELECT 12ex10, b'15', 0XFfA, -0XFfA, +0XFfA;
3+
SELECT 12ex10, b'15', 0XFfA, -0XFfA, +0XFfA, .e4;

tests/data/lexer/lexNumber.out

Lines changed: 51 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
{
2-
"query": "SELECT 12, 34, 5.67, 0x89, -10, --11, +12, .15, 0xFFa, 0xfFA, +0xfFA, -0xFFa, -0xfFA, 1e-10, 1e10, .5e10, b'10';\n-- invalid numbers\nSELECT 12ex10, b'15', 0XFfA, -0XFfA, +0XFfA;",
2+
"query": "SELECT 12, 34, 5.67, 0x89, -10, --11, +12, .15, 0xFFa, 0xfFA, +0xfFA, -0xFFa, -0xfFA, 1e-10, 1e10, .5e10, b'10';\n-- invalid numbers\nSELECT 12ex10, b'15', 0XFfA, -0XFfA, +0XFfA, .e4;\n",
33
"lexer": {
44
"@type": "PhpMyAdmin\\SqlParser\\Lexer",
5-
"str": "SELECT 12, 34, 5.67, 0x89, -10, --11, +12, .15, 0xFFa, 0xfFA, +0xfFA, -0xFFa, -0xfFA, 1e-10, 1e10, .5e10, b'10';\n-- invalid numbers\nSELECT 12ex10, b'15', 0XFfA, -0XFfA, +0XFfA;",
6-
"len": 176,
7-
"last": 176,
5+
"str": "SELECT 12, 34, 5.67, 0x89, -10, --11, +12, .15, 0xFFa, 0xfFA, +0xfFA, -0xFFa, -0xfFA, 1e-10, 1e10, .5e10, b'10';\n-- invalid numbers\nSELECT 12ex10, b'15', 0XFfA, -0XFfA, +0XFfA, .e4;\n",
6+
"len": 182,
7+
"last": 182,
88
"list": {
99
"@type": "PhpMyAdmin\\SqlParser\\TokensList",
1010
"tokens": [
@@ -665,14 +665,59 @@
665665
"flags": 0,
666666
"position": 170
667667
},
668+
{
669+
"@type": "PhpMyAdmin\\SqlParser\\Token",
670+
"token": ",",
671+
"value": ",",
672+
"keyword": null,
673+
"type": 2,
674+
"flags": 16,
675+
"position": 175
676+
},
677+
{
678+
"@type": "PhpMyAdmin\\SqlParser\\Token",
679+
"token": " ",
680+
"value": " ",
681+
"keyword": null,
682+
"type": 3,
683+
"flags": 0,
684+
"position": 176
685+
},
686+
{
687+
"@type": "PhpMyAdmin\\SqlParser\\Token",
688+
"token": ".",
689+
"value": ".",
690+
"keyword": null,
691+
"type": 2,
692+
"flags": 16,
693+
"position": 177
694+
},
695+
{
696+
"@type": "PhpMyAdmin\\SqlParser\\Token",
697+
"token": "e4",
698+
"value": "e4",
699+
"keyword": null,
700+
"type": 0,
701+
"flags": 0,
702+
"position": 178
703+
},
668704
{
669705
"@type": "PhpMyAdmin\\SqlParser\\Token",
670706
"token": ";",
671707
"value": ";",
672708
"keyword": null,
673709
"type": 9,
674710
"flags": 0,
675-
"position": 175
711+
"position": 180
712+
},
713+
{
714+
"@type": "PhpMyAdmin\\SqlParser\\Token",
715+
"token": "\n",
716+
"value": " ",
717+
"keyword": null,
718+
"type": 3,
719+
"flags": 0,
720+
"position": 181
676721
},
677722
{
678723
"@type": "PhpMyAdmin\\SqlParser\\Token",
@@ -684,7 +729,7 @@
684729
"position": null
685730
}
686731
],
687-
"count": 75,
732+
"count": 80,
688733
"idx": 0
689734
},
690735
"delimiter": ";",

tests/data/parser/parseLoad8.in

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
-- Query from https://github.com/phpmyadmin/sql-parser/issues/578
2+
-- Issue was that Lexer detected ".e1" as number token.
3+
4+
LOAD DATA LOCAL INFILE '/home/user/myloadfile.csv'
5+
IGNORE INTO TABLE erp.e1_table
6+
FIELDS TERMINATED BY '\t'
7+
LINES TERMINATED BY '\n'
8+
IGNORE 0 LINES;

0 commit comments

Comments
 (0)