Skip to content

Commit bc51e91

Browse files
committed
Ban numeric escapes in custom character classes
PCRE treats them as octal, but we require a `0` prefix.
1 parent 5b30c5b commit bc51e91

File tree

2 files changed

+15
-15
lines changed

2 files changed

+15
-15
lines changed

Sources/_RegexParser/Regex/Parse/LexicalAnalysis.swift

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1674,9 +1674,10 @@ extension Source {
16741674
break
16751675
}
16761676

1677-
// We only allow unknown escape sequences for non-letter ASCII, and
1678-
// non-ASCII whitespace.
1679-
guard (char.isASCII && !char.isLetter) ||
1677+
// We only allow unknown escape sequences for non-letter non-number ASCII,
1678+
// and non-ASCII whitespace.
1679+
// TODO: Once we have fix-its, suggest a `0` prefix for octal `[\7]`.
1680+
guard (char.isASCII && !char.isLetter && !char.isNumber) ||
16801681
(!char.isASCII && char.isWhitespace)
16811682
else {
16821683
throw ParseError.invalidEscape(char)

Tests/RegexTests/ParseTests.swift

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -466,14 +466,6 @@ extension RegexTests {
466466
parseTest(#"[\08]"#, charClass(scalar_m("\u{0}"), "8"))
467467
parseTest(#"[\0707]"#, charClass(scalar_m("\u{1C7}")))
468468

469-
// TODO: These are treated as octal sequences by PCRE, we should warn and
470-
// suggest user prefix with 0.
471-
parseTest(#"[\1]"#, charClass("1"))
472-
parseTest(#"[\123]"#, charClass("1", "2", "3"))
473-
parseTest(#"[\101]"#, charClass("1", "0", "1"))
474-
parseTest(#"[\7777]"#, charClass("7", "7", "7", "7"))
475-
parseTest(#"[\181]"#, charClass("1", "8", "1"))
476-
477469
// We take *up to* the first two valid digits for \x. No valid digits is 0.
478470
parseTest(#"\x"#, scalar("\u{0}"))
479471
parseTest(#"\x5"#, scalar("\u{5}"))
@@ -1267,10 +1259,6 @@ extension RegexTests {
12671259
parseTest(#"\g'+30'"#, subpattern(.relative(30)), throwsError: .unsupported)
12681260
parseTest(#"\g'abc'"#, subpattern(.named("abc")), throwsError: .unsupported)
12691261

1270-
// Backreferences are not valid in custom character classes.
1271-
parseTest(#"[\8]"#, charClass("8"))
1272-
parseTest(#"[\9]"#, charClass("9"))
1273-
12741262
// These are valid references.
12751263
parseTest(#"()\1"#, concat(
12761264
capture(empty()), backreference(.absolute(1))
@@ -2547,6 +2535,17 @@ extension RegexTests {
25472535
// TODO: Custom diagnostic for missing '\Q'
25482536
diagnosticTest(#"\E"#, .invalidEscape("E"))
25492537

2538+
// PCRE treats these as octal, but we require a `0` prefix.
2539+
diagnosticTest(#"[\1]"#, .invalidEscape("1"))
2540+
diagnosticTest(#"[\123]"#, .invalidEscape("1"))
2541+
diagnosticTest(#"[\101]"#, .invalidEscape("1"))
2542+
diagnosticTest(#"[\7777]"#, .invalidEscape("7"))
2543+
diagnosticTest(#"[\181]"#, .invalidEscape("1"))
2544+
2545+
// Backreferences are not valid in custom character classes.
2546+
diagnosticTest(#"[\8]"#, .invalidEscape("8"))
2547+
diagnosticTest(#"[\9]"#, .invalidEscape("9"))
2548+
25502549
// Non-ASCII non-whitespace cases.
25512550
diagnosticTest(#"\🔥"#, .invalidEscape("🔥"))
25522551
diagnosticTest(#"\🇩🇰"#, .invalidEscape("🇩🇰"))

0 commit comments

Comments
 (0)