Skip to content

Commit b563ce4

Browse files
committed
Ban numeric escapes in custom character classes
PCRE treats them as octal, but we require a `0` prefix.
1 parent cda88e4 commit b563ce4

File tree

2 files changed

+15
-15
lines changed

2 files changed

+15
-15
lines changed

Sources/_RegexParser/Regex/Parse/LexicalAnalysis.swift

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1674,9 +1674,10 @@ extension Source {
16741674
break
16751675
}
16761676

1677-
// We only allow unknown escape sequences for non-letter ASCII, and
1678-
// non-ASCII whitespace.
1679-
guard (char.isASCII && !char.isLetter) ||
1677+
// We only allow unknown escape sequences for non-letter non-number ASCII,
1678+
// and non-ASCII whitespace.
1679+
// TODO: Once we have fix-its, suggest a `0` prefix for octal `[\7]`.
1680+
guard (char.isASCII && !char.isLetter && !char.isNumber) ||
16801681
(!char.isASCII && char.isWhitespace)
16811682
else {
16821683
throw ParseError.invalidEscape(char)

Tests/RegexTests/ParseTests.swift

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -466,14 +466,6 @@ extension RegexTests {
466466
parseTest(#"[\08]"#, charClass(scalar_m("\u{0}"), "8"))
467467
parseTest(#"[\0707]"#, charClass(scalar_m("\u{1C7}")))
468468

469-
// TODO: These are treated as octal sequences by PCRE, we should warn and
470-
// suggest user prefix with 0.
471-
parseTest(#"[\1]"#, charClass("1"))
472-
parseTest(#"[\123]"#, charClass("1", "2", "3"))
473-
parseTest(#"[\101]"#, charClass("1", "0", "1"))
474-
parseTest(#"[\7777]"#, charClass("7", "7", "7", "7"))
475-
parseTest(#"[\181]"#, charClass("1", "8", "1"))
476-
477469
// We take *up to* the first two valid digits for \x. No valid digits is 0.
478470
parseTest(#"\x"#, scalar("\u{0}"))
479471
parseTest(#"\x5"#, scalar("\u{5}"))
@@ -1251,10 +1243,6 @@ extension RegexTests {
12511243
parseTest(#"\g'+30'"#, subpattern(.relative(30)), throwsError: .unsupported)
12521244
parseTest(#"\g'abc'"#, subpattern(.named("abc")), throwsError: .unsupported)
12531245

1254-
// Backreferences are not valid in custom character classes.
1255-
parseTest(#"[\8]"#, charClass("8"))
1256-
parseTest(#"[\9]"#, charClass("9"))
1257-
12581246
// These are valid references.
12591247
parseTest(#"()\1"#, concat(
12601248
capture(empty()), backreference(.absolute(1))
@@ -2536,6 +2524,17 @@ extension RegexTests {
25362524
// TODO: Custom diagnostic for missing '\Q'
25372525
diagnosticTest(#"\E"#, .invalidEscape("E"))
25382526

2527+
// PCRE treats these as octal, but we require a `0` prefix.
2528+
diagnosticTest(#"[\1]"#, .invalidEscape("1"))
2529+
diagnosticTest(#"[\123]"#, .invalidEscape("1"))
2530+
diagnosticTest(#"[\101]"#, .invalidEscape("1"))
2531+
diagnosticTest(#"[\7777]"#, .invalidEscape("7"))
2532+
diagnosticTest(#"[\181]"#, .invalidEscape("1"))
2533+
2534+
// Backreferences are not valid in custom character classes.
2535+
diagnosticTest(#"[\8]"#, .invalidEscape("8"))
2536+
diagnosticTest(#"[\9]"#, .invalidEscape("9"))
2537+
25392538
// Non-ASCII non-whitespace cases.
25402539
diagnosticTest(#"\🔥"#, .invalidEscape("🔥"))
25412540
diagnosticTest(#"\🇩🇰"#, .invalidEscape("🇩🇰"))

0 commit comments

Comments
 (0)