Skip to content

Commit cdf98c5

Browse files
committed
Forbid empty character classes
As per PCRE, Oniguruma, and ICU, a first character of `]` is treated as literal.
1 parent 692f0fd commit cdf98c5

File tree

2 files changed

+14
-0
lines changed

2 files changed

+14
-0
lines changed

Sources/_RegexParser/Regex/Parse/Parse.swift

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -425,6 +425,12 @@ extension Parser {
425425
try source.expectNonEmpty()
426426

427427
var members: Array<Member> = []
428+
429+
// We can eat an initial ']', as PCRE, Oniguruma, and ICU forbid empty
430+
// character classes, and assume an initial ']' is literal.
431+
if let loc = source.tryEatWithLoc("]") {
432+
members.append(.atom(.init(.char("]"), loc)))
433+
}
428434
try parseCCCMembers(into: &members)
429435

430436
// If we have a binary set operator, parse it and the next members. Note

Tests/RegexTests/ParseTests.swift

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -428,6 +428,10 @@ extension RegexTests {
428428

429429
parseTest("[-]", charClass("-"))
430430

431+
// Empty character classes are forbidden, therefore this is a character
432+
// class of literal ']'.
433+
parseTest("[]]", charClass("]"))
434+
431435
// These are metacharacters in certain contexts, but normal characters
432436
// otherwise.
433437
parseTest(
@@ -1901,6 +1905,10 @@ extension RegexTests {
19011905
diagnosticTest("(?<a-b", .expected(">"))
19021906
diagnosticTest("(?<a-b>", .expected(")"))
19031907

1908+
// The first ']' of a custom character class is literal, so this is missing
1909+
// the closing bracket.
1910+
diagnosticTest("[]", .expected("]"))
1911+
19041912
// MARK: Bad escapes
19051913

19061914
diagnosticTest("\\", .expectedEscape)

0 commit comments

Comments
 (0)