Skip to content

Commit 7cc4556

Browse files
committed
Forbid empty character classes
As per PCRE, Oniguruma, and ICU, a first character of `]` is treated as literal.
1 parent f205b6e commit 7cc4556

File tree

2 files changed

+14
-0
lines changed

2 files changed

+14
-0
lines changed

Sources/_MatchingEngine/Regex/Parse/Parse.swift

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -434,6 +434,12 @@ extension Parser {
434434
try source.expectNonEmpty()
435435

436436
var members: Array<Member> = []
437+
438+
// We can eat an initial ']', as PCRE, Oniguruma, and ICU forbid empty
439+
// character classes, and assume an initial ']' is literal.
440+
if let loc = source.tryEatWithLoc("]") {
441+
members.append(.atom(.init(.char("]"), loc)))
442+
}
437443
try parseCCCMembers(into: &members)
438444

439445
// If we have a binary set operator, parse it and the next members. Note

Tests/RegexTests/ParseTests.swift

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -428,6 +428,10 @@ extension RegexTests {
428428

429429
parseTest("[-]", charClass("-"))
430430

431+
// Empty character classes are forbidden, therefore this is a character
432+
// class of literal ']'.
433+
parseTest("[]]", charClass("]"))
434+
431435
// These are metacharacters in certain contexts, but normal characters
432436
// otherwise.
433437
parseTest(
@@ -1901,6 +1905,10 @@ extension RegexTests {
19011905
diagnosticTest("(?<a-b", .expected(">"))
19021906
diagnosticTest("(?<a-b>", .expected(")"))
19031907

1908+
// The first ']' of a custom character class is literal, so this is missing
1909+
// the closing bracket.
1910+
diagnosticTest("[]", .expected("]"))
1911+
19041912
// MARK: Bad escapes
19051913

19061914
diagnosticTest("\\", .expectedEscape)

0 commit comments

Comments
 (0)