Skip to content

Commit a6bf9b8

Browse files
committed
Parse end-of-line comments in custom character classes
Previously we would only parse non-semantic whitespace, but also expand to end-of-line comments, which are supported by ICU.
1 parent 7fe243e commit a6bf9b8

File tree

3 files changed

+20
-11
lines changed

3 files changed

+20
-11
lines changed

Sources/_RegexParser/Regex/Parse/LexicalAnalysis.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -639,7 +639,7 @@ extension Source {
639639
///
640640
mutating func lexComment(context: ParsingContext) throws -> AST.Trivia? {
641641
let trivia: Located<String>? = try recordLoc { src in
642-
if src.tryEat(sequence: "(?#") {
642+
if !context.isInCustomCharacterClass && src.tryEat(sequence: "(?#") {
643643
return try src.lexUntil(eating: ")").value
644644
}
645645
if context.experimentalComments, src.tryEat(sequence: "/*") {

Sources/_RegexParser/Regex/Parse/Parse.swift

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -515,10 +515,8 @@ extension Parser {
515515
continue
516516
}
517517

518-
// Lex non-semantic whitespace if we're allowed.
519-
// TODO: ICU allows end-of-line comments in custom character classes,
520-
// which we ought to support if we want to support multi-line regex.
521-
if let trivia = source.lexNonSemanticWhitespace(context: context) {
518+
// Lex trivia if we're allowed.
519+
if let trivia = try source.lexTrivia(context: context) {
522520
members.append(.trivia(trivia))
523521
continue
524522
}

Tests/RegexTests/ParseTests.swift

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1677,12 +1677,8 @@ extension RegexTests {
16771677
)
16781678
)
16791679

1680-
// End of line comments aren't applicable in custom char classes.
1681-
// TODO: ICU supports this.
1682-
parseTest("(?x)[ # abc]", concat(
1683-
changeMatchingOptions(matchingOptions(adding: .extended)),
1684-
charClass("#", "a", "b", "c")
1685-
))
1680+
parseTest("[ # abc]", charClass(" ", "#", " ", "a", "b", "c"))
1681+
parseTest("[#]", charClass("#"))
16861682

16871683
parseTest("(?x)a b c[d e f]", concat(
16881684
changeMatchingOptions(matchingOptions(adding: .extended)),
@@ -2115,6 +2111,15 @@ extension RegexTests {
21152111
/#
21162112
"""#, scalarSeq("\u{AB}", "\u{B}", "\u{C}"))
21172113

2114+
parseWithDelimitersTest(#"""
2115+
#/
2116+
[
2117+
a # interesting
2118+
b-c #a
2119+
d]
2120+
/#
2121+
"""#, charClass("a", range_m("b", "c"), "d"))
2122+
21182123
// MARK: Delimiter skipping: Make sure we can skip over the ending delimiter
21192124
// if it's clear that it's part of the regex syntax.
21202125

@@ -2544,6 +2549,12 @@ extension RegexTests {
25442549
diagnosticTest(#"[c-b]"#, .invalidCharacterRange(from: "c", to: "b"))
25452550
diagnosticTest(#"[\u{66}-\u{65}]"#, .invalidCharacterRange(from: "\u{66}", to: "\u{65}"))
25462551

2552+
diagnosticTest("(?x)[(?#)]", .expected("]"))
2553+
diagnosticTest("(?x)[(?#abc)]", .expected("]"))
2554+
2555+
diagnosticTest("(?x)[#]", .expectedCustomCharacterClassMembers)
2556+
diagnosticTest("(?x)[ # abc]", .expectedCustomCharacterClassMembers)
2557+
25472558
// MARK: Bad escapes
25482559

25492560
diagnosticTest("\\", .expectedEscape)

0 commit comments

Comments
 (0)