Skip to content

Commit dc90b1f

Browse files
committed
Fix scalar mode for quoted sequences in character class
Previously we would only match entire characters. Update to use the generic Character consumer logic that can handle scalar semantic mode. rdar://97209131
1 parent 180c36b commit dc90b1f

File tree

2 files changed

+56
-14
lines changed

2 files changed

+56
-14
lines changed

Sources/_StringProcessing/ConsumerInterface.swift

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ extension DSLTree._AST.Atom {
6363
extension Character {
6464
func generateConsumer(
6565
_ opts: MatchingOptions
66-
) throws -> MEProgram.ConsumeFunction? {
66+
) throws -> MEProgram.ConsumeFunction {
6767
let isCaseInsensitive = opts.isCaseInsensitive
6868
switch opts.semanticLevel {
6969
case .graphemeCluster:
@@ -437,21 +437,17 @@ extension DSLTree.CustomCharacterClass.Member {
437437
}
438438
return rhs(input, bounds)
439439
}
440-
case .quotedLiteral(let s):
441-
if opts.isCaseInsensitive {
442-
return { input, bounds in
443-
guard s.lowercased()._contains(input[bounds.lowerBound].lowercased()) else {
444-
return nil
445-
}
446-
return input.index(after: bounds.lowerBound)
447-
}
448-
} else {
449-
return { input, bounds in
450-
guard s.contains(input[bounds.lowerBound]) else {
451-
return nil
440+
case .quotedLiteral(let str):
441+
let consumers = try str.map {
442+
try $0.generateConsumer(opts)
443+
}
444+
return { input, bounds in
445+
for fn in consumers {
446+
if let idx = fn(input, bounds) {
447+
return idx
452448
}
453-
return input.index(after: bounds.lowerBound)
454449
}
450+
return nil
455451
}
456452
case .trivia:
457453
// TODO: Should probably strip this earlier...

Tests/RegexTests/MatchTests.swift

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,7 @@ func firstMatchTests(
191191
enableTracing: Bool = false,
192192
dumpAST: Bool = false,
193193
xfail: Bool = false,
194+
semanticLevel: RegexSemanticLevel = .graphemeCluster,
194195
file: StaticString = #filePath,
195196
line: UInt = #line
196197
) {
@@ -203,6 +204,7 @@ func firstMatchTests(
203204
enableTracing: enableTracing,
204205
dumpAST: dumpAST,
205206
xfail: xfail,
207+
semanticLevel: semanticLevel,
206208
file: file,
207209
line: line)
208210
}
@@ -685,6 +687,50 @@ extension RegexTests {
685687
("a\u{301}", true),
686688
semanticLevel: .unicodeScalar)
687689

690+
// Scalar matching in quoted sequences.
691+
firstMatchTests(
692+
"[\\Qe\u{301}\\E]",
693+
("e", nil),
694+
("E", nil),
695+
("\u{301}", nil),
696+
(eDecomposed, eDecomposed),
697+
(eComposed, eComposed),
698+
("E\u{301}", nil),
699+
("\u{C9}", nil)
700+
)
701+
firstMatchTests(
702+
"[\\Qe\u{301}\\E]",
703+
("e", "e"),
704+
("E", nil),
705+
("\u{301}", "\u{301}"),
706+
(eDecomposed, "e"),
707+
(eComposed, nil),
708+
("E\u{301}", "\u{301}"),
709+
("\u{C9}", nil),
710+
semanticLevel: .unicodeScalar
711+
)
712+
firstMatchTests(
713+
"(?i)[\\Qe\u{301}\\E]",
714+
("e", nil),
715+
("E", nil),
716+
("\u{301}", nil),
717+
(eDecomposed, eDecomposed),
718+
(eComposed, eComposed),
719+
("E\u{301}", "E\u{301}"),
720+
("\u{C9}", "\u{C9}")
721+
)
722+
firstMatchTests(
723+
"(?i)[\\Qe\u{301}\\E]",
724+
("e", "e"),
725+
("E", "E"),
726+
("\u{301}", "\u{301}"),
727+
(eDecomposed, "e"),
728+
(eComposed, nil),
729+
("E\u{301}", "E"),
730+
("\u{C9}", nil),
731+
semanticLevel: .unicodeScalar
732+
)
733+
688734
firstMatchTest("[-]", input: "123-abcxyz", match: "-")
689735

690736
// These are metacharacters in certain contexts, but normal characters

0 commit comments

Comments
 (0)