Skip to content

Commit 5e2b77c

Browse files
authored
Option APIs for RegexProtocol (#171)
This adds an API to any conforming type that lets the user control whether a match uses case sensitive comparison from within a DSL declaration. Adding successive modifiers is equivalent to nesting an expression in a new Regex declaration. For example, these two regex declarations are equivalent: ``` let regexDSL = Regex { oneOrMore { "a" } .caseSensitive(false) .caseSensitive(true) } let regexNested = Regex { Regex { oneOrMore { "a" }.caseSensitive(false) }.caseSensitive(true) } ```
1 parent c35c30a commit 5e2b77c

File tree

4 files changed

+101
-22
lines changed

4 files changed

+101
-22
lines changed

Sources/_MatchingEngine/Regex/AST/MatchingOptions.swift

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,16 @@ extension AST {
110110
}
111111
}
112112

113+
extension AST.MatchingOptionSequence {
114+
public init(adding: [AST.MatchingOption]) {
115+
self.init(caretLoc: nil, adding: adding, minusLoc: nil, removing: [])
116+
}
117+
118+
public init(removing: [AST.MatchingOption]) {
119+
self.init(caretLoc: nil, adding: [], minusLoc: nil, removing: removing)
120+
}
121+
}
122+
113123
extension AST.MatchingOption: _ASTPrintable {
114124
public var _dumpBase: String { "\(kind)" }
115125
}

Sources/_StringProcessing/MatchingOptions.swift

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,19 @@ extension MatchingOptions {
189189
contains(.init(kind))
190190
}
191191

192+
mutating func add(_ opt: Option) {
193+
// If opt is in one of the mutually exclusive groups, clear out the
194+
// group before inserting.
195+
if Self.semanticMatchingLevels.contains(opt.representation) {
196+
remove(.semanticMatchingLevels)
197+
}
198+
if Self.textSegmentOptions.contains(opt.representation) {
199+
remove(.textSegmentOptions)
200+
}
201+
202+
insert(opt.representation)
203+
}
204+
192205
/// Applies the changes described by `sequence` to this set of options.
193206
mutating func apply(_ sequence: AST.MatchingOptionSequence) {
194207
// Replace entirely if the sequence includes a caret, e.g. `(?^is)`.
@@ -197,28 +210,17 @@ extension MatchingOptions {
197210
}
198211

199212
for opt in sequence.adding {
200-
guard let opt = Option(opt.kind)?.representation else {
213+
guard let opt = Option(opt.kind) else {
201214
continue
202215
}
203-
204-
// If opt is in one of the mutually exclusive groups, clear out the
205-
// group before inserting.
206-
if Self.semanticMatchingLevels.contains(opt) {
207-
remove(.semanticMatchingLevels)
208-
}
209-
if Self.textSegmentOptions.contains(opt) {
210-
remove(.textSegmentOptions)
211-
}
212-
213-
insert(opt)
216+
add(opt)
214217
}
215218

216219
for opt in sequence.removing {
217-
guard let opt = Option(opt.kind)?.representation else {
220+
guard let opt = Option(opt.kind) else {
218221
continue
219222
}
220-
221-
remove(opt)
223+
remove(opt.representation)
222224
}
223225
}
224226
}
@@ -229,6 +231,9 @@ extension MatchingOptions.Representation {
229231
self.rawValue = 1 << kind.rawValue
230232
}
231233

234+
// Case insensitivity
235+
static var caseInsensitive: Self { .init(.caseInsensitive) }
236+
232237
// Text segmentation options
233238
static var textSegmentGraphemeMode: Self { .init(.textSegmentGraphemeMode) }
234239
static var textSegmentWordMode: Self { .init(.textSegmentWordMode) }
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
//
10+
//===----------------------------------------------------------------------===//
11+
12+
import _MatchingEngine
13+
14+
extension RegexProtocol {
15+
public func caseSensitive(_ isCaseSensitive: Bool) -> Regex<Match> {
16+
// The API is "case sensitive = true or false", so as to avoid the
17+
// double negatives inherent in setting "case insensitive" to a Boolean
18+
// value. The internal version of this option, on the other hand, is
19+
// `.caseInsensitive`, derived from the `(?i)` regex literal option.
20+
let sequence = isCaseSensitive
21+
? AST.MatchingOptionSequence(removing: [.init(.caseInsensitive, location: .fake)])
22+
: AST.MatchingOptionSequence(adding: [.init(.caseInsensitive, location: .fake)])
23+
return Regex(node: .group(.changeMatchingOptions(sequence, isIsolated: false), regex.root))
24+
}
25+
}
26+

Tests/RegexTests/RegexDSLTests.swift

Lines changed: 45 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,51 @@ class RegexDSLTests: XCTestCase {
152152
}
153153
}
154154

155+
func testOptions() throws {
156+
try _testDSLCaptures(
157+
("abc", "abc"),
158+
("ABC", "ABC"),
159+
("abcabc", "abcabc"),
160+
("abcABCaBc", "abcABCaBc"),
161+
captureType: Substring.self, ==) {
162+
oneOrMore {
163+
"abc"
164+
}.caseSensitive(false)
165+
}
166+
167+
// Multiple options on one component wrap successively, but do not
168+
// override - equivalent to each option attached to a wrapping `Regex`.
169+
try _testDSLCaptures(
170+
("abc", "abc"),
171+
("ABC", "ABC"),
172+
("abcabc", "abcabc"),
173+
("abcABCaBc", "abcABCaBc"),
174+
captureType: Substring.self, ==) {
175+
oneOrMore {
176+
"abc"
177+
}
178+
.caseSensitive(false)
179+
.caseSensitive(true)
180+
}
181+
182+
// An option on an outer component doesn't override an option set on an
183+
// inner component.
184+
try _testDSLCaptures(
185+
("abc", "abc"),
186+
("ABC", "ABC"),
187+
("ABCde", "ABCde"),
188+
("ABCDE", nil),
189+
("abcabc", "abcabc"),
190+
("abcdeABCdeaBcde", "abcdeABCdeaBcde"),
191+
captureType: Substring.self, ==) {
192+
oneOrMore {
193+
"abc".caseSensitive(false)
194+
optionally("de")
195+
}
196+
.caseSensitive(true)
197+
}
198+
}
199+
155200
func testQuantificationBehavior() throws {
156201
try _testDSLCaptures(
157202
("abc1def2", ("abc1def2", "2")),
@@ -525,13 +570,6 @@ extension Unicode.Scalar {
525570

526571
// MARK: Extra == functions
527572

528-
// (Substring, [(Substring, Substring, [Substring])])
529-
typealias S_AS = (Substring, [(Substring, Substring, [Substring])])
530-
531-
func ==(lhs: S_AS, rhs: S_AS) -> Bool {
532-
lhs.0 == rhs.0 && lhs.1.elementsEqual(rhs.1, by: ==)
533-
}
534-
535573
func == <T0: Equatable, T1: Equatable, T2: Equatable, T3: Equatable, T4: Equatable, T5: Equatable, T6: Equatable>(
536574
l: (T0, T1, T2, T3, T4, T5, T6), r: (T0, T1, T2, T3, T4, T5, T6)
537575
) -> Bool {

0 commit comments

Comments
 (0)