Skip to content

Commit 0d41bb2

Browse files
natecook1000Azoy
authored andcommitted
Nominalize option methods (#295)
1 parent 3c43286 commit 0d41bb2

File tree

3 files changed

+85
-20
lines changed

3 files changed

+85
-20
lines changed

Sources/_StringProcessing/Regex/Options.swift

Lines changed: 43 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -14,40 +14,36 @@ import _RegexParser
1414
@available(SwiftStdlib 5.7, *)
1515
extension RegexComponent {
1616
/// Returns a regular expression that ignores casing when matching.
17-
public func ignoringCase(_ ignoreCase: Bool = true) -> Regex<RegexOutput> {
18-
wrapInOption(.caseInsensitive, addingIf: ignoreCase)
17+
public func ignoresCase(_ ignoresCase: Bool = true) -> Regex<RegexOutput> {
18+
wrapInOption(.caseInsensitive, addingIf: ignoresCase)
1919
}
2020

2121
/// Returns a regular expression that only matches ASCII characters as "word
2222
/// characters".
23-
public func usingASCIIWordCharacters(_ useASCII: Bool = true) -> Regex<RegexOutput> {
24-
wrapInOption(.asciiOnlyDigit, addingIf: useASCII)
23+
public func asciiOnlyWordCharacters(_ useASCII: Bool = true) -> Regex<RegexOutput> {
24+
wrapInOption(.asciiOnlyWord, addingIf: useASCII)
2525
}
2626

2727
/// Returns a regular expression that only matches ASCII characters as digits.
28-
public func usingASCIIDigits(_ useASCII: Bool = true) -> Regex<RegexOutput> {
28+
public func asciiOnlyDigits(_ useASCII: Bool = true) -> Regex<RegexOutput> {
2929
wrapInOption(.asciiOnlyDigit, addingIf: useASCII)
3030
}
3131

3232
/// Returns a regular expression that only matches ASCII characters as space
3333
/// characters.
34-
public func usingASCIISpaces(_ useASCII: Bool = true) -> Regex<RegexOutput> {
34+
public func asciiOnlyWhitespace(_ useASCII: Bool = true) -> Regex<RegexOutput> {
3535
wrapInOption(.asciiOnlySpace, addingIf: useASCII)
3636
}
3737

3838
/// Returns a regular expression that only matches ASCII characters when
3939
/// matching character classes.
40-
public func usingASCIICharacterClasses(_ useASCII: Bool = true) -> Regex<RegexOutput> {
40+
public func asciiOnlyCharacterClasses(_ useASCII: Bool = true) -> Regex<RegexOutput> {
4141
wrapInOption(.asciiOnlyPOSIXProps, addingIf: useASCII)
4242
}
4343

44-
/// Returns a regular expression that uses the Unicode word boundary
45-
/// algorithm.
46-
///
47-
/// This option is enabled by default; pass `false` to disable use of
48-
/// Unicode's word boundary algorithm.
49-
public func usingUnicodeWordBoundaries(_ useUnicodeWordBoundaries: Bool = true) -> Regex<RegexOutput> {
50-
wrapInOption(.unicodeWordBoundaries, addingIf: useUnicodeWordBoundaries)
44+
/// Returns a regular expression that uses the specified word boundary algorithm.
45+
public func wordBoundaryKind(_ wordBoundaryKind: RegexWordBoundaryKind) -> Regex<RegexOutput> {
46+
wrapInOption(.unicodeWordBoundaries, addingIf: wordBoundaryKind == .unicodeLevel2)
5147
}
5248

5349
/// Returns a regular expression where the start and end of input
@@ -133,6 +129,7 @@ extension RegexComponent {
133129
}
134130

135131
@available(SwiftStdlib 5.7, *)
132+
/// A semantic level to use during regex matching.
136133
public struct RegexSemanticLevel: Hashable {
137134
internal enum Representation {
138135
case graphemeCluster
@@ -154,6 +151,38 @@ public struct RegexSemanticLevel: Hashable {
154151
}
155152
}
156153

154+
@available(SwiftStdlib 5.7, *)
155+
/// A word boundary algorithm to use during regex matching.
156+
public struct RegexWordBoundaryKind: Hashable {
157+
internal enum Representation {
158+
case unicodeLevel1
159+
case unicodeLevel2
160+
}
161+
162+
internal var base: Representation
163+
164+
/// A word boundary algorithm that implements the "simple word boundary"
165+
/// Unicode recommendation.
166+
///
167+
/// A simple word boundary is a position in the input between two characters
168+
/// that match `/\w\W/` or `/\W\w/`, or between the start or end of the input
169+
/// and a `\w` character. Word boundaries therefore depend on the option-
170+
/// defined behavior of `\w`.
171+
public static var unicodeLevel1: Self {
172+
.init(base: .unicodeLevel1)
173+
}
174+
175+
/// A word boundary algorithm that implements the "default word boundary"
176+
/// Unicode recommendation.
177+
///
178+
/// Default word boundaries use a Unicode algorithm that handles some cases
179+
/// better than simple word boundaries, such as words with internal
180+
/// punctuation, changes in script, and Emoji.
181+
public static var unicodeLevel2: Self {
182+
.init(base: .unicodeLevel2)
183+
}
184+
}
185+
157186
// MARK: - Helper method
158187

159188
@available(SwiftStdlib 5.7, *)

Tests/RegexBuilderTests/RegexDSLTests.swift

Lines changed: 41 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,7 @@ class RegexDSLTests: XCTestCase {
228228
matchType: Substring.self, ==) {
229229
OneOrMore {
230230
"abc"
231-
}.ignoringCase(true)
231+
}.ignoresCase(true)
232232
}
233233

234234
// Multiple options on one component wrap successively, but do not
@@ -242,8 +242,8 @@ class RegexDSLTests: XCTestCase {
242242
OneOrMore {
243243
"abc"
244244
}
245-
.ignoringCase(true)
246-
.ignoringCase(false)
245+
.ignoresCase(true)
246+
.ignoresCase(false)
247247
}
248248

249249
// An option on an outer component doesn't override an option set on an
@@ -257,12 +257,36 @@ class RegexDSLTests: XCTestCase {
257257
("abcdeABCdeaBcde", "abcdeABCdeaBcde"),
258258
matchType: Substring.self, ==) {
259259
OneOrMore {
260-
"abc".ignoringCase(true)
260+
"abc".ignoresCase(true)
261261
Optionally("de")
262262
}
263-
.ignoringCase(false)
263+
.ignoresCase(false)
264264
}
265265

266+
#if os(macOS)
267+
try XCTExpectFailure("Implement level 2 word boundaries") {
268+
try _testDSLCaptures(
269+
("can't stop won't stop", ("can't stop won't stop", "can't", "won")),
270+
matchType: (Substring, Substring, Substring).self, ==) {
271+
Capture {
272+
OneOrMore(.word)
273+
Anchor.wordBoundary
274+
}
275+
OneOrMore(.any, .reluctantly)
276+
"stop"
277+
" "
278+
279+
Capture {
280+
OneOrMore(.word)
281+
Anchor.wordBoundary
282+
}
283+
.wordBoundaryKind(.unicodeLevel1)
284+
OneOrMore(.any, .reluctantly)
285+
"stop"
286+
}
287+
}
288+
#endif
289+
266290
try _testDSLCaptures(
267291
("abcdef123", ("abcdef123", "a", "123")),
268292
matchType: (Substring, Substring, Substring).self, ==) {
@@ -280,6 +304,18 @@ class RegexDSLTests: XCTestCase {
280304
}
281305
ZeroOrMore(.digit)
282306
}
307+
308+
try _testDSLCaptures(
309+
("abcdefg", ("abcdefg", "abcdefg")),
310+
("abcdéfg", ("abcdéfg", "abcd")),
311+
matchType: (Substring, Substring).self, ==) {
312+
Capture {
313+
OneOrMore(.word)
314+
}
315+
.asciiOnlyWordCharacters()
316+
317+
ZeroOrMore(.any)
318+
}
283319
}
284320

285321
func testQuantificationBehavior() throws {

Tests/RegexTests/MatchTests.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1308,7 +1308,7 @@ extension RegexTests {
13081308
XCTAssertTrue ("cafe".contains(regex))
13091309
XCTAssertFalse("CaFe".contains(regex))
13101310

1311-
let caseInsensitiveRegex = regex.ignoringCase()
1311+
let caseInsensitiveRegex = regex.ignoresCase()
13121312
XCTAssertTrue("cafe".contains(caseInsensitiveRegex))
13131313
XCTAssertTrue("CaFe".contains(caseInsensitiveRegex))
13141314
}

0 commit comments

Comments
 (0)