Skip to content

Commit b8a1a81

Browse files
committed
Adds RegexBuilder.CharacterClass.anyUnicodeScalar (swiftlang#315)
This provides a RegexBuilder API that represents the same as `\O` in regex syntax.
1 parent 115a937 commit b8a1a81

File tree

3 files changed

+23
-8
lines changed

3 files changed

+23
-8
lines changed

Sources/RegexBuilder/CharacterClass.swift

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,10 @@ extension RegexComponent where Self == CharacterClass {
6060
public static var anyGrapheme: CharacterClass {
6161
.init(unconverted: .anyGrapheme)
6262
}
63+
64+
public static var anyUnicodeScalar: CharacterClass {
65+
.init(unconverted: .anyUnicodeScalar)
66+
}
6367

6468
public static var whitespace: CharacterClass {
6569
.init(unconverted: .whitespace)

Sources/_StringProcessing/_CharacterClassModel.swift

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ public struct _CharacterClassModel: Hashable {
3333
case any
3434
/// Any grapheme cluster
3535
case anyGrapheme
36+
/// Any Unicode scalar
37+
case anyScalar
3638
/// Character.isDigit
3739
case digit
3840
/// Character.isHexDigit
@@ -155,8 +157,12 @@ public struct _CharacterClassModel: Hashable {
155157
case .graphemeCluster:
156158
let c = str[i]
157159
var matched: Bool
160+
var next = str.index(after: i)
158161
switch cc {
159162
case .any, .anyGrapheme: matched = true
163+
case .anyScalar:
164+
matched = true
165+
next = str.unicodeScalars.index(after: i)
160166
case .digit:
161167
matched = c.isNumber && (c.isASCII || !options.usesASCIIDigits)
162168
case .hexDigit:
@@ -174,12 +180,13 @@ public struct _CharacterClassModel: Hashable {
174180
if isInverted {
175181
matched.toggle()
176182
}
177-
return matched ? str.index(after: i) : nil
183+
return matched ? next : nil
178184
case .unicodeScalar:
179185
let c = str.unicodeScalars[i]
180186
var matched: Bool
181187
switch cc {
182188
case .any: matched = true
189+
case .anyScalar: matched = true
183190
case .anyGrapheme: fatalError("Not matched in this mode")
184191
case .digit:
185192
matched = c.properties.numericType != nil && (c.isASCII || !options.usesASCIIDigits)
@@ -224,6 +231,10 @@ extension _CharacterClassModel {
224231
.init(cc: .anyGrapheme, matchLevel: .graphemeCluster)
225232
}
226233

234+
public static var anyUnicodeScalar: _CharacterClassModel {
235+
.init(cc: .any, matchLevel: .unicodeScalar)
236+
}
237+
227238
public static var whitespace: _CharacterClassModel {
228239
.init(cc: .whitespace, matchLevel: .graphemeCluster)
229240
}
@@ -275,6 +286,7 @@ extension _CharacterClassModel.Representation: CustomStringConvertible {
275286
switch self {
276287
case .any: return "<any>"
277288
case .anyGrapheme: return "<any grapheme>"
289+
case .anyScalar: return "<any scalar>"
278290
case .digit: return "<digit>"
279291
case .hexDigit: return "<hex digit>"
280292
case .horizontalWhitespace: return "<horizontal whitespace>"
@@ -431,6 +443,7 @@ extension AST.Atom.EscapedBuiltin {
431443
case .notWordCharacter: return .word.inverted
432444

433445
case .graphemeCluster: return .anyGrapheme
446+
case .trueAnychar: return .anyUnicodeScalar
434447

435448
default:
436449
return nil

Tests/RegexTests/MatchTests.swift

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1483,13 +1483,11 @@ extension RegexTests {
14831483
(eDecomposed, false))
14841484

14851485
// FIXME: \O is unsupported
1486-
firstMatchTest(#"\O\u{301}"#, input: eDecomposed, match: eDecomposed,
1487-
xfail: true)
1488-
firstMatchTest(#"e\O"#, input: eDecomposed, match: eDecomposed,
1489-
xfail: true)
1490-
firstMatchTest(#"\O\u{301}"#, input: eComposed, match: nil,
1491-
xfail: true)
1492-
firstMatchTest(#"e\O"#, input: eComposed, match: nil,
1486+
firstMatchTest(#"(?u)\O\u{301}"#, input: eDecomposed, match: eDecomposed)
1487+
firstMatchTest(#"(?u)e\O"#, input: eDecomposed, match: eDecomposed,
1488+
xfail: true)
1489+
firstMatchTest(#"\O"#, input: eComposed, match: eComposed)
1490+
firstMatchTest(#"\O"#, input: eDecomposed, match: nil,
14931491
xfail: true)
14941492

14951493
matchTest(

0 commit comments

Comments
 (0)