Skip to content

Commit f9a4675

Browse files
authored
Adds RegexBuilder.CharacterClass.anyUnicodeScalar (#315)
This provides a RegexBuilder API that represents the same as `\O` in regex syntax.
1 parent b24d3ea commit f9a4675

File tree

3 files changed

+23
-8
lines changed

3 files changed

+23
-8
lines changed

Sources/RegexBuilder/CharacterClass.swift

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,10 @@ extension RegexComponent where Self == CharacterClass {
5151
public static var anyGrapheme: CharacterClass {
5252
.init(unconverted: .anyGrapheme)
5353
}
54+
55+
public static var anyUnicodeScalar: CharacterClass {
56+
.init(unconverted: .anyUnicodeScalar)
57+
}
5458

5559
public static var whitespace: CharacterClass {
5660
.init(unconverted: .whitespace)

Sources/_StringProcessing/_CharacterClassModel.swift

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ public struct _CharacterClassModel: Hashable {
3333
case any
3434
/// Any grapheme cluster
3535
case anyGrapheme
36+
/// Any Unicode scalar
37+
case anyScalar
3638
/// Character.isDigit
3739
case digit
3840
/// Character.isHexDigit
@@ -159,8 +161,12 @@ public struct _CharacterClassModel: Hashable {
159161
case .graphemeCluster:
160162
let c = str[i]
161163
var matched: Bool
164+
var next = str.index(after: i)
162165
switch cc {
163166
case .any, .anyGrapheme: matched = true
167+
case .anyScalar:
168+
matched = true
169+
next = str.unicodeScalars.index(after: i)
164170
case .digit:
165171
matched = c.isNumber && (c.isASCII || !options.usesASCIIDigits)
166172
case .hexDigit:
@@ -178,12 +184,13 @@ public struct _CharacterClassModel: Hashable {
178184
if isInverted {
179185
matched.toggle()
180186
}
181-
return matched ? str.index(after: i) : nil
187+
return matched ? next : nil
182188
case .unicodeScalar:
183189
let c = str.unicodeScalars[i]
184190
var matched: Bool
185191
switch cc {
186192
case .any: matched = true
193+
case .anyScalar: matched = true
187194
case .anyGrapheme: fatalError("Not matched in this mode")
188195
case .digit:
189196
matched = c.properties.numericType != nil && (c.isASCII || !options.usesASCIIDigits)
@@ -228,6 +235,10 @@ extension _CharacterClassModel {
228235
.init(cc: .anyGrapheme, matchLevel: .graphemeCluster)
229236
}
230237

238+
public static var anyUnicodeScalar: _CharacterClassModel {
239+
.init(cc: .any, matchLevel: .unicodeScalar)
240+
}
241+
231242
public static var whitespace: _CharacterClassModel {
232243
.init(cc: .whitespace, matchLevel: .graphemeCluster)
233244
}
@@ -279,6 +290,7 @@ extension _CharacterClassModel.Representation: CustomStringConvertible {
279290
switch self {
280291
case .any: return "<any>"
281292
case .anyGrapheme: return "<any grapheme>"
293+
case .anyScalar: return "<any scalar>"
282294
case .digit: return "<digit>"
283295
case .hexDigit: return "<hex digit>"
284296
case .horizontalWhitespace: return "<horizontal whitespace>"
@@ -445,6 +457,7 @@ extension AST.Atom.EscapedBuiltin {
445457
case .notWordCharacter: return .word.inverted
446458

447459
case .graphemeCluster: return .anyGrapheme
460+
case .trueAnychar: return .anyUnicodeScalar
448461

449462
default:
450463
return nil

Tests/RegexTests/MatchTests.swift

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1512,13 +1512,11 @@ extension RegexTests {
15121512
(eDecomposed, false))
15131513

15141514
// FIXME: \O is unsupported
1515-
firstMatchTest(#"\O\u{301}"#, input: eDecomposed, match: eDecomposed,
1516-
xfail: true)
1517-
firstMatchTest(#"e\O"#, input: eDecomposed, match: eDecomposed,
1518-
xfail: true)
1519-
firstMatchTest(#"\O\u{301}"#, input: eComposed, match: nil,
1520-
xfail: true)
1521-
firstMatchTest(#"e\O"#, input: eComposed, match: nil,
1515+
firstMatchTest(#"(?u)\O\u{301}"#, input: eDecomposed, match: eDecomposed)
1516+
firstMatchTest(#"(?u)e\O"#, input: eDecomposed, match: eDecomposed,
1517+
xfail: true)
1518+
firstMatchTest(#"\O"#, input: eComposed, match: eComposed)
1519+
firstMatchTest(#"\O"#, input: eDecomposed, match: nil,
15221520
xfail: true)
15231521

15241522
matchTest(

0 commit comments

Comments
 (0)