Skip to content

Commit 9456c54

Browse files
committed
Implement CharacterClass.anyNonNewline
rdar://97029702
1 parent 9a545a0 commit 9456c54

File tree

6 files changed

+101
-7
lines changed

6 files changed

+101
-7
lines changed

Sources/RegexBuilder/CharacterClass.swift

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@ extension RegexComponent where Self == CharacterClass {
4545
.init(DSLTree.CustomCharacterClass(members: [.atom(.any)]))
4646
}
4747

48+
public static var anyNonNewline: CharacterClass {
49+
.init(DSLTree.CustomCharacterClass(members: [.atom(.anyNonNewline)]))
50+
}
51+
4852
public static var anyGraphemeCluster: CharacterClass {
4953
.init(unconverted: ._anyGrapheme)
5054
}

Sources/_StringProcessing/ByteCodeGen.swift

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,9 @@ fileprivate extension Compiler.ByteCodeGen {
5858
case .any:
5959
emitAny()
6060

61+
case .anyNonNewline:
62+
emitAnyNonNewline()
63+
6164
case .dot:
6265
emitDot()
6366

@@ -341,11 +344,7 @@ fileprivate extension Compiler.ByteCodeGen {
341344
}
342345
}
343346

344-
mutating func emitDot() {
345-
if options.dotMatchesNewline {
346-
emitAny()
347-
return
348-
}
347+
mutating func emitAnyNonNewline() {
349348
switch options.semanticLevel {
350349
case .graphemeCluster:
351350
builder.buildConsume { input, bounds in
@@ -362,6 +361,14 @@ fileprivate extension Compiler.ByteCodeGen {
362361
}
363362
}
364363

364+
mutating func emitDot() {
365+
if options.dotMatchesNewline {
366+
emitAny()
367+
} else {
368+
emitAnyNonNewline()
369+
}
370+
}
371+
365372
mutating func emitAlternation(
366373
_ children: [DSLTree.Node]
367374
) throws {

Sources/_StringProcessing/ConsumerInterface.swift

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,22 @@ extension DSLTree.Atom {
140140
}
141141
}
142142

143+
case .anyNonNewline:
144+
switch opts.semanticLevel {
145+
case .graphemeCluster:
146+
return { input, bounds in
147+
input[bounds.lowerBound].isNewline
148+
? nil
149+
: input.index(after: bounds.lowerBound)
150+
}
151+
case .unicodeScalar:
152+
return { input, bounds in
153+
input[bounds.lowerBound].isNewline
154+
? nil
155+
: input.unicodeScalars.index(after: bounds.lowerBound)
156+
}
157+
}
158+
143159
case .dot:
144160
throw Unreachable(".atom(.dot) should be handled by emitDot")
145161

Sources/_StringProcessing/PrintAsPattern.swift

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1133,6 +1133,9 @@ extension DSLTree.Atom {
11331133
case .any:
11341134
return (".any", true)
11351135

1136+
case .anyNonNewline:
1137+
return (".anyNonNewline", true)
1138+
11361139
case .dot:
11371140
// The DSL does not have an equivalent to '.', print as a regex.
11381141
return ("/./", false)
@@ -1179,6 +1182,9 @@ extension DSLTree.Atom {
11791182
case .any:
11801183
return "(?s:.)"
11811184

1185+
case .anyNonNewline:
1186+
return "(?-s:.)"
1187+
11821188
case .dot:
11831189
return "."
11841190

Sources/_StringProcessing/Regex/DSLTree.swift

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,10 @@ extension DSLTree {
169169
/// Any character, including newlines.
170170
case any
171171

172+
/// Any character, excluding newlines. This differs from '.', as it is not
173+
/// affected by single line mode.
174+
case anyNonNewline
175+
172176
/// The DSL representation of '.' in a regex literal. This does not match
173177
/// newlines unless single line mode is enabled.
174178
case dot
@@ -795,8 +799,8 @@ extension DSLTree.Atom {
795799
switch self {
796800
case .changeMatchingOptions, .assertion:
797801
return false
798-
case .char, .scalar, .any, .dot, .backreference, .symbolicReference,
799-
.unconverted:
802+
case .char, .scalar, .any, .anyNonNewline, .dot, .backreference,
803+
.symbolicReference, .unconverted:
800804
return true
801805
}
802806
}

Tests/RegexBuilderTests/RegexDSLTests.swift

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,63 @@ class RegexDSLTests: XCTestCase {
313313
}
314314
}
315315

316+
func testAnyNonNewline() throws {
317+
// `.anyNonNewline` is `.` without single-line mode.
318+
for mode in [RegexSemanticLevel.graphemeCluster, .unicodeScalar] {
319+
for dotMatchesNewline in [true, false] {
320+
try _testDSLCaptures(
321+
("abcdef", "abcdef"),
322+
("abcdef\n", nil),
323+
("\r\n", nil),
324+
("\r", nil),
325+
("\n", nil),
326+
matchType: Substring.self, ==)
327+
{
328+
Regex {
329+
OneOrMore(.anyNonNewline)
330+
}.matchingSemantics(mode).dotMatchesNewlines(dotMatchesNewline)
331+
}
332+
333+
try _testDSLCaptures(
334+
("abcdef", nil),
335+
("abcdef\n", nil),
336+
("\r\n", "\r\n"),
337+
("\r", "\r"),
338+
("\n", "\n"),
339+
matchType: Substring.self, ==)
340+
{
341+
Regex {
342+
OneOrMore(.anyNonNewline.inverted)
343+
}.matchingSemantics(mode).dotMatchesNewlines(dotMatchesNewline)
344+
}
345+
346+
try _testDSLCaptures(
347+
("abc", "abc"),
348+
("abcd", nil),
349+
("\r\n", nil),
350+
("\r", nil),
351+
("\n", nil),
352+
matchType: Substring.self, ==)
353+
{
354+
Regex {
355+
OneOrMore(CharacterClass.anyNonNewline.intersection(.anyOf("\n\rabc")))
356+
}.matchingSemantics(mode).dotMatchesNewlines(dotMatchesNewline)
357+
}
358+
}
359+
}
360+
361+
try _testDSLCaptures(
362+
("\r\n", "\r\n"), matchType: Substring.self, ==) {
363+
CharacterClass.anyNonNewline.inverted
364+
}
365+
try _testDSLCaptures(
366+
("\r\n", nil), matchType: Substring.self, ==) {
367+
Regex {
368+
CharacterClass.anyNonNewline.inverted
369+
}.matchingSemantics(.unicodeScalar)
370+
}
371+
}
372+
316373
func testMatchResultDotZeroWithoutCapture() throws {
317374
let match = try XCTUnwrap("aaa".wholeMatch { OneOrMore { "a" } })
318375
XCTAssertEqual(match.0, "aaa")

0 commit comments

Comments
 (0)