Skip to content

Commit 540ab8c

Browse files
committed
Fix Anchor.startOfLine and Anchor.endOfLine
Introduce `startOfInput` and `endOfInput` assertion kinds, and map the DSL to them such that they do not depend on matching options. rdar://97029630
1 parent 3f3d253 commit 540ab8c

File tree

6 files changed

+96
-43
lines changed

6 files changed

+96
-43
lines changed

Sources/RegexBuilder/Anchor.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,10 +55,10 @@ extension Anchor: RegexComponent {
5555
return isInverted ? .notTextSegment : .textSegment
5656
case .startOfLine:
5757
// FIXME: Inverted?
58-
return .caretAnchor
58+
return .startOfLine
5959
case .endOfLine:
6060
// FIXME: Inverted?
61-
return .dollarAnchor
61+
return .endOfLine
6262
case .wordBoundary:
6363
return isInverted ? .notWordBoundary : .wordBoundary
6464
}

Sources/_StringProcessing/ByteCodeGen.swift

Lines changed: 35 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,32 @@ fileprivate extension Compiler.ByteCodeGen {
113113
}
114114
}
115115

116+
mutating func emitStartOfLine() {
117+
builder.buildAssert { [semanticLevel = options.semanticLevel]
118+
(_, _, input, pos, subjectBounds) in
119+
if pos == subjectBounds.lowerBound { return true }
120+
switch semanticLevel {
121+
case .graphemeCluster:
122+
return input[input.index(before: pos)].isNewline
123+
case .unicodeScalar:
124+
return input.unicodeScalars[input.unicodeScalars.index(before: pos)].isNewline
125+
}
126+
}
127+
}
128+
129+
mutating func emitEndOfLine() {
130+
builder.buildAssert { [semanticLevel = options.semanticLevel]
131+
(_, _, input, pos, subjectBounds) in
132+
if pos == subjectBounds.upperBound { return true }
133+
switch semanticLevel {
134+
case .graphemeCluster:
135+
return input[pos].isNewline
136+
case .unicodeScalar:
137+
return input.unicodeScalars[pos].isNewline
138+
}
139+
}
140+
}
141+
116142
mutating func emitAssertion(
117143
_ kind: DSLTree.Atom.Assertion
118144
) throws {
@@ -170,44 +196,24 @@ fileprivate extension Compiler.ByteCodeGen {
170196
!input.isOnGraphemeClusterBoundary(pos)
171197
}
172198

199+
case .startOfLine:
200+
emitStartOfLine()
201+
202+
case .endOfLine:
203+
emitEndOfLine()
204+
173205
case .caretAnchor:
174-
// FIXME: Anchor.startOfLine must always use this first branch
175-
// The behavior of `^` should depend on `anchorsMatchNewlines`, but
176-
// the DSL-based `.startOfLine` anchor should always match the start
177-
// of a line. Right now we don't distinguish between those anchors.
178206
if options.anchorsMatchNewlines {
179-
builder.buildAssert { [semanticLevel = options.semanticLevel]
180-
(_, _, input, pos, subjectBounds) in
181-
if pos == subjectBounds.lowerBound { return true }
182-
switch semanticLevel {
183-
case .graphemeCluster:
184-
return input[input.index(before: pos)].isNewline
185-
case .unicodeScalar:
186-
return input.unicodeScalars[input.unicodeScalars.index(before: pos)].isNewline
187-
}
188-
}
207+
emitStartOfLine()
189208
} else {
190209
builder.buildAssert { (_, _, input, pos, subjectBounds) in
191210
pos == subjectBounds.lowerBound
192211
}
193212
}
194-
213+
195214
case .dollarAnchor:
196-
// FIXME: Anchor.endOfLine must always use this first branch
197-
// The behavior of `$` should depend on `anchorsMatchNewlines`, but
198-
// the DSL-based `.endOfLine` anchor should always match the end
199-
// of a line. Right now we don't distinguish between those anchors.
200215
if options.anchorsMatchNewlines {
201-
builder.buildAssert { [semanticLevel = options.semanticLevel]
202-
(_, _, input, pos, subjectBounds) in
203-
if pos == subjectBounds.upperBound { return true }
204-
switch semanticLevel {
205-
case .graphemeCluster:
206-
return input[pos].isNewline
207-
case .unicodeScalar:
208-
return input.unicodeScalars[pos].isNewline
209-
}
210-
}
216+
emitEndOfLine()
211217
} else {
212218
builder.buildAssert { (_, _, input, pos, subjectBounds) in
213219
pos == subjectBounds.upperBound

Sources/_StringProcessing/PrintAsPattern.swift

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -627,12 +627,16 @@ extension DSLTree.Atom.Assertion {
627627
// TODO: Some way to integrate this with conversion...
628628
var _patternBase: String {
629629
switch self {
630-
case .caretAnchor:
631-
// FIXME: The DSL doesn't have a way of representing this.
630+
case .startOfLine:
632631
return "Anchor.startOfLine"
633-
case .dollarAnchor:
634-
// FIXME: The DSL doesn't have a way of representing this.
632+
case .endOfLine:
635633
return "Anchor.endOfLine"
634+
case .caretAnchor:
635+
// The DSL doesn't have an equivalent to this, so print as regex.
636+
return "/^/"
637+
case .dollarAnchor:
638+
// The DSL doesn't have an equivalent to this, so print as regex.
639+
return "/$/"
636640
case .wordBoundary:
637641
return "Anchor.wordBoundary"
638642
case .notWordBoundary:

Sources/_StringProcessing/Regex/DSLTree.swift

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,14 @@ extension DSLTree.Atom {
287287
/// \Y
288288
case notTextSegment
289289

290+
/// The DSL's Anchor.startOfLine, which matches the start of a line
291+
/// even if `anchorsMatchNewlines` is false.
292+
case startOfLine
293+
294+
/// The DSL's Anchor.endOfLine, which matches the end of a line
295+
/// even if `anchorsMatchNewlines` is false.
296+
case endOfLine
297+
290298
/// ^
291299
case caretAnchor
292300

Tests/RegexBuilderTests/RegexDSLTests.swift

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -798,19 +798,40 @@ class RegexDSLTests: XCTestCase {
798798
Anchor.endOfSubject
799799
}.anchorsMatchLineEndings()
800800
}
801-
802-
// FIXME: Anchor.start/endOfLine needs to always match line endings,
803-
// even when the `anchorsMatchLineEndings()` option is turned off.
801+
804802
try _testDSLCaptures(
805-
("\naaa", "aaa"),
806-
("aaa\n", "aaa"),
807-
("\naaa\n", "aaa"),
808-
matchType: Substring.self, ==, xfail: true)
803+
("\naaa", "\naaa"),
804+
("aaa\n", "aaa\n"),
805+
("\naaa\n", "\naaa\n"),
806+
matchType: Substring.self, ==)
809807
{
810808
Regex {
809+
Optionally { "\n" }
811810
Anchor.startOfLine
812811
Repeat("a", count: 3)
813812
Anchor.endOfLine
813+
Optionally { "\n" }
814+
}
815+
}
816+
817+
// startOfLine/endOfLine apply regardless of mode.
818+
for matchLineEndings in [true, false] {
819+
for mode in [RegexSemanticLevel.graphemeCluster, .unicodeScalar] {
820+
let r = Regex {
821+
Anchor.startOfLine
822+
Repeat("a", count: 3)
823+
Anchor.endOfLine
824+
}.anchorsMatchLineEndings(matchLineEndings).matchingSemantics(mode)
825+
826+
XCTAssertNotNil(try r.firstMatch(in: "\naaa"))
827+
XCTAssertNotNil(try r.firstMatch(in: "aaa\n"))
828+
XCTAssertNotNil(try r.firstMatch(in: "\naaa\n"))
829+
XCTAssertNotNil(try r.firstMatch(in: "\naaa\r\n"))
830+
XCTAssertNotNil(try r.firstMatch(in: "\r\naaa\n"))
831+
XCTAssertNotNil(try r.firstMatch(in: "\r\naaa\r\n"))
832+
833+
XCTAssertNil(try r.firstMatch(in: "\nbaaa\n"))
834+
XCTAssertNil(try r.firstMatch(in: "\naaab\n"))
814835
}
815836
}
816837
}

Tests/RegexTests/RenderDSLTests.swift

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,21 @@ extension RenderDSLTests {
8585
}
8686
"""#)
8787
}
88-
88+
89+
func testAnchor() throws {
90+
try testConversion(#"^(?:a|b|c)$"#, #"""
91+
Regex {
92+
/^/
93+
ChoiceOf {
94+
"a"
95+
"b"
96+
"c"
97+
}
98+
/$/
99+
}
100+
"""#)
101+
}
102+
89103
func testOptions() throws {
90104
try XCTExpectFailure("Options like '(?i)' aren't converted") {
91105
try testConversion(#"(?i)abc"#, """

0 commit comments

Comments
 (0)