Skip to content

Commit 1b3ba2c

Browse files
committed
Fix Anchor.startOfLine and Anchor.endOfLine
Introduce `startOfInput` and `endOfInput` assertion kinds, and map the DSL to them such that they do not depend on matching options. rdar://97029630
1 parent dff47ff commit 1b3ba2c

File tree

6 files changed

+96
-43
lines changed

6 files changed

+96
-43
lines changed

Sources/RegexBuilder/Anchor.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,10 +55,10 @@ extension Anchor: RegexComponent {
5555
return isInverted ? .notTextSegment : .textSegment
5656
case .startOfLine:
5757
// FIXME: Inverted?
58-
return .caretAnchor
58+
return .startOfLine
5959
case .endOfLine:
6060
// FIXME: Inverted?
61-
return .dollarAnchor
61+
return .endOfLine
6262
case .wordBoundary:
6363
return isInverted ? .notWordBoundary : .wordBoundary
6464
}

Sources/_StringProcessing/ByteCodeGen.swift

Lines changed: 35 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,32 @@ fileprivate extension Compiler.ByteCodeGen {
113113
}
114114
}
115115

116+
mutating func emitStartOfLine() {
117+
builder.buildAssert { [semanticLevel = options.semanticLevel]
118+
(_, _, input, pos, subjectBounds) in
119+
if pos == subjectBounds.lowerBound { return true }
120+
switch semanticLevel {
121+
case .graphemeCluster:
122+
return input[input.index(before: pos)].isNewline
123+
case .unicodeScalar:
124+
return input.unicodeScalars[input.unicodeScalars.index(before: pos)].isNewline
125+
}
126+
}
127+
}
128+
129+
mutating func emitEndOfLine() {
130+
builder.buildAssert { [semanticLevel = options.semanticLevel]
131+
(_, _, input, pos, subjectBounds) in
132+
if pos == subjectBounds.upperBound { return true }
133+
switch semanticLevel {
134+
case .graphemeCluster:
135+
return input[pos].isNewline
136+
case .unicodeScalar:
137+
return input.unicodeScalars[pos].isNewline
138+
}
139+
}
140+
}
141+
116142
mutating func emitAssertion(
117143
_ kind: DSLTree.Atom.Assertion
118144
) throws {
@@ -170,44 +196,24 @@ fileprivate extension Compiler.ByteCodeGen {
170196
!input.isOnGraphemeClusterBoundary(pos)
171197
}
172198

199+
case .startOfLine:
200+
emitStartOfLine()
201+
202+
case .endOfLine:
203+
emitEndOfLine()
204+
173205
case .caretAnchor:
174-
// FIXME: Anchor.startOfLine must always use this first branch
175-
// The behavior of `^` should depend on `anchorsMatchNewlines`, but
176-
// the DSL-based `.startOfLine` anchor should always match the start
177-
// of a line. Right now we don't distinguish between those anchors.
178206
if options.anchorsMatchNewlines {
179-
builder.buildAssert { [semanticLevel = options.semanticLevel]
180-
(_, _, input, pos, subjectBounds) in
181-
if pos == subjectBounds.lowerBound { return true }
182-
switch semanticLevel {
183-
case .graphemeCluster:
184-
return input[input.index(before: pos)].isNewline
185-
case .unicodeScalar:
186-
return input.unicodeScalars[input.unicodeScalars.index(before: pos)].isNewline
187-
}
188-
}
207+
emitStartOfLine()
189208
} else {
190209
builder.buildAssert { (_, _, input, pos, subjectBounds) in
191210
pos == subjectBounds.lowerBound
192211
}
193212
}
194-
213+
195214
case .dollarAnchor:
196-
// FIXME: Anchor.endOfLine must always use this first branch
197-
// The behavior of `$` should depend on `anchorsMatchNewlines`, but
198-
// the DSL-based `.endOfLine` anchor should always match the end
199-
// of a line. Right now we don't distinguish between those anchors.
200215
if options.anchorsMatchNewlines {
201-
builder.buildAssert { [semanticLevel = options.semanticLevel]
202-
(_, _, input, pos, subjectBounds) in
203-
if pos == subjectBounds.upperBound { return true }
204-
switch semanticLevel {
205-
case .graphemeCluster:
206-
return input[pos].isNewline
207-
case .unicodeScalar:
208-
return input.unicodeScalars[pos].isNewline
209-
}
210-
}
216+
emitEndOfLine()
211217
} else {
212218
builder.buildAssert { (_, _, input, pos, subjectBounds) in
213219
pos == subjectBounds.upperBound

Sources/_StringProcessing/PrintAsPattern.swift

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -627,12 +627,16 @@ extension DSLTree.Atom.Assertion {
627627
// TODO: Some way to integrate this with conversion...
628628
var _patternBase: String {
629629
switch self {
630-
case .caretAnchor:
631-
// FIXME: The DSL doesn't have a way of representing this.
630+
case .startOfLine:
632631
return "Anchor.startOfLine"
633-
case .dollarAnchor:
634-
// FIXME: The DSL doesn't have a way of representing this.
632+
case .endOfLine:
635633
return "Anchor.endOfLine"
634+
case .caretAnchor:
635+
// The DSL doesn't have an equivalent to this, so print as regex.
636+
return "/^/"
637+
case .dollarAnchor:
638+
// The DSL doesn't have an equivalent to this, so print as regex.
639+
return "/$/"
636640
case .wordBoundary:
637641
return "Anchor.wordBoundary"
638642
case .notWordBoundary:

Sources/_StringProcessing/Regex/DSLTree.swift

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,14 @@ extension DSLTree.Atom {
287287
/// \Y
288288
case notTextSegment
289289

290+
/// The DSL's Anchor.startOfLine, which matches the start of a line
291+
/// even if `anchorsMatchNewlines` is false.
292+
case startOfLine
293+
294+
/// The DSL's Anchor.endOfLine, which matches the end of a line
295+
/// even if `anchorsMatchNewlines` is false.
296+
case endOfLine
297+
290298
/// ^
291299
case caretAnchor
292300

Tests/RegexBuilderTests/RegexDSLTests.swift

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -815,19 +815,40 @@ class RegexDSLTests: XCTestCase {
815815
Anchor.endOfSubject
816816
}.anchorsMatchLineEndings()
817817
}
818-
819-
// FIXME: Anchor.start/endOfLine needs to always match line endings,
820-
// even when the `anchorsMatchLineEndings()` option is turned off.
818+
821819
try _testDSLCaptures(
822-
("\naaa", "aaa"),
823-
("aaa\n", "aaa"),
824-
("\naaa\n", "aaa"),
825-
matchType: Substring.self, ==, xfail: true)
820+
("\naaa", "\naaa"),
821+
("aaa\n", "aaa\n"),
822+
("\naaa\n", "\naaa\n"),
823+
matchType: Substring.self, ==)
826824
{
827825
Regex {
826+
Optionally { "\n" }
828827
Anchor.startOfLine
829828
Repeat("a", count: 3)
830829
Anchor.endOfLine
830+
Optionally { "\n" }
831+
}
832+
}
833+
834+
// startOfLine/endOfLine apply regardless of mode.
835+
for matchLineEndings in [true, false] {
836+
for mode in [RegexSemanticLevel.graphemeCluster, .unicodeScalar] {
837+
let r = Regex {
838+
Anchor.startOfLine
839+
Repeat("a", count: 3)
840+
Anchor.endOfLine
841+
}.anchorsMatchLineEndings(matchLineEndings).matchingSemantics(mode)
842+
843+
XCTAssertNotNil(try r.firstMatch(in: "\naaa"))
844+
XCTAssertNotNil(try r.firstMatch(in: "aaa\n"))
845+
XCTAssertNotNil(try r.firstMatch(in: "\naaa\n"))
846+
XCTAssertNotNil(try r.firstMatch(in: "\naaa\r\n"))
847+
XCTAssertNotNil(try r.firstMatch(in: "\r\naaa\n"))
848+
XCTAssertNotNil(try r.firstMatch(in: "\r\naaa\r\n"))
849+
850+
XCTAssertNil(try r.firstMatch(in: "\nbaaa\n"))
851+
XCTAssertNil(try r.firstMatch(in: "\naaab\n"))
831852
}
832853
}
833854
}

Tests/RegexTests/RenderDSLTests.swift

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,21 @@ extension RenderDSLTests {
8585
}
8686
"""#)
8787
}
88-
88+
89+
func testAnchor() throws {
90+
try testConversion(#"^(?:a|b|c)$"#, #"""
91+
Regex {
92+
/^/
93+
ChoiceOf {
94+
"a"
95+
"b"
96+
"c"
97+
}
98+
/$/
99+
}
100+
"""#)
101+
}
102+
89103
func testOptions() throws {
90104
try XCTExpectFailure("Options like '(?i)' aren't converted") {
91105
try testConversion(#"(?i)abc"#, """

0 commit comments

Comments
 (0)