Skip to content

Commit 210bfa3

Browse files
committed
Fix Anchor.startOfLine and Anchor.endOfLine
Introduce `startOfInput` and `endOfInput` assertion kinds, and map the DSL to them such that they do not depend on matching options. rdar://97029630
1 parent 21ca2fb commit 210bfa3

File tree

6 files changed

+96
-43
lines changed

6 files changed

+96
-43
lines changed

Sources/RegexBuilder/Anchor.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,10 +55,10 @@ extension Anchor: RegexComponent {
5555
return isInverted ? .notTextSegment : .textSegment
5656
case .startOfLine:
5757
// FIXME: Inverted?
58-
return .caretAnchor
58+
return .startOfLine
5959
case .endOfLine:
6060
// FIXME: Inverted?
61-
return .dollarAnchor
61+
return .endOfLine
6262
case .wordBoundary:
6363
return isInverted ? .notWordBoundary : .wordBoundary
6464
}

Sources/_StringProcessing/ByteCodeGen.swift

Lines changed: 35 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,32 @@ fileprivate extension Compiler.ByteCodeGen {
145145
}
146146
}
147147

148+
mutating func emitStartOfLine() {
149+
builder.buildAssert { [semanticLevel = options.semanticLevel]
150+
(_, _, input, pos, subjectBounds) in
151+
if pos == subjectBounds.lowerBound { return true }
152+
switch semanticLevel {
153+
case .graphemeCluster:
154+
return input[input.index(before: pos)].isNewline
155+
case .unicodeScalar:
156+
return input.unicodeScalars[input.unicodeScalars.index(before: pos)].isNewline
157+
}
158+
}
159+
}
160+
161+
mutating func emitEndOfLine() {
162+
builder.buildAssert { [semanticLevel = options.semanticLevel]
163+
(_, _, input, pos, subjectBounds) in
164+
if pos == subjectBounds.upperBound { return true }
165+
switch semanticLevel {
166+
case .graphemeCluster:
167+
return input[pos].isNewline
168+
case .unicodeScalar:
169+
return input.unicodeScalars[pos].isNewline
170+
}
171+
}
172+
}
173+
148174
mutating func emitAssertion(
149175
_ kind: DSLTree.Atom.Assertion
150176
) throws {
@@ -202,44 +228,24 @@ fileprivate extension Compiler.ByteCodeGen {
202228
!input.isOnGraphemeClusterBoundary(pos)
203229
}
204230

231+
case .startOfLine:
232+
emitStartOfLine()
233+
234+
case .endOfLine:
235+
emitEndOfLine()
236+
205237
case .caretAnchor:
206-
// FIXME: Anchor.startOfLine must always use this first branch
207-
// The behavior of `^` should depend on `anchorsMatchNewlines`, but
208-
// the DSL-based `.startOfLine` anchor should always match the start
209-
// of a line. Right now we don't distinguish between those anchors.
210238
if options.anchorsMatchNewlines {
211-
builder.buildAssert { [semanticLevel = options.semanticLevel]
212-
(_, _, input, pos, subjectBounds) in
213-
if pos == subjectBounds.lowerBound { return true }
214-
switch semanticLevel {
215-
case .graphemeCluster:
216-
return input[input.index(before: pos)].isNewline
217-
case .unicodeScalar:
218-
return input.unicodeScalars[input.unicodeScalars.index(before: pos)].isNewline
219-
}
220-
}
239+
emitStartOfLine()
221240
} else {
222241
builder.buildAssert { (_, _, input, pos, subjectBounds) in
223242
pos == subjectBounds.lowerBound
224243
}
225244
}
226-
245+
227246
case .dollarAnchor:
228-
// FIXME: Anchor.endOfLine must always use this first branch
229-
// The behavior of `$` should depend on `anchorsMatchNewlines`, but
230-
// the DSL-based `.endOfLine` anchor should always match the end
231-
// of a line. Right now we don't distinguish between those anchors.
232247
if options.anchorsMatchNewlines {
233-
builder.buildAssert { [semanticLevel = options.semanticLevel]
234-
(_, _, input, pos, subjectBounds) in
235-
if pos == subjectBounds.upperBound { return true }
236-
switch semanticLevel {
237-
case .graphemeCluster:
238-
return input[pos].isNewline
239-
case .unicodeScalar:
240-
return input.unicodeScalars[pos].isNewline
241-
}
242-
}
248+
emitEndOfLine()
243249
} else {
244250
builder.buildAssert { (_, _, input, pos, subjectBounds) in
245251
pos == subjectBounds.upperBound

Sources/_StringProcessing/PrintAsPattern.swift

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -651,12 +651,16 @@ extension DSLTree.Atom.Assertion {
651651
// TODO: Some way to integrate this with conversion...
652652
var _patternBase: String {
653653
switch self {
654-
case .caretAnchor:
655-
// FIXME: The DSL doesn't have a way of representing this.
654+
case .startOfLine:
656655
return "Anchor.startOfLine"
657-
case .dollarAnchor:
658-
// FIXME: The DSL doesn't have a way of representing this.
656+
case .endOfLine:
659657
return "Anchor.endOfLine"
658+
case .caretAnchor:
659+
// The DSL doesn't have an equivalent to this, so print as regex.
660+
return "/^/"
661+
case .dollarAnchor:
662+
// The DSL doesn't have an equivalent to this, so print as regex.
663+
return "/$/"
660664
case .wordBoundary:
661665
return "Anchor.wordBoundary"
662666
case .notWordBoundary:

Sources/_StringProcessing/Regex/DSLTree.swift

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,14 @@ extension DSLTree.Atom {
207207
/// \Y
208208
case notTextSegment
209209

210+
/// The DSL's Anchor.startOfLine, which matches the start of a line
211+
/// even if `anchorsMatchNewlines` is false.
212+
case startOfLine
213+
214+
/// The DSL's Anchor.endOfLine, which matches the end of a line
215+
/// even if `anchorsMatchNewlines` is false.
216+
case endOfLine
217+
210218
/// ^
211219
case caretAnchor
212220

Tests/RegexBuilderTests/RegexDSLTests.swift

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -802,19 +802,40 @@ class RegexDSLTests: XCTestCase {
802802
Anchor.endOfSubject
803803
}.anchorsMatchLineEndings()
804804
}
805-
806-
// FIXME: Anchor.start/endOfLine needs to always match line endings,
807-
// even when the `anchorsMatchLineEndings()` option is turned off.
805+
808806
try _testDSLCaptures(
809-
("\naaa", "aaa"),
810-
("aaa\n", "aaa"),
811-
("\naaa\n", "aaa"),
812-
matchType: Substring.self, ==, xfail: true)
807+
("\naaa", "\naaa"),
808+
("aaa\n", "aaa\n"),
809+
("\naaa\n", "\naaa\n"),
810+
matchType: Substring.self, ==)
813811
{
814812
Regex {
813+
Optionally { "\n" }
815814
Anchor.startOfLine
816815
Repeat("a", count: 3)
817816
Anchor.endOfLine
817+
Optionally { "\n" }
818+
}
819+
}
820+
821+
// startOfLine/endOfLine apply regardless of mode.
822+
for matchLineEndings in [true, false] {
823+
for mode in [RegexSemanticLevel.graphemeCluster, .unicodeScalar] {
824+
let r = Regex {
825+
Anchor.startOfLine
826+
Repeat("a", count: 3)
827+
Anchor.endOfLine
828+
}.anchorsMatchLineEndings(matchLineEndings).matchingSemantics(mode)
829+
830+
XCTAssertNotNil(try r.firstMatch(in: "\naaa"))
831+
XCTAssertNotNil(try r.firstMatch(in: "aaa\n"))
832+
XCTAssertNotNil(try r.firstMatch(in: "\naaa\n"))
833+
XCTAssertNotNil(try r.firstMatch(in: "\naaa\r\n"))
834+
XCTAssertNotNil(try r.firstMatch(in: "\r\naaa\n"))
835+
XCTAssertNotNil(try r.firstMatch(in: "\r\naaa\r\n"))
836+
837+
XCTAssertNil(try r.firstMatch(in: "\nbaaa\n"))
838+
XCTAssertNil(try r.firstMatch(in: "\naaab\n"))
818839
}
819840
}
820841
}

Tests/RegexTests/RenderDSLTests.swift

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,21 @@ extension RenderDSLTests {
8585
}
8686
"""#)
8787
}
88-
88+
89+
func testAnchor() throws {
90+
try testConversion(#"^(?:a|b|c)$"#, #"""
91+
Regex {
92+
/^/
93+
ChoiceOf {
94+
"a"
95+
"b"
96+
"c"
97+
}
98+
/$/
99+
}
100+
"""#)
101+
}
102+
89103
func testOptions() throws {
90104
try XCTExpectFailure("Options like '(?i)' aren't converted") {
91105
try testConversion(#"(?i)abc"#, """

0 commit comments

Comments
 (0)