Skip to content

Commit f8e1dc2

Browse files
authored
Add anchors and assertions to the DSL (#154)
1 parent 43ae710 commit f8e1dc2

File tree

2 files changed

+148
-0
lines changed

2 files changed

+148
-0
lines changed
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
//
10+
//===----------------------------------------------------------------------===//
11+
12+
import _MatchingEngine
13+
14+
public struct Anchor {
15+
internal enum Kind {
16+
case startOfSubject
17+
case endOfSubjectBeforeNewline
18+
case endOfSubject
19+
case firstMatchingPositionInSubject
20+
case textSegmentBoundary
21+
case startOfLine
22+
case endOfLine
23+
case wordBoundary
24+
}
25+
26+
var kind: Kind
27+
var isInverted: Bool = false
28+
}
29+
30+
extension Anchor: RegexProtocol {
31+
var astAssertion: AST.Atom.AssertionKind {
32+
if !isInverted {
33+
switch kind {
34+
case .startOfSubject: return .startOfSubject
35+
case .endOfSubjectBeforeNewline: return .endOfSubjectBeforeNewline
36+
case .endOfSubject: return .endOfSubject
37+
case .firstMatchingPositionInSubject: return .firstMatchingPositionInSubject
38+
case .textSegmentBoundary: return .textSegment
39+
case .startOfLine: return .startOfLine
40+
case .endOfLine: return .endOfLine
41+
case .wordBoundary: return .wordBoundary
42+
}
43+
} else {
44+
switch kind {
45+
case .startOfSubject: fatalError("Not yet supported")
46+
case .endOfSubjectBeforeNewline: fatalError("Not yet supported")
47+
case .endOfSubject: fatalError("Not yet supported")
48+
case .firstMatchingPositionInSubject: fatalError("Not yet supported")
49+
case .textSegmentBoundary: return .notTextSegment
50+
case .startOfLine: fatalError("Not yet supported")
51+
case .endOfLine: fatalError("Not yet supported")
52+
case .wordBoundary: return .notWordBoundary
53+
}
54+
}
55+
}
56+
57+
public var regex: Regex<Substring> {
58+
Regex(node: .atom(.assertion(astAssertion)))
59+
}
60+
}
61+
62+
// MARK: - Public API
63+
64+
extension Anchor {
65+
public static var startOfSubject: Anchor {
66+
Anchor(kind: .startOfSubject)
67+
}
68+
69+
public static var endOfSubjectBeforeNewline: Anchor {
70+
Anchor(kind: .endOfSubjectBeforeNewline)
71+
}
72+
73+
public static var endOfSubject: Anchor {
74+
Anchor(kind: .endOfSubject)
75+
}
76+
77+
// TODO: Are we supporting this?
78+
// public static var resetStartOfMatch: Anchor {
79+
// Anchor(kind: resetStartOfMatch)
80+
// }
81+
82+
public static var firstMatchingPositionInSubject: Anchor {
83+
Anchor(kind: .firstMatchingPositionInSubject)
84+
}
85+
86+
public static var textSegmentBoundary: Anchor {
87+
Anchor(kind: .textSegmentBoundary)
88+
}
89+
90+
public static var startOfLine: Anchor {
91+
Anchor(kind: .startOfLine)
92+
}
93+
94+
public static var endOfLine: Anchor {
95+
Anchor(kind: .endOfLine)
96+
}
97+
98+
public static var wordBoundary: Anchor {
99+
Anchor(kind: .wordBoundary)
100+
}
101+
102+
public var inverted: Anchor {
103+
var result = self
104+
result.isInverted.toggle()
105+
return result
106+
}
107+
}
108+
109+
public func lookahead<R: RegexProtocol>(
110+
negative: Bool = false,
111+
@RegexBuilder _ content: () -> R
112+
) -> Regex<R.Match> {
113+
Regex(node: .group(negative ? .negativeLookahead : .lookahead, content().regex.root))
114+
}
115+
116+
public func lookahead<R: RegexProtocol>(
117+
_ component: R,
118+
negative: Bool = false
119+
) -> Regex<R.Match> {
120+
Regex(node: .group(negative ? .negativeLookahead : .lookahead, component.regex.root))
121+
}

Tests/RegexTests/RegexDSLTests.swift

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,33 @@ class RegexDSLTests: XCTestCase {
215215
repeating(0...) { "f" }
216216
}
217217
}
218+
219+
func testAssertions() throws {
220+
try _testDSLCaptures(
221+
("aaaaab", "aaaaab"),
222+
("caaaaab", nil),
223+
("aaaaabc", nil),
224+
captureType: Substring.self, ==)
225+
{
226+
Anchor.startOfLine
227+
"a".+
228+
"b"
229+
Anchor.endOfLine
230+
}
231+
232+
try _testDSLCaptures(
233+
("aaaaa1", "aaaaa1"),
234+
("aaaaa2", nil),
235+
("aaaaa", nil),
236+
("aaaaab", nil),
237+
captureType: Substring.self, ==)
238+
{
239+
"a".+
240+
lookahead(CharacterClass.digit)
241+
lookahead("2", negative: true)
242+
CharacterClass.word
243+
}
244+
}
218245

219246
func testNestedGroups() throws {
220247
return;

0 commit comments

Comments
 (0)