Skip to content

Commit 3f3d253

Browse files
committed
Move AssertionKind onto the DSL
This enum will start including cases that only the DSL can use, so move it off the AST.
1 parent cb9c5fb commit 3f3d253

File tree

8 files changed

+102
-116
lines changed

8 files changed

+102
-116
lines changed

Sources/RegexBuilder/Anchor.swift

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -37,16 +37,30 @@ public struct Anchor {
3737

3838
@available(SwiftStdlib 5.7, *)
3939
extension Anchor: RegexComponent {
40-
var baseAssertion: DSLTree._AST.AssertionKind {
40+
var baseAssertion: DSLTree.Atom.Assertion {
4141
switch kind {
42-
case .startOfSubject: return .startOfSubject(isInverted)
43-
case .endOfSubjectBeforeNewline: return .endOfSubjectBeforeNewline(isInverted)
44-
case .endOfSubject: return .endOfSubject(isInverted)
45-
case .firstMatchingPositionInSubject: return .firstMatchingPositionInSubject(isInverted)
46-
case .textSegmentBoundary: return .textSegmentBoundary(isInverted)
47-
case .startOfLine: return .startOfLine(isInverted)
48-
case .endOfLine: return .endOfLine(isInverted)
49-
case .wordBoundary: return .wordBoundary(isInverted)
42+
case .startOfSubject:
43+
// FIXME: Inverted?
44+
return .startOfSubject
45+
case .endOfSubjectBeforeNewline:
46+
// FIXME: Inverted?
47+
return .endOfSubjectBeforeNewline
48+
case .endOfSubject:
49+
// FIXME: Inverted?
50+
return .endOfSubject
51+
case .firstMatchingPositionInSubject:
52+
// FIXME: Inverted?
53+
return .firstMatchingPositionInSubject
54+
case .textSegmentBoundary:
55+
return isInverted ? .notTextSegment : .textSegment
56+
case .startOfLine:
57+
// FIXME: Inverted?
58+
return .caretAnchor
59+
case .endOfLine:
60+
// FIXME: Inverted?
61+
return .dollarAnchor
62+
case .wordBoundary:
63+
return isInverted ? .notWordBoundary : .wordBoundary
5064
}
5165
}
5266

Sources/_RegexParser/Regex/AST/Atom.swift

Lines changed: 0 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -511,67 +511,6 @@ extension AST.Atom.CharacterProperty {
511511
}
512512
}
513513

514-
extension AST.Atom {
515-
/// Anchors and other built-in zero-width assertions.
516-
public enum AssertionKind: String, Hashable {
517-
/// \A
518-
case startOfSubject = #"\A"#
519-
520-
/// \Z
521-
case endOfSubjectBeforeNewline = #"\Z"#
522-
523-
/// \z
524-
case endOfSubject = #"\z"#
525-
526-
/// \K
527-
case resetStartOfMatch = #"\K"#
528-
529-
/// \G
530-
case firstMatchingPositionInSubject = #"\G"#
531-
532-
/// \y
533-
case textSegment = #"\y"#
534-
535-
/// \Y
536-
case notTextSegment = #"\Y"#
537-
538-
/// ^
539-
case caretAnchor = #"^"#
540-
541-
/// $
542-
case dollarAnchor = #"$"#
543-
544-
/// \b (from outside a custom character class)
545-
case wordBoundary = #"\b"#
546-
547-
/// \B
548-
case notWordBoundary = #"\B"#
549-
550-
}
551-
552-
public var assertionKind: AssertionKind? {
553-
switch kind {
554-
case .caretAnchor: return .caretAnchor
555-
case .dollarAnchor: return .dollarAnchor
556-
557-
case .escaped(.wordBoundary): return .wordBoundary
558-
case .escaped(.notWordBoundary): return .notWordBoundary
559-
case .escaped(.startOfSubject): return .startOfSubject
560-
case .escaped(.endOfSubject): return .endOfSubject
561-
case .escaped(.textSegment): return .textSegment
562-
case .escaped(.notTextSegment): return .notTextSegment
563-
case .escaped(.endOfSubjectBeforeNewline):
564-
return .endOfSubjectBeforeNewline
565-
case .escaped(.firstMatchingPositionInSubject):
566-
return .firstMatchingPositionInSubject
567-
568-
case .escaped(.resetStartOfMatch): return .resetStartOfMatch
569-
570-
default: return nil
571-
}
572-
}
573-
}
574-
575514
extension AST.Atom {
576515
public enum Callout: Hashable {
577516
/// A PCRE callout written `(?C...)`

Sources/_RegexParser/Regex/Printing/PrintAsCanonical.swift

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -237,9 +237,6 @@ extension AST.Atom.Number {
237237

238238
extension AST.Atom {
239239
var _canonicalBase: String {
240-
if let anchor = self.assertionKind {
241-
return anchor.rawValue
242-
}
243240
if let lit = self.literalStringValue {
244241
// FIXME: We may have to re-introduce escapes
245242
// For example, `\.` will come back as "." instead
@@ -248,6 +245,10 @@ extension AST.Atom {
248245
return lit
249246
}
250247
switch self.kind {
248+
case .caretAnchor:
249+
return "^"
250+
case .dollarAnchor:
251+
return "$"
251252
case .escaped(let e):
252253
return "\\\(e.character)"
253254
case .backreference(let br):

Sources/_StringProcessing/ByteCodeGen.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ fileprivate extension Compiler.ByteCodeGen {
6868
try emitScalar(s)
6969

7070
case let .assertion(kind):
71-
try emitAssertion(kind.ast)
71+
try emitAssertion(kind)
7272

7373
case let .backreference(ref):
7474
try emitBackreference(ref.ast)
@@ -114,7 +114,7 @@ fileprivate extension Compiler.ByteCodeGen {
114114
}
115115

116116
mutating func emitAssertion(
117-
_ kind: AST.Atom.AssertionKind
117+
_ kind: DSLTree.Atom.Assertion
118118
) throws {
119119
// FIXME: Depends on API model we have... We may want to
120120
// think through some of these with API interactions in mind

Sources/_StringProcessing/PrintAsPattern.swift

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -623,7 +623,7 @@ extension String {
623623
}
624624
}
625625

626-
extension AST.Atom.AssertionKind {
626+
extension DSLTree.Atom.Assertion {
627627
// TODO: Some way to integrate this with conversion...
628628
var _patternBase: String {
629629
switch self {
@@ -811,7 +811,7 @@ extension AST.Atom {
811811
///
812812
/// TODO: Some way to integrate this with conversion...
813813
var _patternBase: (String, canBeWrapped: Bool) {
814-
if let anchor = self.assertionKind {
814+
if let anchor = self.dslAssertionKind {
815815
return (anchor._patternBase, false)
816816
}
817817

@@ -1124,7 +1124,7 @@ extension DSLTree.Atom {
11241124
}
11251125

11261126
case .assertion(let a):
1127-
return (a.ast._patternBase, false)
1127+
return (a._patternBase, false)
11281128

11291129
case .backreference(_):
11301130
return ("/* TOOD: backreferences */", false)

Sources/_StringProcessing/Regex/ASTConversion.swift

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -208,10 +208,38 @@ extension AST.CustomCharacterClass {
208208
}
209209
}
210210

211+
extension AST.Atom.EscapedBuiltin {
212+
var dslAssertionKind: DSLTree.Atom.Assertion? {
213+
switch self {
214+
case .wordBoundary: return .wordBoundary
215+
case .notWordBoundary: return .notWordBoundary
216+
case .startOfSubject: return .startOfSubject
217+
case .endOfSubject: return .endOfSubject
218+
case .textSegment: return .textSegment
219+
case .notTextSegment: return .notTextSegment
220+
case .endOfSubjectBeforeNewline: return .endOfSubjectBeforeNewline
221+
case .firstMatchingPositionInSubject: return .firstMatchingPositionInSubject
222+
case .resetStartOfMatch: return .resetStartOfMatch
223+
default: return nil
224+
}
225+
}
226+
}
227+
228+
extension AST.Atom {
229+
var dslAssertionKind: DSLTree.Atom.Assertion? {
230+
switch kind {
231+
case .caretAnchor: return .caretAnchor
232+
case .dollarAnchor: return .dollarAnchor
233+
case .escaped(let b): return b.dslAssertionKind
234+
default: return nil
235+
}
236+
}
237+
}
238+
211239
extension AST.Atom {
212240
var dslTreeAtom: DSLTree.Atom {
213-
if let kind = assertionKind {
214-
return .assertion(.init(ast: kind))
241+
if let kind = dslAssertionKind {
242+
return .assertion(kind)
215243
}
216244

217245
switch self.kind {

Sources/_StringProcessing/Regex/DSLTree.swift

Lines changed: 39 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@ extension DSLTree {
253253
/// newlines unless single line mode is enabled.
254254
case dot
255255

256-
case assertion(_AST.AssertionKind)
256+
case assertion(Assertion)
257257
case backreference(_AST.Reference)
258258
case symbolicReference(ReferenceID)
259259

@@ -263,6 +263,44 @@ extension DSLTree {
263263
}
264264
}
265265

266+
extension DSLTree.Atom {
267+
@_spi(RegexBuilder)
268+
public enum Assertion: Hashable {
269+
/// \A
270+
case startOfSubject
271+
272+
/// \Z
273+
case endOfSubjectBeforeNewline
274+
275+
/// \z
276+
case endOfSubject
277+
278+
/// \K
279+
case resetStartOfMatch
280+
281+
/// \G
282+
case firstMatchingPositionInSubject
283+
284+
/// \y
285+
case textSegment
286+
287+
/// \Y
288+
case notTextSegment
289+
290+
/// ^
291+
case caretAnchor
292+
293+
/// $
294+
case dollarAnchor
295+
296+
/// \b (from outside a custom character class)
297+
case wordBoundary
298+
299+
/// \B
300+
case notWordBoundary
301+
}
302+
}
303+
266304
extension Unicode.GeneralCategory {
267305
var extendedGeneralCategory: Unicode.ExtendedGeneralCategory? {
268306
switch self {
@@ -779,40 +817,6 @@ extension DSLTree {
779817
internal var ast: AST.AbsentFunction
780818
}
781819

782-
@_spi(RegexBuilder)
783-
public struct AssertionKind {
784-
internal var ast: AST.Atom.AssertionKind
785-
786-
public static func startOfSubject(_ inverted: Bool = false) -> Self {
787-
.init(ast: .startOfSubject)
788-
}
789-
public static func endOfSubjectBeforeNewline(_ inverted: Bool = false) -> Self {
790-
.init(ast: .endOfSubjectBeforeNewline)
791-
}
792-
public static func endOfSubject(_ inverted: Bool = false) -> Self {
793-
.init(ast: .endOfSubject)
794-
}
795-
public static func firstMatchingPositionInSubject(_ inverted: Bool = false) -> Self {
796-
.init(ast: .firstMatchingPositionInSubject)
797-
}
798-
public static func textSegmentBoundary(_ inverted: Bool = false) -> Self {
799-
inverted
800-
? .init(ast: .notTextSegment)
801-
: .init(ast: .textSegment)
802-
}
803-
public static func startOfLine(_ inverted: Bool = false) -> Self {
804-
.init(ast: .caretAnchor)
805-
}
806-
public static func endOfLine(_ inverted: Bool = false) -> Self {
807-
.init(ast: .dollarAnchor)
808-
}
809-
public static func wordBoundary(_ inverted: Bool = false) -> Self {
810-
inverted
811-
? .init(ast: .notWordBoundary)
812-
: .init(ast: .wordBoundary)
813-
}
814-
}
815-
816820
@_spi(RegexBuilder)
817821
public struct Reference {
818822
internal var ast: AST.Reference

Sources/_StringProcessing/Utility/RegexFactory.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ public struct _RegexFactory {
4040
@_spi(RegexBuilder)
4141
@available(SwiftStdlib 5.7, *)
4242
public func assertion<Output>(
43-
_ kind: DSLTree._AST.AssertionKind
43+
_ kind: DSLTree.Atom.Assertion
4444
) -> Regex<Output> {
4545
.init(node: .atom(.assertion(kind)))
4646
}

0 commit comments

Comments
 (0)