Skip to content

Commit 21ca2fb

Browse files
committed
Move AssertionKind onto the DSL
This enum will start including cases that only the DSL can use, so move it off the AST.
1 parent 657351e commit 21ca2fb

File tree

8 files changed

+102
-116
lines changed

8 files changed

+102
-116
lines changed

Sources/RegexBuilder/Anchor.swift

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -37,16 +37,30 @@ public struct Anchor {
3737

3838
@available(SwiftStdlib 5.7, *)
3939
extension Anchor: RegexComponent {
40-
var baseAssertion: DSLTree._AST.AssertionKind {
40+
var baseAssertion: DSLTree.Atom.Assertion {
4141
switch kind {
42-
case .startOfSubject: return .startOfSubject(isInverted)
43-
case .endOfSubjectBeforeNewline: return .endOfSubjectBeforeNewline(isInverted)
44-
case .endOfSubject: return .endOfSubject(isInverted)
45-
case .firstMatchingPositionInSubject: return .firstMatchingPositionInSubject(isInverted)
46-
case .textSegmentBoundary: return .textSegmentBoundary(isInverted)
47-
case .startOfLine: return .startOfLine(isInverted)
48-
case .endOfLine: return .endOfLine(isInverted)
49-
case .wordBoundary: return .wordBoundary(isInverted)
42+
case .startOfSubject:
43+
// FIXME: Inverted?
44+
return .startOfSubject
45+
case .endOfSubjectBeforeNewline:
46+
// FIXME: Inverted?
47+
return .endOfSubjectBeforeNewline
48+
case .endOfSubject:
49+
// FIXME: Inverted?
50+
return .endOfSubject
51+
case .firstMatchingPositionInSubject:
52+
// FIXME: Inverted?
53+
return .firstMatchingPositionInSubject
54+
case .textSegmentBoundary:
55+
return isInverted ? .notTextSegment : .textSegment
56+
case .startOfLine:
57+
// FIXME: Inverted?
58+
return .caretAnchor
59+
case .endOfLine:
60+
// FIXME: Inverted?
61+
return .dollarAnchor
62+
case .wordBoundary:
63+
return isInverted ? .notWordBoundary : .wordBoundary
5064
}
5165
}
5266

Sources/_RegexParser/Regex/AST/Atom.swift

Lines changed: 0 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -511,67 +511,6 @@ extension AST.Atom.CharacterProperty {
511511
}
512512
}
513513

514-
extension AST.Atom {
515-
/// Anchors and other built-in zero-width assertions.
516-
public enum AssertionKind: String, Hashable {
517-
/// \A
518-
case startOfSubject = #"\A"#
519-
520-
/// \Z
521-
case endOfSubjectBeforeNewline = #"\Z"#
522-
523-
/// \z
524-
case endOfSubject = #"\z"#
525-
526-
/// \K
527-
case resetStartOfMatch = #"\K"#
528-
529-
/// \G
530-
case firstMatchingPositionInSubject = #"\G"#
531-
532-
/// \y
533-
case textSegment = #"\y"#
534-
535-
/// \Y
536-
case notTextSegment = #"\Y"#
537-
538-
/// ^
539-
case caretAnchor = #"^"#
540-
541-
/// $
542-
case dollarAnchor = #"$"#
543-
544-
/// \b (from outside a custom character class)
545-
case wordBoundary = #"\b"#
546-
547-
/// \B
548-
case notWordBoundary = #"\B"#
549-
550-
}
551-
552-
public var assertionKind: AssertionKind? {
553-
switch kind {
554-
case .caretAnchor: return .caretAnchor
555-
case .dollarAnchor: return .dollarAnchor
556-
557-
case .escaped(.wordBoundary): return .wordBoundary
558-
case .escaped(.notWordBoundary): return .notWordBoundary
559-
case .escaped(.startOfSubject): return .startOfSubject
560-
case .escaped(.endOfSubject): return .endOfSubject
561-
case .escaped(.textSegment): return .textSegment
562-
case .escaped(.notTextSegment): return .notTextSegment
563-
case .escaped(.endOfSubjectBeforeNewline):
564-
return .endOfSubjectBeforeNewline
565-
case .escaped(.firstMatchingPositionInSubject):
566-
return .firstMatchingPositionInSubject
567-
568-
case .escaped(.resetStartOfMatch): return .resetStartOfMatch
569-
570-
default: return nil
571-
}
572-
}
573-
}
574-
575514
extension AST.Atom {
576515
public enum Callout: Hashable {
577516
/// A PCRE callout written `(?C...)`

Sources/_RegexParser/Regex/Printing/PrintAsCanonical.swift

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -237,9 +237,6 @@ extension AST.Atom.Number {
237237

238238
extension AST.Atom {
239239
var _canonicalBase: String {
240-
if let anchor = self.assertionKind {
241-
return anchor.rawValue
242-
}
243240
if let lit = self.literalStringValue {
244241
// FIXME: We may have to re-introduce escapes
245242
// For example, `\.` will come back as "." instead
@@ -248,6 +245,10 @@ extension AST.Atom {
248245
return lit
249246
}
250247
switch self.kind {
248+
case .caretAnchor:
249+
return "^"
250+
case .dollarAnchor:
251+
return "$"
251252
case .escaped(let e):
252253
return "\\\(e.character)"
253254
case .backreference(let br):

Sources/_StringProcessing/ByteCodeGen.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ fileprivate extension Compiler.ByteCodeGen {
7272
}
7373

7474
case let .assertion(kind):
75-
try emitAssertion(kind.ast)
75+
try emitAssertion(kind)
7676

7777
case let .backreference(ref):
7878
try emitBackreference(ref.ast)
@@ -146,7 +146,7 @@ fileprivate extension Compiler.ByteCodeGen {
146146
}
147147

148148
mutating func emitAssertion(
149-
_ kind: AST.Atom.AssertionKind
149+
_ kind: DSLTree.Atom.Assertion
150150
) throws {
151151
// FIXME: Depends on API model we have... We may want to
152152
// think through some of these with API interactions in mind

Sources/_StringProcessing/PrintAsPattern.swift

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -647,7 +647,7 @@ extension StringLiteralBuilder: CustomStringConvertible {
647647
var description: String { result }
648648
}
649649

650-
extension AST.Atom.AssertionKind {
650+
extension DSLTree.Atom.Assertion {
651651
// TODO: Some way to integrate this with conversion...
652652
var _patternBase: String {
653653
switch self {
@@ -835,7 +835,7 @@ extension AST.Atom {
835835
///
836836
/// TODO: Some way to integrate this with conversion...
837837
var _patternBase: (String, canBeWrapped: Bool) {
838-
if let anchor = self.assertionKind {
838+
if let anchor = self.dslAssertionKind {
839839
return (anchor._patternBase, false)
840840
}
841841

@@ -1148,7 +1148,7 @@ extension DSLTree.Atom {
11481148
}
11491149

11501150
case .assertion(let a):
1151-
return (a.ast._patternBase, false)
1151+
return (a._patternBase, false)
11521152

11531153
case .backreference(_):
11541154
return ("/* TOOD: backreferences */", false)

Sources/_StringProcessing/Regex/ASTConversion.swift

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -208,10 +208,38 @@ extension AST.CustomCharacterClass {
208208
}
209209
}
210210

211+
extension AST.Atom.EscapedBuiltin {
212+
var dslAssertionKind: DSLTree.Atom.Assertion? {
213+
switch self {
214+
case .wordBoundary: return .wordBoundary
215+
case .notWordBoundary: return .notWordBoundary
216+
case .startOfSubject: return .startOfSubject
217+
case .endOfSubject: return .endOfSubject
218+
case .textSegment: return .textSegment
219+
case .notTextSegment: return .notTextSegment
220+
case .endOfSubjectBeforeNewline: return .endOfSubjectBeforeNewline
221+
case .firstMatchingPositionInSubject: return .firstMatchingPositionInSubject
222+
case .resetStartOfMatch: return .resetStartOfMatch
223+
default: return nil
224+
}
225+
}
226+
}
227+
228+
extension AST.Atom {
229+
var dslAssertionKind: DSLTree.Atom.Assertion? {
230+
switch kind {
231+
case .caretAnchor: return .caretAnchor
232+
case .dollarAnchor: return .dollarAnchor
233+
case .escaped(let b): return b.dslAssertionKind
234+
default: return nil
235+
}
236+
}
237+
}
238+
211239
extension AST.Atom {
212240
var dslTreeAtom: DSLTree.Atom {
213-
if let kind = assertionKind {
214-
return .assertion(.init(ast: kind))
241+
if let kind = dslAssertionKind {
242+
return .assertion(kind)
215243
}
216244

217245
switch self.kind {

Sources/_StringProcessing/Regex/DSLTree.swift

Lines changed: 39 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ extension DSLTree {
173173
/// newlines unless single line mode is enabled.
174174
case dot
175175

176-
case assertion(_AST.AssertionKind)
176+
case assertion(Assertion)
177177
case backreference(_AST.Reference)
178178
case symbolicReference(ReferenceID)
179179

@@ -183,6 +183,44 @@ extension DSLTree {
183183
}
184184
}
185185

186+
extension DSLTree.Atom {
187+
@_spi(RegexBuilder)
188+
public enum Assertion: Hashable {
189+
/// \A
190+
case startOfSubject
191+
192+
/// \Z
193+
case endOfSubjectBeforeNewline
194+
195+
/// \z
196+
case endOfSubject
197+
198+
/// \K
199+
case resetStartOfMatch
200+
201+
/// \G
202+
case firstMatchingPositionInSubject
203+
204+
/// \y
205+
case textSegment
206+
207+
/// \Y
208+
case notTextSegment
209+
210+
/// ^
211+
case caretAnchor
212+
213+
/// $
214+
case dollarAnchor
215+
216+
/// \b (from outside a custom character class)
217+
case wordBoundary
218+
219+
/// \B
220+
case notWordBoundary
221+
}
222+
}
223+
186224
extension Unicode.GeneralCategory {
187225
var extendedGeneralCategory: Unicode.ExtendedGeneralCategory? {
188226
switch self {
@@ -699,40 +737,6 @@ extension DSLTree {
699737
internal var ast: AST.AbsentFunction
700738
}
701739

702-
@_spi(RegexBuilder)
703-
public struct AssertionKind {
704-
internal var ast: AST.Atom.AssertionKind
705-
706-
public static func startOfSubject(_ inverted: Bool = false) -> Self {
707-
.init(ast: .startOfSubject)
708-
}
709-
public static func endOfSubjectBeforeNewline(_ inverted: Bool = false) -> Self {
710-
.init(ast: .endOfSubjectBeforeNewline)
711-
}
712-
public static func endOfSubject(_ inverted: Bool = false) -> Self {
713-
.init(ast: .endOfSubject)
714-
}
715-
public static func firstMatchingPositionInSubject(_ inverted: Bool = false) -> Self {
716-
.init(ast: .firstMatchingPositionInSubject)
717-
}
718-
public static func textSegmentBoundary(_ inverted: Bool = false) -> Self {
719-
inverted
720-
? .init(ast: .notTextSegment)
721-
: .init(ast: .textSegment)
722-
}
723-
public static func startOfLine(_ inverted: Bool = false) -> Self {
724-
.init(ast: .caretAnchor)
725-
}
726-
public static func endOfLine(_ inverted: Bool = false) -> Self {
727-
.init(ast: .dollarAnchor)
728-
}
729-
public static func wordBoundary(_ inverted: Bool = false) -> Self {
730-
inverted
731-
? .init(ast: .notWordBoundary)
732-
: .init(ast: .wordBoundary)
733-
}
734-
}
735-
736740
@_spi(RegexBuilder)
737741
public struct Reference {
738742
internal var ast: AST.Reference

Sources/_StringProcessing/Utility/RegexFactory.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ public struct _RegexFactory {
4040
@_spi(RegexBuilder)
4141
@available(SwiftStdlib 5.7, *)
4242
public func assertion<Output>(
43-
_ kind: DSLTree._AST.AssertionKind
43+
_ kind: DSLTree.Atom.Assertion
4444
) -> Regex<Output> {
4545
.init(node: .atom(.assertion(kind)))
4646
}

0 commit comments

Comments
 (0)