Skip to content

Commit 23ff648

Browse files
authored
Track source locations through the AST (#57)
* Track more source locations Group and Quantification now track source locations, though many places in parser still fake them.
1 parent 53d1ce2 commit 23ff648

26 files changed

+816
-778
lines changed

Sources/_MatchingEngine/Regex/AST/AST.swift

Lines changed: 110 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -1,118 +1,158 @@
11
/// A regex abstract syntax tree
2-
public enum AST: ASTValue/*, ASTAction*/ {
3-
public typealias Product = Self
2+
public indirect enum AST:
3+
Hashable/*, _ASTPrintable ASTValue, ASTAction*/
4+
{
5+
/// Located value: a value wrapped with a source range
6+
public typealias Loc = Source.Value
47

58
/// ... | ... | ...
6-
indirect case alternation([AST])
9+
case alternation(Alternation)
710

811
/// ... ...
9-
indirect case concatenation([AST])
12+
case concatenation(Concatenation)
1013

1114
/// (...)
12-
indirect case group(Group, AST)
15+
case group(Group)
1316

14-
indirect case quantification(Quantifier, AST)
17+
case quantification(Quantification)
1518

16-
case quote(String)
19+
/// \Q...\E
20+
case quote(Quote)
1721

18-
case trivia // TODO: track comments
22+
///
23+
case trivia(Trivia) // TODO: track comments
1924

2025
case atom(Atom)
2126

2227
case customCharacterClass(CustomCharacterClass)
2328

24-
case empty
29+
case empty(Empty)
2530

2631

2732
// FIXME: Move off the regex literal AST
28-
indirect case groupTransform(
29-
Group, AST, transform: CaptureTransform)
33+
case groupTransform(
34+
Group, transform: CaptureTransform)
3035
}
3136

32-
extension AST {
33-
public static var any: AST {
34-
.atom(.any)
35-
}
36-
}
37-
38-
// Note that we're not yet an ASTEntity, would need to be a struct.
39-
// We might end up with ASTStorage which projects the nice AST type.
40-
// Values and projected entities can still refer to positions.
41-
// ASTStorage might end up becoming the ASTAction conformer
37+
// TODO: This is currently unused, but it's likely we'll want
38+
// to host things like global options, more source info, etc.
4239
private struct ASTStorage {
4340
let ast: AST
4441
let sourceRange: SourceRange?
4542
}
4643

4744
extension AST {
48-
public var isSemantic: Bool {
45+
// :-(
46+
var _associatedValue: _ASTNode {
4947
switch self {
50-
case .trivia: return false
51-
default: return true
48+
case let .alternation(v): return v
49+
case let .concatenation(v): return v
50+
case let .group(v): return v
51+
case let .quantification(v): return v
52+
case let .quote(v): return v
53+
case let .trivia(v): return v
54+
case let .atom(v): return v
55+
case let .customCharacterClass(v): return v
56+
case let .empty(v): return v
57+
58+
case let .groupTransform(g, _):
59+
return g // FIXME: get this out of here
5260
}
5361
}
5462

55-
func filter(_ f: (AST) -> Bool) -> AST? {
56-
func filt(_ children: [AST]) -> [AST] {
57-
children.compactMap {
58-
guard f($0) else { return nil }
59-
return $0.filter(f)
60-
}
63+
/// If this node is a parent node, access its children
64+
public var children: [AST]? {
65+
return (_associatedValue as? _ASTParent)?.children
66+
}
67+
68+
public var sourceRange: SourceRange {
69+
_associatedValue.sourceRange
70+
}
71+
72+
/// Whether this node is "trivia" or non-semantic, like comments
73+
public var isTrivia: Bool {
74+
switch self {
75+
case .trivia: return true
76+
default: return false
6177
}
62-
func filt(_ cc: CustomCharacterClass) -> CustomCharacterClass {
63-
CustomCharacterClass(cc.start, filt(cc.members))
78+
}
79+
80+
/// Whether this node has nested somewhere inside it a capture
81+
public var hasCapture: Bool {
82+
if case let .group(g) = self, g.kind.value.isCapturing {
83+
return true
84+
}
85+
86+
return self.children?.any(\.hasCapture) ?? false
87+
}
88+
}
89+
90+
// MARK: - AST types
91+
92+
extension AST {
93+
94+
public struct Alternation: Hashable, _ASTNode {
95+
public let children: [AST]
96+
public let sourceRange: SourceRange
97+
98+
public init(_ mems: [AST], _ sourceRange: SourceRange) {
99+
self.children = mems
100+
self.sourceRange = sourceRange
64101
}
65-
typealias CCCMember = CustomCharacterClass.Member
66-
func filt(_ children: [CCCMember]) -> [CCCMember] {
67-
children.compactMap {
68-
switch $0 {
69-
case let .custom(cc):
70-
return .custom(filt(cc))
71-
case .range(let lhs, let rhs):
72-
guard let filtLHS = f(.atom(lhs)) ? lhs : nil else { return nil }
73-
guard let filtRHS = f(.atom(rhs)) ? rhs : nil else { return nil }
74-
return .range(filtLHS, filtRHS)
75-
case let .atom(atom):
76-
return f(.atom(atom)) ? .atom(atom) : nil
77-
case let .setOperation(lhsMembers, op, rhsMembers):
78-
return .setOperation(filt(lhsMembers), op, filt(rhsMembers))
79-
}
80-
}
102+
103+
public var _dumpBase: String { "alternation" }
104+
}
105+
106+
public struct Concatenation: Hashable, _ASTNode {
107+
public let children: [AST]
108+
public let sourceRange: SourceRange
109+
110+
public init(_ mems: [AST], _ sourceRange: SourceRange) {
111+
self.children = mems
112+
self.sourceRange = sourceRange
81113
}
82-
switch self {
83-
case let .alternation(children):
84-
return .alternation(filt(children))
85114

86-
case let .concatenation(children):
87-
return .concatenation(filt(children))
115+
public var _dumpBase: String { "" }
116+
}
88117

89-
case let .customCharacterClass(cc):
90-
return .customCharacterClass(filt(cc))
118+
public struct Quote: Hashable, _ASTNode {
119+
public let literal: String
120+
public let sourceRange: SourceRange
91121

92-
case let .group(g, child):
93-
guard let c = child.filter(f) else { return nil }
94-
return .group(g, c)
122+
public init(_ s: String, _ sourceRange: SourceRange) {
123+
self.literal = s
124+
self.sourceRange = sourceRange
125+
}
126+
127+
public var _dumpBase: String { "quote" }
128+
}
95129

96-
case let .groupTransform(g, child, transform):
97-
guard let c = child.filter(f) else { return nil }
98-
return .groupTransform(g, c, transform: transform)
130+
public struct Trivia: Hashable, _ASTNode {
131+
// TODO: Contents of trivia, kinds, etc
132+
public let sourceRange: SourceRange
99133

100-
case let .quantification(q, child):
101-
guard let c = child.filter(f) else { return nil }
102-
return .quantification(q, c)
134+
public init(_ sourceRange: SourceRange) {
135+
self.sourceRange = sourceRange
136+
}
103137

104-
case .any, .trivia, .quote, .atom, .empty:
105-
return f(self) ? self : nil
138+
public var _dumpBase: String {
139+
// TODO: comments, non-semantic whitespace, etc.
140+
""
106141
}
107142
}
108143

109-
public var strippingTrivia: AST? {
110-
filter(\.isSemantic)
144+
public struct Empty: Hashable, _ASTNode {
145+
public let sourceRange: SourceRange
146+
147+
public init(_ sourceRange: SourceRange) {
148+
self.sourceRange = sourceRange
149+
}
150+
151+
public var _dumpBase: String { "" }
111152
}
112153
}
113154

114-
// FIXME: Probably remove this from the AST
115-
155+
// FIXME: Get this out of here
116156
public struct CaptureTransform: Equatable, Hashable, CustomStringConvertible {
117157
public let closure: (Substring) -> Any
118158

Sources/_MatchingEngine/Regex/AST/ASTBuilder.swift

Lines changed: 38 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -16,25 +16,28 @@ AST.
1616

1717
public let _fakeLoc = "".startIndex
1818
public let _fakeRange = _fakeLoc ..< _fakeLoc
19+
public func _fake<T: Hashable>(_ t: T) -> AST.Loc<T> {
20+
.init(t, _fakeRange)
21+
}
1922

2023
public func alt(_ asts: [AST]) -> AST {
21-
.alternation(asts)
24+
.alternation(.init(asts, _fakeRange))
2225
}
2326
public func alt(_ asts: AST...) -> AST {
2427
alt(asts)
2528
}
2629

2730
public func concat(_ asts: [AST]) -> AST {
28-
.concatenation(asts)
31+
.concatenation(.init(asts, _fakeRange))
2932
}
3033
public func concat(_ asts: AST...) -> AST {
3134
concat(asts)
3235
}
3336

3437
public func group(
35-
_ kind: Group.Kind, _ child: AST
38+
_ kind: AST.Group.Kind, _ child: AST
3639
) -> AST {
37-
.group(Group(kind, _fakeRange), child)
40+
.group(.init(_fake(kind), child, _fakeRange))
3841
}
3942
public func capture(
4043
_ child: AST
@@ -50,7 +53,7 @@ public func namedCapture(
5053
_ name: String,
5154
_ child: AST
5255
) -> AST {
53-
group(.namedCapture(name), child)
56+
group(.namedCapture(_fake(name)), child)
5457
}
5558
public func nonCaptureReset(
5659
_ child: AST
@@ -75,68 +78,71 @@ public func negativeLookbehind(_ child: AST) -> AST {
7578
group(.negativeLookbehind, child)
7679
}
7780

81+
7882
public var any: AST { .atom(.any) }
7983

8084
public func quant(
81-
_ amount: Quantifier.Amount,
82-
_ kind: Quantifier.Kind = .greedy,
85+
_ amount: AST.Quantification.Amount,
86+
_ kind: AST.Quantification.Kind = .greedy,
8387
_ child: AST
8488
) -> AST {
85-
.quantification(Quantifier(amount, kind, _fakeRange), child)
89+
.quantification(.init(
90+
_fake(amount), _fake(kind), child, _fakeRange))
8691
}
8792
public func zeroOrMore(
88-
_ kind: Quantifier.Kind = .greedy,
93+
_ kind: AST.Quantification.Kind = .greedy,
8994
_ child: AST
9095
) -> AST {
9196
quant(.zeroOrMore, kind, child)
9297
}
9398
public func zeroOrOne(
94-
_ kind: Quantifier.Kind = .greedy,
99+
_ kind: AST.Quantification.Kind = .greedy,
95100
_ child: AST
96101
) -> AST {
97102
quant(.zeroOrOne, kind, child)
98103
}
99104
public func oneOrMore(
100-
_ kind: Quantifier.Kind = .greedy,
105+
_ kind: AST.Quantification.Kind = .greedy,
101106
_ child: AST
102107
) -> AST {
103108
quant(.oneOrMore, kind, child)
104109
}
105110
public func exactly(
106-
_ kind: Quantifier.Kind = .greedy,
111+
_ kind: AST.Quantification.Kind = .greedy,
107112
_ i: Int,
108-
child: AST
113+
_ child: AST
109114
) -> AST {
110-
quant(.exactly(i), kind, child)
115+
quant(.exactly(_fake(i)), kind, child)
111116
}
112117
public func nOrMore(
113-
_ kind: Quantifier.Kind = .greedy,
118+
_ kind: AST.Quantification.Kind = .greedy,
114119
_ i: Int,
115-
child: AST
120+
_ child: AST
116121
) -> AST {
117-
quant(.nOrMore(i), kind, child)
122+
quant(.nOrMore(_fake(i)), kind, child)
118123
}
119124
public func upToN(
120-
_ kind: Quantifier.Kind = .greedy,
125+
_ kind: AST.Quantification.Kind = .greedy,
121126
_ i: Int,
122-
child: AST
127+
_ child: AST
123128
) -> AST {
124-
quant(.upToN(i), kind, child)
129+
quant(.upToN(_fake(i)), kind, child)
125130
}
126131
public func quantRange(
127-
_ kind: Quantifier.Kind = .greedy,
132+
_ kind: AST.Quantification.Kind = .greedy,
128133
_ r: ClosedRange<Int>,
129-
child: AST
134+
_ child: AST
130135
) -> AST {
131-
quant(.range(r), kind, child)
136+
let range = _fake(r.lowerBound) ... _fake(r.upperBound)
137+
return quant(.range(range), kind, child)
132138
}
133139

134140
public func charClass(
135141
_ members: CustomCharacterClass.Member...,
136142
inverted: Bool = false
137143
) -> AST {
138144
let cc = CustomCharacterClass(
139-
inverted ? .inverted : .normal, members
145+
inverted ? .inverted : .normal, members, _fakeRange
140146
)
141147
return .customCharacterClass(cc)
142148
}
@@ -145,16 +151,21 @@ public func charClass(
145151
inverted: Bool = false
146152
) -> CustomCharacterClass.Member {
147153
let cc = CustomCharacterClass(
148-
inverted ? .inverted : .normal, members
154+
inverted ? .inverted : .normal, members, _fakeRange
149155
)
150156
return .custom(cc)
151157
}
152158
public func posixSet(
153159
_ set: Unicode.POSIXCharacterSet, inverted: Bool = false
154160
) -> Atom {
155-
return .namedSet(.init(inverted: inverted, set: set))
161+
.namedSet(.init(inverted: inverted, set: set))
162+
}
163+
164+
public func quote(_ s: String) -> AST {
165+
.quote(.init(s, _fakeRange))
156166
}
157-
func prop(
167+
168+
public func prop(
158169
_ kind: Atom.CharacterProperty.Kind, inverted: Bool = false
159170
) -> Atom {
160171
return .property(.init(kind, isInverted: inverted))

0 commit comments

Comments
 (0)