Skip to content

Commit 4984d75

Browse files
committed
WIP
1 parent 3da9083 commit 4984d75

24 files changed

+830
-714
lines changed

Sources/_MatchingEngine/Regex/AST/AST.swift

Lines changed: 96 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,26 @@
11
/// A regex abstract syntax tree
2-
public enum AST: ASTValue/*, ASTAction*/ {
3-
public typealias Product = Self
2+
public indirect enum AST:
3+
Hashable/*, _ASTPrintable ASTValue, ASTAction*/
4+
{
5+
/// Located value: a value wrapped with a source range
6+
public typealias Loc = Source.Value
47

58
/// ... | ... | ...
6-
indirect case alternation([AST])
9+
case alternation(Alternation)
710

811
/// ... ...
9-
indirect case concatenation([AST])
12+
case concatenation(Concatenation)
1013

1114
/// (...)
12-
indirect case group(Group, AST)
15+
case group(Group)
1316

14-
indirect case quantification(Quantifier, AST)
17+
case quantification(Quantification)
1518

16-
case quote(String)
19+
/// \Q...\E
20+
case quote(Quote)
1721

18-
case trivia // TODO: track comments
22+
///
23+
case trivia(Trivia) // TODO: track comments
1924

2025
case atom(Atom)
2126

@@ -25,94 +30,116 @@ public enum AST: ASTValue/*, ASTAction*/ {
2530

2631

2732
// FIXME: Move off the regex literal AST
28-
indirect case groupTransform(
29-
Group, AST, transform: CaptureTransform)
33+
case groupTransform(
34+
Group, transform: CaptureTransform)
3035
}
3136

32-
extension AST {
33-
public static var any: AST {
34-
.atom(.any)
35-
}
36-
}
37-
38-
// Note that we're not yet an ASTEntity, would need to be a struct.
39-
// We might end up with ASTStorage which projects the nice AST type.
40-
// Values and projected entities can still refer to positions.
41-
// ASTStorage might end up becoming the ASTAction conformer
37+
// TODO: This is currently unused, but it's likely we'll want
38+
// to host things like global options, more source info, etc.
4239
private struct ASTStorage {
4340
let ast: AST
4441
let sourceRange: SourceRange?
4542
}
4643

4744
extension AST {
48-
public var isSemantic: Bool {
45+
// :-(
46+
var _associatedValue: _ASTNode? {
4947
switch self {
50-
case .trivia: return false
51-
default: return true
48+
case .empty: return nil
49+
case let .alternation(v): return v
50+
case let .concatenation(v): return v
51+
case let .group(v): return v
52+
case let .quantification(v): return v
53+
case let .quote(v): return v
54+
case let .trivia(v): return v
55+
case let .atom(v): return v
56+
case let .customCharacterClass(v): return v
57+
58+
case let .groupTransform(g, _):
59+
return g // FIXME: get this out of here
5260
}
5361
}
5462

55-
func filter(_ f: (AST) -> Bool) -> AST? {
56-
func filt(_ children: [AST]) -> [AST] {
57-
children.compactMap {
58-
guard f($0) else { return nil }
59-
return $0.filter(f)
60-
}
63+
/// If this node is a parent node, access its children
64+
public var children: [AST]? {
65+
guard let av = _associatedValue else { return nil }
66+
return (av as? _ASTParent)?.children
67+
}
68+
69+
/// Whether this node is "trivia" or non-semantic, like comments
70+
public var isTrivia: Bool {
71+
switch self {
72+
case .trivia: return true
73+
default: return false
6174
}
62-
func filt(_ cc: CustomCharacterClass) -> CustomCharacterClass {
63-
CustomCharacterClass(cc.start, filt(cc.members))
75+
}
76+
77+
/// Whether this node has nested somewhere inside it a capture
78+
public var hasCapture: Bool {
79+
if case let .group(g) = self, g.kind.value.isCapturing {
80+
return true
6481
}
65-
typealias CCCMember = CustomCharacterClass.Member
66-
func filt(_ children: [CCCMember]) -> [CCCMember] {
67-
children.compactMap {
68-
switch $0 {
69-
case let .custom(cc):
70-
return .custom(filt(cc))
71-
case .range(let lhs, let rhs):
72-
guard let filtLHS = f(.atom(lhs)) ? lhs : nil else { return nil }
73-
guard let filtRHS = f(.atom(rhs)) ? rhs : nil else { return nil }
74-
return .range(filtLHS, filtRHS)
75-
case let .atom(atom):
76-
return f(.atom(atom)) ? .atom(atom) : nil
77-
case let .setOperation(lhsMembers, op, rhsMembers):
78-
return .setOperation(filt(lhsMembers), op, filt(rhsMembers))
79-
}
80-
}
82+
83+
return self.children?.any(\.hasCapture) ?? false
84+
}
85+
}
86+
87+
// MARK: - AST types
88+
89+
extension AST {
90+
91+
public struct Alternation: Hashable, _ASTNode {
92+
public let children: [AST]
93+
public let sourceRange: SourceRange
94+
95+
public init(_ mems: [AST], _ sourceRange: SourceRange) {
96+
self.children = mems
97+
self.sourceRange = sourceRange
8198
}
82-
switch self {
83-
case let .alternation(children):
84-
return .alternation(filt(children))
8599

86-
case let .concatenation(children):
87-
return .concatenation(filt(children))
100+
public var _dumpBase: String { "alternation" }
101+
}
88102

89-
case let .customCharacterClass(cc):
90-
return .customCharacterClass(filt(cc))
103+
public struct Concatenation: Hashable, _ASTNode {
104+
public let children: [AST]
105+
public let sourceRange: SourceRange
91106

92-
case let .group(g, child):
93-
guard let c = child.filter(f) else { return nil }
94-
return .group(g, c)
107+
public init(_ mems: [AST], _ sourceRange: SourceRange) {
108+
self.children = mems
109+
self.sourceRange = sourceRange
110+
}
95111

96-
case let .groupTransform(g, child, transform):
97-
guard let c = child.filter(f) else { return nil }
98-
return .groupTransform(g, c, transform: transform)
112+
public var _dumpBase: String { "" }
113+
}
99114

100-
case let .quantification(q, child):
101-
guard let c = child.filter(f) else { return nil }
102-
return .quantification(q, c)
115+
public struct Quote: Hashable, _ASTNode {
116+
public let literal: String
117+
public let sourceRange: SourceRange
103118

104-
case .any, .trivia, .quote, .atom, .empty:
105-
return f(self) ? self : nil
119+
public init(_ s: String, _ sourceRange: SourceRange) {
120+
self.literal = s
121+
self.sourceRange = sourceRange
106122
}
123+
124+
public var _dumpBase: String { "quote" }
107125
}
108126

109-
public var strippingTrivia: AST? {
110-
filter(\.isSemantic)
127+
public struct Trivia: Hashable, _ASTNode {
128+
// TODO: Contents of trivia, kinds, etc
129+
public let sourceRange: SourceRange
130+
131+
public init(_ sourceRange: SourceRange) {
132+
self.sourceRange = sourceRange
133+
}
134+
135+
public var _dumpBase: String {
136+
// TODO: comments, non-semantic whitespace, etc.
137+
""
138+
}
111139
}
112140
}
113141

114-
// FIXME: Probably remove this from the AST
115-
142+
// FIXME: Get this out of here
116143
public struct CaptureTransform: Equatable, Hashable, CustomStringConvertible {
117144
public let closure: (Substring) -> Any
118145

Sources/_MatchingEngine/Regex/AST/ASTBuilder.swift

Lines changed: 36 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -16,25 +16,28 @@ AST.
1616

1717
public let _fakeLoc = "".startIndex
1818
public let _fakeRange = _fakeLoc ..< _fakeLoc
19+
public func _fake<T: Hashable>(_ t: T) -> AST.Loc<T> {
20+
.init(t, _fakeRange)
21+
}
1922

2023
public func alt(_ asts: [AST]) -> AST {
21-
.alternation(asts)
24+
.alternation(.init(asts, _fakeRange))
2225
}
2326
public func alt(_ asts: AST...) -> AST {
2427
alt(asts)
2528
}
2629

2730
public func concat(_ asts: [AST]) -> AST {
28-
.concatenation(asts)
31+
.concatenation(.init(asts, _fakeRange))
2932
}
3033
public func concat(_ asts: AST...) -> AST {
3134
concat(asts)
3235
}
3336

3437
public func group(
35-
_ kind: Group.Kind, _ child: AST
38+
_ kind: AST.Group.Kind, _ child: AST
3639
) -> AST {
37-
.group(Group(kind, _fakeRange), child)
40+
.group(.init(_fake(kind), child, _fakeRange))
3841
}
3942
public func capture(
4043
_ child: AST
@@ -50,7 +53,7 @@ public func namedCapture(
5053
_ name: String,
5154
_ child: AST
5255
) -> AST {
53-
group(.namedCapture(name), child)
56+
group(.namedCapture(_fake(name)), child)
5457
}
5558
public func nonCaptureReset(
5659
_ child: AST
@@ -75,60 +78,63 @@ public func negativeLookbehind(_ child: AST) -> AST {
7578
group(.negativeLookbehind, child)
7679
}
7780

81+
7882
public var any: AST { .atom(.any) }
7983

8084
public func quant(
81-
_ amount: Quantifier.Amount,
82-
_ kind: Quantifier.Kind = .greedy,
85+
_ amount: AST.Quantification.Amount,
86+
_ kind: AST.Quantification.Kind = .greedy,
8387
_ child: AST
8488
) -> AST {
85-
.quantification(Quantifier(amount, kind, _fakeRange), child)
89+
.quantification(.init(
90+
_fake(amount), _fake(kind), child, _fakeRange))
8691
}
8792
public func zeroOrMore(
88-
_ kind: Quantifier.Kind = .greedy,
93+
_ kind: AST.Quantification.Kind = .greedy,
8994
_ child: AST
9095
) -> AST {
9196
quant(.zeroOrMore, kind, child)
9297
}
9398
public func zeroOrOne(
94-
_ kind: Quantifier.Kind = .greedy,
99+
_ kind: AST.Quantification.Kind = .greedy,
95100
_ child: AST
96101
) -> AST {
97102
quant(.zeroOrOne, kind, child)
98103
}
99104
public func oneOrMore(
100-
_ kind: Quantifier.Kind = .greedy,
105+
_ kind: AST.Quantification.Kind = .greedy,
101106
_ child: AST
102107
) -> AST {
103108
quant(.oneOrMore, kind, child)
104109
}
105110
public func exactly(
106-
_ kind: Quantifier.Kind = .greedy,
111+
_ kind: AST.Quantification.Kind = .greedy,
107112
_ i: Int,
108-
child: AST
113+
_ child: AST
109114
) -> AST {
110-
quant(.exactly(i), kind, child)
115+
quant(.exactly(_fake(i)), kind, child)
111116
}
112117
public func nOrMore(
113-
_ kind: Quantifier.Kind = .greedy,
118+
_ kind: AST.Quantification.Kind = .greedy,
114119
_ i: Int,
115-
child: AST
120+
_ child: AST
116121
) -> AST {
117-
quant(.nOrMore(i), kind, child)
122+
quant(.nOrMore(_fake(i)), kind, child)
118123
}
119124
public func upToN(
120-
_ kind: Quantifier.Kind = .greedy,
125+
_ kind: AST.Quantification.Kind = .greedy,
121126
_ i: Int,
122-
child: AST
127+
_ child: AST
123128
) -> AST {
124-
quant(.upToN(i), kind, child)
129+
quant(.upToN(_fake(i)), kind, child)
125130
}
126131
public func quantRange(
127-
_ kind: Quantifier.Kind = .greedy,
132+
_ kind: AST.Quantification.Kind = .greedy,
128133
_ r: ClosedRange<Int>,
129-
child: AST
134+
_ child: AST
130135
) -> AST {
131-
quant(.range(r), kind, child)
136+
let range = _fake(r.lowerBound) ... _fake(r.upperBound)
137+
return quant(.range(range), kind, child)
132138
}
133139

134140
public func charClass(
@@ -152,9 +158,14 @@ public func charClass(
152158
public func posixSet(
153159
_ set: Unicode.POSIXCharacterSet, inverted: Bool = false
154160
) -> Atom {
155-
return .namedSet(.init(inverted: inverted, set: set))
161+
.namedSet(.init(inverted: inverted, set: set))
162+
}
163+
164+
public func quote(_ s: String) -> AST {
165+
.quote(.init(s, _fakeRange))
156166
}
157-
func prop(
167+
168+
public func prop(
158169
_ kind: Atom.CharacterProperty.Kind, inverted: Bool = false
159170
) -> Atom {
160171
return .property(.init(kind, isInverted: inverted))
Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,19 @@
1-
2-
/// TODO: Describe
3-
///
4-
/// Values don't need to track locations. They're often trivially recomputable or irrelevant
5-
public protocol ASTValue: _ASTPrintable, Hashable {
6-
7-
}
8-
9-
/// TODO: Describe
10-
///
11-
/// Tracks source location information
12-
public protocol ASTEntity: ASTValue {
13-
var sourceRange: SourceRange { get }
14-
}
15-
16-
public protocol ASTParentEntity: ASTEntity, _ASTPrintableNested {
17-
// TODO: variadic access to children?
18-
}
19-
1+
//
2+
///// TODO: Describe
3+
/////
4+
///// Values don't need to track locations. They're often trivially recomputable or irrelevant
5+
//public protocol ASTValue: _ASTPrintable, Hashable {
6+
//
7+
//}
8+
//
9+
///// TODO: Describe
10+
/////
11+
///// Tracks source location information
12+
//public protocol ASTEntity: ASTValue {
13+
// var sourceRange: SourceRange { get }
14+
//}
15+
//
16+
//public protocol ASTParentEntity: ASTEntity, _ASTPrintableNested {
17+
// // TODO: variadic access to children?
18+
//}
19+
//

0 commit comments

Comments
 (0)