Skip to content

Commit c487971

Browse files
committed
WIP
1 parent 2a19722 commit c487971

File tree

3 files changed

+185
-141
lines changed

3 files changed

+185
-141
lines changed

Sources/Regex/AST/ASTBuilder.swift

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,17 +46,49 @@ public func alt(_ asts: [AST]) -> AST {
4646
public func alt(_ asts: AST...) -> AST {
4747
alt(asts)
4848
}
49+
4950
public func concat(_ asts: [AST]) -> AST {
5051
.concatenation(.init(asts))
5152
}
5253
public func concat(_ asts: AST...) -> AST {
5354
concat(asts)
5455
}
56+
5557
public func group(
5658
_ kind: AST.Group.Kind, _ child: AST
5759
) -> AST {
5860
.group(.init(_fake(kind), child, _fakeRange))
5961
}
62+
public func capture(
63+
_ child: AST
64+
) -> AST {
65+
group(.capture, child)
66+
}
67+
public func nonCapture(
68+
_ child: AST
69+
) -> AST {
70+
group(.nonCapture, child)
71+
}
72+
public func namedCapture(
73+
_ name: String,
74+
_ child: AST
75+
) -> AST {
76+
group(.namedCapture(_fake(name)), child)
77+
}
78+
public func nonCaptureReset(
79+
_ child: AST
80+
) -> AST {
81+
group(.nonCaptureReset, child)
82+
}
83+
public func atomicNonCapturing(
84+
_ child: AST
85+
) -> AST {
86+
group(.atomicNonCapturing, child)
87+
}
88+
89+
90+
public var any: AST { .atom(.any) }
91+
6092
public func quant(
6193
_ amount: AST.Quantification.Amount,
6294
_ kind: AST.Quantification.Kind = .greedy,
@@ -65,6 +97,25 @@ public func quant(
6597
.quantification(.init(
6698
_fake(amount), _fake(kind), child, _fakeRange))
6799
}
100+
public func zeroOrMore(
101+
_ kind: AST.Quantification.Kind = .greedy,
102+
_ child: AST
103+
) -> AST {
104+
quant(.zeroOrMore, kind, child)
105+
}
106+
public func zeroOrOne(
107+
_ kind: AST.Quantification.Kind = .greedy,
108+
_ child: AST
109+
) -> AST {
110+
quant(.zeroOrOne, kind, child)
111+
}
112+
public func oneOrMore(
113+
_ kind: AST.Quantification.Kind = .greedy,
114+
_ child: AST
115+
) -> AST {
116+
quant(.oneOrMore, kind, child)
117+
}
118+
68119
public func charClass(
69120
_ members: CustomCharacterClass.Member...,
70121
inverted: Bool = false

Tests/RegexTests/ParseTests.swift

Lines changed: 90 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -138,43 +138,42 @@ extension RegexTests {
138138
139139
parseTest(
140140
"abc", concat("a", "b", "c"))
141-
// parseTest(
142-
// #"abc\+d*"#,
143-
// concat("a", "b", "c", "+", .zeroOrMore(.greedy, "d")))
144-
// parseTest(
145-
// "a(b)", concat("a", .group(.capture(), "b")))
146-
// parseTest(
147-
// "abc(?:de)+fghi*k|j",
148-
// alt(
149-
// concat(
150-
// "a", "b", "c",
151-
// .oneOrMore(
152-
// .greedy, .group(.nonCapture(), concat("d", "e"))),
153-
// "f", "g", "h", .zeroOrMore(.greedy, "i"), "k"),
154-
// "j"))
155-
// parseTest(
156-
// "a(?:b|c)?d",
157-
// concat("a", .zeroOrOne(
158-
// .greedy, .group(.nonCapture(), alt("b", "c"))), "d"))
159-
// parseTest(
160-
// "a?b??c+d+?e*f*?",
161-
// concat(
162-
// .zeroOrOne(.greedy, "a"), .zeroOrOne(.reluctant, "b"),
163-
// .oneOrMore(.greedy, "c"), .oneOrMore(.reluctant, "d"),
164-
// .zeroOrMore(.greedy, "e"), .zeroOrMore(.reluctant, "f")))
165-
// parseTest(
166-
// "a|b?c",
167-
// alt("a", concat(.zeroOrOne(.greedy, "b"), "c")))
168-
// parseTest(
169-
// "(a|b)c",
170-
// concat(.group(.capture(), alt("a", "b")), "c"))
171-
// parseTest(
172-
// "(.)*(.*)",
173-
// concat(
174-
// .zeroOrMore(
175-
// .greedy, .group(.capture(), .any)),
176-
// .group(
177-
// .capture(), .zeroOrMore(.greedy, .any))))
141+
parseTest(
142+
#"abc\+d*"#,
143+
concat("a", "b", "c", "+", zeroOrMore(.greedy, "d")))
144+
parseTest(
145+
"a(b)", concat("a", group(.capture, "b")))
146+
parseTest(
147+
"abc(?:de)+fghi*k|j",
148+
alt(
149+
concat(
150+
"a", "b", "c",
151+
oneOrMore(
152+
.greedy, nonCapture(concat("d", "e"))),
153+
"f", "g", "h", zeroOrMore(.greedy, "i"), "k"),
154+
"j"))
155+
parseTest(
156+
"a(?:b|c)?d",
157+
concat("a", zeroOrOne(
158+
.greedy, nonCapture(alt("b", "c"))), "d"))
159+
parseTest(
160+
"a?b??c+d+?e*f*?",
161+
concat(
162+
zeroOrOne(.greedy, "a"), zeroOrOne(.reluctant, "b"),
163+
oneOrMore(.greedy, "c"), oneOrMore(.reluctant, "d"),
164+
zeroOrMore(.greedy, "e"), zeroOrMore(.reluctant, "f")))
165+
parseTest(
166+
"a|b?c",
167+
alt("a", concat(zeroOrOne(.greedy, "b"), "c")))
168+
parseTest(
169+
"(a|b)c",
170+
concat(capture(alt("a", "b")), "c"))
171+
parseTest(
172+
"(.)*(.*)",
173+
concat(
174+
zeroOrMore(
175+
.greedy, capture(any)),
176+
capture(zeroOrMore(.greedy, any))))
178177
parseTest(
179178
#"abc\d"#,
180179
concat("a", "b", "c", .atom(.escaped(.decimalDigit))))
@@ -244,14 +243,14 @@ extension RegexTests {
244243
parseTest("[[[:space:]]]",
245244
charClass(charClass(.atom(posixSet(.space)))))
246245
247-
// parseTest(
248-
// #"[a[bc]de&&[^bc]\d]+"#,
249-
// .oneOrMore(.greedy, charClass(
250-
// .setOperation(
251-
// ["a", charClass("b", "c"), "d", "e"],
252-
// .intersection,
253-
// [charClass("b", "c", inverted: true), .atom(.escaped(.decimalDigit))]
254-
// ))))
246+
parseTest(
247+
#"[a[bc]de&&[^bc]\d]+"#,
248+
oneOrMore(.greedy, charClass(
249+
.setOperation(
250+
["a", charClass("b", "c"), "d", "e"],
251+
.intersection,
252+
[charClass("b", "c", inverted: true), .atom(.escaped(.decimalDigit))]
253+
))))
255254
256255
parseTest(
257256
"[a&&b]",
@@ -274,14 +273,14 @@ extension RegexTests {
274273
// Operators are only valid in custom character classes.
275274
parseTest(
276275
"a&&b", concat("a", "&", "&", "b"))
277-
// parseTest(
278-
// "&?", .zeroOrOne(.greedy, "&"))
279-
// parseTest(
280-
// "&&?", concat("&", .zeroOrOne(.greedy, "&")))
281-
// parseTest(
282-
// "--+", concat("-", .oneOrMore(.greedy, "-")))
283-
// parseTest(
284-
// "~~*", concat("~", .zeroOrMore(.greedy, "~")))
276+
parseTest(
277+
"&?", zeroOrOne(.greedy, "&"))
278+
parseTest(
279+
"&&?", concat("&", zeroOrOne(.greedy, "&")))
280+
parseTest(
281+
"--+", concat("-", oneOrMore(.greedy, "-")))
282+
parseTest(
283+
"~~*", concat("~", zeroOrMore(.greedy, "~")))
285284
286285
parseTest(
287286
#"a\Q .\Eb"#,
@@ -294,48 +293,46 @@ extension RegexTests {
294293
#"a(?#. comment)b"#,
295294
concat("a", "b"))
296295
297-
// parseTest(
298-
// #"a{1,2}"#,
299-
// .quantification(.range(.greedy, 1...2), "a"))
300-
// parseTest(
301-
// #"a{,2}"#,
302-
// .quantification(.upToN(.greedy, 2), "a"))
303-
// parseTest(
304-
// #"a{1,}"#,
305-
// .quantification(.nOrMore(.greedy, 1), "a"))
306-
// parseTest(
307-
// #"a{1}"#,
308-
// .quantification(.exactly(.greedy, 1), "a"))
309-
// parseTest(
310-
// #"a{1,2}?"#,
311-
// .quantification(.range(.reluctant, 1...2), "a"))
312-
313-
// // Named captures
314-
// parseTest(
315-
// #"a(?<label>b)c"#,
316-
// concat("a", .namedCapture("label", "b"), "c"))
317-
// parseTest(
318-
// #"a(?'label'b)c"#,
319-
// concat("a", .namedCapture("label", "b"), "c"))
320-
// parseTest(
321-
// #"a(?P<label>b)c"#,
322-
// concat("a", .namedCapture("label", "b"), "c"))
323-
// parseTest(
324-
// #"a(?P<label>b)c"#,
325-
// concat("a", .namedCapture("label", "b"), "c"))
326-
//
327-
// // Other groups
328-
// parseTest(
329-
// #"a(?:b)c"#,
330-
// concat("a", .nonCapture("b"), "c"))
331-
// parseTest(
332-
// #"a(?|b)c"#,
333-
// concat("a", .nonCaptureReset("b"), "c"))
334-
// parseTest(
335-
// #"a(?>b)c"#,
336-
// concat("a", .atomicNonCapturing("b"), "c"))
296+
parseTest(
297+
#"a{1,2}"#,
298+
quant(.range(_fake(1)..._fake(2)), .greedy, "a"))
299+
parseTest(
300+
#"a{,2}"#,
301+
quant(.upToN(_fake(2)), .greedy, "a"))
302+
parseTest(
303+
#"a{1,}"#,
304+
quant(.nOrMore(_fake(1)), .greedy, "a"))
305+
parseTest(
306+
#"a{1}"#,
307+
quant(.exactly(_fake(1)), .greedy, "a"))
308+
parseTest(
309+
#"a{1,2}?"#,
310+
quant(.range(_fake(1)..._fake(2)), .reluctant, "a"))
337311
312+
// Named captures
313+
parseTest(
314+
#"a(?<label>b)c"#,
315+
concat("a", namedCapture("label", "b"), "c"))
316+
parseTest(
317+
#"a(?'label'b)c"#,
318+
concat("a", namedCapture("label", "b"), "c"))
319+
parseTest(
320+
#"a(?P<label>b)c"#,
321+
concat("a", namedCapture("label", "b"), "c"))
322+
parseTest(
323+
#"a(?P<label>b)c"#,
324+
concat("a", namedCapture("label", "b"), "c"))
338325
326+
// Other groups
327+
parseTest(
328+
#"a(?:b)c"#,
329+
concat("a", nonCapture("b"), "c"))
330+
parseTest(
331+
#"a(?|b)c"#,
332+
concat("a", nonCaptureReset("b"), "c"))
333+
parseTest(
334+
#"a(?>b)c"#,
335+
concat("a", atomicNonCapturing("b"), "c"))
339336
340337
// TODO: failure tests
341338
}

Tests/RegexTests/SyntaxOptionsTests.swift

Lines changed: 44 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,11 @@ private func esc(_ c: Character) -> Token {
55
.character(c, isEscaped: true)
66
}
77

8-
private let dplus = AST.quantification(.init(
9-
_fake(.oneOrMore),
10-
_fake(.greedy),
11-
.atom(.escaped(.decimalDigit)),
12-
_fakeRange))
13-
private let dotAST = AST.concatenation(
14-
AST.Concatenation([
15-
dplus, ".", dplus, ".", dplus, ".", dplus]))
8+
private let dplus = oneOrMore(
9+
.greedy, .atom(.escaped(.decimalDigit)))
10+
11+
private let dotAST = concat(
12+
dplus, ".", dplus, ".", dplus, ".", dplus)
1613

1714
let trivia = AST.trivia(AST.Trivia())
1815

@@ -67,49 +64,48 @@ extension RegexTests {
6764
parseTest(
6865
#" \d+ "." \d+ "." \d+ "." \d+ "#,
6966
dotAST , syntax: .modern)
70-
//
71-
// parseTest(
72-
// #"a{1,2}"#,
73-
// .quantification(.range(.greedy, 1...2), "a"))
74-
// parseTest(
75-
// #"a{1...2}"#,
76-
// .quantification(.range(.greedy, 1...2), "a"),
77-
// syntax: .modernRanges)
78-
// parseTest(
79-
// #"a{1..<3}"#,
80-
// .quantification(.range(.greedy, 1...2), "a"),
81-
// syntax: .modernRanges)
82-
//
83-
// parseTest(
84-
// #"a{,2}"#,
85-
// .quantification(.upToN(.greedy, 2), "a"))
86-
// parseTest(
87-
// #"a{...2}"#,
88-
// .quantification(.upToN(.greedy, 2), "a"),
89-
// syntax: .modern)
90-
// parseTest(
91-
// #"a{..<3}"#,
92-
// .quantification(.upToN(.greedy, 2), "a"),
93-
// syntax: .modern)
94-
//
95-
// parseTest(
96-
// #"a{1,}"#,
97-
// .quantification(.nOrMore(.greedy, 1), "a"))
98-
// parseTest(
99-
// #"a{1...}"#,
100-
// .quantification(.nOrMore(.greedy, 1), "a"),
101-
// syntax: .modern)
67+
68+
parseTest(
69+
#"a{1,2}"#,
70+
quant(.range(_fake(1)..._fake(2)), .greedy, "a"))
71+
parseTest(
72+
#"a{1...2}"#,
73+
quant(.range(_fake(1)..._fake(2)), .greedy, "a"),
74+
syntax: .modernRanges)
75+
parseTest(
76+
#"a{1..<3}"#,
77+
quant(.range(_fake(1)..._fake(2)), .greedy, "a"),
78+
syntax: .modernRanges)
79+
80+
parseTest(
81+
#"a{,2}"#,
82+
quant(.upToN(_fake(2)), .greedy, "a"))
83+
parseTest(
84+
#"a{...2}"#,
85+
quant(.upToN(_fake(2)), .greedy, "a"),
86+
syntax: .modern)
87+
parseTest(
88+
#"a{..<3}"#,
89+
quant(.upToN(_fake(2)), .greedy, "a"),
90+
syntax: .modern)
91+
92+
parseTest(
93+
#"a{1,}"#,
94+
quant(.nOrMore(_fake(1)), .greedy, "a"))
95+
parseTest(
96+
#"a{1...}"#,
97+
quant(.nOrMore(_fake(1)), .greedy, "a"),
98+
syntax: .modern)
10299
}
103100

104101
func testModernCaptures() {
105-
// FIXME
106-
// parseTest(
107-
// #"a(?:b)c"#,
108-
// .concatenation(AST.Concatenation(["a", .nonCapture("b"), "c"])))
109-
// parseTest(
110-
// #"a(_:b)c"#,
111-
// .concatenation(AST.Concatenation(["a", .nonCapture("b"), "c"])),
112-
// syntax: .modernCaptures)
102+
parseTest(
103+
#"a(?:b)c"#,
104+
concat("a", nonCapture("b"), "c"))
105+
parseTest(
106+
#"a(_:b)c"#,
107+
concat("a", nonCapture("b"), "c"),
108+
syntax: .modernCaptures)
113109

114110
// TODO: `(name: .*)`
115111
}

0 commit comments

Comments
 (0)