Skip to content

Commit b78683f

Browse files
committed
RegexBuilder module
Move the regex builder DSL (except `RegexComponent`) to a new module named RegexBuilder. The DSL depends on `DSLTree` and a few other supporting types, so those types have been made `_spi(RegexBuilder) public`. The SPI establishes an ABI between `_StringProcessing` and `RegexBuilder`, but I don't think it's a concern because the two modules will co-evolve and both will be rebuilt for every release.
1 parent e087320 commit b78683f

21 files changed

+237
-141
lines changed

Package.swift

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,13 +42,22 @@ let package = Package(
4242
.target(
4343
name: "_StringProcessing",
4444
dependencies: ["_MatchingEngine", "_CUnicode"],
45+
swiftSettings: [
46+
.unsafeFlags(["-enable-library-evolution"]),
47+
]),
48+
.target(
49+
name: "RegexBuilder",
50+
dependencies: ["_StringProcessing", "_MatchingEngine"],
4551
swiftSettings: [
4652
.unsafeFlags(["-enable-library-evolution"]),
4753
.unsafeFlags(["-Xfrontend", "-enable-experimental-pairwise-build-block"])
4854
]),
4955
.testTarget(
5056
name: "RegexTests",
51-
dependencies: ["_StringProcessing"],
57+
dependencies: ["_StringProcessing"]),
58+
.testTarget(
59+
name: "RegexBuilderTests",
60+
dependencies: ["_StringProcessing", "RegexBuilder"],
5261
swiftSettings: [
5362
.unsafeFlags(["-Xfrontend", "-enable-experimental-pairwise-build-block"])
5463
]),
@@ -73,7 +82,7 @@ let package = Package(
7382
// MARK: Exercises
7483
.target(
7584
name: "Exercises",
76-
dependencies: ["_MatchingEngine", "Prototypes", "_StringProcessing"],
85+
dependencies: ["_MatchingEngine", "Prototypes", "_StringProcessing", "RegexBuilder"],
7786
swiftSettings: [
7887
.unsafeFlags(["-Xfrontend", "-enable-experimental-pairwise-build-block"])
7988
]),

Sources/Exercises/Participants/RegexParticipant.swift

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
//===----------------------------------------------------------------------===//
1111

1212
import _StringProcessing
13+
import RegexBuilder
1314

1415
/*
1516

Sources/_StringProcessing/RegexDSL/Anchor.swift renamed to Sources/RegexBuilder/Anchor.swift

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
//===----------------------------------------------------------------------===//
1111

1212
import _MatchingEngine
13+
@_spi(RegexBuilder) import _StringProcessing
1314

1415
public struct Anchor {
1516
internal enum Kind {

Sources/_StringProcessing/RegexDSL/Builder.swift renamed to Sources/RegexBuilder/Builder.swift

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
//
1010
//===----------------------------------------------------------------------===//
1111

12+
@_spi(RegexBuilder) import _StringProcessing
13+
1214
@resultBuilder
1315
public enum RegexComponentBuilder {
1416
public static func buildBlock() -> Regex<Substring> {

Sources/_StringProcessing/RegexDSL/DSL.swift renamed to Sources/RegexBuilder/DSL.swift

Lines changed: 51 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,16 @@
1010
//===----------------------------------------------------------------------===//
1111

1212
import _MatchingEngine
13+
@_spi(RegexBuilder) import _StringProcessing
14+
import _StringProcessing
15+
16+
extension Regex {
17+
public init<Content: RegexComponent>(
18+
@RegexComponentBuilder _ content: () -> Content
19+
) where Content.Output == Output {
20+
self.init(content())
21+
}
22+
}
1323

1424
// A convenience protocol for builtin regex components that are initialized with
1525
// a `DSLTree` node.
@@ -57,17 +67,6 @@ extension UnicodeScalar: RegexComponent {
5767
}
5868
}
5969

60-
extension CharacterClass: RegexComponent {
61-
public typealias Output = Substring
62-
63-
public var regex: Regex<Output> {
64-
guard let ast = self.makeAST() else {
65-
fatalError("FIXME: extended AST?")
66-
}
67-
return Regex(ast: ast)
68-
}
69-
}
70-
7170
// MARK: - Combinators
7271

7372
// MARK: Concatenation
@@ -96,9 +95,9 @@ public struct QuantificationBehavior {
9695
case reluctantly
9796
case possessively
9897
}
99-
98+
10099
var kind: Kind
101-
100+
102101
internal var astKind: AST.Quantification.Kind {
103102
switch kind {
104103
case .eagerly: return .eager
@@ -108,19 +107,49 @@ public struct QuantificationBehavior {
108107
}
109108
}
110109

110+
extension DSLTree.Node {
111+
/// Generates a DSLTree node for a repeated range of the given DSLTree node.
112+
/// Individual public API functions are in the generated Variadics.swift file.
113+
static func repeating(
114+
_ range: Range<Int>,
115+
_ behavior: QuantificationBehavior,
116+
_ node: DSLTree.Node
117+
) -> DSLTree.Node {
118+
// TODO: Throw these as errors
119+
assert(range.lowerBound >= 0, "Cannot specify a negative lower bound")
120+
assert(!range.isEmpty, "Cannot specify an empty range")
121+
122+
switch (range.lowerBound, range.upperBound) {
123+
case (0, Int.max): // 0...
124+
return .quantification(.zeroOrMore, behavior.astKind, node)
125+
case (1, Int.max): // 1...
126+
return .quantification(.oneOrMore, behavior.astKind, node)
127+
case _ where range.count == 1: // ..<1 or ...0 or any range with count == 1
128+
// Note: `behavior` is ignored in this case
129+
return .quantification(.exactly(.init(faking: range.lowerBound)), .eager, node)
130+
case (0, _): // 0..<n or 0...n or ..<n or ...n
131+
return .quantification(.upToN(.init(faking: range.upperBound)), behavior.astKind, node)
132+
case (_, Int.max): // n...
133+
return .quantification(.nOrMore(.init(faking: range.lowerBound)), behavior.astKind, node)
134+
default: // any other range
135+
return .quantification(.range(.init(faking: range.lowerBound), .init(faking: range.upperBound)), behavior.astKind, node)
136+
}
137+
}
138+
}
139+
111140
extension QuantificationBehavior {
112141
/// Match as much of the input string as possible, backtracking when
113142
/// necessary.
114143
public static var eagerly: QuantificationBehavior {
115144
.init(kind: .eagerly)
116145
}
117-
146+
118147
/// Match as little of the input string as possible, expanding the matched
119148
/// region as necessary to complete a match.
120149
public static var reluctantly: QuantificationBehavior {
121150
.init(kind: .reluctantly)
122151
}
123-
152+
124153
/// Match as much of the input string as possible, performing no backtracking.
125154
public static var possessively: QuantificationBehavior {
126155
.init(kind: .possessively)
@@ -247,22 +276,18 @@ public struct TryCapture<Output>: _BuiltinRegexComponent {
247276

248277
// MARK: - Backreference
249278

250-
struct ReferenceID: Hashable, Equatable {
251-
private static var counter: Int = 0
252-
var base: Int
253-
254-
init() {
255-
base = Self.counter
256-
Self.counter += 1
257-
}
258-
}
259-
260279
public struct Reference<Capture>: RegexComponent {
261280
let id = ReferenceID()
262-
281+
263282
public init(_ captureType: Capture.Type = Capture.self) {}
264283

265284
public var regex: Regex<Capture> {
266285
.init(node: .atom(.symbolicReference(id)))
267286
}
268287
}
288+
289+
extension Regex.Match {
290+
public subscript<Capture>(_ reference: Reference<Capture>) -> Capture {
291+
self[reference.id]
292+
}
293+
}

Sources/RegexBuilder/Match.swift

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
//
10+
//===----------------------------------------------------------------------===//
11+
12+
import _StringProcessing
13+
14+
extension String {
15+
public func match<R: RegexComponent>(
16+
@RegexComponentBuilder _ content: () -> R
17+
) -> Regex<R.Output>.Match? {
18+
match(content())
19+
}
20+
}
21+
22+
extension Substring {
23+
public func match<R: RegexComponent>(
24+
@RegexComponentBuilder _ content: () -> R
25+
) -> Regex<R.Output>.Match? {
26+
match(content())
27+
}
28+
}

Sources/_StringProcessing/RegexDSL/Variadics.swift renamed to Sources/RegexBuilder/Variadics.swift

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
// BEGIN AUTO-GENERATED CONTENT
1313

1414
import _MatchingEngine
15+
@_spi(RegexBuilder) import _StringProcessing
1516

1617
extension RegexComponentBuilder {
1718
public static func buildPartialBlock<W0, W1, C0, R0: RegexComponent, R1: RegexComponent>(

Sources/VariadicsGenerator/VariadicsGenerator.swift

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
//
1010
//===----------------------------------------------------------------------===//
1111

12-
// swift run VariadicsGenerator --max-arity 10 > Sources/_StringProcessing/RegexDSL/Variadics.swift
12+
// swift run VariadicsGenerator --max-arity 10 > Sources/RegexBuilder/Variadics.swift
1313

1414
import ArgumentParser
1515
#if os(macOS)
@@ -121,6 +121,7 @@ struct VariadicsGenerator: ParsableCommand {
121121
// BEGIN AUTO-GENERATED CONTENT
122122
123123
import _MatchingEngine
124+
@_spi(RegexBuilder) import _StringProcessing
124125
125126
126127
""")

Sources/_StringProcessing/CharacterClass.swift

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,17 @@ public struct CharacterClass: Hashable {
178178
}
179179
}
180180

181+
extension CharacterClass: RegexComponent {
182+
public typealias Output = Substring
183+
184+
public var regex: Regex<Output> {
185+
guard let ast = self.makeAST() else {
186+
fatalError("FIXME: extended AST?")
187+
}
188+
return Regex(ast: ast)
189+
}
190+
}
191+
181192
extension RegexComponent where Self == CharacterClass {
182193
public static var any: CharacterClass {
183194
.init(cc: .any, matchLevel: .graphemeCluster)

Sources/_StringProcessing/RegexDSL/Core.swift renamed to Sources/_StringProcessing/Regex/Core.swift

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ public struct Regex<Output>: RegexComponent {
3636
init(ast: AST) {
3737
self.tree = ast.dslTree
3838
}
39+
3940
init(tree: DSLTree) {
4041
self.tree = tree
4142
}
@@ -44,7 +45,8 @@ public struct Regex<Output>: RegexComponent {
4445
let program: Program
4546
// var ast: AST { program.ast }
4647

47-
var root: DSLTree.Node {
48+
@_spi(RegexBuilder)
49+
public var root: DSLTree.Node {
4850
program.tree.root
4951
}
5052

@@ -59,7 +61,8 @@ public struct Regex<Output>: RegexComponent {
5961
self.program = Program(ast: .init(ast, globalOptions: nil))
6062
}
6163

62-
init(node: DSLTree.Node) {
64+
@_spi(RegexBuilder)
65+
public init(node: DSLTree.Node) {
6366
self.program = Program(tree: .init(node, options: nil))
6467
}
6568

@@ -84,12 +87,6 @@ public struct Regex<Output>: RegexComponent {
8487
self = content.regex
8588
}
8689

87-
public init<Content: RegexComponent>(
88-
@RegexComponentBuilder _ content: () -> Content
89-
) where Content.Output == Output {
90-
self.init(content())
91-
}
92-
9390
public var regex: Regex<Output> {
9491
self
9592
}

0 commit comments

Comments
 (0)