Skip to content

Commit 58c6428

Browse files
committed
Allow subcripting a match result on a Reference.
To simplify the call site, I chose to precondition instead of returning optionals. Examples: ``` let a = Reference() let result = regex.match { tryCapture(as: a) { oneOrMore(.digit) } transform: { Int($0) } } result[a] // => Any result[a, as: Int.self] // => Int ```
1 parent ed2ed90 commit 58c6428

File tree

11 files changed

+133
-60
lines changed

11 files changed

+133
-60
lines changed

Sources/_StringProcessing/Capture.swift

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,19 @@ extension Capture {
3131
}
3232
}
3333

34+
extension Capture {
35+
subscript(_ index: Int) -> Capture {
36+
if case .tuple(let elements) = self {
37+
assert(
38+
(elements.startIndex..<elements.endIndex).contains(index),
39+
"Capture index out of bounds"
40+
)
41+
return elements[index]
42+
}
43+
return self
44+
}
45+
}
46+
3447
extension Capture {
3548
static func tupleOrAtom(_ elements: [Capture]) -> Self {
3649
elements.count == 1 ? elements[0] : .tuple(elements)

Sources/_StringProcessing/Engine/Consume.swift

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,8 @@ extension Engine where Input == String {
6363
guard let result = result else { return nil }
6464

6565
let capList = cpu.storedCaptures
66-
return (result, CaptureList(caps: capList))
66+
return (result, CaptureList(
67+
values: capList, referencedCaptureOffsets: program.referencedCaptureOffsets))
6768
}
6869
}
6970

Sources/_StringProcessing/Engine/MEBuilder.swift

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ extension MEProgram where Input.Element: Hashable {
4444

4545
// Symbolic reference resolution
4646
var unresolvedReferences: [Reference.ID: [InstructionAddress]] = [:]
47-
var captureOffsets: [Reference.ID: Int] = [:]
47+
var referencedCaptureOffsets: [Reference.ID: Int] = [:]
4848
var captureCount: Int {
4949
// We currently deduce the capture count from the capture register number.
5050
nextCaptureRegister.rawValue
@@ -352,7 +352,8 @@ extension MEProgram.Builder {
352352
staticTransformFunctions: transformFunctions,
353353
staticMatcherFunctions: matcherFunctions,
354354
registerInfo: regInfo,
355-
captureStructure: captureStructure)
355+
captureStructure: captureStructure,
356+
referencedCaptureOffsets: referencedCaptureOffsets)
356357
}
357358

358359
public mutating func reset() { self = Self() }
@@ -424,7 +425,7 @@ extension MEProgram.Builder {
424425
fileprivate extension MEProgram.Builder {
425426
mutating func resolveReferences() throws {
426427
for (id, uses) in unresolvedReferences {
427-
guard let offset = captureOffsets[id] else {
428+
guard let offset = referencedCaptureOffsets[id] else {
428429
throw RegexCompilationError.uncapturedReference
429430
}
430431
for use in uses {
@@ -441,7 +442,7 @@ extension MEProgram.Builder {
441442
defer { nextCaptureRegister.rawValue += 1 }
442443
// Register the capture for later lookup via symbolic references.
443444
if let id = id {
444-
let preexistingValue = captureOffsets.updateValue(
445+
let preexistingValue = referencedCaptureOffsets.updateValue(
445446
captureCount, forKey: id)
446447
assert(preexistingValue == nil)
447448
}

Sources/_StringProcessing/Engine/MECapture.swift

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,14 +143,15 @@ extension Processor._StoredCapture: CustomStringConvertible {
143143
}
144144

145145
public struct CaptureList {
146-
var caps: Array<Processor<String>._StoredCapture>
146+
var values: Array<Processor<String>._StoredCapture>
147+
var referencedCaptureOffsets: [Reference.ID: Int]
147148

148149
// func extract(from s: String) -> Array<Array<Substring>> {
149150
// caps.map { $0.map { s[$0] } }
150151
// }
151152
//
152153
func latestUntyped(from s: String) -> Array<Substring?> {
153-
caps.map {
154+
values.map {
154155
guard let last = $0.latest else {
155156
return nil
156157
}

Sources/_StringProcessing/Engine/MEProgram.swift

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ public struct MEProgram<Input: Collection> where Input.Element: Equatable {
3535
var enableTracing: Bool = false
3636

3737
let captureStructure: CaptureStructure
38+
let referencedCaptureOffsets: [Reference.ID: Int]
3839
}
3940

4041
extension MEProgram: CustomStringConvertible {

Sources/_StringProcessing/Engine/Processor.swift

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ public enum MatchMode {
1414
case partialFromFront
1515
}
1616

17+
typealias Program = MEProgram<String>
18+
1719
/// A concrete CU. Somehow will run the concrete logic and
1820
/// feed stuff back to generic code
1921
struct Controller {

Sources/_StringProcessing/Engine/StringProcessor.swift

Lines changed: 0 additions & 31 deletions
This file was deleted.

Sources/_StringProcessing/Engine/Structuralize.swift

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,21 +29,21 @@ private struct Fabricator {
2929

3030
mutating func next(
3131
) throws -> Processor<String>._StoredCapture {
32-
guard curIdx < list.caps.endIndex else {
32+
guard curIdx < list.values.endIndex else {
3333
// TODO: Is `throws` a bit much here?
3434
// Maybe just precondition or hard trap
3535
throw Unreachable("Capture count mismatch")
3636
}
37-
defer { list.caps.formIndex(after: &curIdx) }
38-
return list.caps[curIdx]
37+
defer { list.values.formIndex(after: &curIdx) }
38+
return list.values[curIdx]
3939
}
4040

4141
var currentIsEmpty: Bool {
42-
guard curIdx < list.caps.endIndex else {
42+
guard curIdx < list.values.endIndex else {
4343
fatalError("Capture count mismatch")
4444
}
4545

46-
return list.caps[curIdx].isEmpty
46+
return list.values[curIdx].isEmpty
4747
}
4848

4949
mutating func formValue(

Sources/_StringProcessing/Executor.swift

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
import _MatchingEngine
1313

14+
// FIXME: Public for prototype
1415
public struct Executor {
1516
// TODO: consider let, for now lets us toggle tracing
1617
var engine: Engine<String>
@@ -19,11 +20,35 @@ public struct Executor {
1920
self.engine = Engine(program, enableTracing: enablesTracing)
2021
}
2122

23+
// FIXME: Public for prototype
24+
public struct Result {
25+
public var range: Range<String.Index>
26+
var captures: Capture
27+
var referencedCaptureOffsets: [Reference.ID: Int]
28+
29+
var destructure: (
30+
matched: Range<String.Index>,
31+
captures: Capture,
32+
referencedCaptureOffsets: [Reference.ID: Int]
33+
) {
34+
(range, captures, referencedCaptureOffsets)
35+
}
36+
37+
init(
38+
_ matched: Range<String.Index>, _ captures: Capture,
39+
_ referencedCaptureOffsets: [Reference.ID: Int]
40+
) {
41+
self.range = matched
42+
self.captures = captures
43+
self.referencedCaptureOffsets = referencedCaptureOffsets
44+
}
45+
}
46+
2247
public func execute(
2348
input: String,
2449
in range: Range<String.Index>,
2550
mode: MatchMode = .wholeString
26-
) -> MatchResult? {
51+
) -> Result? {
2752
guard let (endIdx, capList) = engine.consume(
2853
input, in: range, matchMode: mode
2954
) else {
@@ -32,15 +57,16 @@ public struct Executor {
3257
let capStruct = engine.program.captureStructure
3358
do {
3459
let caps = try capStruct.structuralize(capList, input)
35-
return MatchResult(range.lowerBound..<endIdx, caps)
60+
return Result(
61+
range.lowerBound..<endIdx, caps, capList.referencedCaptureOffsets)
3662
} catch {
3763
fatalError(String(describing: error))
3864
}
3965
}
4066
public func execute(
4167
input: Substring,
4268
mode: MatchMode = .wholeString
43-
) -> MatchResult? {
69+
) -> Result? {
4470
self.execute(
4571
input: input.base,
4672
in: input.startIndex..<input.endIndex,

Sources/_StringProcessing/RegexDSL/Match.swift

Lines changed: 43 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,24 @@
1111

1212
@dynamicMemberLookup
1313
public struct RegexMatch<Match> {
14+
let input: String
1415
public let range: Range<String.Index>
15-
public let match: Match
16+
let rawCaptures: Capture
17+
let referencedCaptureOffsets: [Reference.ID: Int]
18+
19+
public var match: Match {
20+
if Match.self == (Substring, DynamicCaptures).self {
21+
return (input[range], DynamicCaptures(rawCaptures)) as! Match
22+
} else if Match.self == Substring.self {
23+
// FIXME: Plumb whole match (`.0`) through the matching engine.
24+
return input[range] as! Match
25+
} else {
26+
let typeErasedMatch = rawCaptures.matchValue(
27+
withWholeMatch: input[range]
28+
)
29+
return typeErasedMatch as! Match
30+
}
31+
}
1632

1733
public subscript<T>(dynamicMember keyPath: KeyPath<Match, T>) -> T {
1834
match[keyPath: keyPath]
@@ -25,6 +41,26 @@ public struct RegexMatch<Match> {
2541
) -> Match {
2642
match
2743
}
44+
45+
public subscript(_ reference: Reference) -> Any {
46+
guard let offset = referencedCaptureOffsets[reference.id] else {
47+
preconditionFailure(
48+
"Reference did not capture any match in the regex")
49+
}
50+
return rawCaptures[offset].value
51+
}
52+
53+
public subscript<T>(_ reference: Reference, as _: T.Type) -> T {
54+
let typeErasedMatch = self[reference]
55+
guard let result = typeErasedMatch as? T else {
56+
preconditionFailure(
57+
"""
58+
The referenced capture has type '\(type(of: typeErasedMatch))', not
59+
'\(T.self)'.
60+
""")
61+
}
62+
return result
63+
}
2864
}
2965

3066
extension RegexProtocol {
@@ -43,23 +79,16 @@ extension RegexProtocol {
4379
mode: MatchMode = .wholeString
4480
) -> RegexMatch<Match>? {
4581
let executor = Executor(program: regex.program.loweredProgram)
46-
guard let (range, captures) = executor.execute(
82+
guard let (range, captures, captureOffsets) = executor.execute(
4783
input: input, in: inputRange, mode: mode
4884
)?.destructure else {
4985
return nil
5086
}
51-
let convertedMatch: Match
52-
if Match.self == (Substring, DynamicCaptures).self {
53-
convertedMatch = (input[range], DynamicCaptures(captures)) as! Match
54-
} else if Match.self == Substring.self {
55-
convertedMatch = input[range] as! Match
56-
} else {
57-
let typeErasedMatch = captures.matchValue(
58-
withWholeMatch: input[range]
59-
)
60-
convertedMatch = typeErasedMatch as! Match
61-
}
62-
return RegexMatch(range: range, match: convertedMatch)
87+
return RegexMatch(
88+
input: input,
89+
range: range,
90+
rawCaptures: captures,
91+
referencedCaptureOffsets: captureOffsets)
6392
}
6493
}
6594

Tests/RegexTests/RegexDSLTests.swift

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -582,6 +582,36 @@ class RegexDSLTests: XCTestCase {
582582
}
583583
}
584584

585+
// Match result referencing a `Reference`.
586+
do {
587+
let a = Reference()
588+
let b = Reference()
589+
let regex = Regex {
590+
capture("abc", as: a)
591+
zeroOrMore {
592+
tryCapture(as: b) {
593+
"#"
594+
oneOrMore(.digit)
595+
} transform: {
596+
Int($0.dropFirst())
597+
}
598+
}
599+
a
600+
zeroOrMore {
601+
capture(b)
602+
}
603+
optionally {
604+
capture(a)
605+
}
606+
}
607+
let input = "abc#41#42abc#42#42"
608+
let result = try XCTUnwrap(input.match(regex))
609+
XCTAssertEqual(result[a] as? Substring, "abc")
610+
XCTAssertEqual(result[b] as? Int, 42)
611+
XCTAssertEqual(result[a, as: Substring.self], "abc")
612+
XCTAssertEqual(result[b, as: Int.self], 42)
613+
}
614+
585615
// Post-hoc captured references
586616
// #"(?:\w\1|:(\w):)+"#
587617
try _testDSLCaptures(

0 commit comments

Comments
 (0)