Skip to content

Commit 3f2468f

Browse files
authored
Custom regex components support (#163)
Hook up custom matchers. This is only lightly-tested, more exhaustive testing TBD.
1 parent 8febede commit 3f2468f

18 files changed

+378
-22
lines changed

Sources/_MatchingEngine/Regex/Parse/CaptureStructure.swift

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,23 @@ extension CaptureStructure.Constructor {
9393
}
9494
}
9595

96+
public mutating func grouping<T: _TreeNode>(
97+
_ child: T,
98+
as kind: AST.Group.Kind,
99+
withType type: AnyType
100+
) -> CaptureStructure {
101+
let innerCaptures = child._captureStructure(&self)
102+
switch kind {
103+
case .capture:
104+
return .atom(type: type) + innerCaptures
105+
case .namedCapture(let name):
106+
return .atom(name: name.value, type: type)
107+
+ innerCaptures
108+
default:
109+
return innerCaptures
110+
}
111+
}
112+
96113
// TODO: We'll likely want/need a generalization of
97114
// conditional's condition kind.
98115
public mutating func condition<T: _TreeNode>(

Sources/_StringProcessing/ByteCodeGen.swift

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -246,12 +246,42 @@ extension Compiler.ByteCodeGen {
246246
builder.label(success)
247247
}
248248

249+
mutating func emitMatcher(
250+
_ matcher: @escaping _MatcherInterface,
251+
into capture: CaptureRegister? = nil
252+
) {
253+
254+
// TODO: Consider emitting consumer interface if
255+
// not captured. This may mean we should store
256+
// an existential instead of a closure...
257+
258+
let matcher = builder.makeMatcherFunction(matcher)
259+
260+
let valReg = builder.makeValueRegister()
261+
builder.buildMatcher(matcher, into: valReg)
262+
263+
// TODO: Instruction to store directly
264+
if let cap = capture {
265+
builder.buildMove(valReg, into: cap)
266+
}
267+
}
268+
249269
mutating func emitGroup(
250270
_ kind: AST.Group.Kind, _ child: DSLTree.Node
251271
) throws -> CaptureRegister? {
252272
options.beginScope()
253273
defer { options.endScope() }
254274

275+
// If we have a strong type, write result directly into
276+
// the capture register.
277+
//
278+
// FIXME: Unify with .groupTransform
279+
if kind.isCapturing, case let .matcher(_, m) = child {
280+
let cap = builder.makeCapture()
281+
emitMatcher(m, into: cap)
282+
return cap
283+
}
284+
255285
if let lookaround = kind.lookaroundKind {
256286
try emitLookaround(lookaround, child)
257287
return nil
@@ -541,8 +571,10 @@ extension Compiler.ByteCodeGen {
541571
throw Unsupported("absent function")
542572
case .consumer:
543573
throw Unsupported("consumer")
544-
case .consumerValidator:
545-
throw Unsupported("consumer validator")
574+
575+
case let .matcher(_, f):
576+
emitMatcher(f)
577+
546578
case .characterPredicate:
547579
throw Unsupported("character predicates")
548580

Sources/_StringProcessing/ConsumerInterface.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ extension DSLTree.Node {
4242

4343
case .consumer:
4444
fatalError("FIXME: Is this where we handle them?")
45-
case .consumerValidator:
45+
case .matcher:
4646
fatalError("FIXME: Is this where we handle them?")
4747
case .characterPredicate:
4848
fatalError("FIXME: Is this where we handle them?")

Sources/_StringProcessing/Engine/InstPayload.swift

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ extension Instruction.Payload {
6262
case packedEltBool(ElementRegister, BoolRegister)
6363
case packedPosPos(PositionRegister, PositionRegister)
6464
case packedCapTran(CaptureRegister, TransformRegister)
65+
case packedMatchVal(MatcherRegister, ValueRegister)
66+
case packedValueCapture(ValueRegister, CaptureRegister)
6567
}
6668
}
6769

@@ -289,5 +291,23 @@ extension Instruction.Payload {
289291
) {
290292
interpretPair()
291293
}
294+
295+
init(value: ValueRegister, capture: CaptureRegister) {
296+
self.init(value, capture)
297+
}
298+
var pairedValueCapture: (
299+
ValueRegister, CaptureRegister
300+
) {
301+
interpretPair()
302+
}
303+
304+
init(matcher: MatcherRegister, value: ValueRegister) {
305+
self.init(matcher, value)
306+
}
307+
var pairedMatcherValue: (
308+
MatcherRegister, ValueRegister
309+
) {
310+
interpretPair()
311+
}
292312
}
293313

Sources/_StringProcessing/Engine/Instruction.swift

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,8 @@ extension Instruction {
167167
/// compare against.
168168
case assertion
169169

170+
// MARK: Extension points
171+
170172
/// Advance the input position based on the result by calling the consume
171173
/// function.
172174
///
@@ -176,16 +178,28 @@ extension Instruction {
176178
/// Custom lookaround assertion operation.
177179
/// Triggers a failure if customFunction returns false.
178180
///
179-
/// assert(
180-
/// _ customFunction: (
181-
/// input: Input,
182-
/// currentPos: Position,
183-
/// bounds: Range<Position>) -> Bool
184-
/// )
181+
/// assert(_ customFunction: (
182+
/// input: Input,
183+
/// currentPos: Position,
184+
/// bounds: Range<Position>
185+
/// ) -> Bool)
185186
///
186187
/// Operands: destination bool register, assert hook register
187188
case assertBy
188189

190+
/// Custom value-creating consume operation.
191+
///
192+
/// match(
193+
/// _ matchFunction: (
194+
/// input: Input,
195+
/// bounds: Range<Position>
196+
/// ) -> (Position, Any),
197+
/// into: ValueReg
198+
/// )
199+
///
200+
///
201+
case matchBy
202+
189203
// MARK: Matching: Save points
190204

191205
/// Add a save point
@@ -246,6 +260,11 @@ extension Instruction {
246260
///
247261
case transformCapture
248262

263+
/// Save a value into a capture register
264+
///
265+
/// captureValue(_: ValReg, into _: CapReg)
266+
case captureValue
267+
249268
/// Match a previously captured value
250269
///
251270
/// backreference(_:CapReg)
@@ -273,6 +292,8 @@ extension Instruction {
273292

274293
// TODO: Fused assertions. It seems like we often want to
275294
// branch based on assertion fail or success.
295+
296+
276297
}
277298
}
278299

Sources/_StringProcessing/Engine/MEBuilder.swift

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ extension MEProgram where Input.Element: Hashable {
2222
var consumeFunctions: [ConsumeFunction] = []
2323
var assertionFunctions: [AssertionFunction] = []
2424
var transformFunctions: [TransformFunction] = []
25+
var matcherFunctions: [MatcherFunction] = []
2526

2627
// Map tokens to actual addresses
2728
var addressTokens: [InstructionAddress?] = []
@@ -32,6 +33,7 @@ extension MEProgram where Input.Element: Hashable {
3233
var nextIntRegister = IntRegister(0)
3334
var nextPositionRegister = PositionRegister(0)
3435
var nextCaptureRegister = CaptureRegister(0)
36+
var nextValueRegister = ValueRegister(0)
3537

3638
// Special addresses or instructions
3739
var failAddressToken: AddressToken? = nil
@@ -237,6 +239,22 @@ extension MEProgram.Builder {
237239
.init(capture: cap, transform: trans)))
238240
}
239241

242+
public mutating func buildMatcher(
243+
_ fun: MatcherRegister, into reg: ValueRegister
244+
) {
245+
instructions.append(.init(
246+
.matchBy,
247+
.init(matcher: fun, value: reg)))
248+
}
249+
250+
public mutating func buildMove(
251+
_ value: ValueRegister, into capture: CaptureRegister
252+
) {
253+
instructions.append(.init(
254+
.captureValue,
255+
.init(value: value, capture: capture)))
256+
}
257+
240258
public mutating func buildBackreference(
241259
_ cap: CaptureRegister
242260
) {
@@ -298,9 +316,11 @@ extension MEProgram.Builder {
298316
regInfo.bools = nextBoolRegister.rawValue
299317
regInfo.ints = nextIntRegister.rawValue
300318
regInfo.positions = nextPositionRegister.rawValue
319+
regInfo.values = nextValueRegister.rawValue
301320
regInfo.consumeFunctions = consumeFunctions.count
302321
regInfo.assertionFunctions = assertionFunctions.count
303322
regInfo.transformFunctions = transformFunctions.count
323+
regInfo.matcherFunctions = matcherFunctions.count
304324
regInfo.captures = nextCaptureRegister.rawValue
305325

306326
return MEProgram(
@@ -311,6 +331,7 @@ extension MEProgram.Builder {
311331
staticConsumeFunctions: consumeFunctions,
312332
staticAssertionFunctions: assertionFunctions,
313333
staticTransformFunctions: transformFunctions,
334+
staticMatcherFunctions: matcherFunctions,
314335
registerInfo: regInfo,
315336
captureStructure: captureStructure)
316337
}
@@ -399,6 +420,10 @@ extension MEProgram.Builder {
399420
defer { nextPositionRegister.rawValue += 1 }
400421
return nextPositionRegister
401422
}
423+
public mutating func makeValueRegister() -> ValueRegister {
424+
defer { nextValueRegister.rawValue += 1 }
425+
return nextValueRegister
426+
}
402427

403428
// Allocate and initialize a register
404429
public mutating func makeIntRegister(
@@ -456,5 +481,11 @@ extension MEProgram.Builder {
456481
defer { transformFunctions.append(f) }
457482
return TransformRegister(transformFunctions.count)
458483
}
484+
public mutating func makeMatcherFunction(
485+
_ f: @escaping MEProgram.MatcherFunction
486+
) -> MatcherRegister {
487+
defer { matcherFunctions.append(f) }
488+
return MatcherRegister(matcherFunctions.count)
489+
}
459490
}
460491

Sources/_StringProcessing/Engine/MECapture.swift

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,8 @@ extension Processor {
6060
assert(stack.isEmpty)
6161
assert(valueStack.isEmpty)
6262
assert(currentCaptureBegin == nil)
63-
} else {
64-
assert(!stack.isEmpty || currentCaptureBegin != nil)
63+
} else if currentCaptureBegin == nil {
64+
assert(!stack.isEmpty || !valueStack.isEmpty)
6565
}
6666
if hasValues {
6767
// FIXME: how?
@@ -109,10 +109,14 @@ extension Processor {
109109
}
110110

111111
mutating func registerValue(
112-
_ value: Any
112+
_ value: Any,
113+
overwriteInitial: SavePoint? = nil
113114
) {
114115
_invariantCheck()
115116
defer { _invariantCheck() }
117+
if let sp = overwriteInitial {
118+
self.startState = sp
119+
}
116120
valueStack.append(value)
117121
}
118122

Sources/_StringProcessing/Engine/MEProgram.swift

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ public struct MEProgram<Input: Collection> where Input.Element: Equatable {
1717
(Input, Input.Index, Range<Input.Index>) -> Bool
1818
public typealias TransformFunction =
1919
(Input, Range<Input.Index>) -> Any?
20+
public typealias MatcherFunction =
21+
(Input, Range<Input.Index>) -> (Input.Index, Any)?
2022

2123
var instructions: InstructionList<Instruction>
2224

@@ -26,6 +28,7 @@ public struct MEProgram<Input: Collection> where Input.Element: Equatable {
2628
var staticConsumeFunctions: [ConsumeFunction]
2729
var staticAssertionFunctions: [AssertionFunction]
2830
var staticTransformFunctions: [TransformFunction]
31+
var staticMatcherFunctions: [MatcherFunction]
2932

3033
var registerInfo: RegisterInfo
3134

Sources/_StringProcessing/Engine/Processor.swift

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,19 @@ extension Processor {
358358
}
359359
controller.step()
360360

361+
case .matchBy:
362+
let (matcherReg, valReg) = payload.pairedMatcherValue
363+
let matcher = registers[matcherReg]
364+
guard let (nextIdx, val) = matcher(
365+
input, currentPosition..<bounds.upperBound
366+
) else {
367+
signalFailure()
368+
return
369+
}
370+
registers[valReg] = val
371+
advance(to: nextIdx)
372+
controller.step()
373+
361374
case .print:
362375
// TODO: Debug stream
363376
doPrint(registers[payload.string])
@@ -423,6 +436,16 @@ extension Processor {
423436
storedCaptures[capNum].registerValue(value)
424437

425438
controller.step()
439+
440+
case .captureValue:
441+
let (val, cap) = payload.pairedValueCapture
442+
let value = registers[val]
443+
let capNum = Int(asserting: cap.rawValue)
444+
let sp = makeSavePoint(self.currentPC)
445+
storedCaptures[capNum].registerValue(
446+
value, overwriteInitial: sp)
447+
controller.step()
426448
}
449+
427450
}
428451
}

0 commit comments

Comments
 (0)