Skip to content

Commit 0fe786d

Browse files
committed
Support quantification in compiler and backend
Supports all number and kinds of quantifications found in regex. BUG: There is a bug wherein reluctant restores are re-entrant, potentially failing to restore the loop trip count registers.
1 parent ed05306 commit 0fe786d

File tree

15 files changed

+1240
-547
lines changed

15 files changed

+1240
-547
lines changed

Sources/Prototypes/PEG/PEGTranspile.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ extension PEG.VM {
4040
case .comment(let s):
4141
builder.buildNop(s)
4242
case .consume(let n):
43-
builder.buildConsume(Distance(n))
43+
builder.buildAdvance(Distance(n))
4444
case .branch(_):
4545
builder.buildBranch(to: nextToken())
4646
case .condBranch(let condition, _):

Sources/_MatchingEngine/Engine/Builder.swift

Lines changed: 186 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -8,108 +8,213 @@ extension Program where Input.Element: Hashable {
88

99
// Map tokens to actual addresses
1010
var addressTokens: [InstructionAddress?] = []
11-
var addressFixups: [(InstructionAddress, AddressToken)] = []
11+
var addressFixups: [(InstructionAddress, AddressFixup)] = []
1212

1313
// Registers
1414
var nextBoolRegister = BoolRegister(0)
15+
var nextIntRegister = IntRegister(0)
16+
17+
// Special addresses or instructions
18+
var failAddressToken: AddressToken? = nil
1519

1620
public init() {}
1721
}
1822
}
1923

2024
extension Program.Builder {
25+
struct AddressFixup {
26+
var first: AddressToken
27+
var second: AddressToken? = nil
28+
29+
init(_ a: AddressToken) { self.first = a }
30+
init(_ a: AddressToken, _ b: AddressToken) {
31+
self.first = a
32+
self.second = b
33+
}
34+
}
35+
}
36+
37+
extension Program.Builder {
38+
// TODO: We want a better strategy for fixups, leaving
39+
// the operand in a differenet form isn't great...
40+
2141
public init<S: Sequence>(staticElements: S) where S.Element == Input.Element {
2242
staticElements.forEach { elements.store($0) }
2343
}
2444

2545
public mutating func buildNop(_ r: StringRegister? = nil) {
26-
instructions.append(.nop(r))
46+
instructions.append(.init(.nop, .init(optionalString: r)))
2747
}
2848
public mutating func buildNop(_ s: String) {
2949
buildNop(strings.store(s))
3050
}
3151

52+
public mutating func buildDecrement(
53+
_ i: IntRegister, nowZero: BoolRegister
54+
) {
55+
instructions.append(.init(
56+
.decrement, .init(bool: nowZero, int: i)))
57+
}
58+
59+
public mutating func buildMoveImmediate(
60+
_ value: UInt64, into: IntRegister
61+
) {
62+
instructions.append(.init(
63+
.moveImmediate, .init(immediate: value, int: into)))
64+
}
65+
66+
// TODO: generic
67+
public mutating func buildMoveImmediate(
68+
_ value: Int, into: IntRegister
69+
) {
70+
let uint = UInt64(truncatingIfNeeded: value)
71+
assert(uint == value)
72+
buildMoveImmediate(uint, into: into)
73+
}
74+
3275
public mutating func buildBranch(to t: AddressToken) {
33-
instructions.append(.branch())
76+
instructions.append(.init(.branch))
3477
fixup(to: t)
3578
}
3679
public mutating func buildCondBranch(
3780
_ condition: BoolRegister, to t: AddressToken
3881
) {
39-
instructions.append(.condBranch(condition: condition))
82+
instructions.append(
83+
.init(.condBranch, .init(bool: condition)))
84+
fixup(to: t)
85+
}
86+
87+
public mutating func buildCondBranch(
88+
to t: AddressToken, ifZeroElseDecrement i: IntRegister
89+
) {
90+
instructions.append(
91+
.init(.condBranchZeroElseDecrement, .init(int: i)))
4092
fixup(to: t)
4193
}
4294

4395
public mutating func buildSave(_ t: AddressToken) {
44-
instructions.append(.save())
96+
instructions.append(.init(.save))
4597
fixup(to: t)
4698
}
4799
public mutating func buildSaveAddress(_ t: AddressToken) {
48-
instructions.append(.saveAddress())
100+
instructions.append(.init(.saveAddress))
49101
fixup(to: t)
50102
}
103+
public mutating func buildSplit(
104+
to: AddressToken, saving: AddressToken
105+
) {
106+
instructions.append(.init(.splitSaving))
107+
fixup(to: (to, saving))
108+
}
51109

52110
public mutating func buildClear() {
53-
instructions.append(.clear())
111+
instructions.append(.init(.clear))
54112
}
55113
public mutating func buildRestore() {
56-
instructions.append(.restore())
114+
instructions.append(.init(.restore))
57115
}
58116
public mutating func buildFail() {
59-
instructions.append(.fail())
117+
instructions.append(.init(.fail))
60118
}
61119
public mutating func buildCall(_ t: AddressToken) {
62-
instructions.append(.call())
120+
instructions.append(.init(.call))
63121
fixup(to: t)
64122
}
65123
public mutating func buildRet() {
66-
instructions.append(.ret())
124+
instructions.append(.init(.ret))
67125
}
68126

69127
public mutating func buildAbort(_ s: StringRegister? = nil) {
70-
instructions.append(.abort(s))
128+
instructions.append(.init(
129+
.abort, .init(optionalString: s)))
71130
}
72131
public mutating func buildAbort(_ s: String) {
73132
buildAbort(strings.store(s))
74133
}
75134

76-
public mutating func buildConsume(_ n: Distance) {
77-
instructions.append(.consume(n))
135+
public mutating func buildAdvance(_ n: Distance) {
136+
instructions.append(.init(.advance, .init(distance: n)))
78137
}
79138

80139
public mutating func buildMatch(_ e: Input.Element) {
81-
instructions.append(.match(elements.store(e)))
140+
instructions.append(.init(
141+
.match, .init(element: elements.store(e))))
82142
}
83143

84-
public mutating func buildConsume(by p: @escaping Program.ConsumeFunction) {
85-
instructions.append(.consume(by: makeConsumeFunction(p)))
144+
public mutating func buildConsume(
145+
by p: @escaping Program.ConsumeFunction
146+
) {
147+
instructions.append(.init(
148+
.consumeBy, .init(consumer: makeConsumeFunction(p))))
86149
}
87150

88-
public mutating func buildAssert(_ e: Input.Element, into c: BoolRegister) {
89-
instructions.append(.assertion(condition: c, elements.store(e)))
151+
public mutating func buildAssert(
152+
_ e: Input.Element, into cond: BoolRegister
153+
) {
154+
instructions.append(.init(.assertion, .init(
155+
element: elements.store(e), bool: cond)))
90156
}
91157

92158
public mutating func buildAccept() {
93-
instructions.append(.accept())
159+
instructions.append(.init(.accept))
94160
}
95161

96162
public mutating func buildPrint(_ s: StringRegister) {
97-
instructions.append(.print(s))
163+
instructions.append(.init(.print, .init(string: s)))
98164
}
99165

100-
public func assemble() -> Program {
166+
// TODO: Mutating because of fail address fixup, drop when
167+
// that's removed
168+
public mutating func assemble() -> Program {
169+
// TODO: This will add a fail instruction at the end every
170+
// time it's assembled. Better to do to the local instruction
171+
// list copy, but that complicates logic. It's possible we
172+
// end up going a different route all-together eventually,
173+
// though.
174+
if let tok = failAddressToken {
175+
label(tok)
176+
buildFail()
177+
}
178+
101179
// Do a pass to map address tokens to addresses
102180
var instructions = instructions
103181
for (instAddr, tok) in addressFixups {
104-
instructions[instAddr.rawValue].operand.initializePayload(
105-
addressTokens[tok.rawValue]!
106-
)
182+
// FIXME: based on opcode, decide if we split...
183+
// Unfortunate...
184+
let inst = instructions[instAddr.rawValue]
185+
let addr = addressTokens[tok.first.rawValue]!
186+
let payload: Instruction.Payload
187+
188+
switch inst.opcode {
189+
case .condBranch:
190+
payload = .init(addr: addr, bool: inst.payload.bool)
191+
192+
case .condBranchZeroElseDecrement:
193+
payload = .init(addr: addr, int: inst.payload.int)
194+
195+
case .branch, .save, .saveAddress, .call:
196+
payload = .init(addr: addr)
197+
198+
case .splitSaving:
199+
guard let fix2 = tok.second else {
200+
fatalError("unreachable")
201+
}
202+
let saving = addressTokens[fix2.rawValue]!
203+
payload = .init(addr: addr, addr2: saving)
204+
205+
default: fatalError("unreachable")
206+
207+
}
208+
209+
instructions[instAddr.rawValue] = .init(
210+
inst.opcode, payload)
107211
}
108212

109213
var regInfo = Program.RegisterInfo()
110214
regInfo.elements = elements.count
111215
regInfo.strings = strings.count
112216
regInfo.bools = nextBoolRegister.rawValue
217+
regInfo.ints = nextIntRegister.rawValue
113218
regInfo.consumeFunctions = consumeFunctions.count
114219

115220
return Program(
@@ -155,22 +260,76 @@ extension Program.Builder {
155260
public mutating func fixup(to t: AddressToken) {
156261
assert(!instructions.isEmpty)
157262
addressFixups.append(
158-
(InstructionAddress(instructions.endIndex-1), t))
263+
(InstructionAddress(instructions.endIndex-1), .init(t)))
264+
}
265+
266+
// Associate the most recently added instruction with
267+
// the provided tokens, ensuring it is fixed up during
268+
// assembly
269+
public mutating func fixup(
270+
to ts: (AddressToken, AddressToken)
271+
) {
272+
assert(!instructions.isEmpty)
273+
addressFixups.append((
274+
InstructionAddress(instructions.endIndex-1),
275+
.init(ts.0, ts.1)))
276+
}
277+
278+
// Push an "empty" save point which will, upon restore, just restore from
279+
// the next save point. Currently, this is modelled by a branch to a "fail"
280+
// instruction, which the builder will ensure exists for us.
281+
//
282+
// This is useful for possessive quantification that needs some initial save
283+
// point to "ratchet" upon a successful match.
284+
public mutating func pushEmptySavePoint() {
285+
if failAddressToken == nil {
286+
failAddressToken = makeAddress()
287+
}
288+
buildSaveAddress(failAddressToken!)
159289
}
290+
160291
}
161292

162293
// Register helpers
163294
extension Program.Builder {
164-
public mutating func makeRegister() -> BoolRegister {
295+
public mutating func makeBoolRegister() -> BoolRegister {
165296
defer { nextBoolRegister.rawValue += 1 }
166297
return nextBoolRegister
167298
}
299+
public mutating func makeIntRegister() -> IntRegister {
300+
defer { nextIntRegister.rawValue += 1 }
301+
return nextIntRegister
302+
}
303+
304+
// Allocate and initialize a register
305+
public mutating func makeIntRegister(
306+
initialValue: Int
307+
) -> IntRegister {
308+
let r = makeIntRegister()
309+
self.buildMoveImmediate(initialValue, into: r)
310+
return r
311+
}
312+
313+
// 'kill' or release allocated registers
314+
public mutating func kill(_ r: IntRegister) {
315+
// TODO: Release/reuse registers, for now nop makes
316+
// reading the code easier
317+
buildNop("kill \(r)")
318+
}
319+
public mutating func kill(_ r: BoolRegister) {
320+
// TODO: Release/reuse registers, for now nop makes
321+
// reading the code easier
322+
buildNop("kill \(r)")
323+
}
168324

169325
public mutating func makeConsumeFunction(
170326
_ f: @escaping Program.ConsumeFunction
171327
) -> ConsumeFunctionRegister {
172328
defer { consumeFunctions.append(f) }
173329
return ConsumeFunctionRegister(consumeFunctions.count)
174330
}
331+
332+
// TODO: consider releasing registers
333+
175334
}
176335

Sources/_MatchingEngine/Engine/Consume.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ extension Engine {
3333
return cpu.currentPosition
3434
case .fail:
3535
return nil
36-
case .inprogress: cpu.cycle()
36+
case .inProgress: cpu.cycle()
3737
}
3838
}
3939
}()

0 commit comments

Comments
 (0)