Skip to content

Commit ce981d2

Browse files
committed
Suppport quoted literal sequences
1 parent 0fe786d commit ce981d2

File tree

9 files changed

+69
-4
lines changed

9 files changed

+69
-4
lines changed

Sources/_MatchingEngine/Engine/Builder.swift

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ extension Program where Input.Element: Hashable {
33
var instructions: [Instruction] = []
44

55
var elements = TypedSetVector<Input.Element, _ElementRegister>()
6+
var sequences = TypedSetVector<[Input.Element], _SequenceRegister>()
67
var strings = TypedSetVector<String, _StringRegister>()
78
var consumeFunctions: [ConsumeFunction] = []
89

@@ -141,6 +142,14 @@ extension Program.Builder {
141142
.match, .init(element: elements.store(e))))
142143
}
143144

145+
public mutating func buildMatchSequence<S: Sequence>(
146+
_ s: S
147+
) where S.Element == Input.Element {
148+
instructions.append(.init(
149+
.matchSequence,
150+
.init(sequence: sequences.store(.init(s)))))
151+
}
152+
144153
public mutating func buildConsume(
145154
by p: @escaping Program.ConsumeFunction
146155
) {
@@ -212,6 +221,7 @@ extension Program.Builder {
212221

213222
var regInfo = Program.RegisterInfo()
214223
regInfo.elements = elements.count
224+
regInfo.sequences = sequences.count
215225
regInfo.strings = strings.count
216226
regInfo.bools = nextBoolRegister.rawValue
217227
regInfo.ints = nextIntRegister.rawValue
@@ -220,6 +230,7 @@ extension Program.Builder {
220230
return Program(
221231
instructions: InstructionList(instructions),
222232
staticElements: elements.stored,
233+
staticSequences: sequences.stored,
223234
staticStrings: strings.stored,
224235
staticConsumeFunctions: consumeFunctions,
225236
registerInfo: regInfo)

Sources/_MatchingEngine/Engine/InstPayload.swift

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ extension Instruction.Payload {
3131
// and variables
3232

3333
case string(StringRegister)
34+
case sequence(SequenceRegister)
3435
case optionalString(StringRegister?)
3536
case int(IntRegister)
3637
case distance(Distance)
@@ -127,6 +128,13 @@ extension Instruction.Payload {
127128
interpret()
128129
}
129130

131+
init(sequence: SequenceRegister) {
132+
self.init(sequence)
133+
}
134+
var sequence: SequenceRegister {
135+
interpret()
136+
}
137+
130138
init(optionalString: StringRegister?) {
131139
self.init(optionalString)
132140
}

Sources/_MatchingEngine/Engine/Instruction.swift

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,11 @@ extension Instruction {
116116
/// Operand: Element register to compare against.
117117
case match
118118

119+
/// Match against a sequence of elements
120+
///
121+
/// Operand: Sequence register to compare against.
122+
case matchSequence
123+
119124
/// Match against a provided element.
120125
///
121126
/// Operand: Packed condition register to write to and element register to

Sources/_MatchingEngine/Engine/Processor.swift

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,11 @@ extension Processor {
127127
func load() -> Element? {
128128
currentPosition < end ? input[currentPosition] : nil
129129
}
130+
func load(count: Int) -> Input.SubSequence? {
131+
let slice = input[currentPosition...].prefix(count)
132+
guard slice.count == count else { return nil }
133+
return slice
134+
}
130135

131136
mutating func signalFailure() {
132137
guard let (pc, pos, stackEnd) = savePoints.popLast()?.destructure
@@ -294,6 +299,22 @@ extension Processor {
294299
controller.step()
295300
}
296301

302+
case .matchSequence:
303+
let reg = payload.sequence
304+
let seq = registers[reg]
305+
let count = seq.count
306+
307+
guard let inputSlice = load(count: count),
308+
seq.elementsEqual(inputSlice)
309+
else {
310+
signalFailure()
311+
return
312+
}
313+
guard consume(.init(count)) else {
314+
fatalError("unreachable")
315+
}
316+
controller.step()
317+
297318
case .consumeBy:
298319
let reg = payload.consumer
299320
guard currentPosition < bounds.upperBound,

Sources/_MatchingEngine/Engine/Program.swift

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ public struct Program<Input: Collection> where Input.Element: Equatable {
33
var instructions: InstructionList<Instruction>
44

55
var staticElements: [Input.Element]
6+
var staticSequences: [[Input.Element]]
67
var staticStrings: [String]
78
var staticConsumeFunctions: [ConsumeFunction]
89

Sources/_MatchingEngine/Engine/Registers.swift

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,11 @@ extension Processor {
44
// currently, these are static readonly
55
var elements: [Element]
66

7+
// currently, these are static readonly
8+
//
9+
// TODO: We want to be `String` instead of `[Character]`...
10+
var sequences: [[Element]] = []
11+
712
// currently, hold output of assertions
813
var bools: [Bool] // TODO: bitset
914

@@ -40,6 +45,9 @@ extension Processor {
4045
subscript(_ i: StringRegister) -> String {
4146
strings[i.rawValue]
4247
}
48+
subscript(_ i: SequenceRegister) -> [Element] {
49+
sequences[i.rawValue]
50+
}
4351
subscript(_ i: IntRegister) -> Int {
4452
get { ints[i.rawValue] }
4553
set { ints[i.rawValue] = newValue }
@@ -67,6 +75,9 @@ extension Processor.Registers {
6775
self.elements = program.staticElements
6876
assert(elements.count == info.elements)
6977

78+
self.sequences = program.staticSequences
79+
assert(sequences.count == info.sequences)
80+
7081
self.consumeFunctions = program.staticConsumeFunctions
7182
assert(consumeFunctions.count == info.consumeFunctions)
7283

@@ -94,6 +105,7 @@ extension Processor.Registers {
94105
extension Program {
95106
struct RegisterInfo {
96107
var elements = 0
108+
var sequences = 0
97109
var bools = 0
98110
var strings = 0
99111
var consumeFunctions = 0

Sources/_MatchingEngine/Utility/TypedInt.swift

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,9 @@ public enum _SavePointAddress {}
127127
public typealias ElementRegister = TypedInt<_ElementRegister>
128128
public enum _ElementRegister {}
129129

130+
public typealias SequenceRegister = TypedInt<_SequenceRegister>
131+
public enum _SequenceRegister {}
132+
130133
/// The register number for a stored boolean value
131134
///
132135
/// E.g. used for conditional branches

Sources/_StringProcessing/Compiler.swift

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,11 @@ class Compiler {
9292
case _ where try node.generateConsumer(matchLevel) != nil:
9393
try builder.buildConsume(by: node.generateConsumer(matchLevel)!)
9494

95-
case .quote, .customCharacterClass, .atom:
95+
case .quote(let q):
96+
// We stick quoted content into read-only constant strings
97+
builder.buildMatchSequence(q.literal)
98+
99+
case .customCharacterClass, .atom:
96100
throw unsupported(node._dumpBase)
97101
}
98102
}

Tests/RegexTests/MatchTests.swift

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -121,13 +121,13 @@ extension RegexTests {
121121

122122
matchTest(
123123
#"a\Q .\Eb"#,
124-
input: "123a .bxyz", match: "a .b", xfail: true)
124+
input: "123a .bxyz", match: "a .b")
125125
matchTest(
126126
#"a\Q \Q \\.\Eb"#,
127-
input: #"123a \Q \\.bxyz"#, match: #"a \Q \\.b"#, xfail: true)
127+
input: #"123a \Q \\.bxyz"#, match: #"a \Q \\.b"#)
128128
matchTest(
129129
#"\d\Q...\E"#,
130-
input: "Countdown: 3... 2... 1...", match: "3...", xfail: true)
130+
input: "Countdown: 3... 2... 1...", match: "3...")
131131

132132
// MARK: Comments
133133

0 commit comments

Comments
 (0)