Skip to content

Commit 6cfceaf

Browse files
committed
Handle end-of-input in lexUntil(eating:)
Throw an error if we reach the end of input before we encounter the closing delimiter we expect. Also add an overload of `lexUntil(eating:)` that takes a character.
1 parent 32fbefd commit 6cfceaf

File tree

2 files changed

+61
-19
lines changed

2 files changed

+61
-19
lines changed

Sources/_MatchingEngine/Regex/Parse/LexicalAnalysis.swift

Lines changed: 38 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,18 @@ extension Source {
100100
}
101101
}
102102

103+
mutating func tryEatNonEmpty(_ c: Char) throws -> Bool {
104+
guard !isEmpty else { throw ParseError.expected(String(c)) }
105+
return tryEat(c)
106+
}
107+
108+
mutating func tryEatNonEmpty<C: Collection>(sequence c: C) throws -> Bool
109+
where C.Element == Char
110+
{
111+
guard !isEmpty else { throw ParseError.expected(String(c)) }
112+
return tryEat(sequence: c)
113+
}
114+
103115
/// Throws an expected ASCII character error if not matched
104116
mutating func expectASCII() throws -> Located<Character> {
105117
try recordLoc { src in
@@ -225,7 +237,7 @@ extension Source {
225237
// Hex numbers.
226238
case "u", "x":
227239
if src.tryEat("{") {
228-
let str = src.lexUntil(eating: "}").value
240+
let str = try src.lexUntil(eating: "}").value
229241
return try Source.validateUnicodeScalar(str, .hex)
230242
}
231243
let numDigits = base == "u" ? 4 : 2
@@ -235,7 +247,7 @@ extension Source {
235247

236248
// Octal numbers.
237249
case "o" where src.tryEat("{"):
238-
let str = src.lexUntil(eating: "}").value
250+
let str = try src.lexUntil(eating: "}").value
239251
return try Source.validateUnicodeScalar(str, .octal)
240252

241253
case "0":
@@ -342,27 +354,33 @@ extension Source {
342354
}
343355

344356
private mutating func lexUntil(
345-
_ predicate: (inout Source) -> Bool
346-
) -> Located<String> {
347-
recordLoc { src in
357+
_ predicate: (inout Source) throws -> Bool
358+
) rethrows -> Located<String> {
359+
try recordLoc { src in
348360
var result = ""
349-
while !predicate(&src) {
361+
while try !predicate(&src) {
350362
result.append(src.eat())
351363
}
352364
return result
353365
}
354366
}
355367

356-
private mutating func lexUntil(eating end: String) -> Located<String> {
357-
lexUntil { $0.tryEat(sequence: end) }
368+
private mutating func lexUntil(eating end: String) throws -> Located<String> {
369+
try lexUntil { try $0.tryEatNonEmpty(sequence: end) }
370+
}
371+
372+
private mutating func lexUntil(
373+
eating end: Character
374+
) throws -> Located<String> {
375+
try lexUntil(eating: String(end))
358376
}
359377

360378
/// Expect a linear run of non-nested non-empty content
361379
private mutating func expectQuoted(
362380
endingWith end: String
363381
) throws -> Located<String> {
364382
try recordLoc { src in
365-
let result = src.lexUntil(eating: end).value
383+
let result = try src.lexUntil(eating: end).value
366384
guard !result.isEmpty else {
367385
throw ParseError.misc("Expected non-empty contents")
368386
}
@@ -586,13 +604,13 @@ extension Source {
586604

587605
// We should either have a unicode scalar.
588606
if src.tryEat(sequence: "U+") {
589-
let str = src.lexUntil(eating: "}").value
607+
let str = try src.lexUntil(eating: "}").value
590608
return .scalar(try Source.validateUnicodeScalar(str, .hex))
591609
}
592610

593611
// Or we should have a character name.
594612
// TODO: Validate the types of characters that can appear in the name?
595-
return .namedCharacter(src.lexUntil(eating: "}").value)
613+
return .namedCharacter(try src.lexUntil(eating: "}").value)
596614
}
597615
}
598616

@@ -606,14 +624,15 @@ extension Source {
606624
// of true), and its key is inferred.
607625
// TODO: We could have better recovery here if we only ate the characters
608626
// that property keys and values can use.
609-
let lhs = src.lexUntil { $0.peek() == "=" || $0.starts(with: end) }.value
610-
if src.tryEat(sequence: end) {
611-
return try Source.classifyCharacterPropertyValueOnly(lhs)
612-
}
613-
src.eat(asserting: "=")
614-
615-
let rhs = src.lexUntil(eating: end).value
616-
return try Source.classifyCharacterProperty(key: lhs, value: rhs)
627+
let lhs = src.lexUntil {
628+
$0.isEmpty || $0.peek() == "=" || $0.starts(with: end)
629+
}.value
630+
if src.tryEat("=") {
631+
let rhs = try src.lexUntil(eating: end).value
632+
return try Source.classifyCharacterProperty(key: lhs, value: rhs)
633+
}
634+
try src.expect(sequence: end)
635+
return try Source.classifyCharacterPropertyValueOnly(lhs)
617636
}
618637
}
619638

Tests/RegexTests/LexTests.swift

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,29 @@ extension RegexTests {
9191
_ = try $0.lexGroupStart()
9292
}
9393

94+
// Test expected closing delimiters.
95+
diagnose(#"\u{5"#, expecting: .expected("}")) { src in
96+
_ = try src.lexAtom(isInCustomCharacterClass: false)
97+
}
98+
diagnose(#"\x{5"#, expecting: .expected("}")) { src in
99+
_ = try src.lexAtom(isInCustomCharacterClass: false)
100+
}
101+
diagnose(#"\N{A"#, expecting: .expected("}")) { src in
102+
_ = try src.lexAtom(isInCustomCharacterClass: false)
103+
}
104+
diagnose(#"\N{U+A"#, expecting: .expected("}")) { src in
105+
_ = try src.lexAtom(isInCustomCharacterClass: false)
106+
}
107+
diagnose(#"\p{a"#, expecting: .expected("}")) { src in
108+
_ = try src.lexAtom(isInCustomCharacterClass: false)
109+
}
110+
diagnose(#"\p{a="#, expecting: .expected("}")) { src in
111+
_ = try src.lexAtom(isInCustomCharacterClass: false)
112+
}
113+
diagnose(#"(?#"#, expecting: .expected(")")) { src in
114+
_ = try src.lexComment()
115+
}
116+
94117
// TODO: want to dummy print out source ranges, etc, test that.
95118
}
96119

0 commit comments

Comments
 (0)