Skip to content

Commit fba0bf2

Browse files
committed
Improve recovery for identifiers and text segment options
Scan to the closing delimiter of an invalid identifier, and better diagnose an invalid text segment option.
1 parent 050d87c commit fba0bf2

File tree

3 files changed

+32
-19
lines changed

3 files changed

+32
-19
lines changed

Sources/_RegexParser/Regex/Parse/Diagnostics.swift

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ enum ParseError: Error, Hashable {
5555

5656
case unknownGroupKind(String)
5757
case unknownCalloutKind(String)
58+
case unknownTextSegmentMatchingOption(Character)
5859

5960
case invalidMatchingOption(Character)
6061
case cannotRemoveMatchingOptionsAfterCaret
@@ -166,6 +167,8 @@ extension ParseError: CustomStringConvertible {
166167
return "unknown group kind '(\(str)'"
167168
case let .unknownCalloutKind(str):
168169
return "unknown callout kind '\(str)'"
170+
case let .unknownTextSegmentMatchingOption(m):
171+
return "unknown text segment mode '\(m)'; expected 'w' or 'g'"
169172
case let .invalidMatchingOption(c):
170173
return "invalid matching option '\(c)'"
171174
case .cannotRemoveMatchingOptionsAfterCaret:

Sources/_RegexParser/Regex/Parse/LexicalAnalysis.swift

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -812,18 +812,28 @@ extension Parser {
812812
case "S": return .asciiOnlySpace
813813
case "W": return .asciiOnlyWord
814814
case "y":
815-
p.expect("{")
815+
// Default to grapheme cluster if unknown.
816+
let recoveryMode = OptKind.textSegmentGraphemeMode
817+
guard p.expect("{") else { return recoveryMode }
818+
819+
guard let optChar = p.tryEatWithLoc(), optChar.value != "}" else {
820+
p.errorAtCurrentPosition(.expected("text segment mode"))
821+
return recoveryMode
822+
}
816823
let opt: OptKind
817-
if p.tryEat("w") {
824+
switch optChar.value {
825+
case "w":
818826
opt = .textSegmentWordMode
819-
} else {
820-
p.expect("g")
827+
case "g":
821828
opt = .textSegmentGraphemeMode
829+
case let x:
830+
p.error(.unknownTextSegmentMatchingOption(x), at: optChar.location)
831+
opt = recoveryMode
822832
}
823833
p.expect("}")
824834
return opt
825835

826-
// Swift semantic level options
836+
// Swift semantic level options
827837
case "X": return .graphemeClusterSemantics
828838
case "u": return .unicodeScalarSemantics
829839
case "b": return .byteSemantics
@@ -958,6 +968,8 @@ extension Parser {
958968
}
959969
guard let str = p.tryEatPrefix(\.isWordCharacter) else {
960970
p.error(.identifierMustBeAlphaNumeric(kind), at: firstChar.location)
971+
// Try skip ahead to the closing delimiter for better recovery.
972+
_ = p.lexUntil { $0.src.isEmpty || $0.src.starts(with: ending) }
961973
return ""
962974
}
963975
return str.value

Tests/RegexTests/ParseTests.swift

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2800,11 +2800,10 @@ extension RegexTests {
28002800
diagnosticTest(#"(?^"#, .expected(")"))
28012801
diagnosticTest(#"(?^i"#, .expected(")"))
28022802

2803-
// TODO: These errors could be better.
2804-
diagnosticTest(#"(?y)"#, .expected("{"), .expected("g"), .expected("}"), unsupported: true)
2805-
diagnosticTest(#"(?y{)"#, .expected("g"), .expected("}"), unsupported: true)
2803+
diagnosticTest(#"(?y)"#, .expected("{"), unsupported: true)
2804+
diagnosticTest(#"(?y{)"#, .unknownTextSegmentMatchingOption(")"), .expected("}"), .expected(")"), unsupported: true)
28062805
diagnosticTest(#"(?y{g)"#, .expected("}"), unsupported: true)
2807-
diagnosticTest(#"(?y{x})"#, .expected("g"), .expected("}"), .invalidMatchingOption("}"), unsupported: true)
2806+
diagnosticTest(#"(?y{x})"#, .unknownTextSegmentMatchingOption("x"), unsupported: true)
28082807

28092808
diagnosticTest(#"(?P"#, .expected(")"))
28102809
diagnosticTest(#"(?R"#, .expected(")"), unsupported: true)
@@ -3083,18 +3082,17 @@ extension RegexTests {
30833082
diagnosticTest(#"(?k)"#, .unknownGroupKind("?k"))
30843083
diagnosticTest(#"(?P#)"#, .invalidMatchingOption("#"))
30853084

3086-
// TODO: We shouldn't emit the expected closing delimiter here and elsewhere.
3087-
diagnosticTest(#"(?<#>)"#, .expected(">"), .identifierMustBeAlphaNumeric(.groupName))
3085+
diagnosticTest(#"(?<#>)"#, .identifierMustBeAlphaNumeric(.groupName))
30883086
diagnosticTest(#"(?'1A')"#, .identifierCannotStartWithNumber(.groupName))
30893087

30903088
// TODO: It might be better if tried to consume up to the closing `'` and
30913089
// diagnosed an invalid group name based on that.
30923090
diagnosticTest(#"(?'abc ')"#, .expected("'"))
30933091

3094-
diagnosticTest("(?'🔥')", .identifierMustBeAlphaNumeric(.groupName), .expected("'"))
3092+
diagnosticTest("(?'🔥')", .identifierMustBeAlphaNumeric(.groupName))
30953093

30963094
diagnosticTest(#"(?'-')"#, .expectedIdentifier(.groupName), unsupported: true)
3097-
diagnosticTest(#"(?'--')"#, .identifierMustBeAlphaNumeric(.groupName), .expected("'"), unsupported: true)
3095+
diagnosticTest(#"(?'--')"#, .identifierMustBeAlphaNumeric(.groupName), unsupported: true)
30983096
diagnosticTest(#"(?'a-b-c')"#, .expected("'"), unsupported: true)
30993097

31003098
diagnosticTest("(?x)(? : )", .unknownGroupKind("? "))
@@ -3175,12 +3173,12 @@ extension RegexTests {
31753173
diagnosticTest(#"\g{0}"#, .cannotReferToWholePattern)
31763174
diagnosticTest(#"(?(0))"#, .cannotReferToWholePattern, unsupported: true)
31773175

3178-
diagnosticTest(#"(?&&)"#, .identifierMustBeAlphaNumeric(.groupName), .unbalancedEndOfGroup, .expected(")"), unsupported: true)
3179-
diagnosticTest(#"(?&-1)"#, .identifierMustBeAlphaNumeric(.groupName), .unbalancedEndOfGroup, .expected(")"), unsupported: true)
3180-
diagnosticTest(#"(?P>+1)"#, .identifierMustBeAlphaNumeric(.groupName), .unbalancedEndOfGroup, .expected(")"), unsupported: true)
3181-
diagnosticTest(#"(?P=+1)"#, .identifierMustBeAlphaNumeric(.groupName), .unbalancedEndOfGroup, .expected(")"), unsupported: true)
3182-
diagnosticTest(#"\k'#'"#, .identifierMustBeAlphaNumeric(.groupName), .expected("'"), unsupported: true)
3183-
diagnosticTest(#"(?&#)"#, .identifierMustBeAlphaNumeric(.groupName), .unbalancedEndOfGroup, .expected(")"), unsupported: true)
3176+
diagnosticTest(#"(?&&)"#, .identifierMustBeAlphaNumeric(.groupName), unsupported: true)
3177+
diagnosticTest(#"(?&-1)"#, .identifierMustBeAlphaNumeric(.groupName), unsupported: true)
3178+
diagnosticTest(#"(?P>+1)"#, .identifierMustBeAlphaNumeric(.groupName), unsupported: true)
3179+
diagnosticTest(#"(?P=+1)"#, .identifierMustBeAlphaNumeric(.groupName), unsupported: true)
3180+
diagnosticTest(#"\k'#'"#, .identifierMustBeAlphaNumeric(.groupName), unsupported: true)
3181+
diagnosticTest(#"(?&#)"#, .identifierMustBeAlphaNumeric(.groupName), unsupported: true)
31843182

31853183
diagnosticTest(#"(?P>1)"#, .identifierCannotStartWithNumber(.groupName), unsupported: true)
31863184
diagnosticTest(#"\k{1}"#, .identifierCannotStartWithNumber(.groupName), .invalidNamedReference("1"))

0 commit comments

Comments
 (0)