Skip to content

Commit 65275bd

Browse files
committed
Parse raw identifiers.
1 parent d145cb2 commit 65275bd

File tree

7 files changed

+285
-24
lines changed

7 files changed

+285
-24
lines changed

Sources/SwiftParser/Lexer/Cursor.swift

Lines changed: 45 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2025,24 +2025,53 @@ extension Lexer.Cursor {
20252025
// Check whether we have an identifier followed by another backtick, in which
20262026
// case this is an escaped identifier.
20272027
let identifierStart = self
2028-
if self.advance(if: { $0.isValidIdentifierStartCodePoint }) {
2029-
// Keep continuing the identifier.
2030-
self.advance(while: { $0.isValidIdentifierContinuationCodePoint })
20312028

2032-
// If we have the terminating "`", it's an escaped identifier.
2033-
if self.advance(matching: "`") {
2034-
return Lexer.Result(.identifier)
2029+
// Track some information while advancing so that we can more efficiently
2030+
// detect invalid identifiers later. Even if we end in a situation that is
2031+
// invalid (for example, it contains a backslash), we want to continue
2032+
// scanning until we reach a terminating backtick if possible because it
2033+
// provides better error recover and more likely resembles what the user
2034+
// was trying to write.
2035+
var sawNonWhitespace = false
2036+
var sawNonOperator = false
2037+
var sawBackslash = false
2038+
var isFirstScalar = true
2039+
self.advance(while: {
2040+
guard $0.isValidWhenLexingRawIdentifier else {
2041+
return false
20352042
}
2036-
}
2037-
2038-
// Special case; allow '`$`'.
2039-
if quote.starts(with: "`$`".utf8) {
2040-
self = quote
2041-
let firstBacktickConsumed = self.advance(matching: "`")
2042-
let dollarConsumed = self.advance(matching: "$")
2043-
let secondBacktickConsumed = self.advance(matching: "`")
2044-
precondition(firstBacktickConsumed && dollarConsumed && secondBacktickConsumed)
2045-
return Lexer.Result(.identifier)
2043+
if isFirstScalar {
2044+
if !$0.isOperatorStartCodePoint {
2045+
sawNonOperator = true
2046+
}
2047+
isFirstScalar = false
2048+
} else if !$0.isOperatorContinuationCodePoint {
2049+
sawNonOperator = true
2050+
}
2051+
if !$0.properties.isWhitespace {
2052+
sawNonWhitespace = true
2053+
}
2054+
if $0 == "\\" {
2055+
sawBackslash = true
2056+
}
2057+
return true
2058+
})
2059+
2060+
// If we have the terminating "`", it's an escaped identifier, unless it
2061+
// contained only operator characters.
2062+
let text = identifierStart.text(upTo: self)
2063+
if self.advance(matching: "`") {
2064+
var error: LexingDiagnostic? = nil
2065+
if text.isEmpty {
2066+
error = LexingDiagnostic(.rawIdentifierCannotBeEmpty, position: quote)
2067+
} else if !sawNonWhitespace {
2068+
error = LexingDiagnostic(.rawIdentifierCannotBeEntirelyWhitespace, position: quote)
2069+
} else if !sawNonOperator {
2070+
error = LexingDiagnostic(.rawIdentifierCannotBeOperator, position: quote)
2071+
} else if sawBackslash {
2072+
error = LexingDiagnostic(.rawIdentifierCannotContainBacklash, position: quote)
2073+
}
2074+
return Lexer.Result(.identifier, error: error)
20462075
}
20472076

20482077
// The backtick is punctuation.

Sources/SwiftParser/Lexer/UnicodeScalarExtensions.swift

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,22 @@ extension Unicode.Scalar {
9393
return true
9494
}
9595

96+
/// True if this code point is allowed when lexing a raw identifier.
97+
///
98+
/// This does not mean that the characters is necessarily _valid_ inside a
99+
/// raw identifier. We scan more than we eventually accept so that we can
100+
/// provide better diagnostics and recovery in certain failing cases, like
101+
/// when a raw identifier contains a backslash or is entirely an operator.
102+
var isValidWhenLexingRawIdentifier: Bool {
103+
if self.value < 0x80 {
104+
guard isPrintableASCII else {
105+
return false
106+
}
107+
return UInt8(self.value) != "`"
108+
}
109+
return true
110+
}
111+
96112
/// isOperatorStartCodePoint - Return true if the specified code point is a
97113
/// valid start of an operator.
98114
var isOperatorStartCodePoint: Bool {

Sources/SwiftParserDiagnostics/LexerDiagnosticMessages.swift

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,10 @@ public enum StaticTokenError: String, DiagnosticMessage {
7676
case invalidIdentifierStartCharacter = "an identifier cannot begin with this character"
7777
case invalidNumberOfHexDigitsInUnicodeEscape = #"\u{...} escape sequence expects between 1 and 8 hex digits"#
7878
case invalidUtf8 = "invalid UTF-8 found in source file"
79+
case rawIdentifierCannotBeEmpty = "a raw identifier cannot be empty"
80+
case rawIdentifierCannotBeEntirelyWhitespace = "a raw identifier cannot be entirely whitespace"
81+
case rawIdentifierCannotBeOperator = "a raw identifier cannot contain only operator characters"
82+
case rawIdentifierCannotContainBacklash = "a raw identifier cannot contain backslashes"
7983
case tokenDiagnosticOffsetOverflow =
8084
"the lexer discovered an error in this token but was not able to represent its offset due to overflow; please split the token"
8185
case sourceConflictMarker = "source control conflict marker in source file"
@@ -226,6 +230,13 @@ extension SwiftSyntax.TokenDiagnostic {
226230
case .multilineRegexClosingNotOnNewline: return StaticTokenError.multilineRegexClosingNotOnNewline
227231
case .nonBreakingSpace: return StaticTokenWarning.nonBreakingSpace
228232
case .nulCharacter: return StaticTokenWarning.nulCharacter
233+
case .rawIdentifierCannotBeEmpty: return StaticTokenError.rawIdentifierCannotBeEmpty
234+
case .rawIdentifierCannotBeEntirelyWhitespace:
235+
return StaticTokenError.rawIdentifierCannotBeEntirelyWhitespace
236+
case .rawIdentifierCannotBeOperator:
237+
return StaticTokenError.rawIdentifierCannotBeOperator
238+
case .rawIdentifierCannotContainBacklash:
239+
return StaticTokenError.rawIdentifierCannotContainBacklash
229240
case .sourceConflictMarker: return StaticTokenError.sourceConflictMarker
230241
case .spaceAtEndOfRegexLiteral: return StaticTokenError.spaceAtEndOfRegexLiteral
231242
case .spaceAtStartOfRegexLiteral: return StaticTokenError.spaceAtStartOfRegexLiteral

Sources/SwiftSyntax/TokenDiagnostic.swift

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,10 @@ public struct TokenDiagnostic: Hashable, Sendable {
4949
case multilineRegexClosingNotOnNewline
5050
case nonBreakingSpace
5151
case nulCharacter
52+
case rawIdentifierCannotBeEmpty
53+
case rawIdentifierCannotBeEntirelyWhitespace
54+
case rawIdentifierCannotBeOperator
55+
case rawIdentifierCannotContainBacklash
5256
case sourceConflictMarker
5357
case spaceAtEndOfRegexLiteral
5458
case spaceAtStartOfRegexLiteral
@@ -88,6 +92,10 @@ public struct TokenDiagnostic: Hashable, Sendable {
8892
case .multilineRegexClosingNotOnNewline: return .error
8993
case .nonBreakingSpace: return .warning
9094
case .nulCharacter: return .warning
95+
case .rawIdentifierCannotBeEmpty: return .error
96+
case .rawIdentifierCannotBeEntirelyWhitespace: return .error
97+
case .rawIdentifierCannotBeOperator: return .error
98+
case .rawIdentifierCannotContainBacklash: return .error
9199
case .sourceConflictMarker: return .error
92100
case .spaceAtEndOfRegexLiteral: return .error
93101
case .spaceAtStartOfRegexLiteral: return .error

Tests/SwiftParserTest/LexerTests.swift

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,11 +76,14 @@ class LexerTests: ParserTestCase {
7676

7777
func testEscapedIdentifiers() {
7878
assertLexemes(
79-
"`Hello` `World` `$`",
79+
"`Hello` `World` `$` `with a space` `/not-an*operator+` `123`",
8080
lexemes: [
8181
LexemeSpec(.identifier, text: "`Hello`", trailing: " "),
8282
LexemeSpec(.identifier, text: "`World`", trailing: " "),
83-
LexemeSpec(.identifier, text: "`$`"),
83+
LexemeSpec(.identifier, text: "`$`", trailing: " "),
84+
LexemeSpec(.identifier, text: "`with a space`", trailing: " "),
85+
LexemeSpec(.identifier, text: "`/not-an*operator+`", trailing: " "),
86+
LexemeSpec(.identifier, text: "`123`"),
8487
]
8588
)
8689
}

Tests/SwiftParserTest/translated/DollarIdentifierTests.swift

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -214,15 +214,11 @@ final class DollarIdentifierTests: ParserTestCase {
214214
assertParse(
215215
"""
216216
func escapedDollarAnd() {
217-
1️⃣`$0` = 1
217+
`$0` = 1
218218
`$$` = 2
219219
`$abc` = 3
220220
}
221-
""",
222-
diagnostics: [
223-
// FIXME: Bad diagnostic
224-
DiagnosticSpec(message: "unexpected code in function")
225-
]
221+
"""
226222
)
227223
}
228224

Tests/SwiftParserTest/translated/EscapedIdentifiersTests.swift

Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,4 +99,202 @@ final class EscapedIdentifiersTests: ParserTestCase {
9999
)
100100
}
101101

102+
func testEscapedIdentifiers11() {
103+
assertParse(
104+
"""
105+
func `method with space and .:/`() {}
106+
`method with space and .:/`()
107+
"""
108+
)
109+
}
110+
111+
func testEscapedIdentifiers12() {
112+
assertParse(
113+
"""
114+
class `Class with space and .:/` {}
115+
var `var with space and .:/` = `Class with space and .:/`.self
116+
"""
117+
)
118+
}
119+
120+
func testEscapedIdentifiers13() {
121+
assertParse(
122+
"""
123+
enum `enum with space and .:/` {
124+
case `space cases`
125+
case `case with payload`(`some label`: `Class with space and .:/`)
126+
}
127+
"""
128+
)
129+
}
130+
131+
func testEscapedIdentifiers14() {
132+
assertParse(
133+
"""
134+
typealias `Typealias with space and .:/` = Int
135+
func `+ start with operator`() {}
136+
"""
137+
)
138+
}
139+
140+
func testEscapedIdentifiers15() {
141+
assertParse(
142+
"""
143+
struct `Escaped Type` {}
144+
func `escaped function`(`escaped label` `escaped arg`: `Escaped Type`) {}
145+
`escaped function`(`escaped label`: `Escaped Type`())
146+
let `escaped reference` = `escaped function`(`escaped label`:)
147+
`escaped reference`(`Escaped Type`())
148+
"""
149+
)
150+
}
151+
152+
func testEscapedIdentifiers16() {
153+
assertParse(
154+
"""
155+
let `@atSign` = 0
156+
let `#octothorpe` = 0
157+
"""
158+
)
159+
}
160+
161+
func testEscapedIdentifiers17() {
162+
assertParse(
163+
"""
164+
@propertyWrapper
165+
struct `@PoorlyNamedWrapper`<`The Value`> {
166+
var wrappedValue: `The Value`
167+
}
168+
struct WithWrappedProperty {
169+
@`@PoorlyNamedWrapper` var x: Int
170+
}
171+
"""
172+
)
173+
}
174+
175+
func testEscapedIdentifiers18() {
176+
assertParse(
177+
"""
178+
let 1️⃣`+` = 0
179+
let 2️⃣`^*^` = 0
180+
let 3️⃣`.` = 0
181+
let 4️⃣`?` = 0
182+
func 5️⃣`+`(lhs: Int, rhs: Int) -> Int
183+
""",
184+
diagnostics: [
185+
DiagnosticSpec(
186+
locationMarker: "1️⃣",
187+
message: "a raw identifier cannot contain only operator characters"
188+
),
189+
DiagnosticSpec(
190+
locationMarker: "2️⃣",
191+
message: "a raw identifier cannot contain only operator characters"
192+
),
193+
DiagnosticSpec(
194+
locationMarker: "3️⃣",
195+
message: "a raw identifier cannot contain only operator characters"
196+
),
197+
DiagnosticSpec(
198+
locationMarker: "4️⃣",
199+
message: "a raw identifier cannot contain only operator characters"
200+
),
201+
DiagnosticSpec(
202+
locationMarker: "5️⃣",
203+
message: "a raw identifier cannot contain only operator characters"
204+
),
205+
]
206+
)
207+
}
208+
209+
func testEscapedIdentifiers19() {
210+
assertParse(
211+
"""
212+
1️⃣`multiline is
213+
not allowed` = 5
214+
""",
215+
diagnostics: [
216+
DiagnosticSpec(
217+
locationMarker: "1️⃣",
218+
message: "extraneous code at top level"
219+
)
220+
]
221+
)
222+
}
223+
224+
func testEscapedIdentifiers20() {
225+
assertParse(
226+
"""
227+
1️⃣`null\u{0000}is not allowed` = 5
228+
`unprintable ascii\u{007f}is not allowed` = 10
229+
""",
230+
diagnostics: [
231+
DiagnosticSpec(
232+
locationMarker: "1️⃣",
233+
message: "extraneous code at top level"
234+
)
235+
]
236+
)
237+
}
238+
239+
func testEscapedIdentifiers21() {
240+
assertParse(
241+
"""
242+
1️⃣`` = 5
243+
""",
244+
diagnostics: [
245+
DiagnosticSpec(
246+
locationMarker: "1️⃣",
247+
message: "a raw identifier cannot be empty"
248+
)
249+
]
250+
)
251+
}
252+
253+
func testEscapedIdentifiers22() {
254+
assertParse(
255+
"""
256+
1️⃣` ` = 5
257+
2️⃣` ` = 5
258+
3️⃣`\u{2000}` = 5
259+
""",
260+
diagnostics: [
261+
DiagnosticSpec(
262+
locationMarker: "1️⃣",
263+
message: "a raw identifier cannot be entirely whitespace"
264+
),
265+
DiagnosticSpec(
266+
locationMarker: "2️⃣",
267+
message: "a raw identifier cannot be entirely whitespace"
268+
),
269+
DiagnosticSpec(
270+
locationMarker: "3️⃣",
271+
message: "a raw identifier cannot be entirely whitespace"
272+
),
273+
]
274+
)
275+
}
276+
277+
func testEscapedIdentifiers23() {
278+
assertParse(
279+
#"""
280+
1️⃣`hello\there` = 5
281+
2️⃣`\` = 5
282+
3️⃣`back\\slash` = 5
283+
"""#,
284+
diagnostics: [
285+
DiagnosticSpec(
286+
locationMarker: "1️⃣",
287+
message: "a raw identifier cannot contain backslashes"
288+
),
289+
DiagnosticSpec(
290+
locationMarker: "2️⃣",
291+
message: "a raw identifier cannot contain backslashes"
292+
),
293+
DiagnosticSpec(
294+
locationMarker: "3️⃣",
295+
message: "a raw identifier cannot contain backslashes"
296+
),
297+
]
298+
)
299+
}
102300
}

0 commit comments

Comments
 (0)