@@ -188,6 +188,22 @@ extension Lexer.Cursor {
188
188
}
189
189
}
190
190
}
191
+
192
+ /// An error that was discovered in a lexeme while lexing it.
193
+ struct LexingError {
194
+ let kind : LexerError . Kind
195
+ /// The position in the token at which the error is.
196
+ let position : Lexer . Cursor
197
+
198
+ init ( _ kind: LexerError . Kind , position: Lexer . Cursor ) {
199
+ self . kind = kind
200
+ self . position = position
201
+ }
202
+
203
+ func lexerError( tokenStart: Lexer . Cursor ) -> LexerError {
204
+ return LexerError ( kind, byteOffset: tokenStart. distance ( to: position) )
205
+ }
206
+ }
191
207
}
192
208
193
209
extension Lexer {
@@ -259,7 +275,7 @@ extension Lexer {
259
275
let flags : Lexer . Lexeme . Flags
260
276
/// The error kind and the cursor pointing to the character at which the
261
277
/// error occurred
262
- let error : ( kind : LexerError . Kind , position : Lexer . Cursor ) ?
278
+ let error : Cursor . LexingError ?
263
279
let stateTransition : StateTransition ?
264
280
/// If set, overritdes the trailing trivia lexing mode of the current state
265
281
/// for this lexeme.
@@ -268,7 +284,7 @@ extension Lexer {
268
284
init (
269
285
_ tokenKind: RawTokenKind ,
270
286
flags: Lexer . Lexeme . Flags = [ ] ,
271
- error: ( kind : LexerError . Kind , position : Cursor ) ? = nil ,
287
+ error: Cursor . LexingError ? = nil ,
272
288
stateTransition: StateTransition ? = nil ,
273
289
trailingTriviaLexingMode: Lexer . Cursor . TriviaLexingMode ? = nil
274
290
) {
@@ -293,7 +309,7 @@ extension Lexer.Cursor {
293
309
if let leadingTriviaMode = self . currentState. leadingTriviaLexingMode ( cursor: self ) {
294
310
let triviaResult = self . lexTrivia ( mode: leadingTriviaMode)
295
311
newlineInLeadingTrivia = triviaResult. newlinePresence
296
- error = error ?? triviaResult. error. map { LexerError ( $0 . kind , byteOffset : cursor. distance ( to : $0 . position ) ) }
312
+ error = error ?? triviaResult. error? . lexerError ( tokenStart : cursor)
297
313
} else {
298
314
newlineInLeadingTrivia = . absent
299
315
}
@@ -329,7 +345,7 @@ extension Lexer.Cursor {
329
345
let trailingTriviaStart = self
330
346
if let trailingTriviaMode = result. trailingTriviaLexingMode ?? currentState. trailingTriviaLexingMode ( cursor: self ) {
331
347
let triviaResult = self . lexTrivia ( mode: trailingTriviaMode)
332
- error = error ?? triviaResult. error. map { LexerError ( $0 . kind , byteOffset : cursor. distance ( to : $0 . position ) ) }
348
+ error = error ?? triviaResult. error? . lexerError ( tokenStart : cursor)
333
349
}
334
350
335
351
if self . currentState. shouldPopStateWhenReachingNewlineInTrailingTrivia && self . is ( at: " \r " , " \n " ) {
@@ -342,7 +358,7 @@ extension Lexer.Cursor {
342
358
}
343
359
344
360
self . previousTokenKind = result. tokenKind. base
345
- error = error ?? result. error. map { LexerError ( $0 . kind , byteOffset : cursor. distance ( to : $0 . position ) ) }
361
+ error = error ?? result. error? . lexerError ( tokenStart : cursor)
346
362
347
363
return . init(
348
364
tokenKind: result. tokenKind,
@@ -880,7 +896,7 @@ extension Lexer.Cursor {
880
896
return result
881
897
case . trivia:
882
898
assertionFailure ( " Invalid UTF-8 sequence should be eaten by lexTrivia as LeadingTrivia " )
883
- return Lexer . Result ( . unknown, error: ( . invalidUtf8, self ) )
899
+ return Lexer . Result ( . unknown, error: LexingError ( . invalidUtf8, position : self ) )
884
900
}
885
901
}
886
902
}
@@ -1003,12 +1019,12 @@ extension Lexer.Cursor {
1003
1019
1004
1020
fileprivate struct TriviaResult {
1005
1021
let newlinePresence : NewlinePresence
1006
- let error : ( kind : LexerError . Kind , position : Lexer . Cursor ) ?
1022
+ let error : LexingError ?
1007
1023
}
1008
1024
1009
1025
fileprivate mutating func lexTrivia( mode: TriviaLexingMode ) -> TriviaResult {
1010
1026
var newlinePresence = NewlinePresence . absent
1011
- var error : ( kind : LexerError . Kind , position : Lexer . Cursor ) ? = nil
1027
+ var error : LexingError ? = nil
1012
1028
if mode == . escapedNewlineInMultiLineStringLiteral {
1013
1029
_ = self . advance ( matching: " \\ " )
1014
1030
self . advance ( while: { $0 == " # " } )
@@ -1071,7 +1087,7 @@ extension Lexer.Cursor {
1071
1087
continue
1072
1088
case UInt8 ( ascii: " < " ) , UInt8 ( ascii: " > " ) :
1073
1089
if self . tryLexConflictMarker ( start: start) {
1074
- error = ( . sourceConflictMarker, start)
1090
+ error = LexingError ( . sourceConflictMarker, position : start)
1075
1091
continue
1076
1092
}
1077
1093
// Start character of tokens.
@@ -1185,7 +1201,7 @@ extension Lexer.Cursor {
1185
1201
self . advance ( while: { $0. isValidIdentifierContinuationCodePoint } )
1186
1202
return Lexer . Result (
1187
1203
. integerLiteral,
1188
- error: ( . invalidOctalDigitInIntegerLiteral, errorPos)
1204
+ error: LexingError ( . invalidOctalDigitInIntegerLiteral, position : errorPos)
1189
1205
)
1190
1206
}
1191
1207
@@ -1199,7 +1215,7 @@ extension Lexer.Cursor {
1199
1215
self . advance ( while: { $0. isValidIdentifierContinuationCodePoint } )
1200
1216
return Lexer . Result (
1201
1217
. integerLiteral,
1202
- error: ( . invalidOctalDigitInIntegerLiteral, errorPos)
1218
+ error: LexingError ( . invalidOctalDigitInIntegerLiteral, position : errorPos)
1203
1219
)
1204
1220
}
1205
1221
@@ -1216,7 +1232,7 @@ extension Lexer.Cursor {
1216
1232
self . advance ( while: { $0. isValidIdentifierContinuationCodePoint } )
1217
1233
return Lexer . Result (
1218
1234
. integerLiteral,
1219
- error: ( . invalidBinaryDigitInIntegerLiteral, errorPos)
1235
+ error: LexingError ( . invalidBinaryDigitInIntegerLiteral, position : errorPos)
1220
1236
)
1221
1237
}
1222
1238
@@ -1230,7 +1246,7 @@ extension Lexer.Cursor {
1230
1246
self . advance ( while: { $0. isValidIdentifierContinuationCodePoint } )
1231
1247
return Lexer . Result (
1232
1248
. integerLiteral,
1233
- error: ( . invalidBinaryDigitInIntegerLiteral, errorPos)
1249
+ error: LexingError ( . invalidBinaryDigitInIntegerLiteral, position : errorPos)
1234
1250
)
1235
1251
}
1236
1252
@@ -1264,7 +1280,7 @@ extension Lexer.Cursor {
1264
1280
self . advance ( while: { $0. isValidIdentifierContinuationCodePoint } )
1265
1281
return Lexer . Result (
1266
1282
. integerLiteral,
1267
- error: ( . invalidDecimalDigitInIntegerLiteral, errorPos)
1283
+ error: LexingError ( . invalidDecimalDigitInIntegerLiteral, position : errorPos)
1268
1284
)
1269
1285
}
1270
1286
@@ -1301,7 +1317,7 @@ extension Lexer.Cursor {
1301
1317
self . advance ( while: { $0. isValidIdentifierContinuationCodePoint } )
1302
1318
return Lexer . Result (
1303
1319
. floatingLiteral,
1304
- error: ( errorKind, errorPos)
1320
+ error: LexingError ( errorKind, position : errorPos)
1305
1321
)
1306
1322
}
1307
1323
@@ -1313,7 +1329,7 @@ extension Lexer.Cursor {
1313
1329
self . advance ( while: { $0. isValidIdentifierContinuationCodePoint } )
1314
1330
return Lexer . Result (
1315
1331
. floatingLiteral,
1316
- error: ( . invalidFloatingPointExponentDigit, errorPos)
1332
+ error: LexingError ( . invalidFloatingPointExponentDigit, position : errorPos)
1317
1333
)
1318
1334
}
1319
1335
}
@@ -1329,16 +1345,16 @@ extension Lexer.Cursor {
1329
1345
1330
1346
// 0x[0-9a-fA-F][0-9a-fA-F_]*
1331
1347
guard let peeked = self . peek ( ) else {
1332
- return Lexer . Result ( . integerLiteral, error: ( . expectedHexDigitInHexLiteral, self ) )
1348
+ return Lexer . Result ( . integerLiteral, error: LexingError ( . expectedHexDigitInHexLiteral, position : self ) )
1333
1349
}
1334
1350
1335
1351
guard Unicode . Scalar ( peeked) . isHexDigit else {
1336
1352
if Unicode . Scalar ( peeked) . isValidIdentifierContinuationCodePoint {
1337
1353
let errorPos = self
1338
1354
self . advance ( while: { $0. isValidIdentifierContinuationCodePoint } )
1339
- return Lexer . Result ( . integerLiteral, error: ( . invalidHexDigitInIntegerLiteral, errorPos) )
1355
+ return Lexer . Result ( . integerLiteral, error: LexingError ( . invalidHexDigitInIntegerLiteral, position : errorPos) )
1340
1356
} else {
1341
- return Lexer . Result ( . integerLiteral, error: ( . expectedHexDigitInHexLiteral, self ) )
1357
+ return Lexer . Result ( . integerLiteral, error: LexingError ( . expectedHexDigitInHexLiteral, position : self ) )
1342
1358
}
1343
1359
}
1344
1360
@@ -1351,7 +1367,7 @@ extension Lexer.Cursor {
1351
1367
self . advance ( while: { $0. isValidIdentifierContinuationCodePoint } )
1352
1368
return Lexer . Result (
1353
1369
. integerLiteral,
1354
- error: ( . invalidHexDigitInIntegerLiteral, errorPos)
1370
+ error: LexingError ( . invalidHexDigitInIntegerLiteral, position : errorPos)
1355
1371
)
1356
1372
} else {
1357
1373
return Lexer . Result ( . integerLiteral)
@@ -1380,7 +1396,7 @@ extension Lexer.Cursor {
1380
1396
}
1381
1397
return Lexer . Result (
1382
1398
. integerLiteral,
1383
- error: ( . expectedBinaryExponentInHexFloatLiteral, self )
1399
+ error: LexingError ( . expectedBinaryExponentInHexFloatLiteral, position : self )
1384
1400
)
1385
1401
}
1386
1402
} else {
@@ -1423,7 +1439,7 @@ extension Lexer.Cursor {
1423
1439
self . advance ( while: { $0. isValidIdentifierContinuationCodePoint } )
1424
1440
return Lexer . Result (
1425
1441
. floatingLiteral,
1426
- error: ( errorKind, errorPos)
1442
+ error: LexingError ( errorKind, position : errorPos)
1427
1443
)
1428
1444
}
1429
1445
@@ -1435,7 +1451,7 @@ extension Lexer.Cursor {
1435
1451
self . advance ( while: { $0. isValidIdentifierContinuationCodePoint } )
1436
1452
return Lexer . Result (
1437
1453
. floatingLiteral,
1438
- error: ( . invalidFloatingPointExponentDigit, errorPos)
1454
+ error: LexingError ( . invalidFloatingPointExponentDigit, position : errorPos)
1439
1455
)
1440
1456
}
1441
1457
return Lexer . Result ( . floatingLiteral)
@@ -1781,7 +1797,7 @@ extension Lexer.Cursor {
1781
1797
}
1782
1798
1783
1799
mutating func lexInStringLiteral( stringLiteralKind: StringLiteralKind , delimiterLength: Int ) -> Lexer . Result {
1784
- var error : ( LexerError . Kind , Lexer . Cursor ) ? = nil
1800
+ var error : LexingError ? = nil
1785
1801
1786
1802
while true {
1787
1803
switch self . peek ( ) {
@@ -1837,7 +1853,7 @@ extension Lexer.Cursor {
1837
1853
return Lexer . Result ( . stringSegment, error: error)
1838
1854
}
1839
1855
case . error( let errorKind) :
1840
- error = ( errorKind, self )
1856
+ error = LexingError ( errorKind, position : self )
1841
1857
self = clone
1842
1858
case . endOfString:
1843
1859
return Lexer . Result (
@@ -1997,15 +2013,15 @@ extension Lexer.Cursor {
1997
2013
case ( UInt8 ( ascii: " - " ) , UInt8 ( ascii: " > " ) ) : // ->
1998
2014
return Lexer . Result ( . arrow)
1999
2015
case ( UInt8 ( ascii: " * " ) , UInt8 ( ascii: " / " ) ) : // */
2000
- return Lexer . Result ( . unknown, error: ( . unexpectedBlockCommentEnd, tokStart) )
2016
+ return Lexer . Result ( . unknown, error: LexingError ( . unexpectedBlockCommentEnd, position : tokStart) )
2001
2017
default :
2002
2018
break
2003
2019
}
2004
2020
} else {
2005
2021
// Verify there is no "*/" in the middle of the identifier token, we reject
2006
2022
// it as potentially ending a block comment.
2007
2023
if tokStart. text ( upTo: self ) . contains ( " */ " ) {
2008
- return Lexer . Result ( . unknown, error: ( . unexpectedBlockCommentEnd, tokStart) )
2024
+ return Lexer . Result ( . unknown, error: LexingError ( . unexpectedBlockCommentEnd, position : tokStart) )
2009
2025
}
2010
2026
}
2011
2027
@@ -2111,7 +2127,7 @@ extension Lexer.Cursor {
2111
2127
2112
2128
enum UnknownCharactersClassification {
2113
2129
/// The characters consumed by `lexUnknown` should be classified as trivia
2114
- case trivia( error: ( kind : LexerError . Kind , position : Lexer . Cursor ) )
2130
+ case trivia( error: LexingError )
2115
2131
/// The characters consumed by `lexUnknown` should be classified as the contents of a lexeme
2116
2132
case lexemeContents( Lexer . Result )
2117
2133
}
@@ -2128,13 +2144,13 @@ extension Lexer.Cursor {
2128
2144
// start, attempt to recover by eating more continuation characters.
2129
2145
tmp. advance ( while: { Unicode . Scalar ( $0) . isValidIdentifierContinuationCodePoint } )
2130
2146
self = tmp
2131
- return . lexemeContents( Lexer . Result ( . identifier, error: ( . invalidIdentifierStartCharacter, position: start) ) )
2147
+ return . lexemeContents( Lexer . Result ( . identifier, error: LexingError ( . invalidIdentifierStartCharacter, position: start) ) )
2132
2148
}
2133
2149
2134
2150
// This character isn't allowed in Swift source.
2135
2151
guard let codepoint = tmp. advanceValidatingUTF8Character ( ) else {
2136
2152
self = tmp
2137
- return . trivia( error: ( kind : . invalidUtf8, position: start) )
2153
+ return . trivia( error: LexingError ( . invalidUtf8, position: start) )
2138
2154
}
2139
2155
if codepoint. value == 0xA0 { // Non-breaking whitespace (U+00A0)
2140
2156
while tmp. is ( at: 0xC2 ) && tmp. is ( offset: 1 , at: 0xA0 ) {
@@ -2143,11 +2159,11 @@ extension Lexer.Cursor {
2143
2159
}
2144
2160
2145
2161
self = tmp
2146
- return . trivia( error: ( kind : . nonBreakingSpace, position: start) )
2162
+ return . trivia( error: LexingError ( . nonBreakingSpace, position: start) )
2147
2163
} else if codepoint. value == 0x201D { // Closing curly quote (U+201D)
2148
2164
// If this is an end curly quote, just diagnose it with a fixit hint.
2149
2165
self = tmp
2150
- return . lexemeContents( Lexer . Result ( . unknown, error: ( . unicodeCurlyQuote, position: start) ) )
2166
+ return . lexemeContents( Lexer . Result ( . unknown, error: LexingError ( . unicodeCurlyQuote, position: start) ) )
2151
2167
} else if codepoint. value == 0x201C { // Opening curly quote (U+201C)
2152
2168
// If this is a start curly quote, do a fuzzy match of a string literal
2153
2169
// to improve recovery.
@@ -2160,15 +2176,15 @@ extension Lexer.Cursor {
2160
2176
// Identifiers are the closest representation of static string literals
2161
2177
// we have in the parser. Classify the entire curly string as an identifier
2162
2178
// for best recovery.
2163
- return . lexemeContents( Lexer . Result ( . identifier, error: ( . unicodeCurlyQuote, position: start) ) )
2179
+ return . lexemeContents( Lexer . Result ( . identifier, error: LexingError ( . unicodeCurlyQuote, position: start) ) )
2164
2180
} else if codepoint. value == 0 { // Nul character
2165
2181
self = tmp
2166
- return . trivia( error: ( kind : . nulCharacter, position: start) )
2182
+ return . trivia( error: LexingError ( . nulCharacter, position: start) )
2167
2183
}
2168
2184
2169
2185
// TODO: Try map confusables to ASCII characters
2170
2186
self = tmp
2171
- return . trivia( error: ( kind : . invalidCharacter, position: start) )
2187
+ return . trivia( error: LexingError ( . invalidCharacter, position: start) )
2172
2188
}
2173
2189
2174
2190
enum ConflictMarker {
0 commit comments