Skip to content

Commit f0ce059

Browse files
committed
Repalce (kind: LexerError, position: Lexer.Cursor) tuples by a struct
1 parent ed6d84d commit f0ce059

File tree

1 file changed

+51
-35
lines changed

1 file changed

+51
-35
lines changed

Sources/SwiftParser/Lexer/Cursor.swift

Lines changed: 51 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,22 @@ extension Lexer.Cursor {
188188
}
189189
}
190190
}
191+
192+
/// An error that was discovered in a lexeme while lexing it.
193+
struct LexingError {
194+
let kind: LexerError.Kind
195+
/// The position in the token at which the error is.
196+
let position: Lexer.Cursor
197+
198+
init(_ kind: LexerError.Kind, position: Lexer.Cursor) {
199+
self.kind = kind
200+
self.position = position
201+
}
202+
203+
func lexerError(tokenStart: Lexer.Cursor) -> LexerError {
204+
return LexerError(kind, byteOffset: tokenStart.distance(to: position))
205+
}
206+
}
191207
}
192208

193209
extension Lexer {
@@ -259,7 +275,7 @@ extension Lexer {
259275
let flags: Lexer.Lexeme.Flags
260276
/// The error kind and the cursor pointing to the character at which the
261277
/// error occurred
262-
let error: (kind: LexerError.Kind, position: Lexer.Cursor)?
278+
let error: Cursor.LexingError?
263279
let stateTransition: StateTransition?
264280
/// If set, overritdes the trailing trivia lexing mode of the current state
265281
/// for this lexeme.
@@ -268,7 +284,7 @@ extension Lexer {
268284
init(
269285
_ tokenKind: RawTokenKind,
270286
flags: Lexer.Lexeme.Flags = [],
271-
error: (kind: LexerError.Kind, position: Cursor)? = nil,
287+
error: Cursor.LexingError? = nil,
272288
stateTransition: StateTransition? = nil,
273289
trailingTriviaLexingMode: Lexer.Cursor.TriviaLexingMode? = nil
274290
) {
@@ -293,7 +309,7 @@ extension Lexer.Cursor {
293309
if let leadingTriviaMode = self.currentState.leadingTriviaLexingMode(cursor: self) {
294310
let triviaResult = self.lexTrivia(mode: leadingTriviaMode)
295311
newlineInLeadingTrivia = triviaResult.newlinePresence
296-
error = error ?? triviaResult.error.map { LexerError($0.kind, byteOffset: cursor.distance(to: $0.position)) }
312+
error = error ?? triviaResult.error?.lexerError(tokenStart: cursor)
297313
} else {
298314
newlineInLeadingTrivia = .absent
299315
}
@@ -329,7 +345,7 @@ extension Lexer.Cursor {
329345
let trailingTriviaStart = self
330346
if let trailingTriviaMode = result.trailingTriviaLexingMode ?? currentState.trailingTriviaLexingMode(cursor: self) {
331347
let triviaResult = self.lexTrivia(mode: trailingTriviaMode)
332-
error = error ?? triviaResult.error.map { LexerError($0.kind, byteOffset: cursor.distance(to: $0.position)) }
348+
error = error ?? triviaResult.error?.lexerError(tokenStart: cursor)
333349
}
334350

335351
if self.currentState.shouldPopStateWhenReachingNewlineInTrailingTrivia && self.is(at: "\r", "\n") {
@@ -342,7 +358,7 @@ extension Lexer.Cursor {
342358
}
343359

344360
self.previousTokenKind = result.tokenKind.base
345-
error = error ?? result.error.map { LexerError($0.kind, byteOffset: cursor.distance(to: $0.position)) }
361+
error = error ?? result.error?.lexerError(tokenStart: cursor)
346362

347363
return .init(
348364
tokenKind: result.tokenKind,
@@ -880,7 +896,7 @@ extension Lexer.Cursor {
880896
return result
881897
case .trivia:
882898
assertionFailure("Invalid UTF-8 sequence should be eaten by lexTrivia as LeadingTrivia")
883-
return Lexer.Result(.unknown, error: (.invalidUtf8, self))
899+
return Lexer.Result(.unknown, error: LexingError(.invalidUtf8, position: self))
884900
}
885901
}
886902
}
@@ -1003,12 +1019,12 @@ extension Lexer.Cursor {
10031019

10041020
fileprivate struct TriviaResult {
10051021
let newlinePresence: NewlinePresence
1006-
let error: (kind: LexerError.Kind, position: Lexer.Cursor)?
1022+
let error: LexingError?
10071023
}
10081024

10091025
fileprivate mutating func lexTrivia(mode: TriviaLexingMode) -> TriviaResult {
10101026
var newlinePresence = NewlinePresence.absent
1011-
var error: (kind: LexerError.Kind, position: Lexer.Cursor)? = nil
1027+
var error: LexingError? = nil
10121028
if mode == .escapedNewlineInMultiLineStringLiteral {
10131029
_ = self.advance(matching: "\\")
10141030
self.advance(while: { $0 == "#" })
@@ -1071,7 +1087,7 @@ extension Lexer.Cursor {
10711087
continue
10721088
case UInt8(ascii: "<"), UInt8(ascii: ">"):
10731089
if self.tryLexConflictMarker(start: start) {
1074-
error = (.sourceConflictMarker, start)
1090+
error = LexingError(.sourceConflictMarker, position: start)
10751091
continue
10761092
}
10771093
// Start character of tokens.
@@ -1185,7 +1201,7 @@ extension Lexer.Cursor {
11851201
self.advance(while: { $0.isValidIdentifierContinuationCodePoint })
11861202
return Lexer.Result(
11871203
.integerLiteral,
1188-
error: (.invalidOctalDigitInIntegerLiteral, errorPos)
1204+
error: LexingError(.invalidOctalDigitInIntegerLiteral, position: errorPos)
11891205
)
11901206
}
11911207

@@ -1199,7 +1215,7 @@ extension Lexer.Cursor {
11991215
self.advance(while: { $0.isValidIdentifierContinuationCodePoint })
12001216
return Lexer.Result(
12011217
.integerLiteral,
1202-
error: (.invalidOctalDigitInIntegerLiteral, errorPos)
1218+
error: LexingError(.invalidOctalDigitInIntegerLiteral, position: errorPos)
12031219
)
12041220
}
12051221

@@ -1216,7 +1232,7 @@ extension Lexer.Cursor {
12161232
self.advance(while: { $0.isValidIdentifierContinuationCodePoint })
12171233
return Lexer.Result(
12181234
.integerLiteral,
1219-
error: (.invalidBinaryDigitInIntegerLiteral, errorPos)
1235+
error: LexingError(.invalidBinaryDigitInIntegerLiteral, position: errorPos)
12201236
)
12211237
}
12221238

@@ -1230,7 +1246,7 @@ extension Lexer.Cursor {
12301246
self.advance(while: { $0.isValidIdentifierContinuationCodePoint })
12311247
return Lexer.Result(
12321248
.integerLiteral,
1233-
error: (.invalidBinaryDigitInIntegerLiteral, errorPos)
1249+
error: LexingError(.invalidBinaryDigitInIntegerLiteral, position: errorPos)
12341250
)
12351251
}
12361252

@@ -1264,7 +1280,7 @@ extension Lexer.Cursor {
12641280
self.advance(while: { $0.isValidIdentifierContinuationCodePoint })
12651281
return Lexer.Result(
12661282
.integerLiteral,
1267-
error: (.invalidDecimalDigitInIntegerLiteral, errorPos)
1283+
error: LexingError(.invalidDecimalDigitInIntegerLiteral, position: errorPos)
12681284
)
12691285
}
12701286

@@ -1301,7 +1317,7 @@ extension Lexer.Cursor {
13011317
self.advance(while: { $0.isValidIdentifierContinuationCodePoint })
13021318
return Lexer.Result(
13031319
.floatingLiteral,
1304-
error: (errorKind, errorPos)
1320+
error: LexingError(errorKind, position: errorPos)
13051321
)
13061322
}
13071323

@@ -1313,7 +1329,7 @@ extension Lexer.Cursor {
13131329
self.advance(while: { $0.isValidIdentifierContinuationCodePoint })
13141330
return Lexer.Result(
13151331
.floatingLiteral,
1316-
error: (.invalidFloatingPointExponentDigit, errorPos)
1332+
error: LexingError(.invalidFloatingPointExponentDigit, position: errorPos)
13171333
)
13181334
}
13191335
}
@@ -1329,16 +1345,16 @@ extension Lexer.Cursor {
13291345

13301346
// 0x[0-9a-fA-F][0-9a-fA-F_]*
13311347
guard let peeked = self.peek() else {
1332-
return Lexer.Result(.integerLiteral, error: (.expectedHexDigitInHexLiteral, self))
1348+
return Lexer.Result(.integerLiteral, error: LexingError(.expectedHexDigitInHexLiteral, position: self))
13331349
}
13341350

13351351
guard Unicode.Scalar(peeked).isHexDigit else {
13361352
if Unicode.Scalar(peeked).isValidIdentifierContinuationCodePoint {
13371353
let errorPos = self
13381354
self.advance(while: { $0.isValidIdentifierContinuationCodePoint })
1339-
return Lexer.Result(.integerLiteral, error: (.invalidHexDigitInIntegerLiteral, errorPos))
1355+
return Lexer.Result(.integerLiteral, error: LexingError(.invalidHexDigitInIntegerLiteral, position: errorPos))
13401356
} else {
1341-
return Lexer.Result(.integerLiteral, error: (.expectedHexDigitInHexLiteral, self))
1357+
return Lexer.Result(.integerLiteral, error: LexingError(.expectedHexDigitInHexLiteral, position: self))
13421358
}
13431359
}
13441360

@@ -1351,7 +1367,7 @@ extension Lexer.Cursor {
13511367
self.advance(while: { $0.isValidIdentifierContinuationCodePoint })
13521368
return Lexer.Result(
13531369
.integerLiteral,
1354-
error: (.invalidHexDigitInIntegerLiteral, errorPos)
1370+
error: LexingError(.invalidHexDigitInIntegerLiteral, position: errorPos)
13551371
)
13561372
} else {
13571373
return Lexer.Result(.integerLiteral)
@@ -1380,7 +1396,7 @@ extension Lexer.Cursor {
13801396
}
13811397
return Lexer.Result(
13821398
.integerLiteral,
1383-
error: (.expectedBinaryExponentInHexFloatLiteral, self)
1399+
error: LexingError(.expectedBinaryExponentInHexFloatLiteral, position: self)
13841400
)
13851401
}
13861402
} else {
@@ -1423,7 +1439,7 @@ extension Lexer.Cursor {
14231439
self.advance(while: { $0.isValidIdentifierContinuationCodePoint })
14241440
return Lexer.Result(
14251441
.floatingLiteral,
1426-
error: (errorKind, errorPos)
1442+
error: LexingError(errorKind, position: errorPos)
14271443
)
14281444
}
14291445

@@ -1435,7 +1451,7 @@ extension Lexer.Cursor {
14351451
self.advance(while: { $0.isValidIdentifierContinuationCodePoint })
14361452
return Lexer.Result(
14371453
.floatingLiteral,
1438-
error: (.invalidFloatingPointExponentDigit, errorPos)
1454+
error: LexingError(.invalidFloatingPointExponentDigit, position: errorPos)
14391455
)
14401456
}
14411457
return Lexer.Result(.floatingLiteral)
@@ -1781,7 +1797,7 @@ extension Lexer.Cursor {
17811797
}
17821798

17831799
mutating func lexInStringLiteral(stringLiteralKind: StringLiteralKind, delimiterLength: Int) -> Lexer.Result {
1784-
var error: (LexerError.Kind, Lexer.Cursor)? = nil
1800+
var error: LexingError? = nil
17851801

17861802
while true {
17871803
switch self.peek() {
@@ -1837,7 +1853,7 @@ extension Lexer.Cursor {
18371853
return Lexer.Result(.stringSegment, error: error)
18381854
}
18391855
case .error(let errorKind):
1840-
error = (errorKind, self)
1856+
error = LexingError(errorKind, position: self)
18411857
self = clone
18421858
case .endOfString:
18431859
return Lexer.Result(
@@ -1997,15 +2013,15 @@ extension Lexer.Cursor {
19972013
case (UInt8(ascii: "-"), UInt8(ascii: ">")): // ->
19982014
return Lexer.Result(.arrow)
19992015
case (UInt8(ascii: "*"), UInt8(ascii: "/")): // */
2000-
return Lexer.Result(.unknown, error: (.unexpectedBlockCommentEnd, tokStart))
2016+
return Lexer.Result(.unknown, error: LexingError(.unexpectedBlockCommentEnd, position: tokStart))
20012017
default:
20022018
break
20032019
}
20042020
} else {
20052021
// Verify there is no "*/" in the middle of the identifier token, we reject
20062022
// it as potentially ending a block comment.
20072023
if tokStart.text(upTo: self).contains("*/") {
2008-
return Lexer.Result(.unknown, error: (.unexpectedBlockCommentEnd, tokStart))
2024+
return Lexer.Result(.unknown, error: LexingError(.unexpectedBlockCommentEnd, position: tokStart))
20092025
}
20102026
}
20112027

@@ -2111,7 +2127,7 @@ extension Lexer.Cursor {
21112127

21122128
enum UnknownCharactersClassification {
21132129
/// The characters consumed by `lexUnknown` should be classified as trivia
2114-
case trivia(error: (kind: LexerError.Kind, position: Lexer.Cursor))
2130+
case trivia(error: LexingError)
21152131
/// The characters consumed by `lexUnknown` should be classified as the contents of a lexeme
21162132
case lexemeContents(Lexer.Result)
21172133
}
@@ -2128,13 +2144,13 @@ extension Lexer.Cursor {
21282144
// start, attempt to recover by eating more continuation characters.
21292145
tmp.advance(while: { Unicode.Scalar($0).isValidIdentifierContinuationCodePoint })
21302146
self = tmp
2131-
return .lexemeContents(Lexer.Result(.identifier, error: (.invalidIdentifierStartCharacter, position: start)))
2147+
return .lexemeContents(Lexer.Result(.identifier, error: LexingError(.invalidIdentifierStartCharacter, position: start)))
21322148
}
21332149

21342150
// This character isn't allowed in Swift source.
21352151
guard let codepoint = tmp.advanceValidatingUTF8Character() else {
21362152
self = tmp
2137-
return .trivia(error: (kind: .invalidUtf8, position: start))
2153+
return .trivia(error: LexingError(.invalidUtf8, position: start))
21382154
}
21392155
if codepoint.value == 0xA0 { // Non-breaking whitespace (U+00A0)
21402156
while tmp.is(at: 0xC2) && tmp.is(offset: 1, at: 0xA0) {
@@ -2143,11 +2159,11 @@ extension Lexer.Cursor {
21432159
}
21442160

21452161
self = tmp
2146-
return .trivia(error: (kind: .nonBreakingSpace, position: start))
2162+
return .trivia(error: LexingError(.nonBreakingSpace, position: start))
21472163
} else if codepoint.value == 0x201D { // Closing curly quote (U+201D)
21482164
// If this is an end curly quote, just diagnose it with a fixit hint.
21492165
self = tmp
2150-
return .lexemeContents(Lexer.Result(.unknown, error: (.unicodeCurlyQuote, position: start)))
2166+
return .lexemeContents(Lexer.Result(.unknown, error: LexingError(.unicodeCurlyQuote, position: start)))
21512167
} else if codepoint.value == 0x201C { // Opening curly quote (U+201C)
21522168
// If this is a start curly quote, do a fuzzy match of a string literal
21532169
// to improve recovery.
@@ -2160,15 +2176,15 @@ extension Lexer.Cursor {
21602176
// Identifiers are the closest representation of static string literals
21612177
// we have in the parser. Classify the entire curly string as an identifier
21622178
// for best recovery.
2163-
return .lexemeContents(Lexer.Result(.identifier, error: (.unicodeCurlyQuote, position: start)))
2179+
return .lexemeContents(Lexer.Result(.identifier, error: LexingError(.unicodeCurlyQuote, position: start)))
21642180
} else if codepoint.value == 0 { // Nul character
21652181
self = tmp
2166-
return .trivia(error: (kind: .nulCharacter, position: start))
2182+
return .trivia(error: LexingError(.nulCharacter, position: start))
21672183
}
21682184

21692185
// TODO: Try map confusables to ASCII characters
21702186
self = tmp
2171-
return .trivia(error: (kind: .invalidCharacter, position: start))
2187+
return .trivia(error: LexingError(.invalidCharacter, position: start))
21722188
}
21732189

21742190
enum ConflictMarker {

0 commit comments

Comments
 (0)