Skip to content

Commit 701d13b

Browse files
committed
Refactor lexNumber
1 parent 95b1c33 commit 701d13b

File tree

1 file changed

+88
-61
lines changed

1 file changed

+88
-61
lines changed

Sources/SwiftParser/Lexer/Cursor.swift

Lines changed: 88 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -331,11 +331,35 @@ extension Lexer.Cursor {
331331
/// If the current character is `matching`, advance the cursor and return `true`.
332332
/// Otherwise, this is a no-op and returns `false`.
333333
mutating func advance(matching: CharacterByte) -> Bool {
334-
guard self.peek() == matching.value else {
334+
if self.peek(matches: matching) {
335+
_ = self.advance()
336+
return true
337+
} else {
338+
return false
339+
}
340+
}
341+
342+
/// If the current character is `matching`, advance the cursor and return `true`.
343+
/// Otherwise, this is a no-op and returns `false`.
344+
mutating func advance(matching character1: CharacterByte, _ character2: CharacterByte) -> Bool {
345+
if self.peek(matches: character1) || self.peek(matches: character2) {
346+
_ = self.advance()
347+
return true
348+
} else {
349+
return false
350+
}
351+
}
352+
353+
/// If the current character is in `matching`, advance the cursor and return `true`.
354+
/// Otherwise, this is a no-op and returns `false`.
355+
@_disfavoredOverload // favor the stamped out copies
356+
mutating func advance(matching characters: CharacterByte...) -> Bool {
357+
if characters.contains(where: { self.peek(matches: $0) }) {
358+
_ = self.advance()
359+
return true
360+
} else {
335361
return false
336362
}
337-
_ = self.advance()
338-
return true
339363
}
340364

341365
/// If the current character matches `predicate`, consume it and return `true`.
@@ -628,7 +652,7 @@ extension Lexer.Cursor {
628652
)
629653
}
630654

631-
let start = self
655+
var start = self
632656
switch self.advance() {
633657
case UInt8(ascii: "@"): return Lexer.Result(.atSign)
634658
case UInt8(ascii: "{"): return Lexer.Result(.leftBrace)
@@ -720,7 +744,9 @@ extension Lexer.Cursor {
720744
UInt8(ascii: "3"), UInt8(ascii: "4"), UInt8(ascii: "5"),
721745
UInt8(ascii: "6"), UInt8(ascii: "7"), UInt8(ascii: "8"),
722746
UInt8(ascii: "9"):
723-
return self.lexNumber(tokenStart: start)
747+
let result = start.lexNumber()
748+
self = start
749+
return result
724750
case UInt8(ascii: #"'"#), UInt8(ascii: #"""#):
725751
return self.lexStringQuote()
726752

@@ -895,21 +921,22 @@ extension Lexer.Cursor {
895921
/// floating_literal ::= [0-9][0-9_]*[eE][+-]?[0-9][0-9_]*
896922
/// floating_literal ::= 0x[0-9A-Fa-f][0-9A-Fa-f_]*
897923
/// (\.[0-9A-Fa-f][0-9A-Fa-f_]*)?[pP][+-]?[0-9][0-9_]*
898-
mutating func lexNumber(tokenStart tokStart: Lexer.Cursor) -> Lexer.Result {
899-
assert(
900-
(Unicode.Scalar(self.previous).isDigit || self.previous == UInt8(ascii: ".")),
901-
"Unexpected start"
902-
)
924+
mutating func lexNumber() -> Lexer.Result {
925+
assert(self.peek().map(Unicode.Scalar.init)?.isDigit == true, "Unexpected start")
926+
let tokenStart = self
903927

904-
if self.previous == UInt8(ascii: "0") && self.peek(matches: "x") {
905-
return self.lexHexNumber(tokenStart: tokStart)
928+
if self.peek(matches: "0") && self.peek(at: 1, matches: "x") {
929+
return self.lexHexNumber()
906930
}
907931

908-
if self.previous == UInt8(ascii: "0") && self.peek(matches: "o") {
932+
if self.peek(matches: "0") && self.peek(at: 1, matches: "o") {
909933
// 0o[0-7][0-7_]*
910-
_ = self.advance()
934+
935+
let zeroConsumed = self.advance(matching: "0") // Consume '0'
936+
let oConsumed = self.advance(matching: "o") // Consome 'o'
937+
assert(zeroConsumed && oConsumed)
911938
if let peeked = self.peek(), peeked < UInt8(ascii: "0") || peeked > UInt8(ascii: "7") {
912-
let errorOffset = tokStart.distance(to: self)
939+
let errorOffset = tokenStart.distance(to: self)
913940
self.advance(while: { $0.isValidIdentifierContinuationCodePoint })
914941
return Lexer.Result(
915942
.integerLiteral,
@@ -923,7 +950,7 @@ extension Lexer.Cursor {
923950

924951
let tmp = self
925952
if self.advance(if: { $0.isValidIdentifierContinuationCodePoint }) {
926-
let errorOffset = tokStart.distance(to: tmp)
953+
let errorOffset = tokenStart.distance(to: tmp)
927954
self.advance(while: { $0.isValidIdentifierContinuationCodePoint })
928955
return Lexer.Result(
929956
.integerLiteral,
@@ -934,11 +961,13 @@ extension Lexer.Cursor {
934961
return Lexer.Result(.integerLiteral)
935962
}
936963

937-
if tokStart.peek(matches: "0") && self.peek(matches: "b") {
964+
if self.peek(matches: "0") && self.peek(at: 1, matches: "b") {
938965
// 0b[01][01_]*
939-
_ = self.advance()
966+
let zeroConsumed = self.advance(matching: "0") // Consume '0'
967+
let bConsumed = self.advance(matching: "b") // Consume 'b'
968+
assert(zeroConsumed && bConsumed)
940969
if self.peek(doesntMatch: "0", "1") {
941-
let errorOffset = tokStart.distance(to: self)
970+
let errorOffset = tokenStart.distance(to: self)
942971
self.advance(while: { $0.isValidIdentifierContinuationCodePoint })
943972
return Lexer.Result(
944973
.integerLiteral,
@@ -952,7 +981,7 @@ extension Lexer.Cursor {
952981

953982
let tmp = self
954983
if self.advance(if: { $0.isValidIdentifierContinuationCodePoint }) {
955-
let errorOffset = tokStart.distance(to: tmp)
984+
let errorOffset = tokenStart.distance(to: tmp)
956985
self.advance(while: { $0.isValidIdentifierContinuationCodePoint })
957986
return Lexer.Result(
958987
.integerLiteral,
@@ -967,40 +996,40 @@ extension Lexer.Cursor {
967996
// floating point value.
968997
self.advance(while: { $0.isDigit || $0 == Unicode.Scalar("_") })
969998

999+
// TODO: This can probably be unified with lexHexNumber somehow
1000+
9701001
// Lex things like 4.x as '4' followed by a tok::period.
9711002
if self.peek(matches: ".") {
9721003
// NextToken is the soon to be previous token
9731004
// Therefore: x.0.1 is sub-tuple access, not x.float_literal
974-
if let peeked = self.peek(at: 1), !Unicode.Scalar(peeked).isDigit || tokStart.previous == UInt8(ascii: ".") {
1005+
if let peeked = self.peek(at: 1), !Unicode.Scalar(peeked).isDigit || tokenStart.previous == UInt8(ascii: ".") {
9751006
return Lexer.Result(.integerLiteral)
9761007
}
977-
} else {
1008+
} else if self.isAtEndOfFile || self.peek(doesntMatch: "e", "E") {
9781009
// Floating literals must have '.', 'e', or 'E' after digits. If it is
9791010
// something else, then this is the end of the token.
980-
if self.isAtEndOfFile || self.peek(doesntMatch: "e", "E") {
981-
let tmp = self
982-
if self.advance(if: { $0.isValidIdentifierContinuationCodePoint }) {
983-
let errorOffset = tokStart.distance(to: tmp)
984-
self.advance(while: { $0.isValidIdentifierContinuationCodePoint })
985-
return Lexer.Result(
986-
.integerLiteral,
987-
error: LexerError(.invalidDecimalDigitInIntegerLiteral, byteOffset: errorOffset)
988-
)
989-
}
990-
991-
return Lexer.Result(.integerLiteral)
1011+
let tmp = self
1012+
if self.advance(if: { $0.isValidIdentifierContinuationCodePoint }) {
1013+
let errorOffset = tokenStart.distance(to: tmp)
1014+
self.advance(while: { $0.isValidIdentifierContinuationCodePoint })
1015+
return Lexer.Result(
1016+
.integerLiteral,
1017+
error: LexerError(.invalidDecimalDigitInIntegerLiteral, byteOffset: errorOffset)
1018+
)
9921019
}
1020+
1021+
return Lexer.Result(.integerLiteral)
9931022
}
9941023

9951024
// Lex decimal point.
996-
if self.advance(if: { $0 == Unicode.Scalar(".") }) {
1025+
if self.advance(matching: ".") {
9971026
// Lex any digits after the decimal point.
9981027
self.advance(while: { $0.isDigit || $0 == Unicode.Scalar("_") })
9991028
}
10001029

10011030
// Lex exponent.
1002-
if self.advance(if: { $0 == Unicode.Scalar("e") || $0 == Unicode.Scalar("E") }) {
1003-
_ = self.advance(if: { $0 == Unicode.Scalar("-") || $0 == Unicode.Scalar("+") })
1031+
if self.advance(matching: "e", "E") {
1032+
_ = self.advance(matching: "-", "+")
10041033

10051034
guard let peeked = self.peek(), Unicode.Scalar(peeked).isDigit else {
10061035
// There are 3 cases to diagnose if the exponent starts with a non-digit:
@@ -1018,18 +1047,16 @@ extension Lexer.Cursor {
10181047
errorKind = .expectedDigitInFloatLiteral
10191048
}
10201049

1021-
let errorOffset = tokStart.distance(to: tmp)
1050+
let errorOffset = tokenStart.distance(to: tmp)
10221051
self.advance(while: { $0.isValidIdentifierContinuationCodePoint })
10231052
return Lexer.Result(.floatingLiteral, error: LexerError(errorKind, byteOffset: errorOffset))
10241053
}
10251054

1026-
self.advance(while: { char in
1027-
char.isDigit || char == Unicode.Scalar("_")
1028-
})
1055+
self.advance(while: { $0.isDigit || $0 == Unicode.Scalar("_") })
10291056

10301057
let tmp = self
10311058
if self.advance(if: { $0.isValidIdentifierContinuationCodePoint }) {
1032-
let errorOffset = tokStart.distance(to: tmp)
1059+
let errorOffset = tokenStart.distance(to: tmp)
10331060
self.advance(while: { $0.isValidIdentifierContinuationCodePoint })
10341061
return Lexer.Result(
10351062
.floatingLiteral,
@@ -1041,14 +1068,16 @@ extension Lexer.Cursor {
10411068
return Lexer.Result(.floatingLiteral)
10421069
}
10431070

1044-
mutating func lexHexNumber(tokenStart tokStart: Lexer.Cursor) -> Lexer.Result {
1071+
mutating func lexHexNumber() -> Lexer.Result {
1072+
let tokStart = self
10451073
// We assume we're starting from the 'x' in a '0x...' floating-point literal.
1046-
assert(self.peek(matches: "x"), "not a hex literal")
1047-
assert(self.previous == UInt8(ascii: "0"), "not a hex literal")
1074+
let zeroConsumed = self.advance(matching: "0")
1075+
let xConsumed = self.advance(matching: "x")
1076+
assert(zeroConsumed && xConsumed, "not a hex literal")
10481077

10491078
// 0x[0-9a-fA-F][0-9a-fA-F_]*
1050-
_ = self.advance()
10511079
if self.isAtEndOfFile {
1080+
// TODO: Diagnose invalid hex literal '0x'
10521081
return Lexer.Result(.integerLiteral)
10531082
}
10541083
guard let peeked = self.peek(), Unicode.Scalar(peeked).isHexDigit else {
@@ -1062,11 +1091,7 @@ extension Lexer.Cursor {
10621091

10631092
self.advance(while: { $0.isHexDigit || $0 == Unicode.Scalar("_") })
10641093

1065-
if self.isAtEndOfFile {
1066-
return Lexer.Result(.integerLiteral)
1067-
}
1068-
1069-
if self.peek(doesntMatch: ".", "p", "P") {
1094+
if self.isAtEndOfFile || self.peek(doesntMatch: ".", "p", "P") {
10701095
let tmp = self
10711096
if self.advance(if: { $0.isValidIdentifierContinuationCodePoint }) {
10721097
let errorOffset = tokStart.distance(to: tmp)
@@ -1081,12 +1106,14 @@ extension Lexer.Cursor {
10811106
}
10821107

10831108
// (\.[0-9A-Fa-f][0-9A-Fa-f_]*)?
1084-
var ptrOnDot: Lexer.Cursor? = self
1085-
if self.advance(if: { $0 == Unicode.Scalar(".") }) {
1109+
1110+
// If a '.' was consumed, the cursor pointing to the '.', otherwise nil
1111+
var cursorToDot: Lexer.Cursor? = self
1112+
if self.advance(matching: ".") {
10861113
// If the character after the '.' is not a digit, assume we have an int
10871114
// literal followed by a dot expression.
10881115
if let peeked = self.peek(), !Unicode.Scalar(peeked).isHexDigit {
1089-
self = ptrOnDot!
1116+
self = cursorToDot!
10901117
return Lexer.Result(.integerLiteral)
10911118
}
10921119

@@ -1095,7 +1122,7 @@ extension Lexer.Cursor {
10951122
if self.isAtEndOfFile || self.peek(doesntMatch: "p", "P") {
10961123
if let peeked = self.peek(at: 1), !Unicode.Scalar(peeked).isDigit {
10971124
// e.g: 0xff.description
1098-
self = ptrOnDot!
1125+
self = cursorToDot!
10991126
return Lexer.Result(.integerLiteral)
11001127
}
11011128
return Lexer.Result(
@@ -1104,23 +1131,23 @@ extension Lexer.Cursor {
11041131
)
11051132
}
11061133
} else {
1107-
ptrOnDot = nil
1134+
cursorToDot = nil
11081135
}
11091136

11101137
// [pP][+-]?[0-9][0-9_]*
1111-
assert(self.isAtEndOfFile || self.peek(matches: "p", "P"), "not at a hex float exponent?!")
1112-
_ = self.advance()
1138+
let pConsumed = self.advance(matching: "p", "P")
1139+
assert(self.isAtEndOfFile || pConsumed, "not at a hex float exponent?!")
11131140

11141141
var signedExponent = false
1115-
if self.advance(if: { $0 == Unicode.Scalar("+") || $0 == Unicode.Scalar("-") }) {
1142+
if self.advance(matching: "+", "-") {
11161143
// Eat the sign.
11171144
signedExponent = true
11181145
}
11191146

11201147
if let peeked = self.peek(), !Unicode.Scalar(peeked).isDigit {
1121-
if let ptrOnDot = ptrOnDot, let peeked = ptrOnDot.peek(at: 1), !Unicode.Scalar(peeked).isDigit && !signedExponent {
1148+
if let cursorToDot = cursorToDot, let peeked = cursorToDot.peek(at: 1), !Unicode.Scalar(peeked).isDigit && !signedExponent {
11221149
// e.g: 0xff.fpValue, 0xff.fp
1123-
self = ptrOnDot
1150+
self = cursorToDot
11241151
return Lexer.Result(.integerLiteral)
11251152
}
11261153
// Note: 0xff.fp+otherExpr can be valid expression. But we don't accept it.

0 commit comments

Comments
 (0)