@@ -331,11 +331,35 @@ extension Lexer.Cursor {
331
331
/// If the current character is `matching`, advance the cursor and return `true`.
332
332
/// Otherwise, this is a no-op and returns `false`.
333
333
mutating func advance( matching: CharacterByte ) -> Bool {
334
- guard self . peek ( ) == matching. value else {
334
+ if self . peek ( matches: matching) {
335
+ _ = self . advance ( )
336
+ return true
337
+ } else {
338
+ return false
339
+ }
340
+ }
341
+
342
+ /// If the current character is `matching`, advance the cursor and return `true`.
343
+ /// Otherwise, this is a no-op and returns `false`.
344
+ mutating func advance( matching character1: CharacterByte , _ character2: CharacterByte ) -> Bool {
345
+ if self . peek ( matches: character1) || self . peek ( matches: character2) {
346
+ _ = self . advance ( )
347
+ return true
348
+ } else {
349
+ return false
350
+ }
351
+ }
352
+
353
+ /// If the current character is in `matching`, advance the cursor and return `true`.
354
+ /// Otherwise, this is a no-op and returns `false`.
355
+ @_disfavoredOverload // favor the stamped out copies
356
+ mutating func advance( matching characters: CharacterByte ... ) -> Bool {
357
+ if characters. contains ( where: { self . peek ( matches: $0) } ) {
358
+ _ = self . advance ( )
359
+ return true
360
+ } else {
335
361
return false
336
362
}
337
- _ = self . advance ( )
338
- return true
339
363
}
340
364
341
365
/// If the current character matches `predicate`, consume it and return `true`.
@@ -628,7 +652,7 @@ extension Lexer.Cursor {
628
652
)
629
653
}
630
654
631
- let start = self
655
+ var start = self
632
656
switch self . advance ( ) {
633
657
case UInt8 ( ascii: " @ " ) : return Lexer . Result ( . atSign)
634
658
case UInt8 ( ascii: " { " ) : return Lexer . Result ( . leftBrace)
@@ -720,7 +744,9 @@ extension Lexer.Cursor {
720
744
UInt8 ( ascii: " 3 " ) , UInt8 ( ascii: " 4 " ) , UInt8 ( ascii: " 5 " ) ,
721
745
UInt8 ( ascii: " 6 " ) , UInt8 ( ascii: " 7 " ) , UInt8 ( ascii: " 8 " ) ,
722
746
UInt8 ( ascii: " 9 " ) :
723
- return self . lexNumber ( tokenStart: start)
747
+ let result = start. lexNumber ( )
748
+ self = start
749
+ return result
724
750
case UInt8 ( ascii: #"'"# ) , UInt8 ( ascii: #"""# ) :
725
751
return self . lexStringQuote ( )
726
752
@@ -895,21 +921,22 @@ extension Lexer.Cursor {
895
921
/// floating_literal ::= [0-9][0-9_]*[eE][+-]?[0-9][0-9_]*
896
922
/// floating_literal ::= 0x[0-9A-Fa-f][0-9A-Fa-f_]*
897
923
/// (\.[0-9A-Fa-f][0-9A-Fa-f_]*)?[pP][+-]?[0-9][0-9_]*
898
- mutating func lexNumber( tokenStart tokStart: Lexer . Cursor ) -> Lexer . Result {
899
- assert (
900
- ( Unicode . Scalar ( self . previous) . isDigit || self . previous == UInt8 ( ascii: " . " ) ) ,
901
- " Unexpected start "
902
- )
924
+ mutating func lexNumber( ) -> Lexer . Result {
925
+ assert ( self . peek ( ) . map ( Unicode . Scalar. init) ? . isDigit == true , " Unexpected start " )
926
+ let tokenStart = self
903
927
904
- if self . previous == UInt8 ( ascii : " 0 " ) && self . peek ( matches: " x " ) {
905
- return self . lexHexNumber ( tokenStart : tokStart )
928
+ if self . peek ( matches : " 0 " ) && self . peek ( at : 1 , matches: " x " ) {
929
+ return self . lexHexNumber ( )
906
930
}
907
931
908
- if self . previous == UInt8 ( ascii : " 0 " ) && self . peek ( matches: " o " ) {
932
+ if self . peek ( matches : " 0 " ) && self . peek ( at : 1 , matches: " o " ) {
909
933
// 0o[0-7][0-7_]*
910
- _ = self . advance ( )
934
+
935
+ let zeroConsumed = self . advance ( matching: " 0 " ) // Consume '0'
936
+ let oConsumed = self . advance ( matching: " o " ) // Consome 'o'
937
+ assert ( zeroConsumed && oConsumed)
911
938
if let peeked = self . peek ( ) , peeked < UInt8 ( ascii: " 0 " ) || peeked > UInt8 ( ascii: " 7 " ) {
912
- let errorOffset = tokStart . distance ( to: self )
939
+ let errorOffset = tokenStart . distance ( to: self )
913
940
self . advance ( while: { $0. isValidIdentifierContinuationCodePoint } )
914
941
return Lexer . Result (
915
942
. integerLiteral,
@@ -923,7 +950,7 @@ extension Lexer.Cursor {
923
950
924
951
let tmp = self
925
952
if self . advance ( if: { $0. isValidIdentifierContinuationCodePoint } ) {
926
- let errorOffset = tokStart . distance ( to: tmp)
953
+ let errorOffset = tokenStart . distance ( to: tmp)
927
954
self . advance ( while: { $0. isValidIdentifierContinuationCodePoint } )
928
955
return Lexer . Result (
929
956
. integerLiteral,
@@ -934,11 +961,13 @@ extension Lexer.Cursor {
934
961
return Lexer . Result ( . integerLiteral)
935
962
}
936
963
937
- if tokStart . peek ( matches: " 0 " ) && self . peek ( matches: " b " ) {
964
+ if self . peek ( matches: " 0 " ) && self . peek ( at : 1 , matches: " b " ) {
938
965
// 0b[01][01_]*
939
- _ = self . advance ( )
966
+ let zeroConsumed = self . advance ( matching: " 0 " ) // Consume '0'
967
+ let bConsumed = self . advance ( matching: " b " ) // Consume 'b'
968
+ assert ( zeroConsumed && bConsumed)
940
969
if self . peek ( doesntMatch: " 0 " , " 1 " ) {
941
- let errorOffset = tokStart . distance ( to: self )
970
+ let errorOffset = tokenStart . distance ( to: self )
942
971
self . advance ( while: { $0. isValidIdentifierContinuationCodePoint } )
943
972
return Lexer . Result (
944
973
. integerLiteral,
@@ -952,7 +981,7 @@ extension Lexer.Cursor {
952
981
953
982
let tmp = self
954
983
if self . advance ( if: { $0. isValidIdentifierContinuationCodePoint } ) {
955
- let errorOffset = tokStart . distance ( to: tmp)
984
+ let errorOffset = tokenStart . distance ( to: tmp)
956
985
self . advance ( while: { $0. isValidIdentifierContinuationCodePoint } )
957
986
return Lexer . Result (
958
987
. integerLiteral,
@@ -967,40 +996,40 @@ extension Lexer.Cursor {
967
996
// floating point value.
968
997
self . advance ( while: { $0. isDigit || $0 == Unicode . Scalar ( " _ " ) } )
969
998
999
+ // TODO: This can probably be unified with lexHexNumber somehow
1000
+
970
1001
// Lex things like 4.x as '4' followed by a tok::period.
971
1002
if self . peek ( matches: " . " ) {
972
1003
// NextToken is the soon to be previous token
973
1004
// Therefore: x.0.1 is sub-tuple access, not x.float_literal
974
- if let peeked = self . peek ( at: 1 ) , !Unicode. Scalar ( peeked) . isDigit || tokStart . previous == UInt8 ( ascii: " . " ) {
1005
+ if let peeked = self . peek ( at: 1 ) , !Unicode. Scalar ( peeked) . isDigit || tokenStart . previous == UInt8 ( ascii: " . " ) {
975
1006
return Lexer . Result ( . integerLiteral)
976
1007
}
977
- } else {
1008
+ } else if self . isAtEndOfFile || self . peek ( doesntMatch : " e " , " E " ) {
978
1009
// Floating literals must have '.', 'e', or 'E' after digits. If it is
979
1010
// something else, then this is the end of the token.
980
- if self . isAtEndOfFile || self . peek ( doesntMatch: " e " , " E " ) {
981
- let tmp = self
982
- if self . advance ( if: { $0. isValidIdentifierContinuationCodePoint } ) {
983
- let errorOffset = tokStart. distance ( to: tmp)
984
- self . advance ( while: { $0. isValidIdentifierContinuationCodePoint } )
985
- return Lexer . Result (
986
- . integerLiteral,
987
- error: LexerError ( . invalidDecimalDigitInIntegerLiteral, byteOffset: errorOffset)
988
- )
989
- }
990
-
991
- return Lexer . Result ( . integerLiteral)
1011
+ let tmp = self
1012
+ if self . advance ( if: { $0. isValidIdentifierContinuationCodePoint } ) {
1013
+ let errorOffset = tokenStart. distance ( to: tmp)
1014
+ self . advance ( while: { $0. isValidIdentifierContinuationCodePoint } )
1015
+ return Lexer . Result (
1016
+ . integerLiteral,
1017
+ error: LexerError ( . invalidDecimalDigitInIntegerLiteral, byteOffset: errorOffset)
1018
+ )
992
1019
}
1020
+
1021
+ return Lexer . Result ( . integerLiteral)
993
1022
}
994
1023
995
1024
// Lex decimal point.
996
- if self . advance ( if : { $0 == Unicode . Scalar ( " . " ) } ) {
1025
+ if self . advance ( matching : " . " ) {
997
1026
// Lex any digits after the decimal point.
998
1027
self . advance ( while: { $0. isDigit || $0 == Unicode . Scalar ( " _ " ) } )
999
1028
}
1000
1029
1001
1030
// Lex exponent.
1002
- if self . advance ( if : { $0 == Unicode . Scalar ( " e " ) || $0 == Unicode . Scalar ( " E " ) } ) {
1003
- _ = self . advance ( if : { $0 == Unicode . Scalar ( " - " ) || $0 == Unicode . Scalar ( " + " ) } )
1031
+ if self . advance ( matching : " e " , " E " ) {
1032
+ _ = self . advance ( matching : " - " , " + " )
1004
1033
1005
1034
guard let peeked = self . peek ( ) , Unicode . Scalar ( peeked) . isDigit else {
1006
1035
// There are 3 cases to diagnose if the exponent starts with a non-digit:
@@ -1018,18 +1047,16 @@ extension Lexer.Cursor {
1018
1047
errorKind = . expectedDigitInFloatLiteral
1019
1048
}
1020
1049
1021
- let errorOffset = tokStart . distance ( to: tmp)
1050
+ let errorOffset = tokenStart . distance ( to: tmp)
1022
1051
self . advance ( while: { $0. isValidIdentifierContinuationCodePoint } )
1023
1052
return Lexer . Result ( . floatingLiteral, error: LexerError ( errorKind, byteOffset: errorOffset) )
1024
1053
}
1025
1054
1026
- self . advance ( while: { char in
1027
- char. isDigit || char == Unicode . Scalar ( " _ " )
1028
- } )
1055
+ self . advance ( while: { $0. isDigit || $0 == Unicode . Scalar ( " _ " ) } )
1029
1056
1030
1057
let tmp = self
1031
1058
if self . advance ( if: { $0. isValidIdentifierContinuationCodePoint } ) {
1032
- let errorOffset = tokStart . distance ( to: tmp)
1059
+ let errorOffset = tokenStart . distance ( to: tmp)
1033
1060
self . advance ( while: { $0. isValidIdentifierContinuationCodePoint } )
1034
1061
return Lexer . Result (
1035
1062
. floatingLiteral,
@@ -1041,14 +1068,16 @@ extension Lexer.Cursor {
1041
1068
return Lexer . Result ( . floatingLiteral)
1042
1069
}
1043
1070
1044
- mutating func lexHexNumber( tokenStart tokStart: Lexer . Cursor ) -> Lexer . Result {
1071
+ mutating func lexHexNumber( ) -> Lexer . Result {
1072
+ let tokStart = self
1045
1073
// We assume we're starting from the 'x' in a '0x...' floating-point literal.
1046
- assert ( self . peek ( matches: " x " ) , " not a hex literal " )
1047
- assert ( self . previous == UInt8 ( ascii: " 0 " ) , " not a hex literal " )
1074
+ let zeroConsumed = self . advance ( matching: " 0 " )
1075
+ let xConsumed = self . advance ( matching: " x " )
1076
+ assert ( zeroConsumed && xConsumed, " not a hex literal " )
1048
1077
1049
1078
// 0x[0-9a-fA-F][0-9a-fA-F_]*
1050
- _ = self . advance ( )
1051
1079
if self . isAtEndOfFile {
1080
+ // TODO: Diagnose invalid hex literal '0x'
1052
1081
return Lexer . Result ( . integerLiteral)
1053
1082
}
1054
1083
guard let peeked = self . peek ( ) , Unicode . Scalar ( peeked) . isHexDigit else {
@@ -1062,11 +1091,7 @@ extension Lexer.Cursor {
1062
1091
1063
1092
self . advance ( while: { $0. isHexDigit || $0 == Unicode . Scalar ( " _ " ) } )
1064
1093
1065
- if self . isAtEndOfFile {
1066
- return Lexer . Result ( . integerLiteral)
1067
- }
1068
-
1069
- if self . peek ( doesntMatch: " . " , " p " , " P " ) {
1094
+ if self . isAtEndOfFile || self . peek ( doesntMatch: " . " , " p " , " P " ) {
1070
1095
let tmp = self
1071
1096
if self . advance ( if: { $0. isValidIdentifierContinuationCodePoint } ) {
1072
1097
let errorOffset = tokStart. distance ( to: tmp)
@@ -1081,12 +1106,14 @@ extension Lexer.Cursor {
1081
1106
}
1082
1107
1083
1108
// (\.[0-9A-Fa-f][0-9A-Fa-f_]*)?
1084
- var ptrOnDot : Lexer . Cursor ? = self
1085
- if self . advance ( if: { $0 == Unicode . Scalar ( " . " ) } ) {
1109
+
1110
+ // If a '.' was consumed, the cursor pointing to the '.', otherwise nil
1111
+ var cursorToDot : Lexer . Cursor ? = self
1112
+ if self . advance ( matching: " . " ) {
1086
1113
// If the character after the '.' is not a digit, assume we have an int
1087
1114
// literal followed by a dot expression.
1088
1115
if let peeked = self . peek ( ) , !Unicode. Scalar ( peeked) . isHexDigit {
1089
- self = ptrOnDot !
1116
+ self = cursorToDot !
1090
1117
return Lexer . Result ( . integerLiteral)
1091
1118
}
1092
1119
@@ -1095,7 +1122,7 @@ extension Lexer.Cursor {
1095
1122
if self . isAtEndOfFile || self . peek ( doesntMatch: " p " , " P " ) {
1096
1123
if let peeked = self . peek ( at: 1 ) , !Unicode. Scalar ( peeked) . isDigit {
1097
1124
// e.g: 0xff.description
1098
- self = ptrOnDot !
1125
+ self = cursorToDot !
1099
1126
return Lexer . Result ( . integerLiteral)
1100
1127
}
1101
1128
return Lexer . Result (
@@ -1104,23 +1131,23 @@ extension Lexer.Cursor {
1104
1131
)
1105
1132
}
1106
1133
} else {
1107
- ptrOnDot = nil
1134
+ cursorToDot = nil
1108
1135
}
1109
1136
1110
1137
// [pP][+-]?[0-9][0-9_]*
1111
- assert ( self . isAtEndOfFile || self . peek ( matches : " p " , " P " ) , " not at a hex float exponent?! " )
1112
- _ = self . advance ( )
1138
+ let pConsumed = self . advance ( matching : " p " , " P " )
1139
+ assert ( self . isAtEndOfFile || pConsumed , " not at a hex float exponent?! " )
1113
1140
1114
1141
var signedExponent = false
1115
- if self . advance ( if : { $0 == Unicode . Scalar ( " + " ) || $0 == Unicode . Scalar ( " - " ) } ) {
1142
+ if self . advance ( matching : " + " , " - " ) {
1116
1143
// Eat the sign.
1117
1144
signedExponent = true
1118
1145
}
1119
1146
1120
1147
if let peeked = self . peek ( ) , !Unicode. Scalar ( peeked) . isDigit {
1121
- if let ptrOnDot = ptrOnDot , let peeked = ptrOnDot . peek ( at: 1 ) , !Unicode. Scalar ( peeked) . isDigit && !signedExponent {
1148
+ if let cursorToDot = cursorToDot , let peeked = cursorToDot . peek ( at: 1 ) , !Unicode. Scalar ( peeked) . isDigit && !signedExponent {
1122
1149
// e.g: 0xff.fpValue, 0xff.fp
1123
- self = ptrOnDot
1150
+ self = cursorToDot
1124
1151
return Lexer . Result ( . integerLiteral)
1125
1152
}
1126
1153
// Note: 0xff.fp+otherExpr can be valid expression. But we don't accept it.
0 commit comments