@@ -433,11 +433,30 @@ extension Lexer.Cursor {
433
433
}
434
434
}
435
435
436
+ /// If this is the opening delimiter of a raw string literal, return the number
437
+ /// of `#` in the raw string delimiter.
438
+ /// Assumes that the parser is currently pointing at the first `#`.
439
+ mutating func advanceIfOpeningRawStringDelimiter( ) -> Int ? {
440
+ assert ( self . peek ( matches: " # " ) )
441
+
442
+ var tmp = self
443
+ var length = 0
444
+ while tmp. advance ( matching: " # " ) {
445
+ length += 1
446
+ }
447
+
448
+ if tmp. peek ( matches: #"""# ) {
449
+ self = tmp
450
+ return length
451
+ }
452
+ return nil
453
+ }
454
+
436
455
/// If this is the opening delimiter of a raw string literal, return the number
437
456
/// of `#` in the raw string delimiter.
438
457
/// Assumes that the parser is currently pointing at the character after the first `#`.
439
458
/// In other words, the first `#` is expected to already be consumed.
440
- mutating func advanceIfOpeningRawStringDelimiter ( ) -> Int ? {
459
+ mutating func legacyAdvanceIfOpeningRawStringDelimiter ( ) -> Int ? {
441
460
assert ( self . previous == UInt8 ( ascii: " # " ) )
442
461
443
462
var tmp = self
@@ -652,20 +671,19 @@ extension Lexer.Cursor {
652
671
)
653
672
}
654
673
655
- var start = self
656
- switch self . advance ( ) {
657
- case UInt8 ( ascii: " @ " ) : return Lexer . Result ( . atSign)
658
- case UInt8 ( ascii: " { " ) : return Lexer . Result ( . leftBrace)
659
- case UInt8 ( ascii: " [ " ) : return Lexer . Result ( . leftSquareBracket)
660
- case UInt8 ( ascii: " ( " ) : return Lexer . Result ( . leftParen)
661
- case UInt8 ( ascii: " } " ) : return Lexer . Result ( . rightBrace)
662
- case UInt8 ( ascii: " ] " ) : return Lexer . Result ( . rightSquareBracket)
663
- case UInt8 ( ascii: " ) " ) : return Lexer . Result ( . rightParen)
664
-
665
- case UInt8 ( ascii: " , " ) : return Lexer . Result ( . comma)
666
- case UInt8 ( ascii: " ; " ) : return Lexer . Result ( . semicolon)
667
- case UInt8 ( ascii: " : " ) : return Lexer . Result ( . colon)
668
- case UInt8 ( ascii: " \\ " ) : return Lexer . Result ( . backslash)
674
+ switch self . peek ( ) {
675
+ case UInt8 ( ascii: " @ " ) : _ = self . advance ( ) ; return Lexer . Result ( . atSign)
676
+ case UInt8 ( ascii: " { " ) : _ = self . advance ( ) ; return Lexer . Result ( . leftBrace)
677
+ case UInt8 ( ascii: " [ " ) : _ = self . advance ( ) ; return Lexer . Result ( . leftSquareBracket)
678
+ case UInt8 ( ascii: " ( " ) : _ = self . advance ( ) ; return Lexer . Result ( . leftParen)
679
+ case UInt8 ( ascii: " } " ) : _ = self . advance ( ) ; return Lexer . Result ( . rightBrace)
680
+ case UInt8 ( ascii: " ] " ) : _ = self . advance ( ) ; return Lexer . Result ( . rightSquareBracket)
681
+ case UInt8 ( ascii: " ) " ) : _ = self . advance ( ) ; return Lexer . Result ( . rightParen)
682
+
683
+ case UInt8 ( ascii: " , " ) : _ = self . advance ( ) ; return Lexer . Result ( . comma)
684
+ case UInt8 ( ascii: " ; " ) : _ = self . advance ( ) ; return Lexer . Result ( . semicolon)
685
+ case UInt8 ( ascii: " : " ) : _ = self . advance ( ) ; return Lexer . Result ( . colon)
686
+ case UInt8 ( ascii: " \\ " ) : _ = self . advance ( ) ; return Lexer . Result ( . backslash)
669
687
670
688
case UInt8 ( ascii: " # " ) :
671
689
if case . afterClosingStringQuote( delimiterLength: _) = state {
@@ -678,60 +696,46 @@ extension Lexer.Cursor {
678
696
}
679
697
680
698
// Try lex a regex literal.
681
- if let token = start. tryLexRegexLiteral ( sourceBufferStart: sourceBufferStart) {
682
- self = start
699
+ if let token = self . tryLexRegexLiteral ( sourceBufferStart: sourceBufferStart) {
683
700
return Lexer . Result ( token)
684
701
}
685
702
// Otherwise try lex a magic pound literal.
686
703
return self . lexMagicPoundLiteral ( )
687
704
case UInt8 ( ascii: " / " ) :
688
705
// Try lex a regex literal.
689
- if let token = start. tryLexRegexLiteral ( sourceBufferStart: sourceBufferStart) {
690
- self = start
706
+ if let token = self . tryLexRegexLiteral ( sourceBufferStart: sourceBufferStart) {
691
707
return Lexer . Result ( token)
692
708
}
693
709
694
710
// Otherwise try lex a magic pound literal.
695
- let result = start. lexOperatorIdentifier ( sourceBufferStart: sourceBufferStart)
696
- self = start
697
- return result
711
+ return self . lexOperatorIdentifier ( sourceBufferStart: sourceBufferStart)
698
712
case UInt8 ( ascii: " ! " ) :
699
- if start. isLeftBound ( sourceBufferStart: sourceBufferStart) {
713
+ if self . isLeftBound ( sourceBufferStart: sourceBufferStart) {
714
+ _ = self . advance ( )
700
715
return Lexer . Result ( . exclamationMark)
701
716
}
702
- let result = start. lexOperatorIdentifier ( sourceBufferStart: sourceBufferStart)
703
- self = start
704
- return result
717
+ return self . lexOperatorIdentifier ( sourceBufferStart: sourceBufferStart)
705
718
706
719
case UInt8 ( ascii: " ? " ) :
707
- if start. isLeftBound ( sourceBufferStart: sourceBufferStart) {
720
+ if self . isLeftBound ( sourceBufferStart: sourceBufferStart) {
721
+ _ = self . advance ( )
708
722
return Lexer . Result ( . postfixQuestionMark)
709
723
}
710
- let result = start. lexOperatorIdentifier ( sourceBufferStart: sourceBufferStart)
711
- self = start
712
- return result
724
+ return self . lexOperatorIdentifier ( sourceBufferStart: sourceBufferStart)
713
725
714
726
case UInt8 ( ascii: " < " ) :
715
- if self . peek ( matches: " # " ) {
716
- let result = start. tryLexEditorPlaceholder ( sourceBufferStart: sourceBufferStart)
717
- self = start
718
- return result
727
+ if self . peek ( at: 1 , matches: " # " ) {
728
+ return self . tryLexEditorPlaceholder ( sourceBufferStart: sourceBufferStart)
719
729
}
720
- let result = start. lexOperatorIdentifier ( sourceBufferStart: sourceBufferStart)
721
- self = start
722
- return result
730
+ return self . lexOperatorIdentifier ( sourceBufferStart: sourceBufferStart)
723
731
case UInt8 ( ascii: " > " ) :
724
- let result = start. lexOperatorIdentifier ( sourceBufferStart: sourceBufferStart)
725
- self = start
726
- return result
732
+ return self . lexOperatorIdentifier ( sourceBufferStart: sourceBufferStart)
727
733
728
734
case UInt8 ( ascii: " = " ) , UInt8 ( ascii: " - " ) , UInt8 ( ascii: " + " ) ,
729
735
UInt8 ( ascii: " * " ) , UInt8 ( ascii: " % " ) , UInt8 ( ascii: " & " ) ,
730
736
UInt8 ( ascii: " | " ) , UInt8 ( ascii: " ^ " ) , UInt8 ( ascii: " ~ " ) ,
731
737
UInt8 ( ascii: " . " ) :
732
- let result = start. lexOperatorIdentifier ( sourceBufferStart: sourceBufferStart)
733
- self = start
734
- return result
738
+ return self . lexOperatorIdentifier ( sourceBufferStart: sourceBufferStart)
735
739
case UInt8 ( ascii: " A " ) , UInt8 ( ascii: " B " ) , UInt8 ( ascii: " C " ) ,
736
740
UInt8 ( ascii: " D " ) , UInt8 ( ascii: " E " ) , UInt8 ( ascii: " F " ) ,
737
741
UInt8 ( ascii: " G " ) , UInt8 ( ascii: " H " ) , UInt8 ( ascii: " I " ) ,
@@ -751,47 +755,34 @@ extension Lexer.Cursor {
751
755
UInt8 ( ascii: " v " ) , UInt8 ( ascii: " w " ) , UInt8 ( ascii: " x " ) ,
752
756
UInt8 ( ascii: " y " ) , UInt8 ( ascii: " z " ) ,
753
757
UInt8 ( ascii: " _ " ) :
754
- let result = start. lexIdentifier ( )
755
- self = start
756
- return result
758
+ return self . lexIdentifier ( )
757
759
758
760
case UInt8 ( ascii: " $ " ) :
759
- let result = start. lexDollarIdentifier ( )
760
- self = start
761
- return result
761
+ return self . lexDollarIdentifier ( )
762
762
763
763
case UInt8 ( ascii: " 0 " ) , UInt8 ( ascii: " 1 " ) , UInt8 ( ascii: " 2 " ) ,
764
764
UInt8 ( ascii: " 3 " ) , UInt8 ( ascii: " 4 " ) , UInt8 ( ascii: " 5 " ) ,
765
765
UInt8 ( ascii: " 6 " ) , UInt8 ( ascii: " 7 " ) , UInt8 ( ascii: " 8 " ) ,
766
766
UInt8 ( ascii: " 9 " ) :
767
- let result = start. lexNumber ( )
768
- self = start
769
- return result
767
+ return self . lexNumber ( )
770
768
case UInt8 ( ascii: #"'"# ) , UInt8 ( ascii: #"""# ) :
771
769
return self . lexStringQuote ( )
772
770
773
771
case UInt8 ( ascii: " ` " ) :
774
- let result = start. lexEscapedIdentifier ( )
775
- self = start
776
- return result
772
+ return self . lexEscapedIdentifier ( )
777
773
case nil :
778
774
return Lexer . Result ( . eof)
779
775
default :
780
- var tmp = start
776
+ var tmp = self
781
777
if tmp. advance ( if: { Unicode . Scalar ( $0) . isValidIdentifierStartCodePoint } ) {
782
- let result = start. lexIdentifier ( )
783
- self = start
784
- return result
778
+ return self . lexIdentifier ( )
785
779
}
786
780
787
781
if tmp. advance ( if: { Unicode . Scalar ( $0) . isOperatorStartCodePoint } ) {
788
- let result = start. lexOperatorIdentifier ( sourceBufferStart: sourceBufferStart)
789
- self = start
790
- return result
782
+ return self . lexOperatorIdentifier ( sourceBufferStart: sourceBufferStart)
791
783
}
792
784
793
- let unknownClassification = start. lexUnknown ( )
794
- self = start
785
+ let unknownClassification = self . lexUnknown ( )
795
786
assert ( unknownClassification == . lexemeContents, " Invalid UTF-8 sequence should be eaten by lexTrivia as LeadingTrivia " )
796
787
return Lexer . Result ( . unknown)
797
788
}
@@ -1217,6 +1208,8 @@ extension Lexer.Cursor {
1217
1208
1218
1209
extension Lexer . Cursor {
1219
1210
mutating func lexMagicPoundLiteral( ) -> Lexer . Result {
1211
+ let poundConsumed = self . advance ( matching: " # " )
1212
+ assert ( poundConsumed)
1220
1213
var tmp = self
1221
1214
// Scan for [a-zA-Z]+ to see what we match.
1222
1215
while let peeked = tmp. peek ( ) , Unicode . Scalar ( peeked) . isAsciiIdentifierStart {
@@ -1428,11 +1421,12 @@ extension Lexer.Cursor {
1428
1421
}
1429
1422
}
1430
1423
1431
- if self . previous == UInt8 ( ascii : " ' " ) {
1424
+ if self . advance ( matching : " ' " ) {
1432
1425
return Lexer . Result ( . singleQuote, newState: newState ( currentState: self . state, kind: . singleQuote) )
1433
1426
}
1434
1427
1435
- assert ( self . previous == UInt8 ( ascii: #"""# ) )
1428
+ let firstQuoteConsumed = self . advance ( matching: #"""# )
1429
+ assert ( firstQuoteConsumed)
1436
1430
1437
1431
var lookingForMultilineString = self
1438
1432
if lookingForMultilineString. advance ( matching: #"""# ) , lookingForMultilineString. advance ( matching: #"""# ) {
@@ -1624,7 +1618,7 @@ extension Lexer.Cursor {
1624
1618
return last
1625
1619
1626
1620
case UInt8 ( ascii: " # " ) :
1627
- guard !inStringLiteral( ) , let delim = curPtr. advanceIfOpeningRawStringDelimiter ( ) else {
1621
+ guard !inStringLiteral( ) , let delim = curPtr. legacyAdvanceIfOpeningRawStringDelimiter ( ) else {
1628
1622
continue
1629
1623
}
1630
1624
let quoteConsumed = curPtr. advance ( matching: #"""# )
0 commit comments