@@ -690,32 +690,46 @@ extension Lexer.Cursor {
690
690
}
691
691
692
692
// Otherwise try lex a magic pound literal.
693
- return self . lexOperatorIdentifier ( tokenStart: start, sourceBufferStart: sourceBufferStart)
693
+ let result = start. lexOperatorIdentifier ( sourceBufferStart: sourceBufferStart)
694
+ self = start
695
+ return result
694
696
case UInt8 ( ascii: " ! " ) :
695
697
if start. isLeftBound ( sourceBufferStart: sourceBufferStart) {
696
698
return Lexer . Result ( . exclamationMark)
697
699
}
698
- return self . lexOperatorIdentifier ( tokenStart: start, sourceBufferStart: sourceBufferStart)
700
+ let result = start. lexOperatorIdentifier ( sourceBufferStart: sourceBufferStart)
701
+ self = start
702
+ return result
699
703
700
704
case UInt8 ( ascii: " ? " ) :
701
705
if start. isLeftBound ( sourceBufferStart: sourceBufferStart) {
702
706
return Lexer . Result ( . postfixQuestionMark)
703
707
}
704
- return self . lexOperatorIdentifier ( tokenStart: start, sourceBufferStart: sourceBufferStart)
708
+ let result = start. lexOperatorIdentifier ( sourceBufferStart: sourceBufferStart)
709
+ self = start
710
+ return result
705
711
706
712
case UInt8 ( ascii: " < " ) :
707
713
if self . peek ( matches: " # " ) {
708
- return self . tryLexEditorPlaceholder ( tokenStart: start, sourceBufferStart: sourceBufferStart)
714
+ let result = start. tryLexEditorPlaceholder ( sourceBufferStart: sourceBufferStart)
715
+ self = start
716
+ return result
709
717
}
710
- return self . lexOperatorIdentifier ( tokenStart: start, sourceBufferStart: sourceBufferStart)
718
+ let result = start. lexOperatorIdentifier ( sourceBufferStart: sourceBufferStart)
719
+ self = start
720
+ return result
711
721
case UInt8 ( ascii: " > " ) :
712
- return self . lexOperatorIdentifier ( tokenStart: start, sourceBufferStart: sourceBufferStart)
722
+ let result = start. lexOperatorIdentifier ( sourceBufferStart: sourceBufferStart)
723
+ self = start
724
+ return result
713
725
714
726
case UInt8 ( ascii: " = " ) , UInt8 ( ascii: " - " ) , UInt8 ( ascii: " + " ) ,
715
727
UInt8 ( ascii: " * " ) , UInt8 ( ascii: " % " ) , UInt8 ( ascii: " & " ) ,
716
728
UInt8 ( ascii: " | " ) , UInt8 ( ascii: " ^ " ) , UInt8 ( ascii: " ~ " ) ,
717
729
UInt8 ( ascii: " . " ) :
718
- return self . lexOperatorIdentifier ( tokenStart: start, sourceBufferStart: sourceBufferStart)
730
+ let result = start. lexOperatorIdentifier ( sourceBufferStart: sourceBufferStart)
731
+ self = start
732
+ return result
719
733
case UInt8 ( ascii: " A " ) , UInt8 ( ascii: " B " ) , UInt8 ( ascii: " C " ) ,
720
734
UInt8 ( ascii: " D " ) , UInt8 ( ascii: " E " ) , UInt8 ( ascii: " F " ) ,
721
735
UInt8 ( ascii: " G " ) , UInt8 ( ascii: " H " ) , UInt8 ( ascii: " I " ) ,
@@ -735,10 +749,14 @@ extension Lexer.Cursor {
735
749
UInt8 ( ascii: " v " ) , UInt8 ( ascii: " w " ) , UInt8 ( ascii: " x " ) ,
736
750
UInt8 ( ascii: " y " ) , UInt8 ( ascii: " z " ) ,
737
751
UInt8 ( ascii: " _ " ) :
738
- return self . lexIdentifier ( tokenStart: start)
752
+ let result = start. lexIdentifier ( )
753
+ self = start
754
+ return result
739
755
740
756
case UInt8 ( ascii: " $ " ) :
741
- return self . lexDollarIdentifier ( start)
757
+ let result = start. lexDollarIdentifier ( )
758
+ self = start
759
+ return result
742
760
743
761
case UInt8 ( ascii: " 0 " ) , UInt8 ( ascii: " 1 " ) , UInt8 ( ascii: " 2 " ) ,
744
762
UInt8 ( ascii: " 3 " ) , UInt8 ( ascii: " 4 " ) , UInt8 ( ascii: " 5 " ) ,
@@ -751,17 +769,23 @@ extension Lexer.Cursor {
751
769
return self . lexStringQuote ( )
752
770
753
771
case UInt8 ( ascii: " ` " ) :
754
- return self . lexEscapedIdentifier ( quote: start)
772
+ let result = start. lexEscapedIdentifier ( )
773
+ self = start
774
+ return result
755
775
case nil :
756
776
return Lexer . Result ( . eof)
757
777
default :
758
778
var tmp = start
759
779
if tmp. advance ( if: { Unicode . Scalar ( $0) . isValidIdentifierStartCodePoint } ) {
760
- return self . lexIdentifier ( tokenStart: start)
780
+ let result = start. lexIdentifier ( )
781
+ self = start
782
+ return result
761
783
}
762
784
763
785
if tmp. advance ( if: { Unicode . Scalar ( $0) . isOperatorStartCodePoint } ) {
764
- return self . lexOperatorIdentifier ( tokenStart: start, sourceBufferStart: sourceBufferStart)
786
+ let result = start. lexOperatorIdentifier ( sourceBufferStart: sourceBufferStart)
787
+ self = start
788
+ return result
765
789
}
766
790
767
791
let unknownClassification = self . lexUnknown ( tokenStart: start)
@@ -1718,8 +1742,8 @@ extension Lexer.Cursor {
1718
1742
1719
1743
extension Lexer . Cursor {
1720
1744
/// lexIdentifier - Match [a-zA-Z_][a-zA-Z_$0-9]*
1721
- mutating func lexIdentifier( tokenStart tokStart : Lexer . Cursor ) -> Lexer . Result {
1722
- self = tokStart
1745
+ mutating func lexIdentifier( ) -> Lexer . Result {
1746
+ let tokStart = self
1723
1747
let didStart = self . advance ( if: { $0. isValidIdentifierStartCodePoint } )
1724
1748
assert ( didStart, " Unexpected start " )
1725
1749
@@ -1738,8 +1762,10 @@ extension Lexer.Cursor {
1738
1762
}
1739
1763
}
1740
1764
1741
- mutating func lexEscapedIdentifier( quote: Lexer . Cursor ) -> Lexer . Result {
1742
- assert ( self . previous == UInt8 ( ascii: " ` " ) , " Unexpected start of escaped identifier " )
1765
+ mutating func lexEscapedIdentifier( ) -> Lexer . Result {
1766
+ let quote = self
1767
+ let backtickConsumed = self . advance ( matching: " ` " )
1768
+ assert ( backtickConsumed, " Unexpected start of escaped identifier " )
1743
1769
1744
1770
// Check whether we have an identifier followed by another backtick, in which
1745
1771
// case this is an escaped identifier.
@@ -1749,17 +1775,18 @@ extension Lexer.Cursor {
1749
1775
self . advance ( while: { $0. isValidIdentifierContinuationCodePoint } )
1750
1776
1751
1777
// If we have the terminating "`", it's an escaped identifier.
1752
- if self . advance ( if : { $0 == Unicode . Scalar ( " ` " ) } ) {
1778
+ if self . advance ( matching : " ` " ) {
1753
1779
return Lexer . Result ( . identifier)
1754
1780
}
1755
1781
}
1756
1782
1757
1783
// Special case; allow '`$`'.
1758
1784
if quote. starts ( with: " `$` " . utf8) {
1759
1785
self = quote
1760
- _ = self . advance ( )
1761
- _ = self . advance ( )
1762
- _ = self . advance ( )
1786
+ let firstBacktickConsumed = self . advance ( matching: " ` " )
1787
+ let dollarConsumed = self . advance ( matching: " $ " )
1788
+ let secondBacktickConsumed = self . advance ( matching: " ` " )
1789
+ assert ( firstBacktickConsumed && dollarConsumed && secondBacktickConsumed)
1763
1790
return Lexer . Result ( . identifier)
1764
1791
}
1765
1792
@@ -1768,8 +1795,8 @@ extension Lexer.Cursor {
1768
1795
return Lexer . Result ( . backtick)
1769
1796
}
1770
1797
1771
- mutating func lexOperatorIdentifier( tokenStart tokStart : Lexer . Cursor , sourceBufferStart: Lexer . Cursor ) -> Lexer . Result {
1772
- self = tokStart
1798
+ mutating func lexOperatorIdentifier( sourceBufferStart: Lexer . Cursor ) -> Lexer . Result {
1799
+ let tokStart = self
1773
1800
let didStart = self . advance ( if: { $0. isOperatorStartCodePoint } )
1774
1801
assert ( didStart, " unexpected operator start " )
1775
1802
@@ -1796,13 +1823,15 @@ extension Lexer.Cursor {
1796
1823
// If there is a "//" or "/*" in the middle of an identifier token,
1797
1824
// it starts a comment.
1798
1825
var ptr = tokStart
1826
+ // Skip over the first character. A `//` or /*` at the beginning would have
1827
+ // been consumed as trivia.
1799
1828
_ = ptr. advance ( )
1800
1829
while ptr. input. baseAddress! < self . input. baseAddress! {
1801
- defer { _ = ptr. advance ( ) }
1802
1830
if ptr. peek ( matches: " / " ) && ptr. peek ( at: 1 , matches: " / " , " * " ) {
1803
1831
self = ptr
1804
1832
break
1805
1833
}
1834
+ _ = ptr. advance ( )
1806
1835
}
1807
1836
}
1808
1837
@@ -1869,8 +1898,10 @@ extension Lexer.Cursor {
1869
1898
}
1870
1899
}
1871
1900
1872
- mutating func lexDollarIdentifier( _ tokStart: Lexer . Cursor ) -> Lexer . Result {
1873
- assert ( self . previous == UInt8 ( ascii: " $ " ) )
1901
+ mutating func lexDollarIdentifier( ) -> Lexer . Result {
1902
+ let tokStart = self
1903
+ let dollarConsumed = self . advance ( matching: " $ " )
1904
+ assert ( dollarConsumed)
1874
1905
1875
1906
var isAllDigits = true
1876
1907
while true {
@@ -1900,30 +1931,30 @@ extension Lexer.Cursor {
1900
1931
// MARK: - Editor Placeholders
1901
1932
1902
1933
extension Lexer . Cursor {
1903
- mutating func tryLexEditorPlaceholder( tokenStart tokStart : Lexer . Cursor , sourceBufferStart: Lexer . Cursor ) -> Lexer . Result {
1904
- assert ( self . previous == UInt8 ( ascii : " < " ) && self . peek ( matches: " # " ) )
1934
+ mutating func tryLexEditorPlaceholder( sourceBufferStart: Lexer . Cursor ) -> Lexer . Result {
1935
+ assert ( self . peek ( matches : " < " ) && self . peek ( at : 1 , matches: " # " ) )
1905
1936
var ptr = self
1906
- _ = ptr. advance ( )
1907
- while !ptr. isAtEndOfFile {
1908
- defer { _ = ptr. advance ( ) }
1909
- if ptr. peek ( matches: " \n " ) {
1910
- break
1911
- }
1912
- guard !ptr. starts ( with: " <# " . utf8) else {
1913
- break
1914
- }
1915
-
1916
- if ptr. starts ( with: " #> " . utf8) {
1917
- // Found it.
1918
- _ = ptr. advance ( )
1919
- _ = ptr. advance ( )
1937
+ let leftAngleConsumed = ptr. advance ( matching: " < " )
1938
+ let poundConsumed = ptr. advance ( matching: " # " )
1939
+ assert ( leftAngleConsumed && poundConsumed)
1940
+ LOOP: while let consumed = ptr. advance ( ) {
1941
+ switch consumed {
1942
+ case UInt8 ( ascii: " \n " ) :
1943
+ break LOOP
1944
+ case UInt8 ( ascii: " < " ) where ptr. peek ( matches: " # " ) :
1945
+ break LOOP
1946
+ case UInt8 ( ascii: " # " ) where ptr. peek ( matches: " > " ) :
1947
+ let closingAngleConsumed = ptr. advance ( matching: " > " )
1948
+ assert ( closingAngleConsumed)
1920
1949
self = ptr
1921
1950
return Lexer . Result ( . identifier)
1951
+ default :
1952
+ break
1922
1953
}
1923
1954
}
1924
1955
1925
1956
// Not a well-formed placeholder.
1926
- return self . lexOperatorIdentifier ( tokenStart : tokStart , sourceBufferStart: sourceBufferStart)
1957
+ return self . lexOperatorIdentifier ( sourceBufferStart: sourceBufferStart)
1927
1958
}
1928
1959
}
1929
1960
0 commit comments