Skip to content

Commit 050d162

Browse files
committed
Make lexImpl peek-based
1 parent c648e85 commit 050d162

File tree

1 file changed

+61
-67
lines changed

1 file changed

+61
-67
lines changed

Sources/SwiftParser/Lexer/Cursor.swift

Lines changed: 61 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -433,11 +433,30 @@ extension Lexer.Cursor {
433433
}
434434
}
435435

436+
/// If this is the opening delimiter of a raw string literal, return the number
437+
/// of `#` in the raw string delimiter.
438+
/// Assumes that the parser is currently pointing at the first `#`.
439+
mutating func advanceIfOpeningRawStringDelimiter() -> Int? {
440+
assert(self.peek(matches: "#"))
441+
442+
var tmp = self
443+
var length = 0
444+
while tmp.advance(matching: "#") {
445+
length += 1
446+
}
447+
448+
if tmp.peek(matches: #"""#) {
449+
self = tmp
450+
return length
451+
}
452+
return nil
453+
}
454+
436455
/// If this is the opening delimiter of a raw string literal, return the number
437456
/// of `#` in the raw string delimiter.
438457
/// Assumes that the parser is currently pointing at the character after the first `#`.
439458
/// In other words, the first `#` is expected to already be consumed.
440-
mutating func advanceIfOpeningRawStringDelimiter() -> Int? {
459+
mutating func legacyAdvanceIfOpeningRawStringDelimiter() -> Int? {
441460
assert(self.previous == UInt8(ascii: "#"))
442461

443462
var tmp = self
@@ -652,20 +671,19 @@ extension Lexer.Cursor {
652671
)
653672
}
654673

655-
var start = self
656-
switch self.advance() {
657-
case UInt8(ascii: "@"): return Lexer.Result(.atSign)
658-
case UInt8(ascii: "{"): return Lexer.Result(.leftBrace)
659-
case UInt8(ascii: "["): return Lexer.Result(.leftSquareBracket)
660-
case UInt8(ascii: "("): return Lexer.Result(.leftParen)
661-
case UInt8(ascii: "}"): return Lexer.Result(.rightBrace)
662-
case UInt8(ascii: "]"): return Lexer.Result(.rightSquareBracket)
663-
case UInt8(ascii: ")"): return Lexer.Result(.rightParen)
664-
665-
case UInt8(ascii: ","): return Lexer.Result(.comma)
666-
case UInt8(ascii: ";"): return Lexer.Result(.semicolon)
667-
case UInt8(ascii: ":"): return Lexer.Result(.colon)
668-
case UInt8(ascii: "\\"): return Lexer.Result(.backslash)
674+
switch self.peek() {
675+
case UInt8(ascii: "@"): _ = self.advance(); return Lexer.Result(.atSign)
676+
case UInt8(ascii: "{"): _ = self.advance(); return Lexer.Result(.leftBrace)
677+
case UInt8(ascii: "["): _ = self.advance(); return Lexer.Result(.leftSquareBracket)
678+
case UInt8(ascii: "("): _ = self.advance(); return Lexer.Result(.leftParen)
679+
case UInt8(ascii: "}"): _ = self.advance(); return Lexer.Result(.rightBrace)
680+
case UInt8(ascii: "]"): _ = self.advance(); return Lexer.Result(.rightSquareBracket)
681+
case UInt8(ascii: ")"): _ = self.advance(); return Lexer.Result(.rightParen)
682+
683+
case UInt8(ascii: ","): _ = self.advance(); return Lexer.Result(.comma)
684+
case UInt8(ascii: ";"): _ = self.advance(); return Lexer.Result(.semicolon)
685+
case UInt8(ascii: ":"): _ = self.advance(); return Lexer.Result(.colon)
686+
case UInt8(ascii: "\\"): _ = self.advance(); return Lexer.Result(.backslash)
669687

670688
case UInt8(ascii: "#"):
671689
if case .afterClosingStringQuote(delimiterLength: _) = state {
@@ -678,60 +696,46 @@ extension Lexer.Cursor {
678696
}
679697

680698
// Try lex a regex literal.
681-
if let token = start.tryLexRegexLiteral(sourceBufferStart: sourceBufferStart) {
682-
self = start
699+
if let token = self.tryLexRegexLiteral(sourceBufferStart: sourceBufferStart) {
683700
return Lexer.Result(token)
684701
}
685702
// Otherwise try lex a magic pound literal.
686703
return self.lexMagicPoundLiteral()
687704
case UInt8(ascii: "/"):
688705
// Try lex a regex literal.
689-
if let token = start.tryLexRegexLiteral(sourceBufferStart: sourceBufferStart) {
690-
self = start
706+
if let token = self.tryLexRegexLiteral(sourceBufferStart: sourceBufferStart) {
691707
return Lexer.Result(token)
692708
}
693709

694710
// Otherwise try lex a magic pound literal.
695-
let result = start.lexOperatorIdentifier(sourceBufferStart: sourceBufferStart)
696-
self = start
697-
return result
711+
return self.lexOperatorIdentifier(sourceBufferStart: sourceBufferStart)
698712
case UInt8(ascii: "!"):
699-
if start.isLeftBound(sourceBufferStart: sourceBufferStart) {
713+
if self.isLeftBound(sourceBufferStart: sourceBufferStart) {
714+
_ = self.advance()
700715
return Lexer.Result(.exclamationMark)
701716
}
702-
let result = start.lexOperatorIdentifier(sourceBufferStart: sourceBufferStart)
703-
self = start
704-
return result
717+
return self.lexOperatorIdentifier(sourceBufferStart: sourceBufferStart)
705718

706719
case UInt8(ascii: "?"):
707-
if start.isLeftBound(sourceBufferStart: sourceBufferStart) {
720+
if self.isLeftBound(sourceBufferStart: sourceBufferStart) {
721+
_ = self.advance()
708722
return Lexer.Result(.postfixQuestionMark)
709723
}
710-
let result = start.lexOperatorIdentifier(sourceBufferStart: sourceBufferStart)
711-
self = start
712-
return result
724+
return self.lexOperatorIdentifier(sourceBufferStart: sourceBufferStart)
713725

714726
case UInt8(ascii: "<"):
715-
if self.peek(matches: "#") {
716-
let result = start.tryLexEditorPlaceholder(sourceBufferStart: sourceBufferStart)
717-
self = start
718-
return result
727+
if self.peek(at: 1, matches: "#") {
728+
return self.tryLexEditorPlaceholder(sourceBufferStart: sourceBufferStart)
719729
}
720-
let result = start.lexOperatorIdentifier(sourceBufferStart: sourceBufferStart)
721-
self = start
722-
return result
730+
return self.lexOperatorIdentifier(sourceBufferStart: sourceBufferStart)
723731
case UInt8(ascii: ">"):
724-
let result = start.lexOperatorIdentifier(sourceBufferStart: sourceBufferStart)
725-
self = start
726-
return result
732+
return self.lexOperatorIdentifier(sourceBufferStart: sourceBufferStart)
727733

728734
case UInt8(ascii: "="), UInt8(ascii: "-"), UInt8(ascii: "+"),
729735
UInt8(ascii: "*"), UInt8(ascii: "%"), UInt8(ascii: "&"),
730736
UInt8(ascii: "|"), UInt8(ascii: "^"), UInt8(ascii: "~"),
731737
UInt8(ascii: "."):
732-
let result = start.lexOperatorIdentifier(sourceBufferStart: sourceBufferStart)
733-
self = start
734-
return result
738+
return self.lexOperatorIdentifier(sourceBufferStart: sourceBufferStart)
735739
case UInt8(ascii: "A"), UInt8(ascii: "B"), UInt8(ascii: "C"),
736740
UInt8(ascii: "D"), UInt8(ascii: "E"), UInt8(ascii: "F"),
737741
UInt8(ascii: "G"), UInt8(ascii: "H"), UInt8(ascii: "I"),
@@ -751,47 +755,34 @@ extension Lexer.Cursor {
751755
UInt8(ascii: "v"), UInt8(ascii: "w"), UInt8(ascii: "x"),
752756
UInt8(ascii: "y"), UInt8(ascii: "z"),
753757
UInt8(ascii: "_"):
754-
let result = start.lexIdentifier()
755-
self = start
756-
return result
758+
return self.lexIdentifier()
757759

758760
case UInt8(ascii: "$"):
759-
let result = start.lexDollarIdentifier()
760-
self = start
761-
return result
761+
return self.lexDollarIdentifier()
762762

763763
case UInt8(ascii: "0"), UInt8(ascii: "1"), UInt8(ascii: "2"),
764764
UInt8(ascii: "3"), UInt8(ascii: "4"), UInt8(ascii: "5"),
765765
UInt8(ascii: "6"), UInt8(ascii: "7"), UInt8(ascii: "8"),
766766
UInt8(ascii: "9"):
767-
let result = start.lexNumber()
768-
self = start
769-
return result
767+
return self.lexNumber()
770768
case UInt8(ascii: #"'"#), UInt8(ascii: #"""#):
771769
return self.lexStringQuote()
772770

773771
case UInt8(ascii: "`"):
774-
let result = start.lexEscapedIdentifier()
775-
self = start
776-
return result
772+
return self.lexEscapedIdentifier()
777773
case nil:
778774
return Lexer.Result(.eof)
779775
default:
780-
var tmp = start
776+
var tmp = self
781777
if tmp.advance(if: { Unicode.Scalar($0).isValidIdentifierStartCodePoint }) {
782-
let result = start.lexIdentifier()
783-
self = start
784-
return result
778+
return self.lexIdentifier()
785779
}
786780

787781
if tmp.advance(if: { Unicode.Scalar($0).isOperatorStartCodePoint }) {
788-
let result = start.lexOperatorIdentifier(sourceBufferStart: sourceBufferStart)
789-
self = start
790-
return result
782+
return self.lexOperatorIdentifier(sourceBufferStart: sourceBufferStart)
791783
}
792784

793-
let unknownClassification = start.lexUnknown()
794-
self = start
785+
let unknownClassification = self.lexUnknown()
795786
assert(unknownClassification == .lexemeContents, "Invalid UTF-8 sequence should be eaten by lexTrivia as LeadingTrivia")
796787
return Lexer.Result(.unknown)
797788
}
@@ -1217,6 +1208,8 @@ extension Lexer.Cursor {
12171208

12181209
extension Lexer.Cursor {
12191210
mutating func lexMagicPoundLiteral() -> Lexer.Result {
1211+
let poundConsumed = self.advance(matching: "#")
1212+
assert(poundConsumed)
12201213
var tmp = self
12211214
// Scan for [a-zA-Z]+ to see what we match.
12221215
while let peeked = tmp.peek(), Unicode.Scalar(peeked).isAsciiIdentifierStart {
@@ -1428,11 +1421,12 @@ extension Lexer.Cursor {
14281421
}
14291422
}
14301423

1431-
if self.previous == UInt8(ascii: "'") {
1424+
if self.advance(matching: "'") {
14321425
return Lexer.Result(.singleQuote, newState: newState(currentState: self.state, kind: .singleQuote))
14331426
}
14341427

1435-
assert(self.previous == UInt8(ascii: #"""#))
1428+
let firstQuoteConsumed = self.advance(matching: #"""#)
1429+
assert(firstQuoteConsumed)
14361430

14371431
var lookingForMultilineString = self
14381432
if lookingForMultilineString.advance(matching: #"""#), lookingForMultilineString.advance(matching: #"""#) {
@@ -1624,7 +1618,7 @@ extension Lexer.Cursor {
16241618
return last
16251619

16261620
case UInt8(ascii: "#"):
1627-
guard !inStringLiteral(), let delim = curPtr.advanceIfOpeningRawStringDelimiter() else {
1621+
guard !inStringLiteral(), let delim = curPtr.legacyAdvanceIfOpeningRawStringDelimiter() else {
16281622
continue
16291623
}
16301624
let quoteConsumed = curPtr.advance(matching: #"""#)

0 commit comments

Comments
 (0)