Skip to content

Commit 4099a9c

Browse files
committed
Make operator lexing peek-based
1 parent 980f3de commit 4099a9c

File tree

3 files changed

+81
-43
lines changed

3 files changed

+81
-43
lines changed

Sources/SwiftParser/Declarations.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1243,7 +1243,7 @@ extension Parser {
12431243
(buffer: UnsafeBufferPointer<UInt8>) -> Bool in
12441244
var cursor = Lexer.Cursor(input: buffer, previous: 0, state: .normal)
12451245
guard buffer[0] == UInt8(ascii: "/") else { return false }
1246-
switch cursor.lexOperatorIdentifier(tokenStart: cursor, sourceBufferStart: cursor).tokenKind {
1246+
switch cursor.lexOperatorIdentifier(sourceBufferStart: cursor).tokenKind {
12471247
case .unknown:
12481248
return false
12491249

Sources/SwiftParser/Lexer/Cursor.swift

Lines changed: 73 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -690,32 +690,46 @@ extension Lexer.Cursor {
690690
}
691691

692692
// Otherwise try lex a magic pound literal.
693-
return self.lexOperatorIdentifier(tokenStart: start, sourceBufferStart: sourceBufferStart)
693+
let result = start.lexOperatorIdentifier(sourceBufferStart: sourceBufferStart)
694+
self = start
695+
return result
694696
case UInt8(ascii: "!"):
695697
if start.isLeftBound(sourceBufferStart: sourceBufferStart) {
696698
return Lexer.Result(.exclamationMark)
697699
}
698-
return self.lexOperatorIdentifier(tokenStart: start, sourceBufferStart: sourceBufferStart)
700+
let result = start.lexOperatorIdentifier(sourceBufferStart: sourceBufferStart)
701+
self = start
702+
return result
699703

700704
case UInt8(ascii: "?"):
701705
if start.isLeftBound(sourceBufferStart: sourceBufferStart) {
702706
return Lexer.Result(.postfixQuestionMark)
703707
}
704-
return self.lexOperatorIdentifier(tokenStart: start, sourceBufferStart: sourceBufferStart)
708+
let result = start.lexOperatorIdentifier(sourceBufferStart: sourceBufferStart)
709+
self = start
710+
return result
705711

706712
case UInt8(ascii: "<"):
707713
if self.peek(matches: "#") {
708-
return self.tryLexEditorPlaceholder(tokenStart: start, sourceBufferStart: sourceBufferStart)
714+
let result = start.tryLexEditorPlaceholder(sourceBufferStart: sourceBufferStart)
715+
self = start
716+
return result
709717
}
710-
return self.lexOperatorIdentifier(tokenStart: start, sourceBufferStart: sourceBufferStart)
718+
let result = start.lexOperatorIdentifier(sourceBufferStart: sourceBufferStart)
719+
self = start
720+
return result
711721
case UInt8(ascii: ">"):
712-
return self.lexOperatorIdentifier(tokenStart: start, sourceBufferStart: sourceBufferStart)
722+
let result = start.lexOperatorIdentifier(sourceBufferStart: sourceBufferStart)
723+
self = start
724+
return result
713725

714726
case UInt8(ascii: "="), UInt8(ascii: "-"), UInt8(ascii: "+"),
715727
UInt8(ascii: "*"), UInt8(ascii: "%"), UInt8(ascii: "&"),
716728
UInt8(ascii: "|"), UInt8(ascii: "^"), UInt8(ascii: "~"),
717729
UInt8(ascii: "."):
718-
return self.lexOperatorIdentifier(tokenStart: start, sourceBufferStart: sourceBufferStart)
730+
let result = start.lexOperatorIdentifier(sourceBufferStart: sourceBufferStart)
731+
self = start
732+
return result
719733
case UInt8(ascii: "A"), UInt8(ascii: "B"), UInt8(ascii: "C"),
720734
UInt8(ascii: "D"), UInt8(ascii: "E"), UInt8(ascii: "F"),
721735
UInt8(ascii: "G"), UInt8(ascii: "H"), UInt8(ascii: "I"),
@@ -735,10 +749,14 @@ extension Lexer.Cursor {
735749
UInt8(ascii: "v"), UInt8(ascii: "w"), UInt8(ascii: "x"),
736750
UInt8(ascii: "y"), UInt8(ascii: "z"),
737751
UInt8(ascii: "_"):
738-
return self.lexIdentifier(tokenStart: start)
752+
let result = start.lexIdentifier()
753+
self = start
754+
return result
739755

740756
case UInt8(ascii: "$"):
741-
return self.lexDollarIdentifier(start)
757+
let result = start.lexDollarIdentifier()
758+
self = start
759+
return result
742760

743761
case UInt8(ascii: "0"), UInt8(ascii: "1"), UInt8(ascii: "2"),
744762
UInt8(ascii: "3"), UInt8(ascii: "4"), UInt8(ascii: "5"),
@@ -751,17 +769,23 @@ extension Lexer.Cursor {
751769
return self.lexStringQuote()
752770

753771
case UInt8(ascii: "`"):
754-
return self.lexEscapedIdentifier(quote: start)
772+
let result = start.lexEscapedIdentifier()
773+
self = start
774+
return result
755775
case nil:
756776
return Lexer.Result(.eof)
757777
default:
758778
var tmp = start
759779
if tmp.advance(if: { Unicode.Scalar($0).isValidIdentifierStartCodePoint }) {
760-
return self.lexIdentifier(tokenStart: start)
780+
let result = start.lexIdentifier()
781+
self = start
782+
return result
761783
}
762784

763785
if tmp.advance(if: { Unicode.Scalar($0).isOperatorStartCodePoint }) {
764-
return self.lexOperatorIdentifier(tokenStart: start, sourceBufferStart: sourceBufferStart)
786+
let result = start.lexOperatorIdentifier(sourceBufferStart: sourceBufferStart)
787+
self = start
788+
return result
765789
}
766790

767791
let unknownClassification = self.lexUnknown(tokenStart: start)
@@ -1718,8 +1742,8 @@ extension Lexer.Cursor {
17181742

17191743
extension Lexer.Cursor {
17201744
/// lexIdentifier - Match [a-zA-Z_][a-zA-Z_$0-9]*
1721-
mutating func lexIdentifier(tokenStart tokStart: Lexer.Cursor) -> Lexer.Result {
1722-
self = tokStart
1745+
mutating func lexIdentifier() -> Lexer.Result {
1746+
let tokStart = self
17231747
let didStart = self.advance(if: { $0.isValidIdentifierStartCodePoint })
17241748
assert(didStart, "Unexpected start")
17251749

@@ -1738,8 +1762,10 @@ extension Lexer.Cursor {
17381762
}
17391763
}
17401764

1741-
mutating func lexEscapedIdentifier(quote: Lexer.Cursor) -> Lexer.Result {
1742-
assert(self.previous == UInt8(ascii: "`"), "Unexpected start of escaped identifier")
1765+
mutating func lexEscapedIdentifier() -> Lexer.Result {
1766+
let quote = self
1767+
let backtickConsumed = self.advance(matching: "`")
1768+
assert(backtickConsumed, "Unexpected start of escaped identifier")
17431769

17441770
// Check whether we have an identifier followed by another backtick, in which
17451771
// case this is an escaped identifier.
@@ -1749,17 +1775,18 @@ extension Lexer.Cursor {
17491775
self.advance(while: { $0.isValidIdentifierContinuationCodePoint })
17501776

17511777
// If we have the terminating "`", it's an escaped identifier.
1752-
if self.advance(if: { $0 == Unicode.Scalar("`") }) {
1778+
if self.advance(matching: "`") {
17531779
return Lexer.Result(.identifier)
17541780
}
17551781
}
17561782

17571783
// Special case; allow '`$`'.
17581784
if quote.starts(with: "`$`".utf8) {
17591785
self = quote
1760-
_ = self.advance()
1761-
_ = self.advance()
1762-
_ = self.advance()
1786+
let firstBacktickConsumed = self.advance(matching: "`")
1787+
let dollarConsumed = self.advance(matching: "$")
1788+
let secondBacktickConsumed = self.advance(matching: "`")
1789+
assert(firstBacktickConsumed && dollarConsumed && secondBacktickConsumed)
17631790
return Lexer.Result(.identifier)
17641791
}
17651792

@@ -1768,8 +1795,8 @@ extension Lexer.Cursor {
17681795
return Lexer.Result(.backtick)
17691796
}
17701797

1771-
mutating func lexOperatorIdentifier(tokenStart tokStart: Lexer.Cursor, sourceBufferStart: Lexer.Cursor) -> Lexer.Result {
1772-
self = tokStart
1798+
mutating func lexOperatorIdentifier(sourceBufferStart: Lexer.Cursor) -> Lexer.Result {
1799+
let tokStart = self
17731800
let didStart = self.advance(if: { $0.isOperatorStartCodePoint })
17741801
assert(didStart, "unexpected operator start")
17751802

@@ -1796,13 +1823,15 @@ extension Lexer.Cursor {
17961823
// If there is a "//" or "/*" in the middle of an identifier token,
17971824
// it starts a comment.
17981825
var ptr = tokStart
1826+
// Skip over the first character. A `//` or /*` at the beginning would have
1827+
// been consumed as trivia.
17991828
_ = ptr.advance()
18001829
while ptr.input.baseAddress! < self.input.baseAddress! {
1801-
defer { _ = ptr.advance() }
18021830
if ptr.peek(matches: "/") && ptr.peek(at: 1, matches: "/", "*") {
18031831
self = ptr
18041832
break
18051833
}
1834+
_ = ptr.advance()
18061835
}
18071836
}
18081837

@@ -1869,8 +1898,10 @@ extension Lexer.Cursor {
18691898
}
18701899
}
18711900

1872-
mutating func lexDollarIdentifier(_ tokStart: Lexer.Cursor) -> Lexer.Result {
1873-
assert(self.previous == UInt8(ascii: "$"))
1901+
mutating func lexDollarIdentifier() -> Lexer.Result {
1902+
let tokStart = self
1903+
let dollarConsumed = self.advance(matching: "$")
1904+
assert(dollarConsumed)
18741905

18751906
var isAllDigits = true
18761907
while true {
@@ -1900,30 +1931,30 @@ extension Lexer.Cursor {
19001931
// MARK: - Editor Placeholders
19011932

19021933
extension Lexer.Cursor {
1903-
mutating func tryLexEditorPlaceholder(tokenStart tokStart: Lexer.Cursor, sourceBufferStart: Lexer.Cursor) -> Lexer.Result {
1904-
assert(self.previous == UInt8(ascii: "<") && self.peek(matches: "#"))
1934+
mutating func tryLexEditorPlaceholder(sourceBufferStart: Lexer.Cursor) -> Lexer.Result {
1935+
assert(self.peek(matches: "<") && self.peek(at: 1, matches: "#"))
19051936
var ptr = self
1906-
_ = ptr.advance()
1907-
while !ptr.isAtEndOfFile {
1908-
defer { _ = ptr.advance() }
1909-
if ptr.peek(matches: "\n") {
1910-
break
1911-
}
1912-
guard !ptr.starts(with: "<#".utf8) else {
1913-
break
1914-
}
1915-
1916-
if ptr.starts(with: "#>".utf8) {
1917-
// Found it.
1918-
_ = ptr.advance()
1919-
_ = ptr.advance()
1937+
let leftAngleConsumed = ptr.advance(matching: "<")
1938+
let poundConsumed = ptr.advance(matching: "#")
1939+
assert(leftAngleConsumed && poundConsumed)
1940+
LOOP: while let consumed = ptr.advance() {
1941+
switch consumed {
1942+
case UInt8(ascii: "\n"):
1943+
break LOOP
1944+
case UInt8(ascii: "<") where ptr.peek(matches: "#"):
1945+
break LOOP
1946+
case UInt8(ascii: "#") where ptr.peek(matches: ">"):
1947+
let closingAngleConsumed = ptr.advance(matching: ">")
1948+
assert(closingAngleConsumed)
19201949
self = ptr
19211950
return Lexer.Result(.identifier)
1951+
default:
1952+
break
19221953
}
19231954
}
19241955

19251956
// Not a well-formed placeholder.
1926-
return self.lexOperatorIdentifier(tokenStart: tokStart, sourceBufferStart: sourceBufferStart)
1957+
return self.lexOperatorIdentifier(sourceBufferStart: sourceBufferStart)
19271958
}
19281959
}
19291960

Tests/SwiftParserTest/LexerTests.swift

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -630,6 +630,13 @@ public class LexerTests: XCTestCase {
630630
LexemeSpec(.identifier, text: "<#b2#>"),
631631
]
632632
)
633+
634+
AssertLexemes(
635+
"<##>",
636+
lexemes: [
637+
LexemeSpec(.identifier, text: "<##>", trailing: "")
638+
]
639+
)
633640
}
634641

635642
func testCommentAttribution() {

0 commit comments

Comments
 (0)