Skip to content

Commit 193e7aa

Browse files
committed
Parse 0.2 after as float literal, not member access
Deciding whether `0.2` should be lexed as a float literal or a member access is a little more difficult than just looking at the previous character because `0.2` might be preceeded by an operator like `…` or `.^.`, in which case it should be lexed as a float literal and not a member access. We might be able to do some disambiguation magic on whether the character before the period is also an operator continuation point but that seems fairly brittle to me. The sanest way of doing this, is to store the previously lexed token’s kind in the cursor and checking that. I measured and did not see a performance regregssion when parsing MovieSwiftUI. rdar://103273988
1 parent 1159abd commit 193e7aa

File tree

2 files changed

+34
-4
lines changed

2 files changed

+34
-4
lines changed

Sources/SwiftParser/Lexer/Cursor.swift

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,8 @@ extension Lexer {
201201
struct Cursor {
202202
var input: UnsafeBufferPointer<UInt8>
203203
var previous: UInt8
204+
/// If we have already lexed a token, the kind of the previously lexed token
205+
var previousTokenKind: RawTokenBaseKind?
204206
private var stateStack: StateStack = StateStack()
205207

206208
init(input: UnsafeBufferPointer<UInt8>, previous: UInt8) {
@@ -335,6 +337,8 @@ extension Lexer.Cursor {
335337
flags.insert(.isAtStartOfLine)
336338
}
337339

340+
self.previousTokenKind = result.tokenKind.base
341+
338342
return .init(
339343
tokenKind: result.tokenKind,
340344
flags: flags,
@@ -1256,11 +1260,13 @@ extension Lexer.Cursor {
12561260

12571261
// TODO: This can probably be unified with lexHexNumber somehow
12581262

1259-
// Lex things like 4.x as '4' followed by a tok::period.
12601263
if self.is(at: ".") {
1261-
// NextToken is the soon to be previous token
1262-
// Therefore: x.0.1 is sub-tuple access, not x.float_literal
1263-
if let peeked = self.peek(at: 1), !Unicode.Scalar(peeked).isDigit || tokenStart.previous == UInt8(ascii: ".") {
1264+
// Lex x.0.1 is sub-tuple access, not x.float_literal
1265+
if let peeked = self.peek(at: 1), !Unicode.Scalar(peeked).isDigit {
1266+
// ".a" is a member access and certainly not a float literal
1267+
return Lexer.Result(.integerLiteral)
1268+
} else if self.previousTokenKind == .period {
1269+
// Lex x.0.1 is sub-tuple access, not x.float_literal.
12641270
return Lexer.Result(.integerLiteral)
12651271
}
12661272
} else if self.isAtEndOfFile || self.is(notAt: "e", "E") {

Tests/SwiftParserTest/LexerTests.swift

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -917,4 +917,28 @@ public class LexerTests: XCTestCase {
917917
]
918918
)
919919
}
920+
921+
func testMultiDigitTupleAccess() {
922+
AssertLexemes(
923+
"x.13.1",
924+
lexemes: [
925+
LexemeSpec(.identifier, text: "x"),
926+
LexemeSpec(.period, text: "."),
927+
LexemeSpec(.integerLiteral, text: "13"),
928+
LexemeSpec(.period, text: "."),
929+
LexemeSpec(.integerLiteral, text: "1"),
930+
]
931+
)
932+
}
933+
934+
func testFloatingPointNumberAfterRangeOperator() {
935+
AssertLexemes(
936+
"0.1...0.2",
937+
lexemes: [
938+
LexemeSpec(.floatingLiteral, text: "0.1"),
939+
LexemeSpec(.binaryOperator, text: "..."),
940+
LexemeSpec(.floatingLiteral, text: "0.2"),
941+
]
942+
)
943+
}
920944
}

0 commit comments

Comments
 (0)