Skip to content

Commit da28695

Browse files
committed
Parse 0.2 after as float literal, not member access
Deciding whether `0.2` should be lexed as a float literal or a member access is a little more difficult than just looking at the previous character because `0.2` might be preceeded by an operator like `…` or `.^.`, in which case it should be lexed as a float literal and not a member access. We might be able to do some disambiguation magic on whether the character before the period is also an operator continuation point but that seems fairly brittle to me. The sanest way of doing this, is to store the previously lexed token’s kind in the cursor and checking that. I measured and did not see a performance regregssion when parsing MovieSwiftUI. rdar://103273988
1 parent 64cabae commit da28695

File tree

2 files changed

+49
-4
lines changed

2 files changed

+49
-4
lines changed

Sources/SwiftParser/Lexer/Cursor.swift

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,8 @@ extension Lexer {
201201
struct Cursor {
202202
var input: UnsafeBufferPointer<UInt8>
203203
var previous: UInt8
204+
/// If we have already lexed a token, the kind of the previously lexed token
205+
var previousTokenKind: RawTokenBaseKind?
204206
private var stateStack: StateStack = StateStack()
205207

206208
init(input: UnsafeBufferPointer<UInt8>, previous: UInt8) {
@@ -335,6 +337,7 @@ extension Lexer.Cursor {
335337
flags.insert(.isAtStartOfLine)
336338
}
337339

340+
self.previousTokenKind = result.tokenKind.base
338341
let error = result.error.map { error in
339342
return LexerError(error.kind, byteOffset: cursor.distance(to: error.position))
340343
}
@@ -676,6 +679,9 @@ extension Lexer.Cursor {
676679
}
677680

678681
/// Rever the lexer by `offset` bytes. This should only be used by `resetForSplit`.
682+
/// This must not back up by more bytes than the last token because that would
683+
/// require us to also update `previousTokenKind`, which we don't do in this
684+
/// function
679685
mutating func backUp(by offset: Int) {
680686
assert(!self.isAtStartOfFile)
681687
self.previous = self.input.baseAddress!.advanced(by: -(offset + 1)).pointee
@@ -1224,11 +1230,16 @@ extension Lexer.Cursor {
12241230

12251231
// TODO: This can probably be unified with lexHexNumber somehow
12261232

1227-
// Lex things like 4.x as '4' followed by a tok::period.
12281233
if self.is(at: ".") {
1229-
// NextToken is the soon to be previous token
1230-
// Therefore: x.0.1 is sub-tuple access, not x.float_literal
1231-
if let peeked = self.peek(at: 1), !Unicode.Scalar(peeked).isDigit || tokenStart.previous == UInt8(ascii: ".") {
1234+
if self.peek(at: 1) == nil {
1235+
// If there are no more digits following the '.', we don't have a float
1236+
// literal.
1237+
return Lexer.Result(.integerLiteral)
1238+
} else if let peeked = self.peek(at: 1), !Unicode.Scalar(peeked).isDigit {
1239+
// ".a" is a member access and certainly not a float literal
1240+
return Lexer.Result(.integerLiteral)
1241+
} else if self.previousTokenKind == .period {
1242+
// Lex x.0.1 is sub-tuple access, not x.float_literal.
12321243
return Lexer.Result(.integerLiteral)
12331244
}
12341245
} else if self.isAtEndOfFile || self.is(notAt: "e", "E") {

Tests/SwiftParserTest/LexerTests.swift

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1002,4 +1002,38 @@ public class LexerTests: XCTestCase {
10021002
]
10031003
)
10041004
}
1005+
1006+
func testMultiDigitTupleAccess() {
1007+
AssertLexemes(
1008+
"x.13.1",
1009+
lexemes: [
1010+
LexemeSpec(.identifier, text: "x"),
1011+
LexemeSpec(.period, text: "."),
1012+
LexemeSpec(.integerLiteral, text: "13"),
1013+
LexemeSpec(.period, text: "."),
1014+
LexemeSpec(.integerLiteral, text: "1"),
1015+
]
1016+
)
1017+
}
1018+
1019+
func testFloatingPointNumberAfterRangeOperator() {
1020+
AssertLexemes(
1021+
"0.1...0.2",
1022+
lexemes: [
1023+
LexemeSpec(.floatingLiteral, text: "0.1"),
1024+
LexemeSpec(.binaryOperator, text: "..."),
1025+
LexemeSpec(.floatingLiteral, text: "0.2"),
1026+
]
1027+
)
1028+
}
1029+
1030+
func testUnterminatedFloatLiteral() {
1031+
AssertLexemes(
1032+
"0.",
1033+
lexemes: [
1034+
LexemeSpec(.integerLiteral, text: "0"),
1035+
LexemeSpec(.period, text: "."),
1036+
]
1037+
)
1038+
}
10051039
}

0 commit comments

Comments
 (0)