Parse 0.2 after … as float literal, not member access

ahoppen · ahoppen · commit 193e7aa7a650 · 2023-01-26T11:19:41.000+01:00
Deciding whether `0.2` should be lexed as a float literal or a member access is a little more difficult than just looking at the previous character because `0.2` might be preceeded by an operator like `…` or `.^.`, in which case it should be lexed as a float literal and not a member access.

We might be able to do some disambiguation magic on whether the character before the period is also an operator continuation point but that seems fairly brittle to me. The sanest way of doing this, is to store the previously lexed token’s kind in the cursor and checking that.

I measured and did not see a performance regregssion when parsing MovieSwiftUI.

rdar://103273988
diff --git a/Sources/SwiftParser/Lexer/Cursor.swift b/Sources/SwiftParser/Lexer/Cursor.swift
@@ -201,6 +201,8 @@ extension Lexer {
   struct Cursor {
     var input: UnsafeBufferPointer<UInt8>
     var previous: UInt8
+    /// If we have already lexed a token, the kind of the previously lexed token
+    var previousTokenKind: RawTokenBaseKind?
     private var stateStack: StateStack = StateStack()
 
     init(input: UnsafeBufferPointer<UInt8>, previous: UInt8) {
@@ -335,6 +337,8 @@ extension Lexer.Cursor {
       flags.insert(.isAtStartOfLine)
     }
 
+    self.previousTokenKind = result.tokenKind.base
+
     return .init(
       tokenKind: result.tokenKind,
       flags: flags,
@@ -1256,11 +1260,13 @@ extension Lexer.Cursor {
 
     // TODO: This can probably be unified with lexHexNumber somehow
 
-    // Lex things like 4.x as '4' followed by a tok::period.
     if self.is(at: ".") {
-      // NextToken is the soon to be previous token
-      // Therefore: x.0.1 is sub-tuple access, not x.float_literal
-      if let peeked = self.peek(at: 1), !Unicode.Scalar(peeked).isDigit || tokenStart.previous == UInt8(ascii: ".") {
+      // Lex x.0.1 is sub-tuple access, not x.float_literal
+      if let peeked = self.peek(at: 1), !Unicode.Scalar(peeked).isDigit {
+        // ".a" is a member access and certainly not a float literal
+        return Lexer.Result(.integerLiteral)
+      } else if self.previousTokenKind == .period {
+        // Lex x.0.1 is sub-tuple access, not x.float_literal.
         return Lexer.Result(.integerLiteral)
       }
     } else if self.isAtEndOfFile || self.is(notAt: "e", "E") {
diff --git a/Tests/SwiftParserTest/LexerTests.swift b/Tests/SwiftParserTest/LexerTests.swift
@@ -917,4 +917,28 @@ public class LexerTests: XCTestCase {
       ]
     )
   }
+
+  func testMultiDigitTupleAccess() {
+    AssertLexemes(
+      "x.13.1",
+      lexemes: [
+        LexemeSpec(.identifier, text: "x"),
+        LexemeSpec(.period, text: "."),
+        LexemeSpec(.integerLiteral, text: "13"),
+        LexemeSpec(.period, text: "."),
+        LexemeSpec(.integerLiteral, text: "1"),
+      ]
+    )
+  }
+
+  func testFloatingPointNumberAfterRangeOperator() {
+    AssertLexemes(
+      "0.1...0.2",
+      lexemes: [
+        LexemeSpec(.floatingLiteral, text: "0.1"),
+        LexemeSpec(.binaryOperator, text: "..."),
+        LexemeSpec(.floatingLiteral, text: "0.2"),
+      ]
+    )
+  }
 }