swiftlang · hamishknight · Mar 23, 2023 · Mar 23, 2023 · Mar 23, 2023
@@ -649,26 +649,6 @@ extension Lexer.Cursor {
     return nil
   }
 
-  /// If this is the opening delimiter of a raw string literal, return the number
-  /// of `#` in the raw string delimiter.
-  /// Assumes that the parser is currently pointing at the character after the first `#`.
-  /// In other words, the first `#` is expected to already be consumed.
-  mutating func legacyAdvanceIfOpeningRawStringDelimiter() -> Int? {
-    assert(self.previous == UInt8(ascii: "#"))
-
-    var tmp = self
-    var length = 1
-    while tmp.advance(matching: "#") {
-      length += 1
-    }
-
-    if tmp.is(at: #"""#) {
-      self = tmp
-      return length
-    }
-    return nil
-  }
-
   /// If we are positioned at the start of a multiline string delimiter, consume
   /// that delimiter and return `true`, otherwise return `false`.
   ///
@@ -1515,8 +1495,6 @@ extension Lexer.Cursor {
   /// Lexes a single character in a string literal, handling escape sequences
   /// like `\n` or `\u{1234}` as a a single character.
   mutating func lexCharacterInStringLiteral(stringLiteralKind: StringLiteralKind, delimiterLength: Int) -> CharacterLex {
-    let charStart = self
-
     switch self.peek() {
     case UInt8(ascii: #"""#):
       let quote = Unicode.Scalar(self.advance()!)
@@ -1578,8 +1556,6 @@ extension Lexer.Cursor {
         return .error(kind)
       }
     default:
-      _ = self.advance()
-      self = charStart
       guard let charValue = self.advanceValidatingUTF8Character() else {
         return .error(.invalidUtf8)
       }

@@ -181,7 +181,7 @@ extension Unicode.Scalar {
     if encodedBytes == 1 || !Unicode.Scalar(curByte).isStartOfUTF8Character {
       // Skip until we get the start of another character.  This is guaranteed to
       // at least stop at the nul at the end of the buffer.
-      while let peeked = peek(), Unicode.Scalar(peeked).isStartOfUTF8Character {
+      while let peeked = peek(), !Unicode.Scalar(peeked).isStartOfUTF8Character {
         _ = advance()
       }
       return nil

@@ -886,6 +886,23 @@ public class LexerTests: XCTestCase {
     }
   }
 
+  func testInvalidUtf8_3() {
+    let sourceBytes: [UInt8] = [0xfd, 0x41]  // 0x41 == "A"
+
+    lex(sourceBytes) { lexemes in
+      guard lexemes.count == 2 else {
+        return XCTFail("Expected 2 lexemes, got \(lexemes.count)")
+      }
+      AssertRawBytesLexeme(
+        lexemes[0],
+        kind: .identifier,
+        leadingTrivia: [0xfd],
+        text: [0x41],
+        error: TokenDiagnostic(.invalidUtf8, byteOffset: 0)
+      )
+    }
+  }
+
   func testInterpolatedString() {
     AssertLexemes(
       #"""