Fix recovery for UTF-8 decoding of continuation byte

hamishknight · hamishknight · commit b949a7a44d7d · 2023-03-23T11:25:42.000Z
The check here should be inverted.
diff --git a/Sources/SwiftParser/Lexer/UnicodeScalarExtensions.swift b/Sources/SwiftParser/Lexer/UnicodeScalarExtensions.swift
@@ -181,7 +181,7 @@ extension Unicode.Scalar {
     if encodedBytes == 1 || !Unicode.Scalar(curByte).isStartOfUTF8Character {
       // Skip until we get the start of another character.  This is guaranteed to
       // at least stop at the nul at the end of the buffer.
-      while let peeked = peek(), Unicode.Scalar(peeked).isStartOfUTF8Character {
+      while let peeked = peek(), !Unicode.Scalar(peeked).isStartOfUTF8Character {
         _ = advance()
       }
       return nil
diff --git a/Tests/SwiftParserTest/LexerTests.swift b/Tests/SwiftParserTest/LexerTests.swift
@@ -886,6 +886,23 @@ public class LexerTests: XCTestCase {
     }
   }
 
+  func testInvalidUtf8_3() {
+    let sourceBytes: [UInt8] = [0xfd, 0x41]  // 0x41 == "A"
+
+    lex(sourceBytes) { lexemes in
+      guard lexemes.count == 2 else {
+        return XCTFail("Expected 2 lexemes, got \(lexemes.count)")
+      }
+      AssertRawBytesLexeme(
+        lexemes[0],
+        kind: .identifier,
+        leadingTrivia: [0xfd],
+        text: [0x41],
+        error: TokenDiagnostic(.invalidUtf8, byteOffset: 0)
+      )
+    }
+  }
+
   func testInterpolatedString() {
     AssertLexemes(
       #"""

Original file line number	Diff line number	Diff line change
`@@ -181,7 +181,7 @@ extension Unicode.Scalar {`
`181`	`181`	`if encodedBytes == 1 \|\| !Unicode.Scalar(curByte).isStartOfUTF8Character {`
`182`	`182`	`// Skip until we get the start of another character. This is guaranteed to`
`183`	`183`	`// at least stop at the nul at the end of the buffer.`
`184`		`- while let peeked = peek(), Unicode.Scalar(peeked).isStartOfUTF8Character {`
	`184`	`+ while let peeked = peek(), !Unicode.Scalar(peeked).isStartOfUTF8Character {`
`185`	`185`	`_ = advance()`
`186`	`186`	`}`
`187`	`187`	`return nil`