Diagnose non-breaking space, invalid identifier start and invalid characters

ahoppen · ahoppen · commit 19c68cb52170 · 2023-02-02T16:51:53.000+01:00
diff --git a/Sources/SwiftParser/Lexer/Cursor.swift b/Sources/SwiftParser/Lexer/Cursor.swift
@@ -289,8 +289,11 @@ extension Lexer.Cursor {
     // Leading trivia.
     let leadingTriviaStart = self
     let newlineInLeadingTrivia: NewlinePresence
+    var error: LexerError? = nil
     if let leadingTriviaMode = self.currentState.leadingTriviaLexingMode(cursor: self) {
-      newlineInLeadingTrivia = self.lexTrivia(mode: leadingTriviaMode)
+      let triviaResult = self.lexTrivia(mode: leadingTriviaMode)
+      newlineInLeadingTrivia = triviaResult.newlinePresence
+      error = error ?? triviaResult.error.map { LexerError($0.kind, byteOffset: cursor.distance(to: $0.position)) }
     } else {
       newlineInLeadingTrivia = .absent
     }
@@ -325,7 +328,8 @@ extension Lexer.Cursor {
     // Trailing trivia.
     let trailingTriviaStart = self
     if let trailingTriviaMode = result.trailingTriviaLexingMode ?? currentState.trailingTriviaLexingMode(cursor: self) {
-      _ = self.lexTrivia(mode: trailingTriviaMode)
+      let triviaResult = self.lexTrivia(mode: trailingTriviaMode)
+      error = error ?? triviaResult.error.map { LexerError($0.kind, byteOffset: cursor.distance(to: $0.position)) }
     }
 
     if self.currentState.shouldPopStateWhenReachingNewlineInTrailingTrivia && self.is(at: "\r", "\n") {
@@ -338,9 +342,7 @@ extension Lexer.Cursor {
     }
 
     self.previousTokenKind = result.tokenKind.base
-    let error = result.error.map { error in
-      return LexerError(error.kind, byteOffset: cursor.distance(to: error.position))
-    }
+    error = error ?? result.error.map { LexerError($0.kind, byteOffset: cursor.distance(to: $0.position)) }
 
     return .init(
       tokenKind: result.tokenKind,
@@ -999,41 +1001,47 @@ extension Lexer.Cursor {
     case escapedNewlineInMultiLineStringLiteral
   }
 
-  fileprivate mutating func lexTrivia(mode: TriviaLexingMode) -> NewlinePresence {
+  fileprivate struct TriviaResult {
+    let newlinePresence: NewlinePresence
+    let error: (kind: LexerError.Kind, position: Lexer.Cursor)?
+  }
+
+  fileprivate mutating func lexTrivia(mode: TriviaLexingMode) -> TriviaResult {
+    var newlinePresence = NewlinePresence.absent
+    var error: (kind: LexerError.Kind, position: Lexer.Cursor)? = nil
     if mode == .escapedNewlineInMultiLineStringLiteral {
       _ = self.advance(matching: "\\")
       self.advance(while: { $0 == "#" })
       self.advance(while: { $0 == " " || $0 == "\t" })
       if self.advance(matching: "\r") {
         _ = self.advance(matching: "\n")
-        return .present
+        return TriviaResult(newlinePresence: .present, error: nil)
       } else if self.advance(matching: "\n") {
-        return .present
+        return TriviaResult(newlinePresence: .present, error: nil)
       } else {
-        return .absent
+        return TriviaResult(newlinePresence: .absent, error: nil)
       }
     }
 
-    var hasNewline = false
     while true {
       let start = self
 
       switch self.advance() {
-      // 'continue' - the character is a part of the trivia.
+      // 'continue' - the character is a part of the trivia9.
       // 'break' - the character should a part of token text.
       case nil:
         break
       case UInt8(ascii: "\n"):
         if mode == .noNewlines {
           break
         }
-        hasNewline = true
+        newlinePresence = .present
         continue
       case UInt8(ascii: "\r"):
         if mode == .noNewlines {
           break
         }
-        hasNewline = true
+        newlinePresence = .present
         continue
 
       case UInt8(ascii: " "):
@@ -1129,7 +1137,8 @@ extension Lexer.Cursor {
 
         // `lexUnknown` expects that the first character has not been consumed yet.
         self = start
-        if case .trivia = self.lexUnknown() {
+        if case .trivia(let unknownError) = self.lexUnknown() {
+          error = error ?? unknownError
           continue
         } else {
           break
@@ -1139,7 +1148,7 @@ extension Lexer.Cursor {
       // `break` means the character was not a trivia. Reset the cursor and
       // return the result.
       self = start
-      return hasNewline ? .present : .absent
+      return TriviaResult(newlinePresence: newlinePresence, error: error)
     }
   }
 }
@@ -2104,7 +2113,7 @@ extension Lexer.Cursor {
 
   enum UnknownCharactersClassification {
     /// The characters consumed by `lexUnknown` should be classified as trivia
-    case trivia
+    case trivia(error: (kind: LexerError.Kind, position: Lexer.Cursor))
     /// The characters consumed by `lexUnknown` should be classified as the contents of a lexeme
     case lexemeContents(Lexer.Result)
   }
@@ -2129,24 +2138,17 @@ extension Lexer.Cursor {
 
     // This character isn't allowed in Swift source.
     guard let codepoint = tmp.advanceValidatingUTF8Character() else {
-      //      diagnose(CurPtr - 1, diag::lex_invalid_utf8)
-      //          .fixItReplaceChars(getSourceLoc(CurPtr - 1), getSourceLoc(Tmp), " ")
       self = tmp
-      return .trivia
+      return .trivia(error: (kind: .invalidUtf8, position: start))
     }
     if codepoint.value == 0xA0 {  // Non-breaking whitespace (U+00A0)
       while tmp.is(at: 0xC2) && tmp.is(offset: 1, at: 0xA0) {
         _ = tmp.advance()
         _ = tmp.advance()
       }
 
-      //      SmallString<8> Spaces
-      //      Spaces.assign((Tmp - CurPtr + 1) / 2, ' ')
-      //      diagnose(CurPtr - 1, diag::lex_nonbreaking_space)
-      //        .fixItReplaceChars(getSourceLoc(CurPtr - 1), getSourceLoc(Tmp),
-      //                           Spaces)
       self = tmp
-      return .trivia
+      return .trivia(error: (kind: .nonBreakingSpace, position: start))
     } else if codepoint.value == 0x201D {  // Closing curly quote (U+201D)
       // If this is an end curly quote, just diagnose it with a fixit hint.
       self = tmp
@@ -2166,26 +2168,9 @@ extension Lexer.Cursor {
       return .lexemeContents(Lexer.Result(.identifier, error: (.unicodeCurlyQuote, position: start)))
     }
 
-    //    diagnose(CurPtr - 1, diag::lex_invalid_character)
-    //        .fixItReplaceChars(getSourceLoc(CurPtr - 1), getSourceLoc(Tmp), " ")
-
-    //    char ExpectedCodepoint
-    //    if ((ExpectedCodepoint =
-    //             confusable::tryConvertConfusableCharacterToASCII(Codepoint))) {
-    //
-    //      llvm::SmallString<4> ConfusedChar
-    //      EncodeToUTF8(Codepoint, ConfusedChar)
-    //      llvm::SmallString<1> ExpectedChar
-    //      ExpectedChar += ExpectedCodepoint
-    //      auto charNames = confusable::getConfusableAndBaseCodepointNames(Codepoint)
-    //      diagnose(CurPtr - 1, diag::lex_confusable_character, ConfusedChar,
-    //               charNames.first, ExpectedChar, charNames.second)
-    //          .fixItReplaceChars(getSourceLoc(CurPtr - 1), getSourceLoc(Tmp),
-    //                             ExpectedChar)
-    //    }
-
+    // TODO: Try map confusables to ASCII characters
     self = tmp
-    return .trivia
+    return .trivia(error: (kind: .invalidCharacter, position: start))
   }
 
   enum ConflictMarker {
diff --git a/Sources/SwiftParserDiagnostics/LexerDiagnosticMessages.swift b/Sources/SwiftParserDiagnostics/LexerDiagnosticMessages.swift
@@ -42,6 +42,7 @@ public enum StaticLexerError: String, DiagnosticMessage {
   case expectedClosingBraceInUnicodeEscape = #"expected '}' in \u{...} escape sequence"#
   case expectedDigitInFloatLiteral = "expected a digit in floating point exponent"
   case expectedHexCodeInUnicodeEscape = #"expected hexadecimal code in \u{...} escape sequence"#
+  case invalidCharacter = "invalid character in source file"
   case invalidEscapeSequenceInStringLiteral = "invalid escape sequence in literal"
   case invalidIdentifierStartCharacter = "an identifier cannot begin with this character"
   case invalidNumberOfHexDigitsInUnicodeEscape = #"\u{...} escape sequence expects between 1 and 8 hex digits"#
@@ -126,6 +127,7 @@ public extension SwiftSyntax.LexerError {
       // here in case the error is not diagnosed.
       return InvalidIndentationInMultiLineStringLiteralError(kind: .insufficientIndentation, lines: 1)
     case .invalidBinaryDigitInIntegerLiteral: return InvalidDigitInIntegerLiteral(kind: .binary(scalarAtErrorOffset))
+    case .invalidCharacter: return StaticLexerError.invalidCharacter
     case .invalidDecimalDigitInIntegerLiteral: return InvalidDigitInIntegerLiteral(kind: .decimal(scalarAtErrorOffset))
     case .invalidEscapeSequenceInStringLiteral: return StaticLexerError.invalidEscapeSequenceInStringLiteral
     case .invalidFloatingPointExponentCharacter: return InvalidFloatingPointExponentDigit(kind: .character(scalarAtErrorOffset))
@@ -150,16 +152,29 @@ public extension SwiftSyntax.LexerError {
   func fixIts(in token: TokenSyntax) -> [FixIt] {
     switch self.kind {
     case .nonBreakingSpace:
-      return []
+      let replaceNonBreakingSpace = { (piece: TriviaPiece) -> TriviaPiece in
+        if piece == .unexpectedText("\u{a0}") {
+          return .spaces(1)
+        } else {
+          return piece
+        }
+      }
+      let fixedToken =
+        token
+        .with(\.leadingTrivia, Trivia(pieces: token.leadingTrivia.map(replaceNonBreakingSpace)))
+        .with(\.trailingTrivia, Trivia(pieces: token.trailingTrivia.map(replaceNonBreakingSpace)))
+      return [
+        FixIt(message: .replaceNonBreakingSpaceBySpace, changes: [[.replace(oldNode: Syntax(token), newNode: Syntax(fixedToken))]])
+      ]
     case .unicodeCurlyQuote:
       let (rawKind, text) = token.tokenKind.decomposeToRaw()
       guard let text = text else {
         return []
       }
       let replacedText =
         text
-        .replaceFirstOccuranceOf("“", with: #"""#)
-        .replaceLastOccuranceOf("”", with: #"""#)
+        .replacingFirstOccurance(of: "“", with: #"""#)
+        .replacingLastOccurance(of: "”", with: #"""#)
 
       let fixedToken = token.withKind(TokenKind.fromRaw(kind: rawKind, text: replacedText))
       return [
diff --git a/Sources/SwiftParserDiagnostics/ParserDiagnosticMessages.swift b/Sources/SwiftParserDiagnostics/ParserDiagnosticMessages.swift
@@ -477,6 +477,9 @@ extension FixItMessage where Self == StaticParserFixIt {
   public static var replaceCurlyQuoteByNormalQuote: Self {
     .init(#"replace curly quotes by '"'"#)
   }
+  public static var replaceNonBreakingSpaceBySpace: Self {
+    .init("replace non-breaking space by ' '")
+  }
   public static var wrapInBackticks: Self {
     .init("if this name is unavoidable, use backticks to escape it")
   }
diff --git a/Sources/SwiftParserDiagnostics/Utils.swift b/Sources/SwiftParserDiagnostics/Utils.swift
@@ -19,14 +19,14 @@ extension String {
     }
   }
 
-  func replaceFirstOccuranceOf(_ character: Character, with replacement: Character) -> String {
+  func replacingFirstOccurance(of character: Character, with replacement: Character) -> String {
     guard let match = self.firstIndex(of: character) else {
       return self
     }
     return self[startIndex..<match] + String(replacement) + self[index(after: match)...]
   }
 
-  func replaceLastOccuranceOf(_ character: Character, with replacement: Character) -> String {
+  func replacingLastOccurance(of character: Character, with replacement: Character) -> String {
     guard let match = self.lastIndex(of: character) else {
       return self
     }
diff --git a/Sources/SwiftSyntax/LexerError.swift b/Sources/SwiftSyntax/LexerError.swift
@@ -23,6 +23,7 @@ public struct LexerError: Hashable {
     case expectedHexCodeInUnicodeEscape
     case insufficientIndentationInMultilineStringLiteral
     case invalidBinaryDigitInIntegerLiteral
+    case invalidCharacter
     case invalidDecimalDigitInIntegerLiteral
     case invalidEscapeSequenceInStringLiteral
     case invalidFloatingPointExponentCharacter
diff --git a/Tests/SwiftParserTest/ExpressionTests.swift b/Tests/SwiftParserTest/ExpressionTests.swift
@@ -1153,6 +1153,16 @@ final class ExpressionTests: XCTestCase {
         """
     )
   }
+
+  func testNonBreakingSpace() {
+    AssertParse(
+      "a 1️⃣\u{a0}+ 2",
+      diagnostics: [
+        DiagnosticSpec(message: "non-breaking space (U+00A0) used instead of regular space", fixIts: ["replace non-breaking space by ' '"])
+      ],
+      fixedSource: "a  + 2"
+    )
+  }
 }
 
 final class MemberExprTests: XCTestCase {
diff --git a/Tests/SwiftParserTest/LexerTests.swift b/Tests/SwiftParserTest/LexerTests.swift
@@ -426,10 +426,10 @@ public class LexerTests: XCTestCase {
 
   func testUnexpectedLexing() {
     AssertLexemes(
-      "static func �() {}",
+      "static func 1️⃣�() {}",
       lexemes: [
         LexemeSpec(.keyword(.static), text: "static", trailing: " "),
-        LexemeSpec(.keyword(.func), text: "func", trailing: " �"),
+        LexemeSpec(.keyword(.func), text: "func", trailing: " �", error: "invalid character in source file"),
         LexemeSpec(.leftParen, text: "("),
         LexemeSpec(.rightParen, text: ")", trailing: " "),
         LexemeSpec(.leftBrace, text: "{"),
@@ -635,9 +635,9 @@ public class LexerTests: XCTestCase {
     )
 
     AssertLexemes(
-      "y\u{fffe} + z",
+      "y1️⃣\u{fffe} + z",
       lexemes: [
-        LexemeSpec(.identifier, text: "y", trailing: "\u{fffe} "),
+        LexemeSpec(.identifier, text: "y", trailing: "\u{fffe} ", error: "invalid character in source file"),
         LexemeSpec(.binaryOperator, text: "+", trailing: " "),
         LexemeSpec(.identifier, text: "z"),
       ]
@@ -861,7 +861,8 @@ public class LexerTests: XCTestCase {
         lexemes[0],
         kind: .eof,
         leadingTrivia: sourceBytes,
-        text: []
+        text: [],
+        error: LexerError(.invalidUtf8, byteOffset: 0)
       )
     }
   }
@@ -877,7 +878,8 @@ public class LexerTests: XCTestCase {
         lexemes[0],
         kind: .eof,
         leadingTrivia: sourceBytes,
-        text: []
+        text: [],
+        error: LexerError(.invalidUtf8, byteOffset: 0)
       )
     }
   }
@@ -1195,4 +1197,15 @@ public class LexerTests: XCTestCase {
       ]
     )
   }
+
+  func testNonBreakingSpace() {
+    AssertLexemes(
+      "a 1️⃣\u{a0} b",
+      lexemes: [
+        LexemeSpec(.identifier, text: "a", trailing: " \u{a0} ", error: "non-breaking space (U+00A0) used instead of regular space"),
+        LexemeSpec(.identifier, text: "b"),
+      ]
+    )
+  }
+
 }
diff --git a/Tests/SwiftParserTest/translated/IdentifiersTests.swift b/Tests/SwiftParserTest/translated/IdentifiersTests.swift
@@ -68,13 +68,13 @@ final class IdentifiersTests: XCTestCase {
   }
 
   func testIdentifiers6() {
+    // Private-use characters aren't valid in Swift source.
     AssertParse(
       """
-      // Private-use characters aren't valid in Swift source.
-      ()
+      1️⃣()
       """,
       diagnostics: [
-        // TODO: Old parser expected error on line 2: invalid character in source file, Fix-It replacements: 1 - 4 = ' '
+        DiagnosticSpec(message: "invalid character in source file")
       ]
     )
   }
diff --git a/Tests/SwiftParserTest/translated/RecoveryTests.swift b/Tests/SwiftParserTest/translated/RecoveryTests.swift
@@ -1949,13 +1949,13 @@ final class RecoveryTests: XCTestCase {
   }
 
   func testRecovery160() {
+    // <rdar://problem/21196171> compiler should recover better from "unicode Specials" characters
     AssertParse(
       #"""
-      // <rdar://problem/21196171> compiler should recover better from "unicode Specials" characters
-      let ￼tryx  = 123
+      let 1️⃣￼tryx  = 123
       """#,
       diagnostics: [
-        // TODO: Old parser expected error on line 2: invalid character in source file, Fix-It replacements: 5 - 8 = ' '
+        DiagnosticSpec(message: "invalid character in source file")
       ]
     )
   }

Original file line number	Diff line number	Diff line change
`@@ -477,6 +477,9 @@ extension FixItMessage where Self == StaticParserFixIt {`
`477`	`477`	`public static var replaceCurlyQuoteByNormalQuote: Self {`
`478`	`478`	`.init(#"replace curly quotes by '"'"#)`
`479`	`479`	`}`
	`480`	`+ public static var replaceNonBreakingSpaceBySpace: Self {`
	`481`	`+ .init("replace non-breaking space by ' '")`
	`482`	`+ }`
`480`	`483`	`public static var wrapInBackticks: Self {`
`481`	`484`	`.init("if this name is unavoidable, use backticks to escape it")`
`482`	`485`	`}`
Original file line number	Diff line number	Diff line change
`@@ -19,14 +19,14 @@ extension String {`
`19`	`19`	`}`
`20`	`20`	`}`
`21`	`21`
`22`		`- func replaceFirstOccuranceOf(_ character: Character, with replacement: Character) -> String {`
	`22`	`+ func replacingFirstOccurance(of character: Character, with replacement: Character) -> String {`
`23`	`23`	`guard let match = self.firstIndex(of: character) else {`
`24`	`24`	`return self`
`25`	`25`	`}`
`26`	`26`	`return self[startIndex..<match] + String(replacement) + self[index(after: match)...]`
`27`	`27`	`}`
`28`	`28`
`29`		`- func replaceLastOccuranceOf(_ character: Character, with replacement: Character) -> String {`
	`29`	`+ func replacingLastOccurance(of character: Character, with replacement: Character) -> String {`
`30`	`30`	`guard let match = self.lastIndex(of: character) else {`
`31`	`31`	`return self`
`32`	`32`	`}`
Original file line number	Diff line number	Diff line change
`@@ -1153,6 +1153,16 @@ final class ExpressionTests: XCTestCase {`
`1153`	`1153`	`"""`
`1154`	`1154`	`)`
`1155`	`1155`	`}`
	`1156`	`+`
	`1157`	`+ func testNonBreakingSpace() {`
	`1158`	`+ AssertParse(`
	`1159`	`+ "a 1️⃣\u{a0}+ 2",`
	`1160`	`+ diagnostics: [`
	`1161`	`+ DiagnosticSpec(message: "non-breaking space (U+00A0) used instead of regular space", fixIts: ["replace non-breaking space by ' '"])`
	`1162`	`+ ],`
	`1163`	`+ fixedSource: "a + 2"`
	`1164`	`+ )`
	`1165`	`+ }`
`1156`	`1166`	`}`
`1157`	`1167`
`1158`	`1168`	`final class MemberExprTests: XCTestCase {`
Original file line number	Diff line number	Diff line change
`@@ -426,10 +426,10 @@ public class LexerTests: XCTestCase {`
`426`	`426`
`427`	`427`	`func testUnexpectedLexing() {`
`428`	`428`	`AssertLexemes(`
`429`		`- "static func �() {}",`
	`429`	`+ "static func 1️⃣�() {}",`
`430`	`430`	`lexemes: [`
`431`	`431`	`LexemeSpec(.keyword(.static), text: "static", trailing: " "),`
`432`		`- LexemeSpec(.keyword(.func), text: "func", trailing: " �"),`
	`432`	`+ LexemeSpec(.keyword(.func), text: "func", trailing: " �", error: "invalid character in source file"),`
`433`	`433`	`LexemeSpec(.leftParen, text: "("),`
`434`	`434`	`LexemeSpec(.rightParen, text: ")", trailing: " "),`
`435`	`435`	`LexemeSpec(.leftBrace, text: "{"),`
`@@ -635,9 +635,9 @@ public class LexerTests: XCTestCase {`
`635`	`635`	`)`
`636`	`636`
`637`	`637`	`AssertLexemes(`
`638`		`- "y\u{fffe} + z",`
	`638`	`+ "y1️⃣\u{fffe} + z",`
`639`	`639`	`lexemes: [`
`640`		`- LexemeSpec(.identifier, text: "y", trailing: "\u{fffe} "),`
	`640`	`+ LexemeSpec(.identifier, text: "y", trailing: "\u{fffe} ", error: "invalid character in source file"),`
`641`	`641`	`LexemeSpec(.binaryOperator, text: "+", trailing: " "),`
`642`	`642`	`LexemeSpec(.identifier, text: "z"),`
`643`	`643`	`]`
`@@ -861,7 +861,8 @@ public class LexerTests: XCTestCase {`
`861`	`861`	`lexemes[0],`
`862`	`862`	`kind: .eof,`
`863`	`863`	`leadingTrivia: sourceBytes,`
`864`		`- text: []`
	`864`	`+ text: [],`
	`865`	`+ error: LexerError(.invalidUtf8, byteOffset: 0)`
`865`	`866`	`)`
`866`	`867`	`}`
`867`	`868`	`}`
`@@ -877,7 +878,8 @@ public class LexerTests: XCTestCase {`
`877`	`878`	`lexemes[0],`
`878`	`879`	`kind: .eof,`
`879`	`880`	`leadingTrivia: sourceBytes,`
`880`		`- text: []`
	`881`	`+ text: [],`
	`882`	`+ error: LexerError(.invalidUtf8, byteOffset: 0)`
`881`	`883`	`)`
`882`	`884`	`}`
`883`	`885`	`}`
`@@ -1195,4 +1197,15 @@ public class LexerTests: XCTestCase {`
`1195`	`1197`	`]`
`1196`	`1198`	`)`
`1197`	`1199`	`}`
	`1200`	`+`
	`1201`	`+ func testNonBreakingSpace() {`
	`1202`	`+ AssertLexemes(`
	`1203`	`+ "a 1️⃣\u{a0} b",`
	`1204`	`+ lexemes: [`
	`1205`	`+ LexemeSpec(.identifier, text: "a", trailing: " \u{a0} ", error: "non-breaking space (U+00A0) used instead of regular space"),`
	`1206`	`+ LexemeSpec(.identifier, text: "b"),`
	`1207`	`+ ]`
	`1208`	`+ )`
	`1209`	`+ }`
	`1210`	`+`
`1198`	`1211`	`}`
Original file line number	Diff line number	Diff line change
`@@ -68,13 +68,13 @@ final class IdentifiersTests: XCTestCase {`
`68`	`68`	`}`
`69`	`69`
`70`	`70`	`func testIdentifiers6() {`
	`71`	`+ // Private-use characters aren't valid in Swift source.`
`71`	`72`	`AssertParse(`
`72`	`73`	`"""`
`73`		`- // Private-use characters aren't valid in Swift source.`
`74`		`- ()`
	`74`	`+ 1️⃣()`
`75`	`75`	`""",`
`76`	`76`	`diagnostics: [`
`77`		`- // TODO: Old parser expected error on line 2: invalid character in source file, Fix-It replacements: 1 - 4 = ' '`
	`77`	`+ DiagnosticSpec(message: "invalid character in source file")`
`78`	`78`	`]`
`79`	`79`	`)`
`80`	`80`	`}`
Original file line number	Diff line number	Diff line change
`@@ -1949,13 +1949,13 @@ final class RecoveryTests: XCTestCase {`
`1949`	`1949`	`}`
`1950`	`1950`
`1951`	`1951`	`func testRecovery160() {`
	`1952`	`+ // <rdar://problem/21196171> compiler should recover better from "unicode Specials" characters`
`1952`	`1953`	`AssertParse(`
`1953`	`1954`	`#"""`
`1954`		`- // <rdar://problem/21196171> compiler should recover better from "unicode Specials" characters`
`1955`		`- let tryx = 123`
	`1955`	`+ let 1️⃣tryx = 123`
`1956`	`1956`	`"""#,`
`1957`	`1957`	`diagnostics: [`
`1958`		`- // TODO: Old parser expected error on line 2: invalid character in source file, Fix-It replacements: 5 - 8 = ' '`
	`1958`	`+ DiagnosticSpec(message: "invalid character in source file")`
`1959`	`1959`	`]`
`1960`	`1960`	`)`
`1961`	`1961`	`}`