swiftlang
diff --git a/‎Sources/SwiftParser/Lexer/Cursor.swift
Lines changed: 161 additions & 195 deletions b/‎Sources/SwiftParser/Lexer/Cursor.swift
Lines changed: 161 additions & 195 deletions
diff --git a/‎Sources/SwiftParserDiagnostics/LexerDiagnosticMessages.swift
Lines changed: 88 additions & 25 deletions b/‎Sources/SwiftParserDiagnostics/LexerDiagnosticMessages.swift
Lines changed: 88 additions & 25 deletions
diff --git a/‎Sources/SwiftParserDiagnostics/ParseDiagnosticsGenerator.swift
Lines changed: 22 additions & 2 deletions b/‎Sources/SwiftParserDiagnostics/ParseDiagnosticsGenerator.swift
Lines changed: 22 additions & 2 deletions
diff --git a/‎Sources/SwiftParserDiagnostics/ParserDiagnosticMessages.swift
Lines changed: 6 additions & 0 deletions b/‎Sources/SwiftParserDiagnostics/ParserDiagnosticMessages.swift
Lines changed: 6 additions & 0 deletions
diff --git a/‎Sources/SwiftParserDiagnostics/Utils.swift
Lines changed: 14 additions & 0 deletions b/‎Sources/SwiftParserDiagnostics/Utils.swift
Lines changed: 14 additions & 0 deletions
diff --git a/‎Sources/SwiftSyntax/LexerError.swift
Lines changed: 48 additions & 2 deletions b/‎Sources/SwiftSyntax/LexerError.swift
Lines changed: 48 additions & 2 deletions
diff --git a/‎Sources/SwiftSyntax/Raw/RawSyntax.swift
Lines changed: 18 additions & 4 deletions b/‎Sources/SwiftSyntax/Raw/RawSyntax.swift
Lines changed: 18 additions & 4 deletions
diff --git a/‎Sources/SwiftSyntax/Syntax.swift
Lines changed: 10 additions & 1 deletion b/‎Sources/SwiftSyntax/Syntax.swift
Lines changed: 10 additions & 1 deletion
@@ -39,8 +39,20 @@ public extension LexerError {
 /// Please order the cases in this enum alphabetically by case name.
 public enum StaticLexerError: String, DiagnosticMessage {
   case expectedBinaryExponentInHexFloatLiteral = "hexadecimal floating point literal must end with an exponent"
+  case expectedClosingBraceInUnicodeEscape = #"expected '}' in \u{...} escape sequence"#
   case expectedDigitInFloatLiteral = "expected a digit in floating point exponent"
+  case expectedHexCodeInUnicodeEscape = #"expected hexadecimal code in \u{...} escape sequence"#
+  case expectedHexDigitInHexLiteral = "expected hexadecimal digit (0-9, A-F) in integer literal"
+  case invalidCharacter = "invalid character in source file"
+  case invalidEscapeSequenceInStringLiteral = "invalid escape sequence in literal"
+  case invalidIdentifierStartCharacter = "an identifier cannot begin with this character"
+  case invalidNumberOfHexDigitsInUnicodeEscape = #"\u{...} escape sequence expects between 1 and 8 hex digits"#
+  case invalidUtf8 = "invalid UTF-8 found in source file"
   case lexerErrorOffsetOverflow = "the lexer dicovered an error in this token but was not able to represent its offset due to overflow; please split the token"
+  case sourceConflictMarker = "source control conflict marker in source file"
+  case unexpectedBlockCommentEnd = "unexpected end of block comment"
+  case unicodeCurlyQuote = #"unicode curly quote found; use '"' instead"#
+  case unprintableAsciiCharacter = "unprintable ASCII character found in source file"
 
   public var message: String { self.rawValue }
 
@@ -51,6 +63,20 @@ public enum StaticLexerError: String, DiagnosticMessage {
   public var severity: DiagnosticSeverity { .error }
 }
 
+/// Please order the cases in this enum alphabetically by case name.
+public enum StaticLexerWarning: String, DiagnosticMessage {
+  case nonBreakingSpace = "non-breaking space (U+00A0) used instead of regular space"
+  case nulCharacter = "nul character embedded in middle of file"
+
+  public var message: String { self.rawValue }
+
+  public var diagnosticID: MessageID {
+    MessageID(domain: diagnosticDomain, id: "\(type(of: self)).\(self)")
+  }
+
+  public var severity: DiagnosticSeverity { .warning }
+}
+
 public struct InvalidFloatingPointExponentDigit: LexerError {
   public enum Kind {
     case digit(Unicode.Scalar)
@@ -98,45 +124,82 @@ public extension SwiftSyntax.LexerError {
   /// `tokenText` is the entire text of the token in which the `LexerError`
   /// occurred, including trivia.
   @_spi(RawSyntax)
-  func diagnostic(wholeTextBytes: [UInt8]) -> DiagnosticMessage {
+  func diagnosticMessage(wholeTextBytes: [UInt8]) -> DiagnosticMessage {
     var scalarAtErrorOffset: UnicodeScalar {
       // Fall back to the Unicode replacement character U+FFFD in case we can't
       // lex the unicode character at `byteOffset`. It's the best we can do
       Unicode.Scalar.lexing(from: wholeTextBytes[Int(self.byteOffset)...]) ?? UnicodeScalar("�")
     }
 
     switch self.kind {
-    case .expectedBinaryExponentInHexFloatLiteral:
-      return StaticLexerError.expectedBinaryExponentInHexFloatLiteral
-    case .expectedDigitInFloatLiteral:
-      return StaticLexerError.expectedDigitInFloatLiteral
+    case .expectedBinaryExponentInHexFloatLiteral: return StaticLexerError.expectedBinaryExponentInHexFloatLiteral
+    case .expectedClosingBraceInUnicodeEscape: return StaticLexerError.expectedClosingBraceInUnicodeEscape
+    case .expectedDigitInFloatLiteral: return StaticLexerError.expectedDigitInFloatLiteral
+    case .expectedHexCodeInUnicodeEscape: return StaticLexerError.expectedHexCodeInUnicodeEscape
+    case .expectedHexDigitInHexLiteral: return StaticLexerError.expectedHexDigitInHexLiteral
     case .insufficientIndentationInMultilineStringLiteral:
       // This should be diagnosed when visiting the `StringLiteralExprSyntax`
       // inside `ParseDiagnosticsGenerator` but fall back to an error message
       // here in case the error is not diagnosed.
       return InvalidIndentationInMultiLineStringLiteralError(kind: .insufficientIndentation, lines: 1)
-    case .invalidBinaryDigitInIntegerLiteral:
-      return InvalidDigitInIntegerLiteral(kind: .binary(scalarAtErrorOffset))
-    case .invalidDecimalDigitInIntegerLiteral:
-      return InvalidDigitInIntegerLiteral(kind: .decimal(scalarAtErrorOffset))
-    case .invalidFloatingPointCharacter:
-      fatalError()
-    case .invalidFloatingPointDigit:
-      fatalError()
-    case .invalidFloatingPointExponentCharacter:
-      return InvalidFloatingPointExponentDigit(kind: .character(scalarAtErrorOffset))
-    case .invalidFloatingPointExponentDigit:
-      return InvalidFloatingPointExponentDigit(kind: .digit(scalarAtErrorOffset))
-    case .invalidHexDigitInIntegerLiteral:
-      return InvalidDigitInIntegerLiteral(kind: .hex(scalarAtErrorOffset))
-    case .invalidOctalDigitInIntegerLiteral:
-      return InvalidDigitInIntegerLiteral(kind: .octal(scalarAtErrorOffset))
-    case .lexerErrorOffsetOverflow:
-      return StaticLexerError.lexerErrorOffsetOverflow
+    case .invalidBinaryDigitInIntegerLiteral: return InvalidDigitInIntegerLiteral(kind: .binary(scalarAtErrorOffset))
+    case .invalidCharacter: return StaticLexerError.invalidCharacter
+    case .invalidDecimalDigitInIntegerLiteral: return InvalidDigitInIntegerLiteral(kind: .decimal(scalarAtErrorOffset))
+    case .invalidEscapeSequenceInStringLiteral: return StaticLexerError.invalidEscapeSequenceInStringLiteral
+    case .invalidFloatingPointExponentCharacter: return InvalidFloatingPointExponentDigit(kind: .character(scalarAtErrorOffset))
+    case .invalidFloatingPointExponentDigit: return InvalidFloatingPointExponentDigit(kind: .digit(scalarAtErrorOffset))
+    case .invalidHexDigitInIntegerLiteral: return InvalidDigitInIntegerLiteral(kind: .hex(scalarAtErrorOffset))
+    case .invalidIdentifierStartCharacter: return StaticLexerError.invalidIdentifierStartCharacter
+    case .invalidNumberOfHexDigitsInUnicodeEscape: return StaticLexerError.invalidNumberOfHexDigitsInUnicodeEscape
+    case .invalidOctalDigitInIntegerLiteral: return InvalidDigitInIntegerLiteral(kind: .octal(scalarAtErrorOffset))
+    case .invalidUtf8: return StaticLexerError.invalidUtf8
+    case .lexerErrorOffsetOverflow: return StaticLexerError.lexerErrorOffsetOverflow
+    case .nonBreakingSpace: return StaticLexerWarning.nonBreakingSpace
+    case .nulCharacter: return StaticLexerWarning.nulCharacter
+    case .sourceConflictMarker: return StaticLexerError.sourceConflictMarker
+    case .unexpectedBlockCommentEnd: return StaticLexerError.unexpectedBlockCommentEnd
+    case .unicodeCurlyQuote: return StaticLexerError.unicodeCurlyQuote
+    case .unprintableAsciiCharacter: return StaticLexerError.unprintableAsciiCharacter
     }
   }
 
-  func diagnostic(in token: TokenSyntax) -> DiagnosticMessage {
-    return self.diagnostic(wholeTextBytes: token.syntaxTextBytes)
+  func diagnosticMessage(in token: TokenSyntax) -> DiagnosticMessage {
+    return self.diagnosticMessage(wholeTextBytes: token.syntaxTextBytes)
+  }
+
+  func fixIts(in token: TokenSyntax) -> [FixIt] {
+    switch self.kind {
+    case .nonBreakingSpace:
+      let replaceNonBreakingSpace = { (piece: TriviaPiece) -> TriviaPiece in
+        if piece == .unexpectedText("\u{a0}") {
+          return .spaces(1)
+        } else {
+          return piece
+        }
+      }
+      let fixedToken =
+        token
+        .with(\.leadingTrivia, Trivia(pieces: token.leadingTrivia.map(replaceNonBreakingSpace)))
+        .with(\.trailingTrivia, Trivia(pieces: token.trailingTrivia.map(replaceNonBreakingSpace)))
+      return [
+        FixIt(message: .replaceNonBreakingSpaceBySpace, changes: [[.replace(oldNode: Syntax(token), newNode: Syntax(fixedToken))]])
+      ]
+    case .unicodeCurlyQuote:
+      let (rawKind, text) = token.tokenKind.decomposeToRaw()
+      guard let text = text else {
+        return []
+      }
+      let replacedText =
+        text
+        .replacingFirstOccurence(of: "“", with: #"""#)
+        .replacingLastOccurence(of: "”", with: #"""#)
+
+      let fixedToken = token.withKind(TokenKind.fromRaw(kind: rawKind, text: replacedText))
+      return [
+        FixIt(message: .replaceCurlyQuoteByNormalQuote, changes: [[.replace(oldNode: Syntax(token), newNode: Syntax(fixedToken))]])
+      ]
+    default:
+      return []
+    }
   }
 }
@@ -30,6 +30,19 @@ fileprivate extension TokenSyntax {
   }
 }
 
+fileprivate extension DiagnosticSeverity {
+  func matches(_ lexerErorSeverity: SwiftSyntax.LexerError.Severity) -> Bool {
+    switch (self, lexerErorSeverity) {
+    case (.error, .error):
+      return true
+    case (.warning, .warning):
+      return true
+    default:
+      return false
+    }
+  }
+}
+
 public class ParseDiagnosticsGenerator: SyntaxAnyVisitor {
   private var diagnostics: [Diagnostic] = []
 
@@ -101,7 +114,7 @@ public class ParseDiagnosticsGenerator: SyntaxAnyVisitor {
   /// Whether the node should be skipped for diagnostic emission.
   /// Every visit method must check this at the beginning.
   func shouldSkip<T: SyntaxProtocol>(_ node: T) -> Bool {
-    if !node.hasError {
+    if !node.hasError && !node.hasWarning {
       return true
     }
     return handledNodes.contains(node.id)
@@ -347,7 +360,14 @@ public class ParseDiagnosticsGenerator: SyntaxAnyVisitor {
       handleMissingToken(token)
     } else {
       if let lexerError = token.lexerError {
-        self.addDiagnostic(token, position: token.position.advanced(by: Int(lexerError.byteOffset)), lexerError.diagnostic(in: token))
+        let message = lexerError.diagnosticMessage(in: token)
+        assert(message.severity.matches(lexerError.severity))
+        self.addDiagnostic(
+          token,
+          position: token.position.advanced(by: Int(lexerError.byteOffset)),
+          message,
+          fixIts: lexerError.fixIts(in: token)
+        )
       }
     }
 
 
@@ -474,6 +474,12 @@ extension FixItMessage where Self == StaticParserFixIt {
   public static var removeOperatorBody: Self {
     .init("remove operator body")
   }
+  public static var replaceCurlyQuoteByNormalQuote: Self {
+    .init(#"replace curly quotes by '"'"#)
+  }
+  public static var replaceNonBreakingSpaceBySpace: Self {
+    .init("replace non-breaking space by ' '")
+  }
   public static var wrapInBackticks: Self {
     .init("if this name is unavoidable, use backticks to escape it")
   }
 
@@ -18,6 +18,20 @@ extension String {
       return self
     }
   }
+
+  func replacingFirstOccurence(of character: Character, with replacement: Character) -> String {
+    guard let match = self.firstIndex(of: character) else {
+      return self
+    }
+    return self[startIndex..<match] + String(replacement) + self[index(after: match)...]
+  }
+
+  func replacingLastOccurence(of character: Character, with replacement: Character) -> String {
+    guard let match = self.lastIndex(of: character) else {
+      return self
+    }
+    return self[startIndex..<match] + String(replacement) + self[index(after: match)...]
+  }
 }
 
 extension Collection {
 
@@ -14,22 +14,39 @@
 /// `lexerErrorOffset` in the token will specify at which offset the error
 /// occurred.
 public struct LexerError: Hashable {
+  public enum Severity {
+    case error
+    case warning
+  }
+
   public enum Kind {
     // Please order these alphabetically
 
     case expectedBinaryExponentInHexFloatLiteral
+    case expectedClosingBraceInUnicodeEscape
     case expectedDigitInFloatLiteral
+    case expectedHexCodeInUnicodeEscape
+    case expectedHexDigitInHexLiteral
     case insufficientIndentationInMultilineStringLiteral
     case invalidBinaryDigitInIntegerLiteral
+    case invalidCharacter
     case invalidDecimalDigitInIntegerLiteral
-    case invalidFloatingPointCharacter
-    case invalidFloatingPointDigit
+    case invalidEscapeSequenceInStringLiteral
     case invalidFloatingPointExponentCharacter
     case invalidFloatingPointExponentDigit
     case invalidHexDigitInIntegerLiteral
+    case invalidIdentifierStartCharacter
+    case invalidNumberOfHexDigitsInUnicodeEscape
     case invalidOctalDigitInIntegerLiteral
+    case invalidUtf8
     /// The lexer dicovered an error but was not able to represent the offset of the error because it would overflow `LexerErrorOffset`.
     case lexerErrorOffsetOverflow
+    case nonBreakingSpace
+    case nulCharacter
+    case sourceConflictMarker
+    case unexpectedBlockCommentEnd
+    case unicodeCurlyQuote
+    case unprintableAsciiCharacter
   }
 
   public let kind: Kind
@@ -54,4 +71,33 @@ public struct LexerError: Hashable {
       self.byteOffset = UInt16(byteOffset)
     }
   }
+
+  public var severity: Severity {
+    switch kind {
+    case .expectedBinaryExponentInHexFloatLiteral: return .error
+    case .expectedClosingBraceInUnicodeEscape: return .error
+    case .expectedDigitInFloatLiteral: return .error
+    case .expectedHexCodeInUnicodeEscape: return .error
+    case .expectedHexDigitInHexLiteral: return .error
+    case .insufficientIndentationInMultilineStringLiteral: return .error
+    case .invalidBinaryDigitInIntegerLiteral: return .error
+    case .invalidCharacter: return .error
+    case .invalidDecimalDigitInIntegerLiteral: return .error
+    case .invalidEscapeSequenceInStringLiteral: return .error
+    case .invalidFloatingPointExponentCharacter: return .error
+    case .invalidFloatingPointExponentDigit: return .error
+    case .invalidHexDigitInIntegerLiteral: return .error
+    case .invalidIdentifierStartCharacter: return .error
+    case .invalidNumberOfHexDigitsInUnicodeEscape: return .error
+    case .invalidOctalDigitInIntegerLiteral: return .error
+    case .invalidUtf8: return .error
+    case .lexerErrorOffsetOverflow: return .error
+    case .nonBreakingSpace: return .warning
+    case .nulCharacter: return .warning
+    case .sourceConflictMarker: return .error
+    case .unexpectedBlockCommentEnd: return .error
+    case .unicodeCurlyQuote: return .error
+    case .unprintableAsciiCharacter: return .error
+    }
+  }
 }
@@ -23,10 +23,16 @@ fileprivate extension SyntaxKind {
 struct RecursiveRawSyntaxFlags: OptionSet {
   let rawValue: UInt8
 
-  /// Whether the tree contained by this layout has any missing or unexpected nodes.
+  /// Whether the tree contained by this layout has any
+  ///  - missing nodes or
+  ///  - unexpected nodes or
+  ///  - tokens with a `LexerError` of severity `error`
   static let hasError = RecursiveRawSyntaxFlags(rawValue: 1 << 0)
-  static let hasSequenceExpr = RecursiveRawSyntaxFlags(rawValue: 1 << 1)
-  static let hasMaximumNestingLevelOverflow = RecursiveRawSyntaxFlags(rawValue: 1 << 2)
+  /// Whether the tree contained by this layout has any tokens with a `LexerError`
+  /// of severity `warning`.
+  static let hasWarning = RecursiveRawSyntaxFlags(rawValue: 1 << 1)
+  static let hasSequenceExpr = RecursiveRawSyntaxFlags(rawValue: 1 << 2)
+  static let hasMaximumNestingLevelOverflow = RecursiveRawSyntaxFlags(rawValue: 1 << 3)
 }
 
 /// Node data for RawSyntax tree. Tagged union plus common data.
@@ -227,9 +233,17 @@ extension RawSyntax {
     switch view {
     case .token(let tokenView):
       var recursiveFlags: RecursiveRawSyntaxFlags = []
-      if tokenView.lexerError != nil || tokenView.presence == .missing {
+      if tokenView.presence == .missing {
         recursiveFlags.insert(.hasError)
       }
+      switch tokenView.lexerError?.severity {
+      case .error:
+        recursiveFlags.insert(.hasError)
+      case .warning:
+        recursiveFlags.insert(.hasWarning)
+      case nil:
+        break
+      }
       return recursiveFlags
     case .layout(let layoutView):
       return layoutView.recursiveFlags
 
@@ -283,11 +283,20 @@ public extension SyntaxProtocol {
     return raw.kind.isSyntaxCollection
   }
 
-  /// Whether this tree contains a missing token or unexpected node.
+  /// Whether the tree contained by this layout has any
+  ///  - missing nodes or
+  ///  - unexpected nodes or
+  ///  - tokens with a `LexerError` of severity `error`
   var hasError: Bool {
     return raw.recursiveFlags.contains(.hasError)
   }
 
+  /// Whether the tree contained by this layout has any tokens with a `LexerError`
+  /// of severity `warning`.
+  var hasWarning: Bool {
+    return raw.recursiveFlags.contains(.hasWarning)
+  }
+
   /// Whether this tree contains a missing token or unexpected node.
   var hasSequenceExpr: Bool {
     return raw.recursiveFlags.contains(.hasSequenceExpr)
Original file line number	Diff line number	Diff line change
`@@ -474,6 +474,12 @@ extension FixItMessage where Self == StaticParserFixIt {`
`474`	`474`	`public static var removeOperatorBody: Self {`
`475`	`475`	`.init("remove operator body")`
`476`	`476`	`}`
	`477`	`+ public static var replaceCurlyQuoteByNormalQuote: Self {`
	`478`	`+ .init(#"replace curly quotes by '"'"#)`
	`479`	`+ }`
	`480`	`+ public static var replaceNonBreakingSpaceBySpace: Self {`
	`481`	`+ .init("replace non-breaking space by ' '")`
	`482`	`+ }`
`477`	`483`	`public static var wrapInBackticks: Self {`
`478`	`484`	`.init("if this name is unavoidable, use backticks to escape it")`
`479`	`485`	`}`