Skip to content

Commit ed6d84d

Browse files
committed
Only emit warnings for nul characters and non-breaking space in the source file
1 parent 3e5c186 commit ed6d84d

File tree

8 files changed

+113
-12
lines changed

8 files changed

+113
-12
lines changed

Sources/SwiftParser/Lexer/Cursor.swift

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2161,6 +2161,9 @@ extension Lexer.Cursor {
21612161
// we have in the parser. Classify the entire curly string as an identifier
21622162
// for best recovery.
21632163
return .lexemeContents(Lexer.Result(.identifier, error: (.unicodeCurlyQuote, position: start)))
2164+
} else if codepoint.value == 0 { // Nul character
2165+
self = tmp
2166+
return .trivia(error: (kind: .nulCharacter, position: start))
21642167
}
21652168

21662169
// TODO: Try map confusables to ASCII characters

Sources/SwiftParserDiagnostics/LexerDiagnosticMessages.swift

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,6 @@ public enum StaticLexerError: String, DiagnosticMessage {
4949
case invalidNumberOfHexDigitsInUnicodeEscape = #"\u{...} escape sequence expects between 1 and 8 hex digits"#
5050
case invalidUtf8 = "invalid UTF-8 found in source file"
5151
case lexerErrorOffsetOverflow = "the lexer dicovered an error in this token but was not able to represent its offset due to overflow; please split the token"
52-
case nonBreakingSpace = "non-breaking space (U+00A0) used instead of regular space"
53-
case nulCharacter = "nul character embedded in middle of file"
5452
case sourceConflictMarker = "source control conflict marker in source file"
5553
case unexpectedBlockCommentEnd = "unexpected end of block comment"
5654
case unicodeCurlyQuote = #"unicode curly quote found; use '"' instead"#
@@ -65,6 +63,20 @@ public enum StaticLexerError: String, DiagnosticMessage {
6563
public var severity: DiagnosticSeverity { .error }
6664
}
6765

66+
/// Please order the cases in this enum alphabetically by case name.
67+
public enum StaticLexerWarning: String, DiagnosticMessage {
68+
case nonBreakingSpace = "non-breaking space (U+00A0) used instead of regular space"
69+
case nulCharacter = "nul character embedded in middle of file"
70+
71+
public var message: String { self.rawValue }
72+
73+
public var diagnosticID: MessageID {
74+
MessageID(domain: diagnosticDomain, id: "\(type(of: self)).\(self)")
75+
}
76+
77+
public var severity: DiagnosticSeverity { .warning }
78+
}
79+
6880
public struct InvalidFloatingPointExponentDigit: LexerError {
6981
public enum Kind {
7082
case digit(Unicode.Scalar)
@@ -142,8 +154,8 @@ public extension SwiftSyntax.LexerError {
142154
case .invalidOctalDigitInIntegerLiteral: return InvalidDigitInIntegerLiteral(kind: .octal(scalarAtErrorOffset))
143155
case .invalidUtf8: return StaticLexerError.invalidUtf8
144156
case .lexerErrorOffsetOverflow: return StaticLexerError.lexerErrorOffsetOverflow
145-
case .nonBreakingSpace: return StaticLexerError.nonBreakingSpace
146-
case .nulCharacter: return StaticLexerError.nulCharacter
157+
case .nonBreakingSpace: return StaticLexerWarning.nonBreakingSpace
158+
case .nulCharacter: return StaticLexerWarning.nulCharacter
147159
case .sourceConflictMarker: return StaticLexerError.sourceConflictMarker
148160
case .unexpectedBlockCommentEnd: return StaticLexerError.unexpectedBlockCommentEnd
149161
case .unicodeCurlyQuote: return StaticLexerError.unicodeCurlyQuote

Sources/SwiftParserDiagnostics/ParseDiagnosticsGenerator.swift

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,19 @@ fileprivate extension TokenSyntax {
3030
}
3131
}
3232

33+
fileprivate extension DiagnosticSeverity {
34+
func matches(_ lexerErorSeverity: SwiftSyntax.LexerError.Severity) -> Bool {
35+
switch (self, lexerErorSeverity) {
36+
case (.error, .error):
37+
return true
38+
case (.warning, .warning):
39+
return true
40+
default:
41+
return false
42+
}
43+
}
44+
}
45+
3346
public class ParseDiagnosticsGenerator: SyntaxAnyVisitor {
3447
private var diagnostics: [Diagnostic] = []
3548

@@ -101,7 +114,7 @@ public class ParseDiagnosticsGenerator: SyntaxAnyVisitor {
101114
/// Whether the node should be skipped for diagnostic emission.
102115
/// Every visit method must check this at the beginning.
103116
func shouldSkip<T: SyntaxProtocol>(_ node: T) -> Bool {
104-
if !node.hasError {
117+
if !node.hasError && !node.hasWarning {
105118
return true
106119
}
107120
return handledNodes.contains(node.id)
@@ -347,10 +360,12 @@ public class ParseDiagnosticsGenerator: SyntaxAnyVisitor {
347360
handleMissingToken(token)
348361
} else {
349362
if let lexerError = token.lexerError {
363+
let message = lexerError.diagnosticMessage(in: token)
364+
assert(message.severity.matches(lexerError.severity))
350365
self.addDiagnostic(
351366
token,
352367
position: token.position.advanced(by: Int(lexerError.byteOffset)),
353-
lexerError.diagnosticMessage(in: token),
368+
message,
354369
fixIts: lexerError.fixIts(in: token)
355370
)
356371
}

Sources/SwiftSyntax/LexerError.swift

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,11 @@
1414
/// `lexerErrorOffset` in the token will specify at which offset the error
1515
/// occurred.
1616
public struct LexerError: Hashable {
17+
public enum Severity {
18+
case error
19+
case warning
20+
}
21+
1722
public enum Kind {
1823
// Please order these alphabetically
1924

@@ -66,4 +71,33 @@ public struct LexerError: Hashable {
6671
self.byteOffset = UInt16(byteOffset)
6772
}
6873
}
74+
75+
public var severity: Severity {
76+
switch kind {
77+
case .expectedBinaryExponentInHexFloatLiteral: return .error
78+
case .expectedClosingBraceInUnicodeEscape: return .error
79+
case .expectedDigitInFloatLiteral: return .error
80+
case .expectedHexCodeInUnicodeEscape: return .error
81+
case .expectedHexDigitInHexLiteral: return .error
82+
case .insufficientIndentationInMultilineStringLiteral: return .error
83+
case .invalidBinaryDigitInIntegerLiteral: return .error
84+
case .invalidCharacter: return .error
85+
case .invalidDecimalDigitInIntegerLiteral: return .error
86+
case .invalidEscapeSequenceInStringLiteral: return .error
87+
case .invalidFloatingPointExponentCharacter: return .error
88+
case .invalidFloatingPointExponentDigit: return .error
89+
case .invalidHexDigitInIntegerLiteral: return .error
90+
case .invalidIdentifierStartCharacter: return .error
91+
case .invalidNumberOfHexDigitsInUnicodeEscape: return .error
92+
case .invalidOctalDigitInIntegerLiteral: return .error
93+
case .invalidUtf8: return .error
94+
case .lexerErrorOffsetOverflow: return .error
95+
case .nonBreakingSpace: return .warning
96+
case .nulCharacter: return .warning
97+
case .sourceConflictMarker: return .error
98+
case .unexpectedBlockCommentEnd: return .error
99+
case .unicodeCurlyQuote: return .error
100+
case .unprintableAsciiCharacter: return .error
101+
}
102+
}
69103
}

Sources/SwiftSyntax/Raw/RawSyntax.swift

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,16 @@ fileprivate extension SyntaxKind {
2323
struct RecursiveRawSyntaxFlags: OptionSet {
2424
let rawValue: UInt8
2525

26-
/// Whether the tree contained by this layout has any missing or unexpected nodes.
26+
/// Whether the tree contained by this layout has any
27+
/// - missing nodes or
28+
/// - unexpected nodes or
29+
/// - tokens with a `LexerError` of severity `error`
2730
static let hasError = RecursiveRawSyntaxFlags(rawValue: 1 << 0)
28-
static let hasSequenceExpr = RecursiveRawSyntaxFlags(rawValue: 1 << 1)
29-
static let hasMaximumNestingLevelOverflow = RecursiveRawSyntaxFlags(rawValue: 1 << 2)
31+
/// Whether the tree contained by this layout has any tokens with a `LexerError`
32+
/// of severity `warning`.
33+
static let hasWarning = RecursiveRawSyntaxFlags(rawValue: 1 << 1)
34+
static let hasSequenceExpr = RecursiveRawSyntaxFlags(rawValue: 1 << 2)
35+
static let hasMaximumNestingLevelOverflow = RecursiveRawSyntaxFlags(rawValue: 1 << 3)
3036
}
3137

3238
/// Node data for RawSyntax tree. Tagged union plus common data.
@@ -227,9 +233,17 @@ extension RawSyntax {
227233
switch view {
228234
case .token(let tokenView):
229235
var recursiveFlags: RecursiveRawSyntaxFlags = []
230-
if tokenView.lexerError != nil || tokenView.presence == .missing {
236+
if tokenView.presence == .missing {
231237
recursiveFlags.insert(.hasError)
232238
}
239+
switch tokenView.lexerError?.severity {
240+
case .error:
241+
recursiveFlags.insert(.hasError)
242+
case .warning:
243+
recursiveFlags.insert(.hasWarning)
244+
case nil:
245+
break
246+
}
233247
return recursiveFlags
234248
case .layout(let layoutView):
235249
return layoutView.recursiveFlags

Sources/SwiftSyntax/Syntax.swift

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -283,11 +283,20 @@ public extension SyntaxProtocol {
283283
return raw.kind.isSyntaxCollection
284284
}
285285

286-
/// Whether this tree contains a missing token or unexpected node.
286+
/// Whether the tree contained by this layout has any
287+
/// - missing nodes or
288+
/// - unexpected nodes or
289+
/// - tokens with a `LexerError` of severity `error`
287290
var hasError: Bool {
288291
return raw.recursiveFlags.contains(.hasError)
289292
}
290293

294+
/// Whether the tree contained by this layout has any tokens with a `LexerError`
295+
/// of severity `warning`.
296+
var hasWarning: Bool {
297+
return raw.recursiveFlags.contains(.hasWarning)
298+
}
299+
291300
/// Whether this tree contains a missing token or unexpected node.
292301
var hasSequenceExpr: Bool {
293302
return raw.recursiveFlags.contains(.hasSequenceExpr)

Tests/SwiftParserTest/Assertions.swift

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,8 @@ struct DiagnosticSpec {
234234
let id: MessageID?
235235
/// If not `nil`, assert that the diagnostic has the given message.
236236
let message: String?
237+
/// Assert that the diagnostic has the given severity.
238+
let severity: DiagnosticSeverity
237239
/// If not `nil`, assert that the highlighted range has this content.
238240
let highlight: String?
239241
/// If not `nil`, assert that the diagnostic contains notes with these messages.
@@ -250,6 +252,7 @@ struct DiagnosticSpec {
250252
locationMarker: String = "1️⃣",
251253
id: MessageID? = nil,
252254
message: String?,
255+
severity: DiagnosticSeverity = .error,
253256
highlight: String? = nil,
254257
notes: [NoteSpec]? = nil,
255258
fixIts: [String]? = nil,
@@ -259,6 +262,7 @@ struct DiagnosticSpec {
259262
self.locationMarker = locationMarker
260263
self.id = id
261264
self.message = message
265+
self.severity = severity
262266
self.highlight = highlight
263267
self.notes = notes
264268
self.fixIts = fixIts
@@ -395,6 +399,7 @@ func AssertDiagnostic<T: SyntaxProtocol>(
395399
if let message = spec.message {
396400
AssertStringsEqualWithDiff(diag.message, message, file: file, line: line)
397401
}
402+
XCTAssertEqual(spec.severity, diag.diagMessage.severity, file: file, line: line)
398403
if diag.message.contains("\n") {
399404
XCTFail(
400405
"""

Tests/SwiftParserTest/ExpressionTests.swift

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1158,7 +1158,7 @@ final class ExpressionTests: XCTestCase {
11581158
AssertParse(
11591159
"a 1️⃣\u{a0}+ 2",
11601160
diagnostics: [
1161-
DiagnosticSpec(message: "non-breaking space (U+00A0) used instead of regular space", fixIts: ["replace non-breaking space by ' '"])
1161+
DiagnosticSpec(message: "non-breaking space (U+00A0) used instead of regular space", severity: .warning, fixIts: ["replace non-breaking space by ' '"])
11621162
],
11631163
fixedSource: "a + 2"
11641164
)
@@ -1183,6 +1183,15 @@ final class ExpressionTests: XCTestCase {
11831183
"""#
11841184
)
11851185
}
1186+
1187+
func testNulCharacterInSourceFile() {
1188+
AssertParse(
1189+
"let a = 1️⃣\u{0}1",
1190+
diagnostics: [
1191+
DiagnosticSpec(message: "nul character embedded in middle of file", severity: .warning)
1192+
]
1193+
)
1194+
}
11861195
}
11871196

11881197
final class MemberExprTests: XCTestCase {

0 commit comments

Comments
 (0)