Skip to content

Commit 3e5c186

Browse files
committed
Diagnose unprintable characters in string literal
1 parent d76d848 commit 3e5c186

File tree

5 files changed

+42
-10
lines changed

5 files changed

+42
-10
lines changed

Sources/SwiftParser/Lexer/Cursor.swift

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1563,20 +1563,17 @@ extension Lexer.Cursor {
15631563
}
15641564
default:
15651565
_ = self.advance()
1566-
// Normal characters are part of the string.
1567-
// If this is a "high" UTF-8 character, validate it.
1568-
// if ((signed char)(CurPtr[-1]) >= 0) {
1569-
// if (isPrintable(CurPtr[-1]) == 0)
1570-
// if (!(IsMultilineString && (CurPtr[-1] == '\t')))
1571-
// if (EmitDiagnostics)
1572-
// diagnose(CharStart, diag::lex_unprintable_ascii_character)
1573-
// return CurPtr[-1]
1574-
// }
15751566
self = charStart
15761567
guard let charValue = self.advanceValidatingUTF8Character() else {
15771568
return .error(.invalidUtf8)
15781569
}
1579-
return .success(charValue)
1570+
// We disallow non-printable ASCII characters in a string literal, with
1571+
// the exception of \t, which is valid only in multi-line string literals.
1572+
if !charValue.isASCII || charValue.isPrintableASCII || stringLiteralKind == .multiLine && charValue == "\t" {
1573+
return .success(charValue)
1574+
} else {
1575+
return .error(.unprintableAsciiCharacter)
1576+
}
15801577
}
15811578
}
15821579

Sources/SwiftParserDiagnostics/LexerDiagnosticMessages.swift

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ public enum StaticLexerError: String, DiagnosticMessage {
5454
case sourceConflictMarker = "source control conflict marker in source file"
5555
case unexpectedBlockCommentEnd = "unexpected end of block comment"
5656
case unicodeCurlyQuote = #"unicode curly quote found; use '"' instead"#
57+
case unprintableAsciiCharacter = "unprintable ASCII character found in source file"
5758

5859
public var message: String { self.rawValue }
5960

@@ -146,6 +147,7 @@ public extension SwiftSyntax.LexerError {
146147
case .sourceConflictMarker: return StaticLexerError.sourceConflictMarker
147148
case .unexpectedBlockCommentEnd: return StaticLexerError.unexpectedBlockCommentEnd
148149
case .unicodeCurlyQuote: return StaticLexerError.unicodeCurlyQuote
150+
case .unprintableAsciiCharacter: return StaticLexerError.unprintableAsciiCharacter
149151
}
150152
}
151153

Sources/SwiftSyntax/LexerError.swift

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ public struct LexerError: Hashable {
4141
case sourceConflictMarker
4242
case unexpectedBlockCommentEnd
4343
case unicodeCurlyQuote
44+
case unprintableAsciiCharacter
4445
}
4546

4647
public let kind: Kind

Tests/SwiftParserTest/ExpressionTests.swift

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1163,6 +1163,26 @@ final class ExpressionTests: XCTestCase {
11631163
fixedSource: "a + 2"
11641164
)
11651165
}
1166+
1167+
func testTabsIndentationInMultilineStringLiteral() {
1168+
AssertParse(
1169+
#"""
1170+
_ = """
1171+
\#taq
1172+
\#t"""
1173+
"""#
1174+
)
1175+
}
1176+
1177+
func testMixedIndentationInMultilineStringLiteral() {
1178+
AssertParse(
1179+
#"""
1180+
_ = """
1181+
\#t aq
1182+
\#t """
1183+
"""#
1184+
)
1185+
}
11661186
}
11671187

11681188
final class MemberExprTests: XCTestCase {

Tests/SwiftParserTest/LexerTests.swift

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1226,4 +1226,16 @@ public class LexerTests: XCTestCase {
12261226
)
12271227
}
12281228

1229+
func testUnprintableAsciiCharactersInStringLiteral() {
1230+
AssertLexemes(
1231+
"""
1232+
"1️⃣\u{7}"
1233+
""",
1234+
lexemes: [
1235+
LexemeSpec(.stringQuote, text: #"""#),
1236+
LexemeSpec(.stringSegment, text: "\u{7}", error: "unprintable ASCII character found in source file"),
1237+
LexemeSpec(.stringQuote, text: #"""#),
1238+
]
1239+
)
1240+
}
12291241
}

0 commit comments

Comments
 (0)