Skip to content

Commit 6e8b0b1

Browse files
committed
Diagnose invalid escape sequences in string literals
1 parent cc8fc77 commit 6e8b0b1

File tree

7 files changed

+36
-31
lines changed

7 files changed

+36
-31
lines changed

Sources/SwiftParser/Lexer/Cursor.swift

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1601,7 +1601,7 @@ extension Lexer.Cursor {
16011601
}
16021602
return cv
16031603

1604-
case UInt8(ascii: " "), UInt8(ascii: "\t"), UInt8(ascii: "\n"), UInt8(ascii: "\r"):
1604+
case UInt8(ascii: "\n"), UInt8(ascii: "\r"):
16051605
if isMultilineString && self.maybeConsumeNewlineEscape() {
16061606
return UInt32(UInt8(ascii: "\n"))
16071607
}
@@ -1780,12 +1780,15 @@ extension Lexer.Cursor {
17801780
.fixItInsert(Lexer::getSourceLoc(CurPtr), "\n")
17811781
}
17821782
*/
1783+
var error: (LexerError.Kind, Lexer.Cursor)? = nil
1784+
17831785
while true {
17841786
switch self.peek() {
17851787
case UInt8(ascii: "\\"):
17861788
if self.isAtStringInterpolationAnchor(delimiterLength: delimiterLength) {
17871789
return Lexer.Result(
17881790
.stringSegment,
1791+
error: error,
17891792
stateTransition: .push(newState: .inStringInterpolationStart(stringLiteralKind: stringLiteralKind))
17901793
)
17911794
} else if self.isAtEscapedNewline(delimiterLength: delimiterLength) {
@@ -1802,16 +1805,17 @@ extension Lexer.Cursor {
18021805
if character == UInt8(ascii: "\r") {
18031806
_ = self.advance(matching: "\n")
18041807
}
1805-
return Lexer.Result(.stringSegment)
1808+
return Lexer.Result(.stringSegment, error: error)
18061809
} else {
18071810
// Single line literals cannot span multiple lines.
18081811
// Terminate the string here and go back to normal lexing (instead of `afterStringLiteral`)
18091812
// since we aren't looking for the closing quote anymore.
1810-
return Lexer.Result(.stringSegment, stateTransition: .pop)
1813+
return Lexer.Result(.stringSegment, error: error, stateTransition: .pop)
18111814
}
18121815
case nil:
18131816
return Lexer.Result(
18141817
.stringSegment,
1818+
error: error,
18151819
stateTransition: .replace(newState: .afterStringLiteral(isRawString: delimiterLength > 0))
18161820
)
18171821
default:
@@ -1829,14 +1833,15 @@ extension Lexer.Cursor {
18291833
if escapedCharacter == "\n" || escapedCharacter == "\r" {
18301834
// Make sure each line starts a new string segment so the parser can
18311835
// validate the multi-line string literal's indentation.
1832-
return Lexer.Result(.stringSegment)
1836+
return Lexer.Result(.stringSegment, error: error)
18331837
}
18341838
case .error:
1835-
// TODO: Diagnose error
1839+
error = (.invalidEscapeSequenceInStringLiteral, self)
18361840
self = clone
18371841
case .endOfString:
18381842
return Lexer.Result(
18391843
.stringSegment,
1844+
error: error,
18401845
stateTransition: .replace(newState: .afterStringLiteral(isRawString: delimiterLength > 0))
18411846
)
18421847
}

Sources/SwiftParserDiagnostics/LexerDiagnosticMessages.swift

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ public extension LexerError {
4040
public enum StaticLexerError: String, DiagnosticMessage {
4141
case expectedBinaryExponentInHexFloatLiteral = "hexadecimal floating point literal must end with an exponent"
4242
case expectedDigitInFloatLiteral = "expected a digit in floating point exponent"
43+
case invalidEscapeSequenceInStringLiteral = "invalid escape sequence in literal"
4344
case lexerErrorOffsetOverflow = "the lexer dicovered an error in this token but was not able to represent its offset due to overflow; please split the token"
4445

4546
public var message: String { self.rawValue }
@@ -119,10 +120,8 @@ public extension SwiftSyntax.LexerError {
119120
return InvalidDigitInIntegerLiteral(kind: .binary(scalarAtErrorOffset))
120121
case .invalidDecimalDigitInIntegerLiteral:
121122
return InvalidDigitInIntegerLiteral(kind: .decimal(scalarAtErrorOffset))
122-
case .invalidFloatingPointCharacter:
123-
fatalError()
124-
case .invalidFloatingPointDigit:
125-
fatalError()
123+
case .invalidEscapeSequenceInStringLiteral:
124+
return StaticLexerError.invalidEscapeSequenceInStringLiteral
126125
case .invalidFloatingPointExponentCharacter:
127126
return InvalidFloatingPointExponentDigit(kind: .character(scalarAtErrorOffset))
128127
case .invalidFloatingPointExponentDigit:

Sources/SwiftSyntax/LexerError.swift

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,7 @@ public struct LexerError: Hashable {
2222
case insufficientIndentationInMultilineStringLiteral
2323
case invalidBinaryDigitInIntegerLiteral
2424
case invalidDecimalDigitInIntegerLiteral
25-
case invalidFloatingPointCharacter
26-
case invalidFloatingPointDigit
25+
case invalidEscapeSequenceInStringLiteral
2726
case invalidFloatingPointExponentCharacter
2827
case invalidFloatingPointExponentDigit
2928
case invalidHexDigitInIntegerLiteral

Tests/SwiftParserTest/ExpressionTests.swift

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -611,9 +611,10 @@ final class ExpressionTests: XCTestCase {
611611
)
612612

613613
AssertParse(
614-
###""\1️⃣"###,
614+
###""1️⃣\2️⃣"###,
615615
diagnostics: [
616-
DiagnosticSpec(message: #"expected '"' to end string literal"#)
616+
DiagnosticSpec(locationMarker: "1️⃣", message: "invalid escape sequence in literal"),
617+
DiagnosticSpec(locationMarker: "2️⃣", message: #"expected '"' to end string literal"#),
617618
]
618619
)
619620
}
@@ -699,9 +700,12 @@ final class ExpressionTests: XCTestCase {
699700
func testPoundsInStringInterpolationWhereNotNecessary() {
700701
AssertParse(
701702
##"""
702-
"\#(1)"
703+
"1️⃣\#(1)"
703704
"""##,
704-
substructure: Syntax(StringSegmentSyntax(content: .stringSegment(##"\#(1)"##)))
705+
substructure: Syntax(StringSegmentSyntax(content: .stringSegment(##"\#(1)"##))),
706+
diagnostics: [
707+
DiagnosticSpec(message: "invalid escape sequence in literal")
708+
]
705709
)
706710
}
707711

Tests/SwiftParserTest/LexerTests.swift

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -130,12 +130,11 @@ public class LexerTests: XCTestCase {
130130

131131
AssertLexemes(
132132
#"""
133-
"\u{12341234}"
133+
"1️⃣\u{12341234}"
134134
"""#,
135135
lexemes: [
136-
// FIXME: We should diagnose invalid unicode characters in string literals
137136
LexemeSpec(.stringQuote, text: #"""#),
138-
LexemeSpec(.stringSegment, text: #"\u{12341234}"#),
137+
LexemeSpec(.stringSegment, text: #"\u{12341234}"#, error: "invalid escape sequence in literal"),
139138
LexemeSpec(.stringQuote, text: #"""#),
140139
]
141140
)

Tests/SwiftParserTest/translated/MultilineErrorsTests.swift

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -476,13 +476,13 @@ final class MultilineErrorsTests: XCTestCase {
476476
func testMultilineErrors20() {
477477
AssertParseWithAllNewlineEndings(
478478
##"""
479-
_ = 1️⃣"""
480-
line one \ non-whitespace
479+
_ = """
480+
line one 1️⃣\ non-whitespace
481481
line two
482482
"""
483483
"""##,
484484
diagnostics: [
485-
// TODO: Old parser expected error on line 2: invalid escape sequence in literal
485+
DiagnosticSpec(message: "invalid escape sequence in literal")
486486
]
487487
)
488488
}
@@ -583,10 +583,11 @@ final class MultilineErrorsTests: XCTestCase {
583583
AssertParseWithAllNewlineEndings(
584584
##"""
585585
_ = """
586-
foo\1️⃣
586+
foo1️⃣\2️⃣
587587
"""##,
588588
diagnostics: [
589-
DiagnosticSpec(message: #"expected '"""' to end string literal"#)
589+
DiagnosticSpec(locationMarker: "1️⃣", message: "invalid escape sequence in literal"),
590+
DiagnosticSpec(locationMarker: "2️⃣", message: #"expected '"""' to end string literal"#),
590591
]
591592
)
592593
}
@@ -600,7 +601,6 @@ final class MultilineErrorsTests: XCTestCase {
600601
"""#,
601602
diagnostics: [
602603
DiagnosticSpec(message: "escaped newline at the last line of a multi-line string literal is not allowed")
603-
// TODO: Old parser expected error on line 2: escaped newline at the last line is not allowed, Fix-It replacements: 1 - 2 = ''
604604
],
605605
fixedSource: #"""
606606
_ = """

Tests/SwiftParserTest/translated/StringLiteralEofTests.swift

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -52,11 +52,11 @@ final class StringLiteralEofTests: XCTestCase {
5252
func testStringLiteralEof3() {
5353
AssertParse(
5454
##"""
55-
_ = "foo \1️⃣
55+
_ = "foo 1️⃣\2️⃣
5656
"""##,
5757
diagnostics: [
58-
// TODO: Old parser expected error on line 1: invalid escape sequence in literal
59-
DiagnosticSpec(message: #"expected '"' to end string literal"#)
58+
DiagnosticSpec(locationMarker: "1️⃣", message: "invalid escape sequence in literal"),
59+
DiagnosticSpec(locationMarker: "2️⃣", message: #"expected '"' to end string literal"#),
6060
]
6161
)
6262
}
@@ -65,12 +65,11 @@ final class StringLiteralEofTests: XCTestCase {
6565
AssertParse(
6666
##"""
6767
// NOTE: DO NOT add a newline at EOF.
68-
_ = "foo \1️⃣
68+
_ = "foo 1️⃣\2️⃣
6969
"""##,
7070
diagnostics: [
71-
// TODO: Old parser expected error on line 2: unterminated string literal
72-
// TODO: Old parser expected error on line 2: invalid escape sequence in literal
73-
DiagnosticSpec(message: #"expected '"' to end string literal"#)
71+
DiagnosticSpec(locationMarker: "1️⃣", message: "invalid escape sequence in literal"),
72+
DiagnosticSpec(locationMarker: "2️⃣", message: #"expected '"' to end string literal"#),
7473
]
7574
)
7675
}

0 commit comments

Comments
 (0)