Skip to content

Commit fac654a

Browse files
committed
Diagnose incorrect quotes in multiline string literals
1 parent 72f9740 commit fac654a

File tree

14 files changed

+680
-73
lines changed

14 files changed

+680
-73
lines changed

Sources/SwiftParser/Lexer/Cursor.swift

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1731,7 +1731,12 @@ extension Lexer.Cursor {
17311731
)
17321732
}
17331733
case UInt8(ascii: "\r"), UInt8(ascii: "\n"):
1734-
if stringLiteralKind != .multiLine {
1734+
if stringLiteralKind == .multiLine {
1735+
// Make sure each line starts a new string segment so the parser can
1736+
// validate the multi-line string literal's indentation.
1737+
_ = self.advance()
1738+
return Lexer.Result(.stringSegment)
1739+
} else {
17351740
// Single line literals cannot span multiple lines.
17361741
// Terminate the string here and go back to normal lexing (instead of `afterStringLiteral`)
17371742
// since we aren't looking for the closing quote anymore.
@@ -1750,8 +1755,15 @@ extension Lexer.Cursor {
17501755
var clone = self
17511756
let charValue = clone.lexCharacterInStringLiteral(stringLiteralKind: stringLiteralKind, delimiterLength: delimiterLength)
17521757
switch charValue {
1753-
case .success, .validatedEscapeSequence:
1758+
case .success:
17541759
self = clone
1760+
case .validatedEscapeSequence(let escapedCharacter):
1761+
self = clone
1762+
if escapedCharacter == "\n" || escapedCharacter == "\r" {
1763+
// Make sure each line starts a new string segment so the parser can
1764+
// validate the multi-line string literal's indentation.
1765+
return Lexer.Result(.stringSegment)
1766+
}
17551767
case .error:
17561768
// TODO: Diagnose error
17571769
self = clone

Sources/SwiftParser/StringLiterals.swift

Lines changed: 196 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,176 @@ extension Parser {
4444
}
4545
}
4646

47+
/// If `text` only consists of indentation whitespace (space and tab), return
48+
/// the trivia pieces that represent `text`, otherwise return `nil`.
49+
private func parseIndentationTrivia(text: SyntaxText) -> [RawTriviaPiece]? {
50+
let trivia = TriviaParser.parseTrivia(text, position: .leading)
51+
if trivia.allSatisfy({ $0.isIndentationWhitespace }) {
52+
return trivia
53+
} else {
54+
return nil
55+
}
56+
}
57+
58+
// FIXME: Handle \r and \r\n if needed in here
59+
private func postProcessMultilineStringLiteral(
60+
openQuote: RawTokenSyntax,
61+
segments allSegments: [RawStringLiteralSegmentsSyntax.Element],
62+
closeQuote: RawTokenSyntax
63+
) -> (
64+
unexpectedBeforeOpenQuote: [RawTokenSyntax],
65+
openQuote: RawTokenSyntax,
66+
segments: [RawStringLiteralSegmentsSyntax.Element],
67+
unexpectedBeforeCloseQuote: [RawTokenSyntax],
68+
closeQuote: RawTokenSyntax
69+
) {
70+
// -------------------------------------------------------------------------
71+
// Precondition
72+
73+
assert(openQuote.trailingTriviaByteLength == 0, "Open quote produced by the lexer should not have trailing trivia because we would drop it during post-processing")
74+
assert(closeQuote.leadingTriviaByteLength == 0, "Closing quote produced by the lexer should not have leading trivia because we would drop it during post-processing")
75+
assert(
76+
allSegments.allSatisfy {
77+
if case .stringSegment(let segment) = $0 {
78+
return segment.unexpectedBeforeContent == nil
79+
&& segment.unexpectedAfterContent == nil
80+
&& segment.content.leadingTriviaByteLength == 0
81+
&& segment.content.trailingTriviaByteLength == 0
82+
} else {
83+
return true
84+
}
85+
},
86+
"String segement produced by the lexer should not have unexpected text or trivia because we would drop it during post-processing"
87+
)
88+
89+
// -------------------------------------------------------------------------
90+
// Variables
91+
92+
var middleSegments = allSegments
93+
let lastSegment = !middleSegments.isEmpty ? middleSegments.removeLast().as(RawStringSegmentSyntax.self) : nil
94+
let firstSegment = !middleSegments.isEmpty ? middleSegments.removeFirst().as(RawStringSegmentSyntax.self) : nil
95+
96+
let indentation: SyntaxText
97+
let indentationTrivia: [RawTriviaPiece]
98+
99+
var unexpectedBeforeOpenQuote: [RawTokenSyntax] = []
100+
var openQuote = openQuote
101+
var unexpectedBeforeCloseQuote: [RawTokenSyntax] = []
102+
var closeQuote = closeQuote
103+
104+
// -------------------------------------------------------------------------
105+
// Check close quote is on new line
106+
107+
let closeDelimiterOnNewLine: Bool
108+
switch middleSegments.last {
109+
case .stringSegment(let lastMiddleSegment):
110+
if lastMiddleSegment.content.tokenText.hasSuffix("\n") {
111+
// The newline at the end of the last line in the string literal is not part of the represented string.
112+
// Mark it as trivia.
113+
middleSegments[middleSegments.count - 1] = .stringSegment(
114+
RawStringSegmentSyntax(
115+
content: lastMiddleSegment.content.reclassifyAsTrailingTrivia([.newlines(1)], arena: self.arena),
116+
arena: self.arena
117+
)
118+
)
119+
closeDelimiterOnNewLine = true
120+
} else {
121+
closeDelimiterOnNewLine = false
122+
}
123+
case .expressionSegment:
124+
closeDelimiterOnNewLine = false
125+
case nil:
126+
closeDelimiterOnNewLine = firstSegment?.content.tokenText.hasSuffix("\n") ?? false
127+
}
128+
129+
if !closeDelimiterOnNewLine {
130+
unexpectedBeforeCloseQuote = [closeQuote]
131+
closeQuote = RawTokenSyntax(missing: closeQuote.tokenKind, leadingTriviaPieces: [.newlines(1)], arena: self.arena)
132+
133+
// The closing delimiter doesn't start on a new line and thus it doesn't
134+
// make sense to try and extract indentation from it.
135+
return (unexpectedBeforeOpenQuote, openQuote, allSegments, unexpectedBeforeCloseQuote, closeQuote)
136+
}
137+
138+
// -------------------------------------------------------------------------
139+
// Parse indentation
140+
141+
if let lastSegment = lastSegment,
142+
let parsedTrivia = parseIndentationTrivia(text: lastSegment.content.tokenText)
143+
{
144+
indentationTrivia = parsedTrivia
145+
indentation = lastSegment.content.tokenText
146+
closeQuote = closeQuote.extendingLeadingTrivia(by: parsedTrivia, arena: self.arena)
147+
} else {
148+
if let lastSegment = lastSegment {
149+
indentationTrivia = TriviaParser.parseTrivia(lastSegment.content.tokenText, position: .leading).prefix(while: { $0.isIndentationWhitespace })
150+
let indentationByteLength = indentationTrivia.reduce(0, { $0 + $1.byteLength })
151+
indentation = SyntaxText(rebasing: lastSegment.content.tokenText[0..<indentationByteLength])
152+
middleSegments.append(.stringSegment(lastSegment))
153+
} else {
154+
indentationTrivia = []
155+
indentation = ""
156+
}
157+
158+
unexpectedBeforeCloseQuote = [closeQuote]
159+
closeQuote = RawTokenSyntax(missing: closeQuote.tokenKind, leadingTriviaPieces: [.newlines(1)] + indentationTrivia, arena: self.arena)
160+
}
161+
162+
// -------------------------------------------------------------------------
163+
// Check open quote followed by newline
164+
165+
if firstSegment?.content.tokenText == "\n" {
166+
openQuote = openQuote.extendingTrailingTrivia(by: [.newlines(1)], arena: self.arena)
167+
} else {
168+
if let firstSegment = firstSegment {
169+
middleSegments.insert(.stringSegment(firstSegment), at: 0)
170+
}
171+
unexpectedBeforeOpenQuote = [openQuote]
172+
openQuote = RawTokenSyntax(missing: openQuote.tokenKind, trailingTriviaPieces: [.newlines(1)] + indentationTrivia, arena: self.arena)
173+
}
174+
175+
// -------------------------------------------------------------------------
176+
// Check indentation of segments
177+
178+
for (index, segment) in middleSegments.enumerated() {
179+
switch segment {
180+
case .stringSegment(var segment):
181+
assert(segment.unexpectedBeforeContent == nil, "Segment should not have unexpected before content")
182+
assert(segment.content.leadingTriviaByteLength == 0, "Segment should not have leading trivia")
183+
if segment.content.tokenText.hasPrefix(indentation) {
184+
segment = RawStringSegmentSyntax(
185+
content: segment.content.reclassifyAsLeadingTrivia(indentationTrivia, arena: self.arena),
186+
arena: self.arena
187+
)
188+
} else {
189+
// TODO: Diagnose
190+
}
191+
if segment.content.tokenText.hasSuffix("\\\n") {
192+
// TODO: Add a backslash trivia kind
193+
segment = RawStringSegmentSyntax(
194+
content: segment.content.reclassifyAsTrailingTrivia([.unexpectedText("\\"), .newlines(1)], arena: self.arena),
195+
arena: self.arena
196+
)
197+
}
198+
middleSegments[index] = .stringSegment(segment)
199+
case .expressionSegment:
200+
// TODO: Check indentation
201+
break
202+
}
203+
}
204+
205+
// -------------------------------------------------------------------------
206+
// Done
207+
208+
return (
209+
unexpectedBeforeOpenQuote,
210+
openQuote,
211+
middleSegments,
212+
unexpectedBeforeCloseQuote,
213+
closeQuote
214+
)
215+
}
216+
47217
/// Parse a string literal expression.
48218
@_spi(RawSyntax)
49219
public mutating func parseStringLiteral() -> RawStringLiteralExprSyntax {
@@ -106,21 +276,34 @@ extension Parser {
106276
}
107277

108278
/// Parse close quote.
109-
let (unexpectedBeforeCloseQuote, closeQuote) = self.expect(openQuote.tokenKind)
279+
let closeQuote = self.expectWithoutRecovery(openQuote.tokenKind)
110280

111281
let (unexpectedBeforeCloseDelimiter, closeDelimiter) = self.parseStringDelimiter(openDelimiter: openDelimiter)
112282

113-
/// Construct the literal expression.
114-
return RawStringLiteralExprSyntax(
115-
openDelimiter: openDelimiter,
116-
unexpectedBeforeOpenQuote,
117-
openQuote: openQuote,
118-
segments: RawStringLiteralSegmentsSyntax(elements: segments, arena: self.arena),
119-
unexpectedBeforeCloseQuote,
120-
closeQuote: closeQuote,
121-
unexpectedBeforeCloseDelimiter,
122-
closeDelimiter: closeDelimiter,
123-
arena: self.arena
124-
)
283+
if openQuote.tokenKind == .multilineStringQuote, !openQuote.isMissing, !closeQuote.isMissing {
284+
let postProcessed = postProcessMultilineStringLiteral(openQuote: openQuote, segments: segments, closeQuote: closeQuote)
285+
return RawStringLiteralExprSyntax(
286+
openDelimiter: openDelimiter,
287+
RawUnexpectedNodesSyntax(combining: unexpectedBeforeOpenQuote, postProcessed.unexpectedBeforeOpenQuote, arena: self.arena),
288+
openQuote: postProcessed.openQuote,
289+
segments: RawStringLiteralSegmentsSyntax(elements: postProcessed.segments, arena: self.arena),
290+
RawUnexpectedNodesSyntax(postProcessed.unexpectedBeforeCloseQuote, arena: self.arena),
291+
closeQuote: postProcessed.closeQuote,
292+
unexpectedBeforeCloseDelimiter,
293+
closeDelimiter: closeDelimiter,
294+
arena: self.arena
295+
)
296+
} else {
297+
return RawStringLiteralExprSyntax(
298+
openDelimiter: openDelimiter,
299+
unexpectedBeforeOpenQuote,
300+
openQuote: openQuote,
301+
segments: RawStringLiteralSegmentsSyntax(elements: segments, arena: self.arena),
302+
closeQuote: closeQuote,
303+
unexpectedBeforeCloseDelimiter,
304+
closeDelimiter: closeDelimiter,
305+
arena: self.arena
306+
)
307+
}
125308
}
126309
}

Sources/SwiftParser/SyntaxUtils.swift

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,3 +95,12 @@ extension RawTokenKind {
9595
return kinds.contains(self)
9696
}
9797
}
98+
99+
extension RawTriviaPiece {
100+
var isIndentationWhitespace: Bool {
101+
switch self {
102+
case .spaces, .tabs: return true
103+
default: return false
104+
}
105+
}
106+
}

Sources/SwiftParserDiagnostics/MissingTokenError.swift

Lines changed: 38 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,19 +26,25 @@ extension ParseDiagnosticsGenerator {
2626
// The previous token is unexpected, assume that it was intended to be
2727
// this token.
2828

29+
let handled: Bool
2930
switch (missingToken.rawTokenKind, invalidToken.rawTokenKind) {
3031
case (.identifier, _):
31-
handleInvalidIdentifier(invalidToken: invalidToken, missingToken: missingToken, invalidTokenContainer: invalidTokenContainer)
32+
handled = handleInvalidIdentifier(invalidToken: invalidToken, missingToken: missingToken, invalidTokenContainer: invalidTokenContainer)
33+
case (.multilineStringQuote, .multilineStringQuote):
34+
handled = handleInvalidMultilineStringQuote(invalidToken: invalidToken, missingToken: missingToken, invalidTokenContainer: invalidTokenContainer)
3235
case (.period, .period):
33-
handleInvalidPeriod(invalidToken: invalidToken, missingToken: missingToken, invalidTokenContainer: invalidTokenContainer)
36+
handled = handleInvalidPeriod(invalidToken: invalidToken, missingToken: missingToken, invalidTokenContainer: invalidTokenContainer)
3437
case (.rawStringDelimiter, .rawStringDelimiter):
35-
handleInvalidRawStringDelimiter(invalidToken: invalidToken, missingToken: missingToken, invalidTokenContainer: invalidTokenContainer)
38+
handled = handleInvalidRawStringDelimiter(invalidToken: invalidToken, missingToken: missingToken, invalidTokenContainer: invalidTokenContainer)
3639
default:
40+
handled = false
41+
}
42+
if !handled {
3743
_ = handleMissingSyntax(missingToken)
3844
}
3945
}
4046

41-
private func handleInvalidIdentifier(invalidToken: TokenSyntax, missingToken: TokenSyntax, invalidTokenContainer: UnexpectedNodesSyntax) {
47+
private func handleInvalidIdentifier(invalidToken: TokenSyntax, missingToken: TokenSyntax, invalidTokenContainer: UnexpectedNodesSyntax) -> Bool {
4248
let fixIts: [FixIt]
4349
if invalidToken.tokenKind.isLexerClassifiedKeyword || invalidToken.tokenKind.isDollarIdentifier {
4450
// TODO: Should the parser add the text with backticks to the missing
@@ -63,9 +69,33 @@ extension ParseDiagnosticsGenerator {
6369
fixIts: fixIts,
6470
handledNodes: [invalidTokenContainer.id]
6571
)
72+
return true
73+
}
74+
75+
private func handleInvalidMultilineStringQuote(invalidToken: TokenSyntax, missingToken: TokenSyntax, invalidTokenContainer: UnexpectedNodesSyntax) -> Bool {
76+
if invalidToken.trailingTrivia.isEmpty && !missingToken.trailingTrivia.isEmpty {
77+
addDiagnostic(
78+
invalidToken,
79+
position: invalidToken.endPositionBeforeTrailingTrivia,
80+
.multiLineStringLiteralMustBeginOnNewLine,
81+
fixIts: [FixIt(message: .insertNewline, changes: [.replaceTrailingTrivia(token: invalidToken, newTrivia: missingToken.trailingTrivia)])],
82+
handledNodes: [invalidTokenContainer.id]
83+
)
84+
return true
85+
} else if invalidToken.leadingTrivia.isEmpty && !missingToken.leadingTrivia.isEmpty {
86+
addDiagnostic(
87+
invalidToken,
88+
.multiLineStringLiteralMustHaveClosingDelimiterOnNewLine,
89+
fixIts: [FixIt(message: .insertNewline, changes: [.replaceLeadingTrivia(token: invalidToken, newTrivia: missingToken.leadingTrivia)])],
90+
handledNodes: [invalidTokenContainer.id]
91+
)
92+
return true
93+
}
94+
95+
return false
6696
}
6797

68-
private func handleInvalidPeriod(invalidToken: TokenSyntax, missingToken: TokenSyntax, invalidTokenContainer: UnexpectedNodesSyntax) {
98+
private func handleInvalidPeriod(invalidToken: TokenSyntax, missingToken: TokenSyntax, invalidTokenContainer: UnexpectedNodesSyntax) -> Bool {
6999
// Trailing trivia is the cause of this diagnostic, don't transfer it.
70100
let changes: [FixIt.Changes] = [
71101
.makeMissing(invalidToken, transferTrivia: false),
@@ -89,9 +119,10 @@ extension ParseDiagnosticsGenerator {
89119
let fixIt = FixIt(message: .removeExtraneousWhitespace, changes: changes)
90120
addDiagnostic(invalidToken, .invalidWhitespaceAfterPeriod, fixIts: [fixIt], handledNodes: [invalidTokenContainer.id])
91121
}
122+
return true
92123
}
93124

94-
private func handleInvalidRawStringDelimiter(invalidToken: TokenSyntax, missingToken: TokenSyntax, invalidTokenContainer: UnexpectedNodesSyntax) {
125+
private func handleInvalidRawStringDelimiter(invalidToken: TokenSyntax, missingToken: TokenSyntax, invalidTokenContainer: UnexpectedNodesSyntax) -> Bool {
95126
let message: DiagnosticMessage
96127
if missingToken.parent?.is(ExpressionSegmentSyntax.self) == true {
97128
message = .tooManyRawStringDelimitersToStartInterpolation
@@ -116,5 +147,6 @@ extension ParseDiagnosticsGenerator {
116147
fixIts: [fixIt],
117148
handledNodes: [invalidTokenContainer.id]
118149
)
150+
return true
119151
}
120152
}

Sources/SwiftParserDiagnostics/ParserDiagnosticMessages.swift

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,12 @@ extension DiagnosticMessage where Self == StaticParserError {
149149
public static var missingColonInTernaryExpr: Self {
150150
.init("expected ':' after '? ...' in ternary expression")
151151
}
152+
public static var multiLineStringLiteralMustBeginOnNewLine: Self {
153+
.init("multi-line string literal content must begin on a new line")
154+
}
155+
public static var multiLineStringLiteralMustHaveClosingDelimiterOnNewLine: Self {
156+
.init("multi-line string literal closing delimiter must begin on a new line")
157+
}
152158
public static var operatorShouldBeDeclaredWithoutBody: Self {
153159
.init("operator should not be declared with body")
154160
}
@@ -345,6 +351,9 @@ extension FixItMessage where Self == StaticParserFixIt {
345351
public static var insertAttributeArguments: Self {
346352
.init("insert attribute argument")
347353
}
354+
public static var insertNewline: Self {
355+
.init("insert newline")
356+
}
348357
public static var joinIdentifiers: Self {
349358
.init("join the identifiers together")
350359
}

Sources/SwiftParserDiagnostics/PresenceUtils.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ class PresentMaker: SyntaxRewriter {
5353
presentToken = TokenSyntax(token.tokenKind, presence: .present)
5454
} else {
5555
let newKind = TokenKind.fromRaw(kind: rawKind, text: rawKind.defaultText.map(String.init) ?? "<#\(rawKind.nameForDiagnostics)#>")
56-
presentToken = TokenSyntax(newKind, presence: .present)
56+
presentToken = TokenSyntax(newKind, leadingTrivia: token.leadingTrivia, trailingTrivia: token.trailingTrivia, presence: .present)
5757
}
5858
return BasicFormat().visit(presentToken)
5959
} else {

Sources/SwiftSyntax/Raw/RawSyntax.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -471,8 +471,8 @@ extension RawSyntax {
471471
wholeText: SyntaxText,
472472
textRange: Range<SyntaxText.Index>,
473473
presence: SourcePresence,
474-
arena: SyntaxArena,
475-
lexerError: LexerError?
474+
lexerError: LexerError?,
475+
arena: SyntaxArena
476476
) -> RawSyntax {
477477
assert(
478478
arena.contains(text: wholeText),

0 commit comments

Comments
 (0)