Skip to content

Commit 4eac7ef

Browse files
committed
Move classification of newline after multi-line string quote and escaped newline to lexer
This simplifies parsing of string literals while only making the lexer slightly more complex. It also fixes two bugs where we incorrectly identified a trailing `\` as escaped even if it wasn’t.
1 parent 00b5095 commit 4eac7ef

File tree

7 files changed

+162
-111
lines changed

7 files changed

+162
-111
lines changed

Sources/SwiftParser/Lexer/Cursor.swift

Lines changed: 69 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -257,17 +257,22 @@ extension Lexer {
257257
let flags: Lexer.Lexeme.Flags
258258
let error: LexerError?
259259
let stateTransition: StateTransition?
260+
/// If set, overritdes the trailing trivia lexing mode of the current state
261+
/// for this lexeme.
262+
let trailingTriviaLexingMode: Lexer.Cursor.TriviaLexingMode?
260263

261264
init(
262265
_ tokenKind: RawTokenKind,
263266
flags: Lexer.Lexeme.Flags = [],
264267
error: LexerError? = nil,
265-
stateTransition: StateTransition? = nil
268+
stateTransition: StateTransition? = nil,
269+
trailingTriviaLexingMode: Lexer.Cursor.TriviaLexingMode? = nil
266270
) {
267271
self.tokenKind = tokenKind
268272
self.flags = flags
269273
self.error = error
270274
self.stateTransition = stateTransition
275+
self.trailingTriviaLexingMode = trailingTriviaLexingMode
271276
}
272277
}
273278
}
@@ -315,16 +320,9 @@ extension Lexer.Cursor {
315320

316321
// Trailing trivia.
317322
let trailingTriviaStart = self
318-
let newlineInTrailingTrivia: NewlinePresence
319-
if let trailingTriviaMode = currentState.trailingTriviaLexingMode(cursor: self) {
320-
newlineInTrailingTrivia = self.lexTrivia(mode: trailingTriviaMode)
321-
} else {
322-
newlineInTrailingTrivia = .absent
323+
if let trailingTriviaMode = result.trailingTriviaLexingMode ?? currentState.trailingTriviaLexingMode(cursor: self) {
324+
_ = self.lexTrivia(mode: trailingTriviaMode)
323325
}
324-
assert(
325-
newlineInTrailingTrivia == .absent,
326-
"trailingTrivia should not have a newline"
327-
)
328326

329327
if self.currentState.shouldPopStateWhenReachingNewlineInTrailingTrivia && self.is(at: "\r", "\n") {
330328
self.stateStack.perform(stateTransition: .pop, stateAllocator: stateAllocator)
@@ -902,7 +900,7 @@ extension Lexer.Cursor {
902900
UInt8(ascii: "9"):
903901
return self.lexNumber()
904902
case UInt8(ascii: #"'"#), UInt8(ascii: #"""#):
905-
return self.lexStringQuote(leadingDelimiterLength: 0)
903+
return self.lexStringQuote(isOpening: true, leadingDelimiterLength: 0)
906904

907905
case UInt8(ascii: "`"):
908906
return self.lexEscapedIdentifier()
@@ -927,7 +925,7 @@ extension Lexer.Cursor {
927925
private mutating func lexAfterRawStringDelimiter(delimiterLength: Int) -> Lexer.Result {
928926
switch self.peek() {
929927
case UInt8(ascii: #"'"#), UInt8(ascii: #"""#):
930-
return self.lexStringQuote(leadingDelimiterLength: delimiterLength)
928+
return self.lexStringQuote(isOpening: true, leadingDelimiterLength: delimiterLength)
931929
case nil:
932930
return Lexer.Result(.eof)
933931
default:
@@ -938,7 +936,7 @@ extension Lexer.Cursor {
938936
private mutating func lexAfterStringLiteral() -> Lexer.Result {
939937
switch self.peek() {
940938
case UInt8(ascii: #"'"#), UInt8(ascii: #"""#):
941-
return self.lexStringQuote(leadingDelimiterLength: 0)
939+
return self.lexStringQuote(isOpening: false, leadingDelimiterLength: 0)
942940
case nil:
943941
return Lexer.Result(.eof)
944942
default:
@@ -1025,9 +1023,28 @@ extension Lexer.Cursor {
10251023
case normal
10261024
/// Don't lex newlines (`\r` and `\r`) as trivia
10271025
case noNewlines
1026+
/// Lex the characters that escape a newline in a multi-line string literal
1027+
/// as trivia.
1028+
///
1029+
/// Matches the following regex: `\\?#*[ \t]*(\r\n|\r|\n)
1030+
case escapedNewlineInMultiLineStringLiteral
10281031
}
10291032

10301033
fileprivate mutating func lexTrivia(mode: TriviaLexingMode) -> NewlinePresence {
1034+
if mode == .escapedNewlineInMultiLineStringLiteral {
1035+
_ = self.advance(matching: "\\")
1036+
self.advance(while: { $0 == "#" })
1037+
self.advance(while: { $0 == " " || $0 == "\t" })
1038+
if self.advance(matching: "\r") {
1039+
_ = self.advance(matching: "\n")
1040+
return .present
1041+
} else if self.advance(matching: "\n") {
1042+
return .present
1043+
} else {
1044+
return .absent
1045+
}
1046+
}
1047+
10311048
var hasNewline = false
10321049
while true {
10331050
let start = self
@@ -1701,7 +1718,9 @@ extension Lexer.Cursor {
17011718
}
17021719
}
17031720

1704-
mutating func lexStringQuote(leadingDelimiterLength: Int) -> Lexer.Result {
1721+
/// `isOpening` is `true` if this string quote is the opening quote of a string
1722+
/// literal and `false` if we are lexing the closing quote of a string literal.
1723+
mutating func lexStringQuote(isOpening: Bool, leadingDelimiterLength: Int) -> Lexer.Result {
17051724
if self.advance(matching: "'") {
17061725
return Lexer.Result(.singleQuote, stateTransition: stateTransitionAfterLexingStringQuote(kind: .singleQuote))
17071726
}
@@ -1735,7 +1754,20 @@ extension Lexer.Cursor {
17351754
}
17361755

17371756
self = lookingForMultilineString
1738-
return Lexer.Result(.multilineStringQuote, stateTransition: stateTransitionAfterLexingStringQuote(kind: .multiLine))
1757+
let trailingTriviaLexingMode: TriviaLexingMode?
1758+
if isOpening && self.is(at: "\n", "\r") {
1759+
// The opening quote of a multi-line string literal must be followed by
1760+
// a newline that's not part of the represented string.
1761+
trailingTriviaLexingMode = .escapedNewlineInMultiLineStringLiteral
1762+
} else {
1763+
trailingTriviaLexingMode = nil
1764+
}
1765+
1766+
return Lexer.Result(
1767+
.multilineStringQuote,
1768+
stateTransition: stateTransitionAfterLexingStringQuote(kind: .multiLine),
1769+
trailingTriviaLexingMode: trailingTriviaLexingMode
1770+
)
17391771
} else {
17401772
return Lexer.Result(.stringQuote, stateTransition: stateTransitionAfterLexingStringQuote(kind: .singleLine))
17411773
}
@@ -1753,6 +1785,23 @@ extension Lexer.Cursor {
17531785
return tmp.advanceIfStringDelimiter(delimiterLength: delimiterLength) && tmp.is(at: "(")
17541786
}
17551787

1788+
/// Returns `true` if we are positioned at a backslash that escapes the newline
1789+
/// character in a multi-line string literal.
1790+
private func isAtEscapedNewline(delimiterLength: Int) -> Bool {
1791+
guard self.is(at: "\\") else {
1792+
return false
1793+
}
1794+
1795+
var tmp = self
1796+
let backslashConsumed = tmp.advance(matching: "\\") // Skip over the '\' to look for '#' and '('
1797+
assert(backslashConsumed)
1798+
guard tmp.advanceIfStringDelimiter(delimiterLength: delimiterLength) else {
1799+
return false
1800+
}
1801+
tmp.advance(while: { $0 == " " || $0 == "\t" })
1802+
return tmp.is(at: "\r", "\n")
1803+
}
1804+
17561805
mutating func lexInStringLiteral(stringLiteralKind: StringLiteralKind, delimiterLength: Int) -> Lexer.Result {
17571806
/*
17581807
if IsMultilineString && *CurPtr != '\n' && *CurPtr != '\r' {
@@ -1768,6 +1817,11 @@ extension Lexer.Cursor {
17681817
.stringSegment,
17691818
stateTransition: .push(newState: .inStringInterpolationStart(stringLiteralKind: stringLiteralKind))
17701819
)
1820+
} else if self.isAtEscapedNewline(delimiterLength: delimiterLength) {
1821+
return Lexer.Result(
1822+
.stringSegment,
1823+
trailingTriviaLexingMode: .escapedNewlineInMultiLineStringLiteral
1824+
)
17711825
}
17721826
case UInt8(ascii: "\r"), UInt8(ascii: "\n"):
17731827
if stringLiteralKind == .multiLine {

0 commit comments

Comments
 (0)