Skip to content

Commit eb2a70b

Browse files
committed
Move classification of newline after multi-line string quote and escaped newline to lexer
This simplifies parsing of string literals while only making the lexer slightly more complex. It also fixes two bugs where we incorrectly identified a trailing `\` as escaped even if it wasn’t.
1 parent f5c0823 commit eb2a70b

File tree

7 files changed

+162
-111
lines changed

7 files changed

+162
-111
lines changed

Sources/SwiftParser/Lexer/Cursor.swift

Lines changed: 69 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -259,17 +259,22 @@ extension Lexer {
259259
/// error occurred
260260
let error: (kind: LexerError.Kind, position: Lexer.Cursor)?
261261
let stateTransition: StateTransition?
262+
/// If set, overritdes the trailing trivia lexing mode of the current state
263+
/// for this lexeme.
264+
let trailingTriviaLexingMode: Lexer.Cursor.TriviaLexingMode?
262265

263266
init(
264267
_ tokenKind: RawTokenKind,
265268
flags: Lexer.Lexeme.Flags = [],
266269
error: (kind: LexerError.Kind, position: Cursor)? = nil,
267-
stateTransition: StateTransition? = nil
270+
stateTransition: StateTransition? = nil,
271+
trailingTriviaLexingMode: Lexer.Cursor.TriviaLexingMode? = nil
268272
) {
269273
self.tokenKind = tokenKind
270274
self.flags = flags
271275
self.error = error
272276
self.stateTransition = stateTransition
277+
self.trailingTriviaLexingMode = trailingTriviaLexingMode
273278
}
274279
}
275280
}
@@ -317,16 +322,9 @@ extension Lexer.Cursor {
317322

318323
// Trailing trivia.
319324
let trailingTriviaStart = self
320-
let newlineInTrailingTrivia: NewlinePresence
321-
if let trailingTriviaMode = currentState.trailingTriviaLexingMode(cursor: self) {
322-
newlineInTrailingTrivia = self.lexTrivia(mode: trailingTriviaMode)
323-
} else {
324-
newlineInTrailingTrivia = .absent
325+
if let trailingTriviaMode = result.trailingTriviaLexingMode ?? currentState.trailingTriviaLexingMode(cursor: self) {
326+
_ = self.lexTrivia(mode: trailingTriviaMode)
325327
}
326-
assert(
327-
newlineInTrailingTrivia == .absent,
328-
"trailingTrivia should not have a newline"
329-
)
330328

331329
if self.currentState.shouldPopStateWhenReachingNewlineInTrailingTrivia && self.is(at: "\r", "\n") {
332330
self.stateStack.perform(stateTransition: .pop, stateAllocator: stateAllocator)
@@ -853,7 +851,7 @@ extension Lexer.Cursor {
853851
UInt8(ascii: "9"):
854852
return self.lexNumber()
855853
case UInt8(ascii: #"'"#), UInt8(ascii: #"""#):
856-
return self.lexStringQuote(leadingDelimiterLength: 0)
854+
return self.lexStringQuote(isOpening: true, leadingDelimiterLength: 0)
857855

858856
case UInt8(ascii: "`"):
859857
return self.lexEscapedIdentifier()
@@ -878,7 +876,7 @@ extension Lexer.Cursor {
878876
private mutating func lexAfterRawStringDelimiter(delimiterLength: Int) -> Lexer.Result {
879877
switch self.peek() {
880878
case UInt8(ascii: #"'"#), UInt8(ascii: #"""#):
881-
return self.lexStringQuote(leadingDelimiterLength: delimiterLength)
879+
return self.lexStringQuote(isOpening: true, leadingDelimiterLength: delimiterLength)
882880
case nil:
883881
return Lexer.Result(.eof)
884882
default:
@@ -889,7 +887,7 @@ extension Lexer.Cursor {
889887
private mutating func lexAfterStringLiteral() -> Lexer.Result {
890888
switch self.peek() {
891889
case UInt8(ascii: #"'"#), UInt8(ascii: #"""#):
892-
return self.lexStringQuote(leadingDelimiterLength: 0)
890+
return self.lexStringQuote(isOpening: false, leadingDelimiterLength: 0)
893891
case nil:
894892
return Lexer.Result(.eof)
895893
default:
@@ -984,9 +982,28 @@ extension Lexer.Cursor {
984982
case normal
985983
/// Don't lex newlines (`\r` and `\r`) as trivia
986984
case noNewlines
985+
/// Lex the characters that escape a newline in a multi-line string literal
986+
/// as trivia.
987+
///
988+
/// Matches the following regex: `\\?#*[ \t]*(\r\n|\r|\n)
989+
case escapedNewlineInMultiLineStringLiteral
987990
}
988991

989992
fileprivate mutating func lexTrivia(mode: TriviaLexingMode) -> NewlinePresence {
993+
if mode == .escapedNewlineInMultiLineStringLiteral {
994+
_ = self.advance(matching: "\\")
995+
self.advance(while: { $0 == "#" })
996+
self.advance(while: { $0 == " " || $0 == "\t" })
997+
if self.advance(matching: "\r") {
998+
_ = self.advance(matching: "\n")
999+
return .present
1000+
} else if self.advance(matching: "\n") {
1001+
return .present
1002+
} else {
1003+
return .absent
1004+
}
1005+
}
1006+
9901007
var hasNewline = false
9911008
while true {
9921009
let start = self
@@ -1662,7 +1679,9 @@ extension Lexer.Cursor {
16621679
}
16631680
}
16641681

1665-
mutating func lexStringQuote(leadingDelimiterLength: Int) -> Lexer.Result {
1682+
/// `isOpening` is `true` if this string quote is the opening quote of a string
1683+
/// literal and `false` if we are lexing the closing quote of a string literal.
1684+
mutating func lexStringQuote(isOpening: Bool, leadingDelimiterLength: Int) -> Lexer.Result {
16661685
if self.advance(matching: "'") {
16671686
return Lexer.Result(.singleQuote, stateTransition: stateTransitionAfterLexingStringQuote(kind: .singleQuote))
16681687
}
@@ -1696,7 +1715,20 @@ extension Lexer.Cursor {
16961715
}
16971716

16981717
self = lookingForMultilineString
1699-
return Lexer.Result(.multilineStringQuote, stateTransition: stateTransitionAfterLexingStringQuote(kind: .multiLine))
1718+
let trailingTriviaLexingMode: TriviaLexingMode?
1719+
if isOpening && self.is(at: "\n", "\r") {
1720+
// The opening quote of a multi-line string literal must be followed by
1721+
// a newline that's not part of the represented string.
1722+
trailingTriviaLexingMode = .escapedNewlineInMultiLineStringLiteral
1723+
} else {
1724+
trailingTriviaLexingMode = nil
1725+
}
1726+
1727+
return Lexer.Result(
1728+
.multilineStringQuote,
1729+
stateTransition: stateTransitionAfterLexingStringQuote(kind: .multiLine),
1730+
trailingTriviaLexingMode: trailingTriviaLexingMode
1731+
)
17001732
} else {
17011733
return Lexer.Result(.stringQuote, stateTransition: stateTransitionAfterLexingStringQuote(kind: .singleLine))
17021734
}
@@ -1714,6 +1746,23 @@ extension Lexer.Cursor {
17141746
return tmp.advanceIfStringDelimiter(delimiterLength: delimiterLength) && tmp.is(at: "(")
17151747
}
17161748

1749+
/// Returns `true` if we are positioned at a backslash that escapes the newline
1750+
/// character in a multi-line string literal.
1751+
private func isAtEscapedNewline(delimiterLength: Int) -> Bool {
1752+
guard self.is(at: "\\") else {
1753+
return false
1754+
}
1755+
1756+
var tmp = self
1757+
let backslashConsumed = tmp.advance(matching: "\\") // Skip over the '\' to look for '#' and '('
1758+
assert(backslashConsumed)
1759+
guard tmp.advanceIfStringDelimiter(delimiterLength: delimiterLength) else {
1760+
return false
1761+
}
1762+
tmp.advance(while: { $0 == " " || $0 == "\t" })
1763+
return tmp.is(at: "\r", "\n")
1764+
}
1765+
17171766
mutating func lexInStringLiteral(stringLiteralKind: StringLiteralKind, delimiterLength: Int) -> Lexer.Result {
17181767
/*
17191768
if IsMultilineString && *CurPtr != '\n' && *CurPtr != '\r' {
@@ -1729,6 +1778,11 @@ extension Lexer.Cursor {
17291778
.stringSegment,
17301779
stateTransition: .push(newState: .inStringInterpolationStart(stringLiteralKind: stringLiteralKind))
17311780
)
1781+
} else if self.isAtEscapedNewline(delimiterLength: delimiterLength) {
1782+
return Lexer.Result(
1783+
.stringSegment,
1784+
trailingTriviaLexingMode: .escapedNewlineInMultiLineStringLiteral
1785+
)
17321786
}
17331787
case UInt8(ascii: "\r"), UInt8(ascii: "\n"):
17341788
if stringLiteralKind == .multiLine {

0 commit comments

Comments
 (0)