Skip to content

Commit 5a6c6bd

Browse files
committed
Produce separate tokens for raw string delimiters and string quotes in the lexer
The eventual goal of this change is that we no longer need to re-lex string literals from the parser to separate them into their components. Instead, the lexer should just produce the lexemes that will later be put into the syntax tree as tokens. The downside of this is that the lexer now needs to carry state and know whether it is lexing a string literal. On the upside, the string literal parser could be significantly simplified and the diagnostics got better without any further changes.
1 parent 80b28f4 commit 5a6c6bd

38 files changed

+460
-394
lines changed

CodeGeneration/Sources/SyntaxSupport/gyb_generated/AttributeNodes.swift

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,6 @@ public let ATTRIBUTE_NODES: [Node] = [
8383
kind: "Token",
8484
tokenChoices: [
8585
"Identifier",
86-
"StringLiteral",
8786
"IntegerLiteral"
8887
]),
8988
Child(name: "StringExpr",
@@ -252,10 +251,7 @@ public let ATTRIBUTE_NODES: [Node] = [
252251
kind: "Syntax",
253252
nodeChoices: [
254253
Child(name: "String",
255-
kind: "StringLiteralToken",
256-
tokenChoices: [
257-
"StringLiteral"
258-
]),
254+
kind: "StringLiteralExpr"),
259255
Child(name: "Declname",
260256
kind: "DeclName")
261257
])
@@ -578,11 +574,8 @@ public let ATTRIBUTE_NODES: [Node] = [
578574
kind: "Syntax",
579575
children: [
580576
Child(name: "MangledName",
581-
kind: "StringLiteralToken",
582-
description: "The mangled name of a declaration.",
583-
tokenChoices: [
584-
"StringLiteral"
585-
]),
577+
kind: "StringLiteralExpr",
578+
description: "The mangled name of a declaration."),
586579
Child(name: "Comma",
587580
kind: "CommaToken",
588581
tokenChoices: [
@@ -636,11 +629,8 @@ public let ATTRIBUTE_NODES: [Node] = [
636629
"Colon"
637630
]),
638631
Child(name: "CTypeString",
639-
kind: "StringLiteralToken",
640-
isOptional: true,
641-
tokenChoices: [
642-
"StringLiteral"
643-
])
632+
kind: "StringLiteralExpr",
633+
isOptional: true)
644634
]),
645635

646636
Node(name: "ConventionWitnessMethodAttributeArguments",

CodeGeneration/Sources/SyntaxSupport/gyb_generated/AvailabilityNodes.swift

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -71,10 +71,7 @@ public let AVAILABILITY_NODES: [Node] = [
7171
description: "The value of this labeled argument",
7272
nodeChoices: [
7373
Child(name: "String",
74-
kind: "StringLiteralToken",
75-
tokenChoices: [
76-
"StringLiteral"
77-
]),
74+
kind: "StringLiteralExpr"),
7875
Child(name: "Version",
7976
kind: "VersionTuple")
8077
])

CodeGeneration/Sources/SyntaxSupport/gyb_generated/DeclNodes.swift

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -318,10 +318,7 @@ public let DECL_NODES: [Node] = [
318318
"Colon"
319319
]),
320320
Child(name: "FileName",
321-
kind: "StringLiteralToken",
322-
tokenChoices: [
323-
"StringLiteral"
324-
]),
321+
kind: "StringLiteralExpr"),
325322
Child(name: "Comma",
326323
kind: "CommaToken",
327324
tokenChoices: [

CodeGeneration/Sources/SyntaxSupport/gyb_generated/StmtNodes.swift

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -685,12 +685,9 @@ public let STMT_NODES: [Node] = [
685685
"Comma"
686686
]),
687687
Child(name: "Message",
688-
kind: "StringLiteralToken",
688+
kind: "StringLiteralExpr",
689689
description: "The assertion message.",
690-
isOptional: true,
691-
tokenChoices: [
692-
"StringLiteral"
693-
]),
690+
isOptional: true),
694691
Child(name: "RightParen",
695692
kind: "RightParenToken",
696693
tokenChoices: [

CodeGeneration/Sources/SyntaxSupport/gyb_generated/TokenSpec.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -309,7 +309,7 @@ public let SYNTAX_TOKENS: [TokenSpec] = [
309309
PoundConfigSpec(name: "PoundHasSymbol", kind: "pound__hasSymbol", text: "#_hasSymbol"),
310310
LiteralSpec(name: "IntegerLiteral", kind: "integer_literal", nameForDiagnostics: "integer literal", classification: "IntegerLiteral"),
311311
LiteralSpec(name: "FloatingLiteral", kind: "floating_literal", nameForDiagnostics: "floating literal", classification: "FloatingLiteral"),
312-
LiteralSpec(name: "StringLiteral", kind: "string_literal", nameForDiagnostics: "string literal", classification: "StringLiteral"),
312+
LiteralSpec(name: "StringLiteralContents", kind: "string_literal", nameForDiagnostics: "string literal", classification: "StringLiteral"),
313313
LiteralSpec(name: "RegexLiteral", kind: "regex_literal", nameForDiagnostics: "regex literal"),
314314
MiscSpec(name: "Unknown", kind: "unknown", nameForDiagnostics: "token"),
315315
MiscSpec(name: "Identifier", kind: "identifier", nameForDiagnostics: "identifier", classification: "Identifier"),

Sources/IDEUtils/generated/SyntaxClassification.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -350,7 +350,7 @@ extension RawTokenKind {
350350
return .integerLiteral
351351
case .floatingLiteral:
352352
return .floatingLiteral
353-
case .stringLiteral:
353+
case .stringLiteralContents:
354354
return .stringLiteral
355355
case .regexLiteral:
356356
return .none

Sources/SwiftParser/Attributes.swift

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -764,7 +764,7 @@ extension Parser {
764764
let (unexpectedBeforeLeftParen, leftParen) = self.expect(.leftParen)
765765
let (unexpectedBeforeLabel, label) = self.expectIdentifier(keywordRecovery: true)
766766
let (unexpectedBeforeColon, colon) = self.expect(.colon)
767-
let filename = self.consumeAnyToken()
767+
let filename = self.parseStringLiteral()
768768
let (unexpectedBeforeRightParen, rightParen) = self.expect(.rightParen)
769769
return RawAttributeSyntax(
770770
unexpectedBeforeAtSign,
@@ -900,11 +900,10 @@ extension Parser {
900900

901901
extension Parser {
902902
mutating func parseOpaqueReturnTypeOfAttributeArguments() -> RawOpaqueReturnTypeOfAttributeArgumentsSyntax {
903-
let (unexpectedBeforeString, mangledName) = self.expect(.stringLiteral)
903+
let mangledName = self.parseStringLiteral()
904904
let (unexpectedBeforeComma, comma) = self.expect(.comma)
905905
let (unexpectedBeforeOrdinal, ordinal) = self.expect(.integerLiteral)
906906
return RawOpaqueReturnTypeOfAttributeArgumentsSyntax(
907-
unexpectedBeforeString,
908907
mangledName: mangledName,
909908
unexpectedBeforeComma,
910909
comma: comma,
@@ -959,20 +958,18 @@ extension Parser {
959958
let cTypeLabel: RawTokenSyntax?
960959
let unexpectedBeforeColon: RawUnexpectedNodesSyntax?
961960
let colon: RawTokenSyntax?
962-
let unexpectedBeforeCTypeString: RawUnexpectedNodesSyntax?
963-
let cTypeString: RawTokenSyntax?
961+
let cTypeString: RawStringLiteralExprSyntax?
964962
if self.at(.comma) {
965963
(unexpectedBeforeComma, comma) = self.expect(.comma)
966964
cTypeLabel = self.consumeAnyToken()
967965
(unexpectedBeforeColon, colon) = self.expect(.colon)
968-
(unexpectedBeforeCTypeString, cTypeString) = self.expect(.stringLiteral)
966+
cTypeString = self.parseStringLiteral()
969967
} else {
970968
unexpectedBeforeComma = nil
971969
comma = nil
972970
cTypeLabel = nil
973971
unexpectedBeforeColon = nil
974972
colon = nil
975-
unexpectedBeforeCTypeString = nil
976973
cTypeString = nil
977974
}
978975
return .conventionArguments(
@@ -983,7 +980,6 @@ extension Parser {
983980
cTypeLabel: cTypeLabel,
984981
unexpectedBeforeColon,
985982
colon: colon,
986-
unexpectedBeforeCTypeString,
987983
cTypeString: cTypeString,
988984
arena: self.arena
989985
)

Sources/SwiftParser/Availability.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ extension Parser {
137137
let argumentLabel = self.eat(handle)
138138
let (unexpectedBeforeColon, colon) = self.expect(.colon)
139139
// FIXME: Make sure this is a string literal with no interpolation.
140-
let stringValue = self.consumeAnyToken()
140+
let stringValue = self.parseStringLiteral()
141141

142142
entry = .availabilityLabeledArgument(
143143
RawAvailabilityLabeledArgumentSyntax(

Sources/SwiftParser/Declarations.swift

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1241,7 +1241,7 @@ extension Parser {
12411241
/// consumes the entire regex literal, we're done.
12421242
return self.currentToken.tokenText.withBuffer {
12431243
(buffer: UnsafeBufferPointer<UInt8>) -> Bool in
1244-
var cursor = Lexer.Cursor(input: buffer, previous: 0)
1244+
var cursor = Lexer.Cursor(input: buffer, previous: 0, state: .normal)
12451245
guard buffer[0] == UInt8(ascii: "/") else { return false }
12461246
switch cursor.lexOperatorIdentifier(cursor, cursor).tokenKind {
12471247
case .unknown:
@@ -2124,19 +2124,7 @@ extension Parser {
21242124
}
21252125

21262126
let (unexpectedBeforeLeftParen, leftParen) = self.expect(.leftParen)
2127-
let stringLiteral: RawStringLiteralExprSyntax
2128-
if self.at(.stringLiteral) {
2129-
stringLiteral = self.parseStringLiteral()
2130-
} else {
2131-
stringLiteral = RawStringLiteralExprSyntax(
2132-
openDelimiter: nil,
2133-
openQuote: RawTokenSyntax(missing: .stringQuote, arena: self.arena),
2134-
segments: RawStringLiteralSegmentsSyntax(elements: [], arena: self.arena),
2135-
closeQuote: RawTokenSyntax(missing: .stringQuote, arena: self.arena),
2136-
closeDelimiter: nil,
2137-
arena: self.arena
2138-
)
2139-
}
2127+
let stringLiteral = self.parseStringLiteral()
21402128
let (unexpectedBeforeRightParen, rightParen) = self.expect(.rightParen)
21412129

21422130
switch directive {

Sources/SwiftParser/Directives.swift

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ extension Parser {
154154
if !self.at(.rightParen) {
155155
let (unexpectedBeforeFile, file) = self.expectIdentifier()
156156
let (unexpectedBeforeFileColon, fileColon) = self.expect(.colon)
157-
let (unexpectedBeforeFileName, fileName) = self.expect(.stringLiteral)
157+
let fileName = self.parseStringLiteral()
158158
let (unexpectedBeforeComma, comma) = self.expect(.comma)
159159

160160
let (unexpectedBeforeLine, line) = self.expectIdentifier()
@@ -166,7 +166,6 @@ extension Parser {
166166
fileArgLabel: file,
167167
unexpectedBeforeFileColon,
168168
fileArgColon: fileColon,
169-
unexpectedBeforeFileName,
170169
fileName: fileName,
171170
unexpectedBeforeComma,
172171
comma: comma,

Sources/SwiftParser/Expressions.swift

Lines changed: 47 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -1136,7 +1136,7 @@ extension Parser {
11361136
arena: self.arena
11371137
)
11381138
)
1139-
case (.stringLiteral, _)?:
1139+
case (.stringLiteralContents, _)?, (.rawStringDelimiter, _)?, (.stringQuote, _)?, (.multilineStringQuote, _)?, (.singleQuote, _)?:
11401140
return RawExprSyntax(self.parseStringLiteral())
11411141
case (.regexLiteral, _)?:
11421142
return RawExprSyntax(self.parseRegexLiteral())
@@ -1428,83 +1428,62 @@ extension Parser {
14281428
/// escaped-newline → escape-sequence inline-spaces? line-break
14291429
@_spi(RawSyntax)
14301430
public mutating func parseStringLiteral() -> RawStringLiteralExprSyntax {
1431-
var text = self.currentToken.wholeText[self.currentToken.textRange]
1432-
14331431
/// Parse opening raw string delimiter if exist.
1434-
let openDelimiter = self.parseStringLiteralDelimiter(at: .leading, text: text)
1435-
if let openDelimiter = openDelimiter {
1436-
text = text.dropFirst(openDelimiter.tokenText.count)
1437-
}
1432+
let openDelimiter = self.consume(if: .rawStringDelimiter)
14381433

14391434
/// Parse open quote.
1440-
let openQuote =
1441-
self.parseStringLiteralQuote(
1442-
at: openDelimiter != nil ? .leadingRaw : .leading,
1443-
text: text,
1444-
wantsMultiline: self.currentToken.isMultilineStringLiteral
1445-
) ?? RawTokenSyntax(missing: .stringQuote, arena: arena)
1446-
if !openQuote.isMissing {
1447-
text = text.dropFirst(openQuote.tokenText.count)
1448-
}
1435+
let (unexpectedBeforeOpenQuote, openQuote) = self.expectAny([.stringQuote, .multilineStringQuote, .singleQuote], default: .stringQuote)
14491436

14501437
/// Parse segments.
1451-
let (segments, closeStart) = self.parseStringLiteralSegments(
1452-
text,
1453-
openQuote,
1454-
openDelimiter?.tokenText ?? ""
1455-
)
1456-
text = text[closeStart...]
1457-
1458-
/// Parse close quote.
1459-
let closeQuote =
1460-
self.parseStringLiteralQuote(
1461-
at: openDelimiter != nil ? .trailingRaw : .trailing,
1438+
let text = self.currentToken.wholeText[...]
1439+
let segments: RawStringLiteralSegmentsSyntax
1440+
if self.at(.stringLiteralContents) {
1441+
(segments, _) = self.parseStringLiteralSegments(
14621442
text: text,
1463-
wantsMultiline: self.currentToken.isMultilineStringLiteral
1464-
) ?? RawTokenSyntax(missing: openQuote.tokenKind, arena: arena)
1465-
if !closeQuote.isMissing {
1466-
text = text.dropFirst(closeQuote.tokenText.count)
1467-
}
1468-
/// Parse closing raw string delimiter if exist.
1469-
let closeDelimiter: RawTokenSyntax?
1470-
if let delimiter = self.parseStringLiteralDelimiter(
1471-
at: .trailing,
1472-
text: text
1473-
) {
1474-
closeDelimiter = delimiter
1475-
} else if let openDelimiter = openDelimiter {
1476-
closeDelimiter = RawTokenSyntax(
1477-
missing: .rawStringDelimiter,
1478-
text: openDelimiter.tokenText,
1479-
arena: arena
1443+
allowsMultiline: openQuote.tokenKind == .multilineStringQuote,
1444+
delimiter: openDelimiter?.tokenText ?? ""
14801445
)
1446+
/// Discard the raw string literal token and create the structed string
1447+
/// literal expression.
1448+
/// FIXME: We should not instantiate `RawTokenSyntax` and discard it here.
1449+
_ = self.consumeAnyToken()
14811450
} else {
1482-
closeDelimiter = nil
1483-
}
1484-
assert(
1485-
(openDelimiter == nil) == (closeDelimiter == nil),
1486-
"existence of open/close delimiter should match"
1487-
)
1488-
if let closeDelimiter = closeDelimiter, !closeDelimiter.isMissing {
1489-
text = text.dropFirst(closeDelimiter.byteLength)
1451+
segments = RawStringLiteralSegmentsSyntax(elements: [], arena: self.arena)
14901452
}
14911453

1492-
assert(
1493-
text.isEmpty,
1494-
"string literal parsing should consume all the literal text"
1495-
)
1454+
/// Parse close quote.
1455+
let (unexpectedBeforeCloseQuote, closeQuote) = self.expect(openQuote.tokenKind)
14961456

1497-
/// Discard the raw string literal token and create the structed string
1498-
/// literal expression.
1499-
/// FIXME: We should not instantiate `RawTokenSyntax` and discard it here.
1500-
_ = self.consumeAnyToken()
1457+
/// Parse closing raw string delimiter if exist.
1458+
var unexpectedCloseDelimiter: RawTokenSyntax? = nil
1459+
var closeDelimiter = self.consume(if: .rawStringDelimiter)
1460+
1461+
switch (openDelimiter, closeDelimiter) {
1462+
case (nil, nil):
1463+
break // good, no raw delimiters on either side
1464+
case (let open?, nil):
1465+
closeDelimiter = missingToken(.rawStringDelimiter, text: open.tokenText)
1466+
case (nil, .some):
1467+
unexpectedCloseDelimiter = closeDelimiter
1468+
closeDelimiter = nil
1469+
case (let open?, let close?):
1470+
if open.tokenText == close.tokenText {
1471+
break // good, same delimiter on both sides
1472+
} else {
1473+
unexpectedCloseDelimiter = closeDelimiter
1474+
closeDelimiter = missingToken(.rawStringDelimiter, text: open.tokenText)
1475+
}
1476+
}
15011477

15021478
/// Construct the literal expression.
15031479
return RawStringLiteralExprSyntax(
15041480
openDelimiter: openDelimiter,
1481+
unexpectedBeforeOpenQuote,
15051482
openQuote: openQuote,
15061483
segments: segments,
1484+
unexpectedBeforeCloseQuote,
15071485
closeQuote: closeQuote,
1486+
RawUnexpectedNodesSyntax([unexpectedCloseDelimiter], arena: self.arena),
15081487
closeDelimiter: closeDelimiter,
15091488
arena: self.arena
15101489
)
@@ -1679,12 +1658,10 @@ extension Parser {
16791658
/// - closer: opening quote token.
16801659
/// - delimiter: opening custom string delimiter or empty string.
16811660
mutating func parseStringLiteralSegments(
1682-
_ text: Slice<SyntaxText>,
1683-
_ closer: RawTokenSyntax,
1684-
_ delimiter: SyntaxText
1661+
text: Slice<SyntaxText>,
1662+
allowsMultiline: Bool,
1663+
delimiter: SyntaxText
16851664
) -> (RawStringLiteralSegmentsSyntax, SyntaxText.Index) {
1686-
let allowsMultiline = closer.tokenKind == .multilineStringQuote
1687-
16881665
var segments = [RawStringLiteralSegmentsSyntax.Element]()
16891666
var segment = text
16901667
var stringLiteralSegmentStart = segment.startIndex
@@ -1728,7 +1705,10 @@ extension Parser {
17281705
let contentSize = content.withBuffer { buf in
17291706
Lexer.lexToEndOfInterpolatedExpression(buf, allowsMultiline)
17301707
}
1731-
let contentEnd = text.index(contentStart, offsetBy: contentSize)
1708+
var contentEnd = text.index(contentStart, offsetBy: contentSize)
1709+
if contentEnd == text.endIndex {
1710+
contentEnd = text.index(before: text.endIndex)
1711+
}
17321712

17331713
do {
17341714
// `\`
@@ -1809,13 +1789,7 @@ extension Parser {
18091789
// trim `##`.
18101790
segment = text[stringLiteralSegmentStart..<text.index(segment.endIndex, offsetBy: -delimiter.count)]
18111791

1812-
if (SyntaxText(rebasing: segment).hasSuffix(closer.tokenText)) {
1813-
// trim `"`.
1814-
segment = text[stringLiteralSegmentStart..<text.index(segment.endIndex, offsetBy: -closer.tokenText.count)]
1815-
} else {
1816-
// If `"` is not found, eat the rest.
1817-
segment = text[stringLiteralSegmentStart...]
1818-
}
1792+
segment = text[stringLiteralSegmentStart...]
18191793
}
18201794

18211795
assert(segments.count % 2 == 0)

0 commit comments

Comments
 (0)