Skip to content

Commit 5735bd2

Browse files
committed
Produce separate tokens for raw string delimiters and string quotes in the lexer
The eventual goal of this change is that we no longer need to re-lex string literals from the parser to separate them into their components. Instead, the lexer should just produce the lexemes that will later be put into the syntax tree as tokens. The downside of this is that the lexer now needs to carry state and know whether it is lexing a string literal. On the upside, the string literal parser could be significantly simplified and the diagnostics got better without any further changes.
1 parent 80cd97b commit 5735bd2

38 files changed

+466
-395
lines changed

CodeGeneration/Sources/SyntaxSupport/gyb_generated/AttributeNodes.swift

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,6 @@ public let ATTRIBUTE_NODES: [Node] = [
8989
kind: "Token",
9090
tokenChoices: [
9191
"Identifier",
92-
"StringLiteral",
9392
"IntegerLiteral"
9493
]),
9594
Child(name: "StringExpr",
@@ -258,10 +257,7 @@ public let ATTRIBUTE_NODES: [Node] = [
258257
kind: "Syntax",
259258
nodeChoices: [
260259
Child(name: "String",
261-
kind: "StringLiteralToken",
262-
tokenChoices: [
263-
"StringLiteral"
264-
]),
260+
kind: "StringLiteralExpr"),
265261
Child(name: "Declname",
266262
kind: "DeclName")
267263
])
@@ -604,11 +600,8 @@ public let ATTRIBUTE_NODES: [Node] = [
604600
kind: "Syntax",
605601
children: [
606602
Child(name: "MangledName",
607-
kind: "StringLiteralToken",
608-
description: "The mangled name of a declaration.",
609-
tokenChoices: [
610-
"StringLiteral"
611-
]),
603+
kind: "StringLiteralExpr",
604+
description: "The mangled name of a declaration."),
612605
Child(name: "Comma",
613606
kind: "CommaToken",
614607
tokenChoices: [
@@ -662,11 +655,8 @@ public let ATTRIBUTE_NODES: [Node] = [
662655
"Colon"
663656
]),
664657
Child(name: "CTypeString",
665-
kind: "StringLiteralToken",
666-
isOptional: true,
667-
tokenChoices: [
668-
"StringLiteral"
669-
])
658+
kind: "StringLiteralExpr",
659+
isOptional: true)
670660
]),
671661

672662
Node(name: "ConventionWitnessMethodAttributeArguments",

CodeGeneration/Sources/SyntaxSupport/gyb_generated/AvailabilityNodes.swift

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -71,10 +71,7 @@ public let AVAILABILITY_NODES: [Node] = [
7171
description: "The value of this labeled argument",
7272
nodeChoices: [
7373
Child(name: "String",
74-
kind: "StringLiteralToken",
75-
tokenChoices: [
76-
"StringLiteral"
77-
]),
74+
kind: "StringLiteralExpr"),
7875
Child(name: "Version",
7976
kind: "VersionTuple")
8077
])

CodeGeneration/Sources/SyntaxSupport/gyb_generated/DeclNodes.swift

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -316,10 +316,7 @@ public let DECL_NODES: [Node] = [
316316
"Colon"
317317
]),
318318
Child(name: "FileName",
319-
kind: "StringLiteralToken",
320-
tokenChoices: [
321-
"StringLiteral"
322-
]),
319+
kind: "StringLiteralExpr"),
323320
Child(name: "Comma",
324321
kind: "CommaToken",
325322
tokenChoices: [

CodeGeneration/Sources/SyntaxSupport/gyb_generated/StmtNodes.swift

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -701,12 +701,9 @@ public let STMT_NODES: [Node] = [
701701
"Comma"
702702
]),
703703
Child(name: "Message",
704-
kind: "StringLiteralToken",
704+
kind: "StringLiteralExpr",
705705
description: "The assertion message.",
706-
isOptional: true,
707-
tokenChoices: [
708-
"StringLiteral"
709-
]),
706+
isOptional: true),
710707
Child(name: "RightParen",
711708
kind: "RightParenToken",
712709
tokenChoices: [

CodeGeneration/Sources/SyntaxSupport/gyb_generated/TokenSpec.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,7 @@ public let SYNTAX_TOKENS: [TokenSpec] = [
306306
PoundConfigSpec(name: "PoundHasSymbol", kind: "pound__hasSymbol", text: "#_hasSymbol"),
307307
LiteralSpec(name: "IntegerLiteral", kind: "integer_literal", nameForDiagnostics: "integer literal", classification: "IntegerLiteral"),
308308
LiteralSpec(name: "FloatingLiteral", kind: "floating_literal", nameForDiagnostics: "floating literal", classification: "FloatingLiteral"),
309-
LiteralSpec(name: "StringLiteral", kind: "string_literal", nameForDiagnostics: "string literal", classification: "StringLiteral"),
309+
LiteralSpec(name: "StringLiteralContents", kind: "string_literal", nameForDiagnostics: "string literal", classification: "StringLiteral"),
310310
LiteralSpec(name: "RegexLiteral", kind: "regex_literal", nameForDiagnostics: "regex literal"),
311311
MiscSpec(name: "Unknown", kind: "unknown", nameForDiagnostics: "token"),
312312
MiscSpec(name: "Identifier", kind: "identifier", nameForDiagnostics: "identifier", classification: "Identifier"),

Sources/IDEUtils/generated/SyntaxClassification.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -350,7 +350,7 @@ extension RawTokenKind {
350350
return .integerLiteral
351351
case .floatingLiteral:
352352
return .floatingLiteral
353-
case .stringLiteral:
353+
case .stringLiteralContents:
354354
return .stringLiteral
355355
case .regexLiteral:
356356
return .none

Sources/SwiftParser/Attributes.swift

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -722,7 +722,7 @@ extension Parser {
722722
let (unexpectedBeforeLeftParen, leftParen) = self.expect(.leftParen)
723723
let (unexpectedBeforeLabel, label) = self.expectIdentifier(keywordRecovery: true)
724724
let (unexpectedBeforeColon, colon) = self.expect(.colon)
725-
let filename = self.consumeAnyToken()
725+
let filename = self.parseStringLiteral()
726726
let (unexpectedBeforeRightParen, rightParen) = self.expect(.rightParen)
727727
return RawAttributeSyntax(
728728
unexpectedBeforeAtSign,
@@ -858,11 +858,10 @@ extension Parser {
858858

859859
extension Parser {
860860
mutating func parseOpaqueReturnTypeOfAttributeArguments() -> RawOpaqueReturnTypeOfAttributeArgumentsSyntax {
861-
let (unexpectedBeforeString, mangledName) = self.expect(.stringLiteral)
861+
let mangledName = self.parseStringLiteral()
862862
let (unexpectedBeforeComma, comma) = self.expect(.comma)
863863
let (unexpectedBeforeOrdinal, ordinal) = self.expect(.integerLiteral)
864864
return RawOpaqueReturnTypeOfAttributeArgumentsSyntax(
865-
unexpectedBeforeString,
866865
mangledName: mangledName,
867866
unexpectedBeforeComma,
868867
comma: comma,
@@ -917,20 +916,18 @@ extension Parser {
917916
let cTypeLabel: RawTokenSyntax?
918917
let unexpectedBeforeColon: RawUnexpectedNodesSyntax?
919918
let colon: RawTokenSyntax?
920-
let unexpectedBeforeCTypeString: RawUnexpectedNodesSyntax?
921-
let cTypeString: RawTokenSyntax?
919+
let cTypeString: RawStringLiteralExprSyntax?
922920
if self.at(.comma) {
923921
(unexpectedBeforeComma, comma) = self.expect(.comma)
924922
cTypeLabel = self.consumeAnyToken()
925923
(unexpectedBeforeColon, colon) = self.expect(.colon)
926-
(unexpectedBeforeCTypeString, cTypeString) = self.expect(.stringLiteral)
924+
cTypeString = self.parseStringLiteral()
927925
} else {
928926
unexpectedBeforeComma = nil
929927
comma = nil
930928
cTypeLabel = nil
931929
unexpectedBeforeColon = nil
932930
colon = nil
933-
unexpectedBeforeCTypeString = nil
934931
cTypeString = nil
935932
}
936933
return .conventionArguments(
@@ -941,7 +938,6 @@ extension Parser {
941938
cTypeLabel: cTypeLabel,
942939
unexpectedBeforeColon,
943940
colon: colon,
944-
unexpectedBeforeCTypeString,
945941
cTypeString: cTypeString,
946942
arena: self.arena
947943
)

Sources/SwiftParser/Availability.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ extension Parser {
112112
let argumentLabel = self.eat(handle)
113113
let (unexpectedBeforeColon, colon) = self.expect(.colon)
114114
// FIXME: Make sure this is a string literal with no interpolation.
115-
let stringValue = self.consumeAnyToken()
115+
let stringValue = self.parseStringLiteral()
116116

117117
entry = .availabilityLabeledArgument(
118118
RawAvailabilityLabeledArgumentSyntax(

Sources/SwiftParser/Declarations.swift

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1220,7 +1220,7 @@ extension Parser {
12201220
/// consumes the entire regex literal, we're done.
12211221
return self.currentToken.tokenText.withBuffer {
12221222
(buffer: UnsafeBufferPointer<UInt8>) -> Bool in
1223-
var cursor = Lexer.Cursor(input: buffer, previous: 0)
1223+
var cursor = Lexer.Cursor(input: buffer, previous: 0, state: .normal)
12241224
guard buffer[0] == UInt8(ascii: "/") else { return false }
12251225
switch cursor.lexOperatorIdentifier(cursor, cursor).tokenKind {
12261226
case .unknown:
@@ -2084,19 +2084,7 @@ extension Parser {
20842084
}
20852085

20862086
let (unexpectedBeforeLeftParen, leftParen) = self.expect(.leftParen)
2087-
let stringLiteral: RawStringLiteralExprSyntax
2088-
if self.at(.stringLiteral) {
2089-
stringLiteral = self.parseStringLiteral()
2090-
} else {
2091-
stringLiteral = RawStringLiteralExprSyntax(
2092-
openDelimiter: nil,
2093-
openQuote: RawTokenSyntax(missing: .stringQuote, arena: self.arena),
2094-
segments: RawStringLiteralSegmentsSyntax(elements: [], arena: self.arena),
2095-
closeQuote: RawTokenSyntax(missing: .stringQuote, arena: self.arena),
2096-
closeDelimiter: nil,
2097-
arena: self.arena
2098-
)
2099-
}
2087+
let stringLiteral = self.parseStringLiteral()
21002088
let (unexpectedBeforeRightParen, rightParen) = self.expect(.rightParen)
21012089

21022090
switch directive {

Sources/SwiftParser/Directives.swift

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ extension Parser {
154154
if !self.at(.rightParen) {
155155
let (unexpectedBeforeFile, file) = self.expectIdentifier()
156156
let (unexpectedBeforeFileColon, fileColon) = self.expect(.colon)
157-
let (unexpectedBeforeFileName, fileName) = self.expect(.stringLiteral)
157+
let fileName = self.parseStringLiteral()
158158
let (unexpectedBeforeComma, comma) = self.expect(.comma)
159159

160160
let (unexpectedBeforeLine, line) = self.expectIdentifier()
@@ -166,7 +166,6 @@ extension Parser {
166166
fileArgLabel: file,
167167
unexpectedBeforeFileColon,
168168
fileArgColon: fileColon,
169-
unexpectedBeforeFileName,
170169
fileName: fileName,
171170
unexpectedBeforeComma,
172171
comma: comma,

Sources/SwiftParser/Expressions.swift

Lines changed: 47 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -1127,7 +1127,7 @@ extension Parser {
11271127
arena: self.arena
11281128
)
11291129
)
1130-
case (.stringLiteral, _)?:
1130+
case (.stringLiteralContents, _)?, (.rawStringDelimiter, _)?, (.stringQuote, _)?, (.multilineStringQuote, _)?, (.singleQuote, _)?:
11311131
return RawExprSyntax(self.parseStringLiteral())
11321132
case (.regexLiteral, _)?:
11331133
return RawExprSyntax(self.parseRegexLiteral())
@@ -1394,83 +1394,62 @@ extension Parser {
13941394
/// escaped-newline → escape-sequence inline-spaces? line-break
13951395
@_spi(RawSyntax)
13961396
public mutating func parseStringLiteral() -> RawStringLiteralExprSyntax {
1397-
var text = self.currentToken.wholeText[self.currentToken.textRange]
1398-
13991397
/// Parse opening raw string delimiter if exist.
1400-
let openDelimiter = self.parseStringLiteralDelimiter(at: .leading, text: text)
1401-
if let openDelimiter = openDelimiter {
1402-
text = text.dropFirst(openDelimiter.tokenText.count)
1403-
}
1398+
let openDelimiter = self.consume(if: .rawStringDelimiter)
14041399

14051400
/// Parse open quote.
1406-
let openQuote =
1407-
self.parseStringLiteralQuote(
1408-
at: openDelimiter != nil ? .leadingRaw : .leading,
1409-
text: text,
1410-
wantsMultiline: self.currentToken.isMultilineStringLiteral
1411-
) ?? RawTokenSyntax(missing: .stringQuote, arena: arena)
1412-
if !openQuote.isMissing {
1413-
text = text.dropFirst(openQuote.tokenText.count)
1414-
}
1401+
let (unexpectedBeforeOpenQuote, openQuote) = self.expectAny([.stringQuote, .multilineStringQuote, .singleQuote], default: .stringQuote)
14151402

14161403
/// Parse segments.
1417-
let (segments, closeStart) = self.parseStringLiteralSegments(
1418-
text,
1419-
openQuote,
1420-
openDelimiter?.tokenText ?? ""
1421-
)
1422-
text = text[closeStart...]
1423-
1424-
/// Parse close quote.
1425-
let closeQuote =
1426-
self.parseStringLiteralQuote(
1427-
at: openDelimiter != nil ? .trailingRaw : .trailing,
1404+
let text = self.currentToken.wholeText[...]
1405+
let segments: RawStringLiteralSegmentsSyntax
1406+
if self.at(.stringLiteralContents) {
1407+
(segments, _) = self.parseStringLiteralSegments(
14281408
text: text,
1429-
wantsMultiline: self.currentToken.isMultilineStringLiteral
1430-
) ?? RawTokenSyntax(missing: openQuote.tokenKind, arena: arena)
1431-
if !closeQuote.isMissing {
1432-
text = text.dropFirst(closeQuote.tokenText.count)
1433-
}
1434-
/// Parse closing raw string delimiter if exist.
1435-
let closeDelimiter: RawTokenSyntax?
1436-
if let delimiter = self.parseStringLiteralDelimiter(
1437-
at: .trailing,
1438-
text: text
1439-
) {
1440-
closeDelimiter = delimiter
1441-
} else if let openDelimiter = openDelimiter {
1442-
closeDelimiter = RawTokenSyntax(
1443-
missing: .rawStringDelimiter,
1444-
text: openDelimiter.tokenText,
1445-
arena: arena
1409+
allowsMultiline: openQuote.tokenKind == .multilineStringQuote,
1410+
delimiter: openDelimiter?.tokenText ?? ""
14461411
)
1412+
/// Discard the raw string literal token and create the structed string
1413+
/// literal expression.
1414+
/// FIXME: We should not instantiate `RawTokenSyntax` and discard it here.
1415+
_ = self.consumeAnyToken()
14471416
} else {
1448-
closeDelimiter = nil
1449-
}
1450-
assert(
1451-
(openDelimiter == nil) == (closeDelimiter == nil),
1452-
"existence of open/close delimiter should match"
1453-
)
1454-
if let closeDelimiter = closeDelimiter, !closeDelimiter.isMissing {
1455-
text = text.dropFirst(closeDelimiter.byteLength)
1417+
segments = RawStringLiteralSegmentsSyntax(elements: [], arena: self.arena)
14561418
}
14571419

1458-
assert(
1459-
text.isEmpty,
1460-
"string literal parsing should consume all the literal text"
1461-
)
1420+
/// Parse close quote.
1421+
let (unexpectedBeforeCloseQuote, closeQuote) = self.expect(openQuote.tokenKind)
14621422

1463-
/// Discard the raw string literal token and create the structed string
1464-
/// literal expression.
1465-
/// FIXME: We should not instantiate `RawTokenSyntax` and discard it here.
1466-
_ = self.consumeAnyToken()
1423+
/// Parse closing raw string delimiter if exist.
1424+
var unexpectedCloseDelimiter: RawTokenSyntax? = nil
1425+
var closeDelimiter = self.consume(if: .rawStringDelimiter)
1426+
1427+
switch (openDelimiter, closeDelimiter) {
1428+
case (nil, nil):
1429+
break // good, no raw delimiters on either side
1430+
case (let open?, nil):
1431+
closeDelimiter = missingToken(.rawStringDelimiter, text: open.tokenText)
1432+
case (nil, .some):
1433+
unexpectedCloseDelimiter = closeDelimiter
1434+
closeDelimiter = nil
1435+
case (let open?, let close?):
1436+
if open.tokenText == close.tokenText {
1437+
break // good, same delimiter on both sides
1438+
} else {
1439+
unexpectedCloseDelimiter = closeDelimiter
1440+
closeDelimiter = missingToken(.rawStringDelimiter, text: open.tokenText)
1441+
}
1442+
}
14671443

14681444
/// Construct the literal expression.
14691445
return RawStringLiteralExprSyntax(
14701446
openDelimiter: openDelimiter,
1447+
unexpectedBeforeOpenQuote,
14711448
openQuote: openQuote,
14721449
segments: segments,
1450+
unexpectedBeforeCloseQuote,
14731451
closeQuote: closeQuote,
1452+
RawUnexpectedNodesSyntax([unexpectedCloseDelimiter], arena: self.arena),
14741453
closeDelimiter: closeDelimiter,
14751454
arena: self.arena
14761455
)
@@ -1646,12 +1625,10 @@ extension Parser {
16461625
/// - closer: opening quote token.
16471626
/// - delimiter: opening custom string delimiter or empty string.
16481627
mutating func parseStringLiteralSegments(
1649-
_ text: Slice<SyntaxText>,
1650-
_ closer: RawTokenSyntax,
1651-
_ delimiter: SyntaxText
1628+
text: Slice<SyntaxText>,
1629+
allowsMultiline: Bool,
1630+
delimiter: SyntaxText
16521631
) -> (RawStringLiteralSegmentsSyntax, SyntaxText.Index) {
1653-
let allowsMultiline = closer.tokenKind == .multilineStringQuote
1654-
16551632
var segments = [RawStringLiteralSegmentsSyntax.Element]()
16561633
var segment = text
16571634
var stringLiteralSegmentStart = segment.startIndex
@@ -1695,7 +1672,10 @@ extension Parser {
16951672
let contentSize = content.withBuffer { buf in
16961673
Lexer.lexToEndOfInterpolatedExpression(buf, allowsMultiline)
16971674
}
1698-
let contentEnd = text.index(contentStart, offsetBy: contentSize)
1675+
var contentEnd = text.index(contentStart, offsetBy: contentSize)
1676+
if contentEnd == text.endIndex {
1677+
contentEnd = text.index(before: text.endIndex)
1678+
}
16991679

17001680
do {
17011681
// `\`
@@ -1776,13 +1756,7 @@ extension Parser {
17761756
// trim `##`.
17771757
segment = text[stringLiteralSegmentStart..<text.index(segment.endIndex, offsetBy: -delimiter.count)]
17781758

1779-
if (SyntaxText(rebasing: segment).hasSuffix(closer.tokenText)) {
1780-
// trim `"`.
1781-
segment = text[stringLiteralSegmentStart..<text.index(segment.endIndex, offsetBy: -closer.tokenText.count)]
1782-
} else {
1783-
// If `"` is not found, eat the rest.
1784-
segment = text[stringLiteralSegmentStart...]
1785-
}
1759+
segment = text[stringLiteralSegmentStart...]
17861760
}
17871761

17881762
assert(segments.count % 2 == 0)

0 commit comments

Comments
 (0)