Skip to content

Commit f126e6c

Browse files
committed
Produce separate tokens for raw string delimiters and string quotes in the lexer
The eventual goal of this change is that we no longer need to re-lex string literals from the parser to separate them into their components. Instead, the lexer should just produce the lexemes that will later be put into the syntax tree as tokens. The downside of this is that the lexer now needs to carry state and know whether it is lexing a string literal. On the upside, the string literal parser could be significantly simplified and the diagnostics got better without any further changes.
1 parent c5fa09c commit f126e6c

40 files changed

+649
-501
lines changed

CodeGeneration/Sources/SyntaxSupport/gyb_generated/AttributeNodes.swift

Lines changed: 21 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ public let ATTRIBUTE_NODES: [Node] = [
5050
kind: "TupleExprElementList"),
5151
Child(name: "Token",
5252
kind: "Token"),
53+
Child(name: "String",
54+
kind: "StringLiteralExpr"),
5355
Child(name: "Availability",
5456
kind: "AvailabilitySpecList"),
5557
Child(name: "SpecializeArguments",
@@ -518,11 +520,8 @@ public let ATTRIBUTE_NODES: [Node] = [
518520
kind: "Syntax",
519521
children: [
520522
Child(name: "MangledName",
521-
kind: "StringLiteralToken",
522-
description: "The mangled name of a declaration.",
523-
tokenChoices: [
524-
"StringLiteral"
525-
]),
523+
kind: "StringLiteralExpr",
524+
description: "The mangled name of a declaration."),
526525
Child(name: "Comma",
527526
kind: "CommaToken",
528527
tokenChoices: [
@@ -576,11 +575,8 @@ public let ATTRIBUTE_NODES: [Node] = [
576575
"Colon"
577576
]),
578577
Child(name: "CTypeString",
579-
kind: "StringLiteralToken",
580-
isOptional: true,
581-
tokenChoices: [
582-
"StringLiteral"
583-
])
578+
kind: "StringLiteralExpr",
579+
isOptional: true)
584580
]),
585581

586582
Node(name: "ConventionWitnessMethodAttributeArguments",
@@ -619,11 +615,8 @@ public let ATTRIBUTE_NODES: [Node] = [
619615
"Comma"
620616
]),
621617
Child(name: "CxxName",
622-
kind: "StringLiteralToken",
623-
isOptional: true,
624-
tokenChoices: [
625-
"StringLiteral"
626-
])
618+
kind: "StringLiteralExpr",
619+
isOptional: true)
627620
]),
628621

629622
Node(name: "OriginallyDefinedInArguments",
@@ -645,10 +638,7 @@ public let ATTRIBUTE_NODES: [Node] = [
645638
"Colon"
646639
]),
647640
Child(name: "ModuleName",
648-
kind: "StringLiteralToken",
649-
tokenChoices: [
650-
"StringLiteral"
651-
]),
641+
kind: "StringLiteralExpr"),
652642
Child(name: "Comma",
653643
kind: "CommaToken",
654644
tokenChoices: [
@@ -678,10 +668,7 @@ public let ATTRIBUTE_NODES: [Node] = [
678668
"Colon"
679669
]),
680670
Child(name: "Filename",
681-
kind: "StringLiteralToken",
682-
tokenChoices: [
683-
"StringLiteral"
684-
])
671+
kind: "StringLiteralExpr")
685672
]),
686673

687674
Node(name: "DynamicReplacementArguments",
@@ -725,10 +712,7 @@ public let ATTRIBUTE_NODES: [Node] = [
725712
"Colon"
726713
]),
727714
Child(name: "Message",
728-
kind: "StringLiteralToken",
729-
tokenChoices: [
730-
"StringLiteral"
731-
])
715+
kind: "StringLiteralExpr")
732716
]),
733717

734718
Node(name: "EffectsArguments",
@@ -759,11 +743,16 @@ public let ATTRIBUTE_NODES: [Node] = [
759743
"Colon"
760744
]),
761745
Child(name: "Value",
762-
kind: "Token",
763-
tokenChoices: [
764-
"Identifier",
765-
"Keyword",
766-
"StringLiteral"
746+
kind: "Syntax",
747+
nodeChoices: [
748+
Child(name: "Token",
749+
kind: "Token",
750+
tokenChoices: [
751+
"Identifier",
752+
"Keyword"
753+
]),
754+
Child(name: "String",
755+
kind: "StringLiteralExpr")
767756
]),
768757
Child(name: "TrailingComma",
769758
kind: "CommaToken",

CodeGeneration/Sources/SyntaxSupport/gyb_generated/AvailabilityNodes.swift

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -71,10 +71,7 @@ public let AVAILABILITY_NODES: [Node] = [
7171
description: "The value of this labeled argument",
7272
nodeChoices: [
7373
Child(name: "String",
74-
kind: "StringLiteralToken",
75-
tokenChoices: [
76-
"StringLiteral"
77-
]),
74+
kind: "StringLiteralExpr"),
7875
Child(name: "Version",
7976
kind: "VersionTuple")
8077
])

CodeGeneration/Sources/SyntaxSupport/gyb_generated/DeclNodes.swift

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -327,10 +327,7 @@ public let DECL_NODES: [Node] = [
327327
"Colon"
328328
]),
329329
Child(name: "FileName",
330-
kind: "StringLiteralToken",
331-
tokenChoices: [
332-
"StringLiteral"
333-
]),
330+
kind: "StringLiteralExpr"),
334331
Child(name: "Comma",
335332
kind: "CommaToken",
336333
tokenChoices: [

CodeGeneration/Sources/SyntaxSupport/gyb_generated/StmtNodes.swift

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -720,12 +720,9 @@ public let STMT_NODES: [Node] = [
720720
"Comma"
721721
]),
722722
Child(name: "Message",
723-
kind: "StringLiteralToken",
723+
kind: "StringLiteralExpr",
724724
description: "The assertion message.",
725-
isOptional: true,
726-
tokenChoices: [
727-
"StringLiteral"
728-
]),
725+
isOptional: true),
729726
Child(name: "RightParen",
730727
kind: "RightParenToken",
731728
tokenChoices: [

CodeGeneration/Sources/SyntaxSupport/gyb_generated/TokenSpec.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ public let SYNTAX_TOKENS: [TokenSpec] = [
221221
PoundConfigSpec(name: "PoundHasSymbol", kind: "pound__hasSymbol", text: "#_hasSymbol"),
222222
LiteralSpec(name: "IntegerLiteral", kind: "integer_literal", nameForDiagnostics: "integer literal", classification: "IntegerLiteral"),
223223
LiteralSpec(name: "FloatingLiteral", kind: "floating_literal", nameForDiagnostics: "floating literal", classification: "FloatingLiteral"),
224-
LiteralSpec(name: "StringLiteral", kind: "string_literal", nameForDiagnostics: "string literal", classification: "StringLiteral"),
224+
LiteralSpec(name: "StringLiteralContents", kind: "string_literal", nameForDiagnostics: "string literal", classification: "StringLiteral"),
225225
LiteralSpec(name: "RegexLiteral", kind: "regex_literal", nameForDiagnostics: "regex literal"),
226226
MiscSpec(name: "Unknown", kind: "unknown", nameForDiagnostics: "token"),
227227
MiscSpec(name: "Identifier", kind: "identifier", nameForDiagnostics: "identifier", classification: "Identifier"),

Sources/IDEUtils/generated/SyntaxClassification.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ extension RawTokenKind {
242242
return .integerLiteral
243243
case .floatingLiteral:
244244
return .floatingLiteral
245-
case .stringLiteral:
245+
case .stringLiteralContents:
246246
return .stringLiteral
247247
case .regexLiteral:
248248
return .none

Sources/SwiftParser/Attributes.swift

Lines changed: 21 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -288,19 +288,15 @@ extension Parser {
288288
}
289289
case ._cdecl, ._silgen_name:
290290
return parseAttribute(argumentMode: .required) { parser in
291-
return .token(parser.consume(if: .stringLiteral) ?? parser.missingToken(.stringLiteral))
291+
return .string(parser.parseStringLiteral())
292292
}
293293
case ._implements:
294294
return parseAttribute(argumentMode: .required) { parser in
295295
return .implementsArguments(parser.parseImplementsAttributeArguments())
296296
}
297297
case ._semantics:
298298
return parseAttribute(argumentMode: .required) { parser in
299-
if let value = parser.consume(if: .stringLiteral) {
300-
return .token(value)
301-
} else {
302-
return .token(parser.missingToken(.stringLiteral))
303-
}
299+
return .string(parser.parseStringLiteral())
304300
}
305301
case ._backDeploy:
306302
return parseAttribute(argumentMode: .required) { parser in
@@ -858,11 +854,10 @@ extension Parser {
858854

859855
extension Parser {
860856
mutating func parseOpaqueReturnTypeOfAttributeArguments() -> RawOpaqueReturnTypeOfAttributeArgumentsSyntax {
861-
let (unexpectedBeforeString, mangledName) = self.expect(.stringLiteral)
857+
let mangledName = self.parseStringLiteral()
862858
let (unexpectedBeforeComma, comma) = self.expect(.comma)
863859
let (unexpectedBeforeOrdinal, ordinal) = self.expect(.integerLiteral)
864860
return RawOpaqueReturnTypeOfAttributeArgumentsSyntax(
865-
unexpectedBeforeString,
866861
mangledName: mangledName,
867862
unexpectedBeforeComma,
868863
comma: comma,
@@ -894,20 +889,18 @@ extension Parser {
894889
let cTypeLabel: RawTokenSyntax?
895890
let unexpectedBeforeColon: RawUnexpectedNodesSyntax?
896891
let colon: RawTokenSyntax?
897-
let unexpectedBeforeCTypeString: RawUnexpectedNodesSyntax?
898-
let cTypeString: RawTokenSyntax?
892+
let cTypeString: RawStringLiteralExprSyntax?
899893
if self.at(.comma) {
900894
(unexpectedBeforeComma, comma) = self.expect(.comma)
901895
cTypeLabel = self.consumeAnyToken()
902896
(unexpectedBeforeColon, colon) = self.expect(.colon)
903-
(unexpectedBeforeCTypeString, cTypeString) = self.expect(.stringLiteral)
897+
cTypeString = self.parseStringLiteral()
904898
} else {
905899
unexpectedBeforeComma = nil
906900
comma = nil
907901
cTypeLabel = nil
908902
unexpectedBeforeColon = nil
909903
colon = nil
910-
unexpectedBeforeCTypeString = nil
911904
cTypeString = nil
912905
}
913906
return .conventionArguments(
@@ -918,7 +911,6 @@ extension Parser {
918911
cTypeLabel: cTypeLabel,
919912
unexpectedBeforeColon,
920913
colon: colon,
921-
unexpectedBeforeCTypeString,
922914
cTypeString: cTypeString,
923915
arena: self.arena
924916
)
@@ -965,22 +957,19 @@ extension Parser {
965957
}
966958
let unexpectedBeforeComma: RawUnexpectedNodesSyntax?
967959
let comma: RawTokenSyntax?
968-
let unexpectedBeforeCxxName: RawUnexpectedNodesSyntax?
969-
let cxxName: RawTokenSyntax?
960+
let cxxName: RawStringLiteralExprSyntax?
970961
if self.at(.comma) {
971962
(unexpectedBeforeComma, comma) = self.expect(.comma)
972-
(unexpectedBeforeCxxName, cxxName) = self.expect(.stringLiteral)
963+
cxxName = self.parseStringLiteral()
973964
} else {
974965
unexpectedBeforeComma = nil
975966
comma = nil
976-
unexpectedBeforeCxxName = nil
977967
cxxName = nil
978968
}
979969
return RawExposeAttributeArgumentsSyntax(
980970
language: language,
981971
unexpectedBeforeComma,
982972
comma: comma,
983-
unexpectedBeforeCxxName,
984973
cxxName: cxxName,
985974
arena: self.arena
986975
)
@@ -991,7 +980,7 @@ extension Parser {
991980
mutating func parseOriginallyDefinedInArguments() -> RawOriginallyDefinedInArgumentsSyntax {
992981
let (unexpectedBeforeModuleLabel, moduleLabel) = self.expect(.keyword(.module), remapping: .identifier)
993982
let (unexpectedBeforeColon, colon) = self.expect(.colon)
994-
let (unexpectedBeforeModuleName, moduleName) = self.expect(.stringLiteral)
983+
let moduleName = self.parseStringLiteral()
995984
let (unexpectedBeforeComma, comma) = self.expect(.comma)
996985

997986
var platforms: [RawAvailabilityVersionRestrictionListEntrySyntax] = []
@@ -1013,7 +1002,6 @@ extension Parser {
10131002
moduleLabel: moduleLabel,
10141003
unexpectedBeforeColon,
10151004
colon: colon,
1016-
unexpectedBeforeModuleName,
10171005
moduleName: moduleName,
10181006
unexpectedBeforeComma,
10191007
comma: comma,
@@ -1027,13 +1015,12 @@ extension Parser {
10271015
mutating func parseUnderscorePrivateAttributeArguments() -> RawUnderscorePrivateAttributeArgumentsSyntax {
10281016
let (unexpectedBeforeLabel, label) = self.expect(.keyword(.sourceFile), remapping: .identifier)
10291017
let (unexpectedBeforeColon, colon) = self.expect(.colon)
1030-
let (unexpectedBeforeFilename, filename) = self.expect(.stringLiteral)
1018+
let filename = self.parseStringLiteral()
10311019
return RawUnderscorePrivateAttributeArgumentsSyntax(
10321020
unexpectedBeforeLabel,
10331021
sourceFileLabel: label,
10341022
unexpectedBeforeColon,
10351023
colon: colon,
1036-
unexpectedBeforeFilename,
10371024
filename: filename,
10381025
arena: self.arena
10391026
)
@@ -1070,13 +1057,12 @@ extension Parser {
10701057
mutating func parseUnavailableFromAsyncArguments() -> RawUnavailableFromAsyncArgumentsSyntax {
10711058
let (unexpectedBeforeLabel, label) = self.expect(.keyword(.message), remapping: .identifier)
10721059
let (unexpectedBeforeColon, colon) = self.expect(.colon)
1073-
let (unexpectedBeforeMessage, message) = self.expect(.stringLiteral)
1060+
let message = self.parseStringLiteral()
10741061
return RawUnavailableFromAsyncArgumentsSyntax(
10751062
unexpectedBeforeLabel,
10761063
messageLabel: label,
10771064
unexpectedBeforeColon,
10781065
colon: colon,
1079-
unexpectedBeforeMessage,
10801066
message: message,
10811067
arena: self.arena
10821068
)
@@ -1092,15 +1078,22 @@ extension Parser {
10921078
let (unexpectedBeforeLabel, label) = self.expectAny([.keyword(.visibility), .keyword(.metadata)], default: .keyword(.visibility))
10931079
let (unexpectedBeforeColon, colon) = self.expect(.colon)
10941080
let unexpectedBeforeValue: RawUnexpectedNodesSyntax?
1095-
let value: RawTokenSyntax
1081+
let value: RawDocumentationAttributeArgumentSyntax.Value
10961082
switch label.tokenText {
10971083
case "visibility":
1098-
(unexpectedBeforeValue, value) = self.expectAny([.keyword(.open), .keyword(.public), .keyword(.internal), .keyword(.fileprivate), .keyword(.private)], default: .keyword(.internal))
1084+
let (unexpected, token) = self.expectAny([.keyword(.open), .keyword(.public), .keyword(.internal), .keyword(.fileprivate), .keyword(.private)], default: .keyword(.internal))
1085+
unexpectedBeforeValue = unexpected
1086+
value = .token(token)
10991087
case "metadata":
1100-
(unexpectedBeforeValue, value) = self.expectAny([.stringLiteral, .identifier], default: .stringLiteral)
1088+
unexpectedBeforeValue = nil
1089+
if let identifier = self.consume(if: .identifier) {
1090+
value = .token(identifier)
1091+
} else {
1092+
value = .string(self.parseStringLiteral())
1093+
}
11011094
default:
11021095
unexpectedBeforeValue = nil
1103-
value = missingToken(.identifier)
1096+
value = .token(missingToken(.identifier))
11041097
}
11051098
keepGoing = self.consume(if: .comma)
11061099
arguments.append(

Sources/SwiftParser/Availability.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ extension Parser {
135135
let argumentLabel = self.eat(handle)
136136
let (unexpectedBeforeColon, colon) = self.expect(.colon)
137137
// FIXME: Make sure this is a string literal with no interpolation.
138-
let stringValue = self.consumeAnyToken()
138+
let stringValue = self.parseStringLiteral()
139139

140140
entry = .availabilityLabeledArgument(
141141
RawAvailabilityLabeledArgumentSyntax(

Sources/SwiftParser/Declarations.swift

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1241,7 +1241,7 @@ extension Parser {
12411241
/// consumes the entire regex literal, we're done.
12421242
return self.currentToken.tokenText.withBuffer {
12431243
(buffer: UnsafeBufferPointer<UInt8>) -> Bool in
1244-
var cursor = Lexer.Cursor(input: buffer, previous: 0)
1244+
var cursor = Lexer.Cursor(input: buffer, previous: 0, state: .normal)
12451245
guard buffer[0] == UInt8(ascii: "/") else { return false }
12461246
switch cursor.lexOperatorIdentifier(cursor, cursor).tokenKind {
12471247
case .unknown:
@@ -2141,19 +2141,7 @@ extension Parser {
21412141
}
21422142

21432143
let (unexpectedBeforeLeftParen, leftParen) = self.expect(.leftParen)
2144-
let stringLiteral: RawStringLiteralExprSyntax
2145-
if self.at(.stringLiteral) {
2146-
stringLiteral = self.parseStringLiteral()
2147-
} else {
2148-
stringLiteral = RawStringLiteralExprSyntax(
2149-
openDelimiter: nil,
2150-
openQuote: RawTokenSyntax(missing: .stringQuote, arena: self.arena),
2151-
segments: RawStringLiteralSegmentsSyntax(elements: [], arena: self.arena),
2152-
closeQuote: RawTokenSyntax(missing: .stringQuote, arena: self.arena),
2153-
closeDelimiter: nil,
2154-
arena: self.arena
2155-
)
2156-
}
2144+
let stringLiteral = self.parseStringLiteral()
21572145
let (unexpectedBeforeRightParen, rightParen) = self.expect(.rightParen)
21582146

21592147
switch directive {

Sources/SwiftParser/Directives.swift

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ extension Parser {
154154
if !self.at(.rightParen) {
155155
let (unexpectedBeforeFile, file) = self.expectIdentifier()
156156
let (unexpectedBeforeFileColon, fileColon) = self.expect(.colon)
157-
let (unexpectedBeforeFileName, fileName) = self.expect(.stringLiteral)
157+
let fileName = self.parseStringLiteral()
158158
let (unexpectedBeforeComma, comma) = self.expect(.comma)
159159

160160
let (unexpectedBeforeLine, line) = self.expectIdentifier()
@@ -166,7 +166,6 @@ extension Parser {
166166
fileArgLabel: file,
167167
unexpectedBeforeFileColon,
168168
fileArgColon: fileColon,
169-
unexpectedBeforeFileName,
170169
fileName: fileName,
171170
unexpectedBeforeComma,
172171
comma: comma,

0 commit comments

Comments
 (0)