Skip to content

Commit 70cb210

Browse files
committed
Produce separate tokens for raw string delimiters and string quotes in the lexer
The eventual goal of this change is that we no longer need to re-lex string literals from the parser to separate them into their components. Instead, the lexer should just produce the lexemes that will later be put into the syntax tree as tokens. The downside of this is that the lexer now needs to carry state and know whether it is lexing a string literal. On the upside, the string literal parser could be significantly simplified and the diagnostics got better without any further changes.
1 parent 28b27c1 commit 70cb210

40 files changed

+715
-502
lines changed

CodeGeneration/Sources/SyntaxSupport/gyb_generated/AttributeNodes.swift

Lines changed: 21 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ public let ATTRIBUTE_NODES: [Node] = [
5050
kind: "TupleExprElementList"),
5151
Child(name: "Token",
5252
kind: "Token"),
53+
Child(name: "String",
54+
kind: "StringLiteralExpr"),
5355
Child(name: "Availability",
5456
kind: "AvailabilitySpecList"),
5557
Child(name: "SpecializeArguments",
@@ -518,11 +520,8 @@ public let ATTRIBUTE_NODES: [Node] = [
518520
kind: "Syntax",
519521
children: [
520522
Child(name: "MangledName",
521-
kind: "StringLiteralToken",
522-
description: "The mangled name of a declaration.",
523-
tokenChoices: [
524-
"StringLiteral"
525-
]),
523+
kind: "StringLiteralExpr",
524+
description: "The mangled name of a declaration."),
526525
Child(name: "Comma",
527526
kind: "CommaToken",
528527
tokenChoices: [
@@ -576,11 +575,8 @@ public let ATTRIBUTE_NODES: [Node] = [
576575
"Colon"
577576
]),
578577
Child(name: "CTypeString",
579-
kind: "StringLiteralToken",
580-
isOptional: true,
581-
tokenChoices: [
582-
"StringLiteral"
583-
])
578+
kind: "StringLiteralExpr",
579+
isOptional: true)
584580
]),
585581

586582
Node(name: "ConventionWitnessMethodAttributeArguments",
@@ -619,11 +615,8 @@ public let ATTRIBUTE_NODES: [Node] = [
619615
"Comma"
620616
]),
621617
Child(name: "CxxName",
622-
kind: "StringLiteralToken",
623-
isOptional: true,
624-
tokenChoices: [
625-
"StringLiteral"
626-
])
618+
kind: "StringLiteralExpr",
619+
isOptional: true)
627620
]),
628621

629622
Node(name: "OriginallyDefinedInArguments",
@@ -645,10 +638,7 @@ public let ATTRIBUTE_NODES: [Node] = [
645638
"Colon"
646639
]),
647640
Child(name: "ModuleName",
648-
kind: "StringLiteralToken",
649-
tokenChoices: [
650-
"StringLiteral"
651-
]),
641+
kind: "StringLiteralExpr"),
652642
Child(name: "Comma",
653643
kind: "CommaToken",
654644
tokenChoices: [
@@ -678,10 +668,7 @@ public let ATTRIBUTE_NODES: [Node] = [
678668
"Colon"
679669
]),
680670
Child(name: "Filename",
681-
kind: "StringLiteralToken",
682-
tokenChoices: [
683-
"StringLiteral"
684-
])
671+
kind: "StringLiteralExpr")
685672
]),
686673

687674
Node(name: "DynamicReplacementArguments",
@@ -725,10 +712,7 @@ public let ATTRIBUTE_NODES: [Node] = [
725712
"Colon"
726713
]),
727714
Child(name: "Message",
728-
kind: "StringLiteralToken",
729-
tokenChoices: [
730-
"StringLiteral"
731-
])
715+
kind: "StringLiteralExpr")
732716
]),
733717

734718
Node(name: "EffectsArguments",
@@ -759,11 +743,16 @@ public let ATTRIBUTE_NODES: [Node] = [
759743
"Colon"
760744
]),
761745
Child(name: "Value",
762-
kind: "Token",
763-
tokenChoices: [
764-
"Identifier",
765-
"Keyword",
766-
"StringLiteral"
746+
kind: "Syntax",
747+
nodeChoices: [
748+
Child(name: "Token",
749+
kind: "Token",
750+
tokenChoices: [
751+
"Identifier",
752+
"Keyword"
753+
]),
754+
Child(name: "String",
755+
kind: "StringLiteralExpr")
767756
]),
768757
Child(name: "TrailingComma",
769758
kind: "CommaToken",

CodeGeneration/Sources/SyntaxSupport/gyb_generated/AvailabilityNodes.swift

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -69,10 +69,7 @@ public let AVAILABILITY_NODES: [Node] = [
6969
description: "The value of this labeled argument",
7070
nodeChoices: [
7171
Child(name: "String",
72-
kind: "StringLiteralToken",
73-
tokenChoices: [
74-
"StringLiteral"
75-
]),
72+
kind: "StringLiteralExpr"),
7673
Child(name: "Version",
7774
kind: "VersionTuple")
7875
])

CodeGeneration/Sources/SyntaxSupport/gyb_generated/DeclNodes.swift

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -327,10 +327,7 @@ public let DECL_NODES: [Node] = [
327327
"Colon"
328328
]),
329329
Child(name: "FileName",
330-
kind: "StringLiteralToken",
331-
tokenChoices: [
332-
"StringLiteral"
333-
]),
330+
kind: "StringLiteralExpr"),
334331
Child(name: "Comma",
335332
kind: "CommaToken",
336333
tokenChoices: [

CodeGeneration/Sources/SyntaxSupport/gyb_generated/StmtNodes.swift

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -720,12 +720,9 @@ public let STMT_NODES: [Node] = [
720720
"Comma"
721721
]),
722722
Child(name: "Message",
723-
kind: "StringLiteralToken",
723+
kind: "StringLiteralExpr",
724724
description: "The assertion message.",
725-
isOptional: true,
726-
tokenChoices: [
727-
"StringLiteral"
728-
]),
725+
isOptional: true),
729726
Child(name: "RightParen",
730727
kind: "RightParenToken",
731728
tokenChoices: [

CodeGeneration/Sources/SyntaxSupport/gyb_generated/TokenSpec.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ public let SYNTAX_TOKENS: [TokenSpec] = [
221221
PoundConfigSpec(name: "PoundHasSymbol", kind: "pound__hasSymbol", text: "#_hasSymbol"),
222222
LiteralSpec(name: "IntegerLiteral", kind: "integer_literal", nameForDiagnostics: "integer literal", classification: "IntegerLiteral"),
223223
LiteralSpec(name: "FloatingLiteral", kind: "floating_literal", nameForDiagnostics: "floating literal", classification: "FloatingLiteral"),
224-
LiteralSpec(name: "StringLiteral", kind: "string_literal", nameForDiagnostics: "string literal", classification: "StringLiteral"),
224+
LiteralSpec(name: "StringLiteralContents", kind: "string_literal", nameForDiagnostics: "string literal", classification: "StringLiteral"),
225225
LiteralSpec(name: "RegexLiteral", kind: "regex_literal", nameForDiagnostics: "regex literal"),
226226
MiscSpec(name: "Unknown", kind: "unknown", nameForDiagnostics: "token"),
227227
MiscSpec(name: "Identifier", kind: "identifier", nameForDiagnostics: "identifier", classification: "Identifier"),

Sources/IDEUtils/generated/SyntaxClassification.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ extension RawTokenKind {
242242
return .integerLiteral
243243
case .floatingLiteral:
244244
return .floatingLiteral
245-
case .stringLiteral:
245+
case .stringLiteralContents:
246246
return .stringLiteral
247247
case .regexLiteral:
248248
return .none

Sources/SwiftParser/Attributes.swift

Lines changed: 21 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -284,19 +284,15 @@ extension Parser {
284284
}
285285
case ._cdecl, ._silgen_name:
286286
return parseAttribute(argumentMode: .required) { parser in
287-
return .token(parser.consume(if: .stringLiteral) ?? parser.missingToken(.stringLiteral))
287+
return .string(parser.parseStringLiteral())
288288
}
289289
case ._implements:
290290
return parseAttribute(argumentMode: .required) { parser in
291291
return .implementsArguments(parser.parseImplementsAttributeArguments())
292292
}
293293
case ._semantics:
294294
return parseAttribute(argumentMode: .required) { parser in
295-
if let value = parser.consume(if: .stringLiteral) {
296-
return .token(value)
297-
} else {
298-
return .token(parser.missingToken(.stringLiteral))
299-
}
295+
return .string(parser.parseStringLiteral())
300296
}
301297
case ._backDeploy:
302298
return parseAttribute(argumentMode: .required) { parser in
@@ -854,11 +850,10 @@ extension Parser {
854850

855851
extension Parser {
856852
mutating func parseOpaqueReturnTypeOfAttributeArguments() -> RawOpaqueReturnTypeOfAttributeArgumentsSyntax {
857-
let (unexpectedBeforeString, mangledName) = self.expect(.stringLiteral)
853+
let mangledName = self.parseStringLiteral()
858854
let (unexpectedBeforeComma, comma) = self.expect(.comma)
859855
let (unexpectedBeforeOrdinal, ordinal) = self.expect(.integerLiteral)
860856
return RawOpaqueReturnTypeOfAttributeArgumentsSyntax(
861-
unexpectedBeforeString,
862857
mangledName: mangledName,
863858
unexpectedBeforeComma,
864859
comma: comma,
@@ -890,20 +885,18 @@ extension Parser {
890885
let cTypeLabel: RawTokenSyntax?
891886
let unexpectedBeforeColon: RawUnexpectedNodesSyntax?
892887
let colon: RawTokenSyntax?
893-
let unexpectedBeforeCTypeString: RawUnexpectedNodesSyntax?
894-
let cTypeString: RawTokenSyntax?
888+
let cTypeString: RawStringLiteralExprSyntax?
895889
if self.at(.comma) {
896890
(unexpectedBeforeComma, comma) = self.expect(.comma)
897891
cTypeLabel = self.consumeAnyToken()
898892
(unexpectedBeforeColon, colon) = self.expect(.colon)
899-
(unexpectedBeforeCTypeString, cTypeString) = self.expect(.stringLiteral)
893+
cTypeString = self.parseStringLiteral()
900894
} else {
901895
unexpectedBeforeComma = nil
902896
comma = nil
903897
cTypeLabel = nil
904898
unexpectedBeforeColon = nil
905899
colon = nil
906-
unexpectedBeforeCTypeString = nil
907900
cTypeString = nil
908901
}
909902
return .conventionArguments(
@@ -914,7 +907,6 @@ extension Parser {
914907
cTypeLabel: cTypeLabel,
915908
unexpectedBeforeColon,
916909
colon: colon,
917-
unexpectedBeforeCTypeString,
918910
cTypeString: cTypeString,
919911
arena: self.arena
920912
)
@@ -961,22 +953,19 @@ extension Parser {
961953
}
962954
let unexpectedBeforeComma: RawUnexpectedNodesSyntax?
963955
let comma: RawTokenSyntax?
964-
let unexpectedBeforeCxxName: RawUnexpectedNodesSyntax?
965-
let cxxName: RawTokenSyntax?
956+
let cxxName: RawStringLiteralExprSyntax?
966957
if self.at(.comma) {
967958
(unexpectedBeforeComma, comma) = self.expect(.comma)
968-
(unexpectedBeforeCxxName, cxxName) = self.expect(.stringLiteral)
959+
cxxName = self.parseStringLiteral()
969960
} else {
970961
unexpectedBeforeComma = nil
971962
comma = nil
972-
unexpectedBeforeCxxName = nil
973963
cxxName = nil
974964
}
975965
return RawExposeAttributeArgumentsSyntax(
976966
language: language,
977967
unexpectedBeforeComma,
978968
comma: comma,
979-
unexpectedBeforeCxxName,
980969
cxxName: cxxName,
981970
arena: self.arena
982971
)
@@ -987,7 +976,7 @@ extension Parser {
987976
mutating func parseOriginallyDefinedInArguments() -> RawOriginallyDefinedInArgumentsSyntax {
988977
let (unexpectedBeforeModuleLabel, moduleLabel) = self.expect(.keyword(.module), remapping: .identifier)
989978
let (unexpectedBeforeColon, colon) = self.expect(.colon)
990-
let (unexpectedBeforeModuleName, moduleName) = self.expect(.stringLiteral)
979+
let moduleName = self.parseStringLiteral()
991980
let (unexpectedBeforeComma, comma) = self.expect(.comma)
992981

993982
var platforms: [RawAvailabilityVersionRestrictionListEntrySyntax] = []
@@ -1009,7 +998,6 @@ extension Parser {
1009998
moduleLabel: moduleLabel,
1010999
unexpectedBeforeColon,
10111000
colon: colon,
1012-
unexpectedBeforeModuleName,
10131001
moduleName: moduleName,
10141002
unexpectedBeforeComma,
10151003
comma: comma,
@@ -1023,13 +1011,12 @@ extension Parser {
10231011
mutating func parseUnderscorePrivateAttributeArguments() -> RawUnderscorePrivateAttributeArgumentsSyntax {
10241012
let (unexpectedBeforeLabel, label) = self.expect(.keyword(.sourceFile), remapping: .identifier)
10251013
let (unexpectedBeforeColon, colon) = self.expect(.colon)
1026-
let (unexpectedBeforeFilename, filename) = self.expect(.stringLiteral)
1014+
let filename = self.parseStringLiteral()
10271015
return RawUnderscorePrivateAttributeArgumentsSyntax(
10281016
unexpectedBeforeLabel,
10291017
sourceFileLabel: label,
10301018
unexpectedBeforeColon,
10311019
colon: colon,
1032-
unexpectedBeforeFilename,
10331020
filename: filename,
10341021
arena: self.arena
10351022
)
@@ -1066,13 +1053,12 @@ extension Parser {
10661053
mutating func parseUnavailableFromAsyncArguments() -> RawUnavailableFromAsyncArgumentsSyntax {
10671054
let (unexpectedBeforeLabel, label) = self.expect(.keyword(.message), remapping: .identifier)
10681055
let (unexpectedBeforeColon, colon) = self.expect(.colon)
1069-
let (unexpectedBeforeMessage, message) = self.expect(.stringLiteral)
1056+
let message = self.parseStringLiteral()
10701057
return RawUnavailableFromAsyncArgumentsSyntax(
10711058
unexpectedBeforeLabel,
10721059
messageLabel: label,
10731060
unexpectedBeforeColon,
10741061
colon: colon,
1075-
unexpectedBeforeMessage,
10761062
message: message,
10771063
arena: self.arena
10781064
)
@@ -1088,15 +1074,22 @@ extension Parser {
10881074
let (unexpectedBeforeLabel, label) = self.expectAny([.keyword(.visibility), .keyword(.metadata)], default: .keyword(.visibility))
10891075
let (unexpectedBeforeColon, colon) = self.expect(.colon)
10901076
let unexpectedBeforeValue: RawUnexpectedNodesSyntax?
1091-
let value: RawTokenSyntax
1077+
let value: RawDocumentationAttributeArgumentSyntax.Value
10921078
switch label.tokenText {
10931079
case "visibility":
1094-
(unexpectedBeforeValue, value) = self.expectAny([.keyword(.open), .keyword(.public), .keyword(.internal), .keyword(.fileprivate), .keyword(.private)], default: .keyword(.internal))
1080+
let (unexpected, token) = self.expectAny([.keyword(.open), .keyword(.public), .keyword(.internal), .keyword(.fileprivate), .keyword(.private)], default: .keyword(.internal))
1081+
unexpectedBeforeValue = unexpected
1082+
value = .token(token)
10951083
case "metadata":
1096-
(unexpectedBeforeValue, value) = self.expectAny([.stringLiteral, .identifier], default: .stringLiteral)
1084+
unexpectedBeforeValue = nil
1085+
if let identifier = self.consume(if: .identifier) {
1086+
value = .token(identifier)
1087+
} else {
1088+
value = .string(self.parseStringLiteral())
1089+
}
10971090
default:
10981091
unexpectedBeforeValue = nil
1099-
value = missingToken(.identifier)
1092+
value = .token(missingToken(.identifier))
11001093
}
11011094
keepGoing = self.consume(if: .comma)
11021095
arguments.append(

Sources/SwiftParser/Availability.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ extension Parser {
108108
let argumentLabel = self.eat(handle)
109109
let (unexpectedBeforeColon, colon) = self.expect(.colon)
110110
// FIXME: Make sure this is a string literal with no interpolation.
111-
let stringValue = self.consumeAnyToken()
111+
let stringValue = self.parseStringLiteral()
112112

113113
entry = .availabilityLabeledArgument(
114114
RawAvailabilityLabeledArgumentSyntax(

Sources/SwiftParser/Declarations.swift

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1241,7 +1241,7 @@ extension Parser {
12411241
/// consumes the entire regex literal, we're done.
12421242
return self.currentToken.tokenText.withBuffer {
12431243
(buffer: UnsafeBufferPointer<UInt8>) -> Bool in
1244-
var cursor = Lexer.Cursor(input: buffer, previous: 0)
1244+
var cursor = Lexer.Cursor(input: buffer, previous: 0, state: .normal)
12451245
guard buffer[0] == UInt8(ascii: "/") else { return false }
12461246
switch cursor.lexOperatorIdentifier(cursor, cursor).tokenKind {
12471247
case .unknown:
@@ -2141,19 +2141,7 @@ extension Parser {
21412141
}
21422142

21432143
let (unexpectedBeforeLeftParen, leftParen) = self.expect(.leftParen)
2144-
let stringLiteral: RawStringLiteralExprSyntax
2145-
if self.at(.stringLiteral) {
2146-
stringLiteral = self.parseStringLiteral()
2147-
} else {
2148-
stringLiteral = RawStringLiteralExprSyntax(
2149-
openDelimiter: nil,
2150-
openQuote: RawTokenSyntax(missing: .stringQuote, arena: self.arena),
2151-
segments: RawStringLiteralSegmentsSyntax(elements: [], arena: self.arena),
2152-
closeQuote: RawTokenSyntax(missing: .stringQuote, arena: self.arena),
2153-
closeDelimiter: nil,
2154-
arena: self.arena
2155-
)
2156-
}
2144+
let stringLiteral = self.parseStringLiteral()
21572145
let (unexpectedBeforeRightParen, rightParen) = self.expect(.rightParen)
21582146

21592147
switch directive {

Sources/SwiftParser/Directives.swift

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ extension Parser {
154154
if !self.at(.rightParen) {
155155
let (unexpectedBeforeFile, file) = self.expectIdentifier()
156156
let (unexpectedBeforeFileColon, fileColon) = self.expect(.colon)
157-
let (unexpectedBeforeFileName, fileName) = self.expect(.stringLiteral)
157+
let fileName = self.parseStringLiteral()
158158
let (unexpectedBeforeComma, comma) = self.expect(.comma)
159159

160160
let (unexpectedBeforeLine, line) = self.expectIdentifier()
@@ -166,7 +166,6 @@ extension Parser {
166166
fileArgLabel: file,
167167
unexpectedBeforeFileColon,
168168
fileArgColon: fileColon,
169-
unexpectedBeforeFileName,
170169
fileName: fileName,
171170
unexpectedBeforeComma,
172171
comma: comma,

0 commit comments

Comments
 (0)