Skip to content

Commit 8042f82

Browse files
committed
Produce separate tokens for raw string delimiters and string quotes in the lexer
The eventual goal of this change is that we no longer need to re-lex string literals from the parser to separate them into their components. Instead, the lexer should just produce the lexemes that will later be put into the syntax tree as tokens. The downside of this is that the lexer now needs to carry state and know whether it is lexing a string literal. On the upside, the string literal parser could be significantly simplified and the diagnostics got better without any further changes.
1 parent 9345009 commit 8042f82

40 files changed

+715
-502
lines changed

CodeGeneration/Sources/SyntaxSupport/gyb_generated/AttributeNodes.swift

Lines changed: 21 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ public let ATTRIBUTE_NODES: [Node] = [
4545
kind: "TupleExprElementList"),
4646
Child(name: "Token",
4747
kind: "Token"),
48+
Child(name: "String",
49+
kind: "StringLiteralExpr"),
4850
Child(name: "Availability",
4951
kind: "AvailabilitySpecList"),
5052
Child(name: "SpecializeArguments",
@@ -513,11 +515,8 @@ public let ATTRIBUTE_NODES: [Node] = [
513515
kind: "Syntax",
514516
children: [
515517
Child(name: "MangledName",
516-
kind: "StringLiteralToken",
517-
description: "The mangled name of a declaration.",
518-
tokenChoices: [
519-
"StringLiteral"
520-
]),
518+
kind: "StringLiteralExpr",
519+
description: "The mangled name of a declaration."),
521520
Child(name: "Comma",
522521
kind: "CommaToken",
523522
tokenChoices: [
@@ -571,11 +570,8 @@ public let ATTRIBUTE_NODES: [Node] = [
571570
"Colon"
572571
]),
573572
Child(name: "CTypeString",
574-
kind: "StringLiteralToken",
575-
isOptional: true,
576-
tokenChoices: [
577-
"StringLiteral"
578-
])
573+
kind: "StringLiteralExpr",
574+
isOptional: true)
579575
]),
580576

581577
Node(name: "ConventionWitnessMethodAttributeArguments",
@@ -614,11 +610,8 @@ public let ATTRIBUTE_NODES: [Node] = [
614610
"Comma"
615611
]),
616612
Child(name: "CxxName",
617-
kind: "StringLiteralToken",
618-
isOptional: true,
619-
tokenChoices: [
620-
"StringLiteral"
621-
])
613+
kind: "StringLiteralExpr",
614+
isOptional: true)
622615
]),
623616

624617
Node(name: "OriginallyDefinedInArguments",
@@ -640,10 +633,7 @@ public let ATTRIBUTE_NODES: [Node] = [
640633
"Colon"
641634
]),
642635
Child(name: "ModuleName",
643-
kind: "StringLiteralToken",
644-
tokenChoices: [
645-
"StringLiteral"
646-
]),
636+
kind: "StringLiteralExpr"),
647637
Child(name: "Comma",
648638
kind: "CommaToken",
649639
tokenChoices: [
@@ -673,10 +663,7 @@ public let ATTRIBUTE_NODES: [Node] = [
673663
"Colon"
674664
]),
675665
Child(name: "Filename",
676-
kind: "StringLiteralToken",
677-
tokenChoices: [
678-
"StringLiteral"
679-
])
666+
kind: "StringLiteralExpr")
680667
]),
681668

682669
Node(name: "DynamicReplacementArguments",
@@ -720,10 +707,7 @@ public let ATTRIBUTE_NODES: [Node] = [
720707
"Colon"
721708
]),
722709
Child(name: "Message",
723-
kind: "StringLiteralToken",
724-
tokenChoices: [
725-
"StringLiteral"
726-
])
710+
kind: "StringLiteralExpr")
727711
]),
728712

729713
Node(name: "EffectsArguments",
@@ -754,11 +738,16 @@ public let ATTRIBUTE_NODES: [Node] = [
754738
"Colon"
755739
]),
756740
Child(name: "Value",
757-
kind: "Token",
758-
tokenChoices: [
759-
"Identifier",
760-
"Keyword",
761-
"StringLiteral"
741+
kind: "Syntax",
742+
nodeChoices: [
743+
Child(name: "Token",
744+
kind: "Token",
745+
tokenChoices: [
746+
"Identifier",
747+
"Keyword"
748+
]),
749+
Child(name: "String",
750+
kind: "StringLiteralExpr")
762751
]),
763752
Child(name: "TrailingComma",
764753
kind: "CommaToken",

CodeGeneration/Sources/SyntaxSupport/gyb_generated/AvailabilityNodes.swift

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -69,10 +69,7 @@ public let AVAILABILITY_NODES: [Node] = [
6969
description: "The value of this labeled argument",
7070
nodeChoices: [
7171
Child(name: "String",
72-
kind: "StringLiteralToken",
73-
tokenChoices: [
74-
"StringLiteral"
75-
]),
72+
kind: "StringLiteralExpr"),
7673
Child(name: "Version",
7774
kind: "VersionTuple")
7875
])

CodeGeneration/Sources/SyntaxSupport/gyb_generated/DeclNodes.swift

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -327,10 +327,7 @@ public let DECL_NODES: [Node] = [
327327
"Colon"
328328
]),
329329
Child(name: "FileName",
330-
kind: "StringLiteralToken",
331-
tokenChoices: [
332-
"StringLiteral"
333-
]),
330+
kind: "StringLiteralExpr"),
334331
Child(name: "Comma",
335332
kind: "CommaToken",
336333
tokenChoices: [

CodeGeneration/Sources/SyntaxSupport/gyb_generated/StmtNodes.swift

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -720,12 +720,9 @@ public let STMT_NODES: [Node] = [
720720
"Comma"
721721
]),
722722
Child(name: "Message",
723-
kind: "StringLiteralToken",
723+
kind: "StringLiteralExpr",
724724
description: "The assertion message.",
725-
isOptional: true,
726-
tokenChoices: [
727-
"StringLiteral"
728-
]),
725+
isOptional: true),
729726
Child(name: "RightParen",
730727
kind: "RightParenToken",
731728
tokenChoices: [

CodeGeneration/Sources/SyntaxSupport/gyb_generated/TokenSpec.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ public let SYNTAX_TOKENS: [TokenSpec] = [
221221
PoundConfigSpec(name: "PoundHasSymbol", kind: "pound__hasSymbol", text: "#_hasSymbol"),
222222
LiteralSpec(name: "IntegerLiteral", kind: "integer_literal", nameForDiagnostics: "integer literal", classification: "IntegerLiteral"),
223223
LiteralSpec(name: "FloatingLiteral", kind: "floating_literal", nameForDiagnostics: "floating literal", classification: "FloatingLiteral"),
224-
LiteralSpec(name: "StringLiteral", kind: "string_literal", nameForDiagnostics: "string literal", classification: "StringLiteral"),
224+
LiteralSpec(name: "StringLiteralContents", kind: "string_literal", nameForDiagnostics: "string literal", classification: "StringLiteral"),
225225
LiteralSpec(name: "RegexLiteral", kind: "regex_literal", nameForDiagnostics: "regex literal"),
226226
MiscSpec(name: "Unknown", kind: "unknown", nameForDiagnostics: "token"),
227227
MiscSpec(name: "Identifier", kind: "identifier", nameForDiagnostics: "identifier", classification: "Identifier"),

Sources/IDEUtils/generated/SyntaxClassification.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ extension RawTokenKind {
242242
return .integerLiteral
243243
case .floatingLiteral:
244244
return .floatingLiteral
245-
case .stringLiteral:
245+
case .stringLiteralContents:
246246
return .stringLiteral
247247
case .regexLiteral:
248248
return .none

Sources/SwiftParser/Attributes.swift

Lines changed: 21 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -284,19 +284,15 @@ extension Parser {
284284
}
285285
case ._cdecl, ._silgen_name:
286286
return parseAttribute(argumentMode: .required) { parser in
287-
return .token(parser.consume(if: .stringLiteral) ?? parser.missingToken(.stringLiteral))
287+
return .string(parser.parseStringLiteral())
288288
}
289289
case ._implements:
290290
return parseAttribute(argumentMode: .required) { parser in
291291
return .implementsArguments(parser.parseImplementsAttributeArguments())
292292
}
293293
case ._semantics:
294294
return parseAttribute(argumentMode: .required) { parser in
295-
if let value = parser.consume(if: .stringLiteral) {
296-
return .token(value)
297-
} else {
298-
return .token(parser.missingToken(.stringLiteral))
299-
}
295+
return .string(parser.parseStringLiteral())
300296
}
301297
case ._backDeploy:
302298
return parseAttribute(argumentMode: .required) { parser in
@@ -854,11 +850,10 @@ extension Parser {
854850

855851
extension Parser {
856852
mutating func parseOpaqueReturnTypeOfAttributeArguments() -> RawOpaqueReturnTypeOfAttributeArgumentsSyntax {
857-
let (unexpectedBeforeString, mangledName) = self.expect(.stringLiteral)
853+
let mangledName = self.parseStringLiteral()
858854
let (unexpectedBeforeComma, comma) = self.expect(.comma)
859855
let (unexpectedBeforeOrdinal, ordinal) = self.expect(.integerLiteral)
860856
return RawOpaqueReturnTypeOfAttributeArgumentsSyntax(
861-
unexpectedBeforeString,
862857
mangledName: mangledName,
863858
unexpectedBeforeComma,
864859
comma: comma,
@@ -890,20 +885,18 @@ extension Parser {
890885
let cTypeLabel: RawTokenSyntax?
891886
let unexpectedBeforeColon: RawUnexpectedNodesSyntax?
892887
let colon: RawTokenSyntax?
893-
let unexpectedBeforeCTypeString: RawUnexpectedNodesSyntax?
894-
let cTypeString: RawTokenSyntax?
888+
let cTypeString: RawStringLiteralExprSyntax?
895889
if self.at(.comma) {
896890
(unexpectedBeforeComma, comma) = self.expect(.comma)
897891
cTypeLabel = self.consumeAnyToken()
898892
(unexpectedBeforeColon, colon) = self.expect(.colon)
899-
(unexpectedBeforeCTypeString, cTypeString) = self.expect(.stringLiteral)
893+
cTypeString = self.parseStringLiteral()
900894
} else {
901895
unexpectedBeforeComma = nil
902896
comma = nil
903897
cTypeLabel = nil
904898
unexpectedBeforeColon = nil
905899
colon = nil
906-
unexpectedBeforeCTypeString = nil
907900
cTypeString = nil
908901
}
909902
return .conventionArguments(
@@ -914,7 +907,6 @@ extension Parser {
914907
cTypeLabel: cTypeLabel,
915908
unexpectedBeforeColon,
916909
colon: colon,
917-
unexpectedBeforeCTypeString,
918910
cTypeString: cTypeString,
919911
arena: self.arena
920912
)
@@ -961,22 +953,19 @@ extension Parser {
961953
}
962954
let unexpectedBeforeComma: RawUnexpectedNodesSyntax?
963955
let comma: RawTokenSyntax?
964-
let unexpectedBeforeCxxName: RawUnexpectedNodesSyntax?
965-
let cxxName: RawTokenSyntax?
956+
let cxxName: RawStringLiteralExprSyntax?
966957
if self.at(.comma) {
967958
(unexpectedBeforeComma, comma) = self.expect(.comma)
968-
(unexpectedBeforeCxxName, cxxName) = self.expect(.stringLiteral)
959+
cxxName = self.parseStringLiteral()
969960
} else {
970961
unexpectedBeforeComma = nil
971962
comma = nil
972-
unexpectedBeforeCxxName = nil
973963
cxxName = nil
974964
}
975965
return RawExposeAttributeArgumentsSyntax(
976966
language: language,
977967
unexpectedBeforeComma,
978968
comma: comma,
979-
unexpectedBeforeCxxName,
980969
cxxName: cxxName,
981970
arena: self.arena
982971
)
@@ -987,7 +976,7 @@ extension Parser {
987976
mutating func parseOriginallyDefinedInArguments() -> RawOriginallyDefinedInArgumentsSyntax {
988977
let (unexpectedBeforeModuleLabel, moduleLabel) = self.expect(.keyword(.module), remapping: .identifier)
989978
let (unexpectedBeforeColon, colon) = self.expect(.colon)
990-
let (unexpectedBeforeModuleName, moduleName) = self.expect(.stringLiteral)
979+
let moduleName = self.parseStringLiteral()
991980
let (unexpectedBeforeComma, comma) = self.expect(.comma)
992981

993982
var platforms: [RawAvailabilityVersionRestrictionListEntrySyntax] = []
@@ -1009,7 +998,6 @@ extension Parser {
1009998
moduleLabel: moduleLabel,
1010999
unexpectedBeforeColon,
10111000
colon: colon,
1012-
unexpectedBeforeModuleName,
10131001
moduleName: moduleName,
10141002
unexpectedBeforeComma,
10151003
comma: comma,
@@ -1023,13 +1011,12 @@ extension Parser {
10231011
mutating func parseUnderscorePrivateAttributeArguments() -> RawUnderscorePrivateAttributeArgumentsSyntax {
10241012
let (unexpectedBeforeLabel, label) = self.expect(.keyword(.sourceFile), remapping: .identifier)
10251013
let (unexpectedBeforeColon, colon) = self.expect(.colon)
1026-
let (unexpectedBeforeFilename, filename) = self.expect(.stringLiteral)
1014+
let filename = self.parseStringLiteral()
10271015
return RawUnderscorePrivateAttributeArgumentsSyntax(
10281016
unexpectedBeforeLabel,
10291017
sourceFileLabel: label,
10301018
unexpectedBeforeColon,
10311019
colon: colon,
1032-
unexpectedBeforeFilename,
10331020
filename: filename,
10341021
arena: self.arena
10351022
)
@@ -1066,13 +1053,12 @@ extension Parser {
10661053
mutating func parseUnavailableFromAsyncArguments() -> RawUnavailableFromAsyncArgumentsSyntax {
10671054
let (unexpectedBeforeLabel, label) = self.expect(.keyword(.message), remapping: .identifier)
10681055
let (unexpectedBeforeColon, colon) = self.expect(.colon)
1069-
let (unexpectedBeforeMessage, message) = self.expect(.stringLiteral)
1056+
let message = self.parseStringLiteral()
10701057
return RawUnavailableFromAsyncArgumentsSyntax(
10711058
unexpectedBeforeLabel,
10721059
messageLabel: label,
10731060
unexpectedBeforeColon,
10741061
colon: colon,
1075-
unexpectedBeforeMessage,
10761062
message: message,
10771063
arena: self.arena
10781064
)
@@ -1088,15 +1074,22 @@ extension Parser {
10881074
let (unexpectedBeforeLabel, label) = self.expectAny([.keyword(.visibility), .keyword(.metadata)], default: .keyword(.visibility))
10891075
let (unexpectedBeforeColon, colon) = self.expect(.colon)
10901076
let unexpectedBeforeValue: RawUnexpectedNodesSyntax?
1091-
let value: RawTokenSyntax
1077+
let value: RawDocumentationAttributeArgumentSyntax.Value
10921078
switch label.tokenText {
10931079
case "visibility":
1094-
(unexpectedBeforeValue, value) = self.expectAny([.keyword(.open), .keyword(.public), .keyword(.internal), .keyword(.fileprivate), .keyword(.private)], default: .keyword(.internal))
1080+
let (unexpected, token) = self.expectAny([.keyword(.open), .keyword(.public), .keyword(.internal), .keyword(.fileprivate), .keyword(.private)], default: .keyword(.internal))
1081+
unexpectedBeforeValue = unexpected
1082+
value = .token(token)
10951083
case "metadata":
1096-
(unexpectedBeforeValue, value) = self.expectAny([.stringLiteral, .identifier], default: .stringLiteral)
1084+
unexpectedBeforeValue = nil
1085+
if let identifier = self.consume(if: .identifier) {
1086+
value = .token(identifier)
1087+
} else {
1088+
value = .string(self.parseStringLiteral())
1089+
}
10971090
default:
10981091
unexpectedBeforeValue = nil
1099-
value = missingToken(.identifier)
1092+
value = .token(missingToken(.identifier))
11001093
}
11011094
keepGoing = self.consume(if: .comma)
11021095
arguments.append(

Sources/SwiftParser/Availability.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ extension Parser {
108108
let argumentLabel = self.eat(handle)
109109
let (unexpectedBeforeColon, colon) = self.expect(.colon)
110110
// FIXME: Make sure this is a string literal with no interpolation.
111-
let stringValue = self.consumeAnyToken()
111+
let stringValue = self.parseStringLiteral()
112112

113113
entry = .availabilityLabeledArgument(
114114
RawAvailabilityLabeledArgumentSyntax(

Sources/SwiftParser/Declarations.swift

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1251,7 +1251,7 @@ extension Parser {
12511251
/// consumes the entire regex literal, we're done.
12521252
return self.currentToken.tokenText.withBuffer {
12531253
(buffer: UnsafeBufferPointer<UInt8>) -> Bool in
1254-
var cursor = Lexer.Cursor(input: buffer, previous: 0)
1254+
var cursor = Lexer.Cursor(input: buffer, previous: 0, state: .normal)
12551255
guard buffer[0] == UInt8(ascii: "/") else { return false }
12561256
switch cursor.lexOperatorIdentifier(cursor, cursor).tokenKind {
12571257
case .unknown:
@@ -2151,19 +2151,7 @@ extension Parser {
21512151
}
21522152

21532153
let (unexpectedBeforeLeftParen, leftParen) = self.expect(.leftParen)
2154-
let stringLiteral: RawStringLiteralExprSyntax
2155-
if self.at(.stringLiteral) {
2156-
stringLiteral = self.parseStringLiteral()
2157-
} else {
2158-
stringLiteral = RawStringLiteralExprSyntax(
2159-
openDelimiter: nil,
2160-
openQuote: RawTokenSyntax(missing: .stringQuote, arena: self.arena),
2161-
segments: RawStringLiteralSegmentsSyntax(elements: [], arena: self.arena),
2162-
closeQuote: RawTokenSyntax(missing: .stringQuote, arena: self.arena),
2163-
closeDelimiter: nil,
2164-
arena: self.arena
2165-
)
2166-
}
2154+
let stringLiteral = self.parseStringLiteral()
21672155
let (unexpectedBeforeRightParen, rightParen) = self.expect(.rightParen)
21682156

21692157
switch directive {

Sources/SwiftParser/Directives.swift

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ extension Parser {
154154
if !self.at(.rightParen) {
155155
let (unexpectedBeforeFile, file) = self.expectIdentifier()
156156
let (unexpectedBeforeFileColon, fileColon) = self.expect(.colon)
157-
let (unexpectedBeforeFileName, fileName) = self.expect(.stringLiteral)
157+
let fileName = self.parseStringLiteral()
158158
let (unexpectedBeforeComma, comma) = self.expect(.comma)
159159

160160
let (unexpectedBeforeLine, line) = self.expectIdentifier()
@@ -166,7 +166,6 @@ extension Parser {
166166
fileArgLabel: file,
167167
unexpectedBeforeFileColon,
168168
fileArgColon: fileColon,
169-
unexpectedBeforeFileName,
170169
fileName: fileName,
171170
unexpectedBeforeComma,
172171
comma: comma,

0 commit comments

Comments
 (0)