Skip to content

Commit 049c3cd

Browse files
authored
Merge pull request #21955 from akyrtzi/syntax-parser-opt
[Parser] Optimize syntactic parsing for libSyntax
2 parents c04a7de + 5b1aab1 commit 049c3cd

31 files changed

+629
-360
lines changed

include/swift/Parse/Lexer.h

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,7 @@
2222
#include "swift/Basic/SourceManager.h"
2323
#include "swift/Parse/LexerState.h"
2424
#include "swift/Parse/Token.h"
25-
#include "swift/Syntax/References.h"
26-
#include "swift/Syntax/Trivia.h"
25+
#include "swift/Parse/ParsedTrivia.h"
2726
#include "llvm/ADT/SmallVector.h"
2827
#include "llvm/Support/SaveAndRestore.h"
2928

@@ -118,13 +117,13 @@ class Lexer {
118117
///
119118
/// This is only preserved if this Lexer was constructed with
120119
/// `TriviaRetentionMode::WithTrivia`.
121-
syntax::Trivia LeadingTrivia;
120+
ParsedTrivia LeadingTrivia;
122121

123122
/// The current trailing trivia for the next token.
124123
///
125124
/// This is only preserved if this Lexer was constructed with
126125
/// `TriviaRetentionMode::WithTrivia`.
127-
syntax::Trivia TrailingTrivia;
126+
ParsedTrivia TrailingTrivia;
128127

129128
Lexer(const Lexer&) = delete;
130129
void operator=(const Lexer&) = delete;
@@ -185,8 +184,8 @@ class Lexer {
185184

186185
/// Lex a token. If \c TriviaRetentionMode is \c WithTrivia, passed pointers
187186
/// to trivias are populated.
188-
void lex(Token &Result, syntax::Trivia &LeadingTriviaResult,
189-
syntax::Trivia &TrailingTriviaResult) {
187+
void lex(Token &Result, ParsedTrivia &LeadingTriviaResult,
188+
ParsedTrivia &TrailingTriviaResult) {
190189
Result = NextToken;
191190
if (TriviaRetention == TriviaRetentionMode::WithTrivia) {
192191
LeadingTriviaResult = {LeadingTrivia};
@@ -197,7 +196,7 @@ class Lexer {
197196
}
198197

199198
void lex(Token &Result) {
200-
syntax::Trivia LeadingTrivia, TrailingTrivia;
199+
ParsedTrivia LeadingTrivia, TrailingTrivia;
201200
lex(Result, LeadingTrivia, TrailingTrivia);
202201
}
203202

@@ -229,7 +228,7 @@ class Lexer {
229228
/// After restoring the state, lexer will return this token and continue from
230229
/// there.
231230
State getStateForBeginningOfToken(const Token &Tok,
232-
const syntax::Trivia &LeadingTrivia = {}) const {
231+
const ParsedTrivia &LeadingTrivia = {}) const {
233232

234233
// If the token has a comment attached to it, rewind to before the comment,
235234
// not just the start of the token. This ensures that we will re-lex and
@@ -529,7 +528,7 @@ class Lexer {
529528
void lexOperatorIdentifier();
530529
void lexHexNumber();
531530
void lexNumber();
532-
void lexTrivia(syntax::Trivia &T, bool IsForTrailingTrivia);
531+
void lexTrivia(ParsedTrivia &T, bool IsForTrailingTrivia);
533532
static unsigned lexUnicodeEscape(const char *&CurPtr, Lexer *Diags);
534533

535534
unsigned lexCharacter(const char *&CurPtr, char StopQuote,

include/swift/Parse/LexerState.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
#include "llvm/ADT/Optional.h"
2121
#include "swift/Basic/SourceLoc.h"
22-
#include "swift/Syntax/Trivia.h"
22+
#include "swift/Parse/ParsedTrivia.h"
2323

2424
namespace swift {
2525
class Lexer;
@@ -39,7 +39,7 @@ class LexerState {
3939
private:
4040
explicit LexerState(SourceLoc Loc) : Loc(Loc) {}
4141
SourceLoc Loc;
42-
llvm::Optional<syntax::Trivia> LeadingTrivia;
42+
llvm::Optional<ParsedTrivia> LeadingTrivia;
4343
friend class Lexer;
4444
};
4545

include/swift/Parse/ParsedRawSyntaxNode.h

Lines changed: 41 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,15 @@
1414
#define SWIFT_PARSE_PARSEDRAWSYNTAXNODE_H
1515

1616
#include "swift/Basic/SourceLoc.h"
17+
#include "swift/Parse/ParsedTrivia.h"
1718
#include "swift/Parse/Token.h"
1819
#include "swift/Syntax/SyntaxKind.h"
19-
#include "swift/Syntax/Trivia.h"
2020
#include <vector>
2121

2222
namespace swift {
2323

2424
typedef void *OpaqueSyntaxNode;
25+
class SyntaxParsingContext;
2526

2627
/// Represents a raw syntax node formed by the parser.
2728
///
@@ -50,12 +51,14 @@ class ParsedRawSyntaxNode {
5051
CharSourceRange Range;
5152
};
5253
struct DeferredLayoutNode {
53-
std::vector<ParsedRawSyntaxNode> Children;
54+
ArrayRef<ParsedRawSyntaxNode> Children;
5455
};
5556
struct DeferredTokenNode {
56-
Token Tok;
57-
syntax::Trivia LeadingTrivia;
58-
syntax::Trivia TrailingTrivia;
57+
const ParsedTriviaPiece *TriviaPieces;
58+
SourceLoc TokLoc;
59+
unsigned TokLength;
60+
uint16_t NumLeadingTrivia;
61+
uint16_t NumTrailingTrivia;
5962
};
6063

6164
union {
@@ -77,16 +80,22 @@ class ParsedRawSyntaxNode {
7780
assert(getKind() == k && "Syntax kind with too large value!");
7881
}
7982

80-
ParsedRawSyntaxNode(Token tok,
81-
syntax::Trivia leadingTrivia,
82-
syntax::Trivia trailingTrivia)
83-
: DeferredToken{std::move(tok),
84-
std::move(leadingTrivia),
85-
std::move(trailingTrivia)},
83+
ParsedRawSyntaxNode(tok tokKind, SourceLoc tokLoc, unsigned tokLength,
84+
const ParsedTriviaPiece *triviaPieces,
85+
unsigned numLeadingTrivia,
86+
unsigned numTrailingTrivia)
87+
: DeferredToken{triviaPieces,
88+
tokLoc, tokLength,
89+
uint16_t(numLeadingTrivia),
90+
uint16_t(numTrailingTrivia)},
8691
SynKind(uint16_t(syntax::SyntaxKind::Token)),
87-
TokKind(uint16_t(tok.getKind())),
92+
TokKind(uint16_t(tokKind)),
8893
DK(DataKind::DeferredToken) {
89-
assert(getTokenKind() == tok.getKind() && "Token kind is too large value!");
94+
assert(getTokenKind() == tokKind && "Token kind is too large value!");
95+
assert(DeferredToken.NumLeadingTrivia == numLeadingTrivia &&
96+
"numLeadingTrivia is too large value!");
97+
assert(DeferredToken.NumTrailingTrivia == numTrailingTrivia &&
98+
"numLeadingTrivia is too large value!");
9099
}
91100

92101
public:
@@ -106,63 +115,6 @@ class ParsedRawSyntaxNode {
106115
assert(getTokenKind() == tokKind && "Token kind with too large value!");
107116
}
108117

109-
ParsedRawSyntaxNode(const ParsedRawSyntaxNode &other) {
110-
switch (other.DK) {
111-
case DataKind::Null:
112-
break;
113-
case DataKind::Recorded:
114-
new(&this->RecordedData)RecordedSyntaxNode(other.RecordedData);
115-
break;
116-
case DataKind::DeferredLayout:
117-
new(&this->DeferredLayout)DeferredLayoutNode(other.DeferredLayout);
118-
break;
119-
case DataKind::DeferredToken:
120-
new(&this->DeferredToken)DeferredTokenNode(other.DeferredToken);
121-
break;
122-
}
123-
this->SynKind = other.SynKind;
124-
this->TokKind = other.TokKind;
125-
this->DK = other.DK;
126-
}
127-
128-
ParsedRawSyntaxNode(ParsedRawSyntaxNode &&other) {
129-
switch (other.DK) {
130-
case DataKind::Null:
131-
break;
132-
case DataKind::Recorded:
133-
new(&this->RecordedData)RecordedSyntaxNode(
134-
std::move(other.RecordedData));
135-
break;
136-
case DataKind::DeferredLayout:
137-
new(&this->DeferredLayout)DeferredLayoutNode(
138-
std::move(other.DeferredLayout));
139-
break;
140-
case DataKind::DeferredToken:
141-
new(&this->DeferredToken)DeferredTokenNode(
142-
std::move(other.DeferredToken));
143-
break;
144-
}
145-
this->SynKind = other.SynKind;
146-
this->TokKind = other.TokKind;
147-
this->DK = other.DK;
148-
}
149-
150-
~ParsedRawSyntaxNode() {
151-
releaseMemory();
152-
}
153-
154-
ParsedRawSyntaxNode &operator=(const ParsedRawSyntaxNode &other) {
155-
releaseMemory();
156-
new (this)ParsedRawSyntaxNode(other);
157-
return *this;
158-
}
159-
160-
ParsedRawSyntaxNode &operator=(ParsedRawSyntaxNode &&other) {
161-
releaseMemory();
162-
new (this)ParsedRawSyntaxNode(std::move(other));
163-
return *this;
164-
}
165-
166118
syntax::SyntaxKind getKind() const { return syntax::SyntaxKind(SynKind); }
167119
tok getTokenKind() const { return tok(TokKind); }
168120

@@ -201,44 +153,44 @@ class ParsedRawSyntaxNode {
201153
assert(DK == DataKind::DeferredLayout);
202154
return DeferredLayout.Children;
203155
}
204-
void addDeferredChild(ParsedRawSyntaxNode subnode) {
205-
assert(DK == DataKind::DeferredLayout);
206-
DeferredLayout.Children.push_back(std::move(subnode));
207-
}
208156

209157
// Deferred Token Data =====================================================//
210158

211-
const Token &getToken() const {
159+
CharSourceRange getDeferredTokenRangeWithoutBackticks() const {
212160
assert(DK == DataKind::DeferredToken);
213-
return DeferredToken.Tok;
161+
return CharSourceRange{DeferredToken.TokLoc, DeferredToken.TokLength};
214162
}
215-
const syntax::Trivia &getLeadingTrivia() const {
163+
ArrayRef<ParsedTriviaPiece> getDeferredLeadingTriviaPieces() const {
216164
assert(DK == DataKind::DeferredToken);
217-
return DeferredToken.LeadingTrivia;
165+
return ArrayRef<ParsedTriviaPiece>(DeferredToken.TriviaPieces,
166+
DeferredToken.NumLeadingTrivia);
218167
}
219-
const syntax::Trivia &getTrailingTrivia() const {
168+
ArrayRef<ParsedTriviaPiece> getDeferredTrailingTriviaPieces() const {
220169
assert(DK == DataKind::DeferredToken);
221-
return DeferredToken.TrailingTrivia;
170+
return ArrayRef<ParsedTriviaPiece>(
171+
DeferredToken.TriviaPieces + DeferredToken.NumLeadingTrivia,
172+
DeferredToken.NumTrailingTrivia);
222173
}
223174

224175
//==========================================================================//
225176

226177
/// Form a deferred syntax layout node.
227178
static ParsedRawSyntaxNode makeDeferred(syntax::SyntaxKind k,
228-
ArrayRef<ParsedRawSyntaxNode> deferredNodes) {
229-
return ParsedRawSyntaxNode{k, deferredNodes};
230-
}
179+
ArrayRef<ParsedRawSyntaxNode> deferredNodes,
180+
SyntaxParsingContext &ctx);
231181

232182
/// Form a deferred token node.
233183
static ParsedRawSyntaxNode makeDeferred(Token tok,
234-
syntax::Trivia leadingTrivia,
235-
syntax::Trivia trailingTrivia) {
236-
return ParsedRawSyntaxNode{std::move(tok), std::move(leadingTrivia),
237-
std::move(trailingTrivia)};
238-
}
184+
const ParsedTrivia &leadingTrivia,
185+
const ParsedTrivia &trailingTrivia,
186+
SyntaxParsingContext &ctx);
239187

240188
/// Form a deferred missing token node.
241-
static ParsedRawSyntaxNode makeDeferredMissing(tok tokKind, SourceLoc loc);
189+
static ParsedRawSyntaxNode makeDeferredMissing(tok tokKind, SourceLoc loc) {
190+
auto raw = ParsedRawSyntaxNode(tokKind, loc, 0, nullptr, 0, 0);
191+
raw.IsMissing = true;
192+
return raw;
193+
}
242194

243195
/// Dump this piece of syntax recursively for debugging or testing.
244196
LLVM_ATTRIBUTE_DEPRECATED(
@@ -251,20 +203,6 @@ class ParsedRawSyntaxNode {
251203
static ParsedRawSyntaxNode null() {
252204
return ParsedRawSyntaxNode{};
253205
}
254-
255-
private:
256-
void releaseMemory() {
257-
switch (DK) {
258-
case DataKind::Null:
259-
break;
260-
case DataKind::Recorded:
261-
RecordedData.~RecordedSyntaxNode(); break;
262-
case DataKind::DeferredLayout:
263-
DeferredLayout.~DeferredLayoutNode(); break;
264-
case DataKind::DeferredToken:
265-
DeferredToken.~DeferredTokenNode(); break;
266-
}
267-
}
268206
};
269207

270208
} // end namespace swift

include/swift/Parse/ParsedRawSyntaxRecorder.h

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,15 +24,17 @@
2424

2525
namespace swift {
2626

27+
class CharSourceRange;
2728
class ParsedRawSyntaxNode;
29+
struct ParsedTrivia;
30+
class ParsedTriviaPiece;
2831
class SyntaxParseActions;
2932
class SourceLoc;
3033
class Token;
3134
enum class tok;
3235

3336
namespace syntax {
3437
enum class SyntaxKind;
35-
struct Trivia;
3638
}
3739

3840
class ParsedRawSyntaxRecorder {
@@ -43,8 +45,12 @@ class ParsedRawSyntaxRecorder {
4345
: SPActions(std::move(spActions)) {}
4446

4547
ParsedRawSyntaxNode recordToken(const Token &tok,
46-
const syntax::Trivia &leadingTrivia,
47-
const syntax::Trivia &trailingTrivia);
48+
const ParsedTrivia &leadingTrivia,
49+
const ParsedTrivia &trailingTrivia);
50+
51+
ParsedRawSyntaxNode recordToken(tok tokenKind, CharSourceRange tokenRange,
52+
ArrayRef<ParsedTriviaPiece> leadingTrivia,
53+
ArrayRef<ParsedTriviaPiece> trailingTrivia);
4854

4955
/// Record a missing token. \p loc can be invalid or an approximate location
5056
/// of where the token would be if not missing.

include/swift/Parse/ParsedSyntaxBuilders.h.gyb

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,17 +34,20 @@ class SyntaxParsingContext;
3434
% if node.is_buildable():
3535
% child_count = len(node.children)
3636
class Parsed${node.name}Builder {
37-
ParsedRawSyntaxRecorder &Rec;
38-
bool IsBacktracking;
39-
ParsedRawSyntaxNode Layout[${child_count}] = {
37+
SyntaxParsingContext &SPCtx;
38+
ParsedRawSyntaxNode Layout[${child_count}];
4039
% for child in node.children:
41-
ParsedRawSyntaxNode::null(),
40+
% child_node = NODE_MAP.get(child.syntax_kind)
41+
% if child_node and child_node.is_syntax_collection():
42+
% child_elt = child_node.collection_element_name
43+
% child_elt_type = child_node.collection_element_type
44+
SmallVector<ParsedRawSyntaxNode, 8> ${child_elt}Nodes;
45+
% end
4246
% end
43-
};
4447

4548
public:
4649
explicit Parsed${node.name}Builder(SyntaxParsingContext &SPCtx)
47-
: Rec(SPCtx.getRecorder()), IsBacktracking(SPCtx.isBacktracking()) {}
50+
: SPCtx(SPCtx) {}
4851

4952
% for child in node.children:
5053
Parsed${node.name}Builder &use${child.name}(Parsed${child.type_name} ${child.name});
@@ -61,7 +64,7 @@ public:
6164

6265
private:
6366
Parsed${node.name} record();
64-
void finishLayout();
67+
void finishLayout(bool deferred);
6568
};
6669

6770
% end

include/swift/Parse/ParsedSyntaxRecorder.h.gyb

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,8 @@ private:
4646
static Parsed${node.name} record${node.syntax_kind}(${child_params},
4747
ParsedRawSyntaxRecorder &rec);
4848
public:
49-
static Parsed${node.name} defer${node.syntax_kind}(${child_params});
49+
static Parsed${node.name} defer${node.syntax_kind}(${child_params},
50+
SyntaxParsingContext &SPCtx);
5051
static Parsed${node.name} make${node.syntax_kind}(${child_params},
5152
SyntaxParsingContext &SPCtx);
5253
% elif node.is_syntax_collection():
@@ -57,7 +58,8 @@ private:
5758

5859
public:
5960
static Parsed${node.name} defer${node.syntax_kind}(
60-
ArrayRef<Parsed${node.collection_element_type}> elts);
61+
ArrayRef<Parsed${node.collection_element_type}> elts,
62+
SyntaxParsingContext &SPCtx);
6163
static Parsed${node.name} make${node.syntax_kind}(
6264
ArrayRef<Parsed${node.collection_element_type}> elts,
6365
SyntaxParsingContext &SPCtx);

0 commit comments

Comments
 (0)