Skip to content

Commit d0e27bb

Browse files
authored
Merge pull request #35649 from ahoppen/trivia-parsing
[libSyntax] Avoid lexing of trivia into pieces if possible
2 parents 30a57fc + a8c0136 commit d0e27bb

40 files changed

+1173
-1295
lines changed

include/swift/Parse/Lexer.h

Lines changed: 29 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -122,16 +122,17 @@ class Lexer {
122122

123123
/// The current leading trivia for the next token.
124124
///
125-
/// This is only preserved if this Lexer was constructed with
126-
/// `TriviaRetentionMode::WithTrivia`.
127-
ParsedTrivia LeadingTrivia;
125+
/// The StringRef points into the source buffer that is currently being lexed.
126+
StringRef LeadingTrivia;
128127

129128
/// The current trailing trivia for the next token.
130-
///
131-
/// This is only preserved if this Lexer was constructed with
132-
/// `TriviaRetentionMode::WithTrivia`.
133-
ParsedTrivia TrailingTrivia;
134-
129+
/// The StringRef points into the source buffer that is currently being lexed.
130+
StringRef TrailingTrivia;
131+
132+
/// The location at which the comment of the next token starts. \c nullptr if
133+
/// the next token doesn't have a comment.
134+
const char *CommentStart;
135+
135136
Lexer(const Lexer&) = delete;
136137
void operator=(const Lexer&) = delete;
137138

@@ -196,19 +197,19 @@ class Lexer {
196197

197198
/// Lex a token. If \c TriviaRetentionMode is \c WithTrivia, passed pointers
198199
/// to trivias are populated.
199-
void lex(Token &Result, ParsedTrivia &LeadingTriviaResult,
200-
ParsedTrivia &TrailingTriviaResult) {
200+
void lex(Token &Result, StringRef &LeadingTriviaResult,
201+
StringRef &TrailingTriviaResult) {
201202
Result = NextToken;
202203
if (TriviaRetention == TriviaRetentionMode::WithTrivia) {
203-
LeadingTriviaResult = {LeadingTrivia};
204-
TrailingTriviaResult = {TrailingTrivia};
204+
LeadingTriviaResult = LeadingTrivia;
205+
TrailingTriviaResult = TrailingTrivia;
205206
}
206207
if (Result.isNot(tok::eof))
207208
lexImpl();
208209
}
209210

210211
void lex(Token &Result) {
211-
ParsedTrivia LeadingTrivia, TrailingTrivia;
212+
StringRef LeadingTrivia, TrailingTrivia;
212213
lex(Result, LeadingTrivia, TrailingTrivia);
213214
}
214215

@@ -240,7 +241,7 @@ class Lexer {
240241
/// After restoring the state, lexer will return this token and continue from
241242
/// there.
242243
State getStateForBeginningOfToken(const Token &Tok,
243-
const ParsedTrivia &LeadingTrivia = {}) const {
244+
const StringRef &LeadingTrivia = {}) const {
244245

245246
// If the token has a comment attached to it, rewind to before the comment,
246247
// not just the start of the token. This ensures that we will re-lex and
@@ -249,8 +250,11 @@ class Lexer {
249250
if (TokStart.isInvalid())
250251
TokStart = Tok.getLoc();
251252
auto S = getStateForBeginningOfTokenLoc(TokStart);
252-
if (TriviaRetention == TriviaRetentionMode::WithTrivia)
253+
if (TriviaRetention == TriviaRetentionMode::WithTrivia) {
253254
S.LeadingTrivia = LeadingTrivia;
255+
} else {
256+
S.LeadingTrivia = StringRef();
257+
}
254258
return S;
255259
}
256260

@@ -275,8 +279,7 @@ class Lexer {
275279

276280
// Restore Trivia.
277281
if (TriviaRetention == TriviaRetentionMode::WithTrivia)
278-
if (auto &LTrivia = S.LeadingTrivia)
279-
LeadingTrivia = std::move(*LTrivia);
282+
LeadingTrivia = S.LeadingTrivia;
280283
}
281284

282285
/// Restore the lexer state to a given state that is located before
@@ -550,7 +553,7 @@ class Lexer {
550553
void lexOperatorIdentifier();
551554
void lexHexNumber();
552555
void lexNumber();
553-
void lexTrivia(ParsedTrivia &T, bool IsForTrailingTrivia);
556+
StringRef lexTrivia(bool IsForTrailingTrivia);
554557
static unsigned lexUnicodeEscape(const char *&CurPtr, Lexer *Diags);
555558

556559
unsigned lexCharacter(const char *&CurPtr, char StopQuote,
@@ -572,7 +575,14 @@ class Lexer {
572575

573576
NulCharacterKind getNulCharacterKind(const char *Ptr) const;
574577
};
575-
578+
579+
/// A lexer that can lex trivia into its pieces
580+
class TriviaLexer {
581+
public:
582+
/// Decompose the triva in \p TriviaStr into their pieces.
583+
static ParsedTrivia lexTrivia(StringRef TriviaStr);
584+
};
585+
576586
/// Given an ordered token \param Array , get the iterator pointing to the first
577587
/// token that is not before \param Loc .
578588
template<typename ArrayTy, typename Iterator = typename ArrayTy::iterator>

include/swift/Parse/LexerState.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ class LexerState {
3939
private:
4040
explicit LexerState(SourceLoc Loc) : Loc(Loc) {}
4141
SourceLoc Loc;
42-
llvm::Optional<ParsedTrivia> LeadingTrivia;
42+
StringRef LeadingTrivia;
4343
friend class Lexer;
4444
};
4545

include/swift/Parse/ParsedRawSyntaxNode.h

Lines changed: 17 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -56,11 +56,10 @@ class ParsedRawSyntaxNode {
5656
CharSourceRange Range;
5757
};
5858
struct DeferredTokenNode {
59-
const ParsedTriviaPiece *TriviaPieces;
6059
SourceLoc TokLoc;
6160
unsigned TokLength;
62-
uint16_t NumLeadingTrivia;
63-
uint16_t NumTrailingTrivia;
61+
StringRef LeadingTrivia;
62+
StringRef TrailingTrivia;
6463
};
6564

6665
union {
@@ -83,21 +82,11 @@ class ParsedRawSyntaxNode {
8382
}
8483

8584
ParsedRawSyntaxNode(tok tokKind, SourceLoc tokLoc, unsigned tokLength,
86-
const ParsedTriviaPiece *triviaPieces,
87-
unsigned numLeadingTrivia,
88-
unsigned numTrailingTrivia)
89-
: DeferredToken{triviaPieces,
90-
tokLoc, tokLength,
91-
uint16_t(numLeadingTrivia),
92-
uint16_t(numTrailingTrivia)},
93-
SynKind(uint16_t(syntax::SyntaxKind::Token)),
94-
TokKind(uint16_t(tokKind)),
95-
DK(DataKind::DeferredToken) {
85+
StringRef leadingTrivia, StringRef trailingTrivia)
86+
: DeferredToken{tokLoc, tokLength, leadingTrivia, trailingTrivia},
87+
SynKind(uint16_t(syntax::SyntaxKind::Token)),
88+
TokKind(uint16_t(tokKind)), DK(DataKind::DeferredToken) {
9689
assert(getTokenKind() == tokKind && "Token kind is too large value!");
97-
assert(DeferredToken.NumLeadingTrivia == numLeadingTrivia &&
98-
"numLeadingTrivia is too large value!");
99-
assert(DeferredToken.NumTrailingTrivia == numTrailingTrivia &&
100-
"numLeadingTrivia is too large value!");
10190
}
10291
ParsedRawSyntaxNode(const ParsedRawSyntaxNode &other) = delete;
10392
ParsedRawSyntaxNode &operator=(const ParsedRawSyntaxNode &other) = delete;
@@ -280,11 +269,8 @@ class ParsedRawSyntaxNode {
280269

281270
CharSourceRange getDeferredTokenRangeWithTrivia() const {
282271
assert(DK == DataKind::DeferredToken);
283-
auto leadTriviaPieces = getDeferredLeadingTriviaPieces();
284-
auto trailTriviaPieces = getDeferredTrailingTriviaPieces();
285-
286-
auto leadTriviaLen = ParsedTriviaPiece::getTotalLength(leadTriviaPieces);
287-
auto trailTriviaLen = ParsedTriviaPiece::getTotalLength(trailTriviaPieces);
272+
auto leadTriviaLen = DeferredToken.LeadingTrivia.size();
273+
auto trailTriviaLen = DeferredToken.TrailingTrivia.size();
288274

289275
SourceLoc begin = DeferredToken.TokLoc.getAdvancedLoc(-leadTriviaLen);
290276
unsigned len = leadTriviaLen + DeferredToken.TokLength + trailTriviaLen;
@@ -295,16 +281,13 @@ class ParsedRawSyntaxNode {
295281
assert(DK == DataKind::DeferredToken);
296282
return CharSourceRange{DeferredToken.TokLoc, DeferredToken.TokLength};
297283
}
298-
ArrayRef<ParsedTriviaPiece> getDeferredLeadingTriviaPieces() const {
284+
StringRef getDeferredLeadingTrivia() const {
299285
assert(DK == DataKind::DeferredToken);
300-
return ArrayRef<ParsedTriviaPiece>(DeferredToken.TriviaPieces,
301-
DeferredToken.NumLeadingTrivia);
286+
return DeferredToken.LeadingTrivia;
302287
}
303-
ArrayRef<ParsedTriviaPiece> getDeferredTrailingTriviaPieces() const {
288+
StringRef getDeferredTrailingTrivia() const {
304289
assert(DK == DataKind::DeferredToken);
305-
return ArrayRef<ParsedTriviaPiece>(
306-
DeferredToken.TriviaPieces + DeferredToken.NumLeadingTrivia,
307-
DeferredToken.NumTrailingTrivia);
290+
return DeferredToken.TrailingTrivia;
308291
}
309292

310293
//==========================================================================//
@@ -315,14 +298,15 @@ class ParsedRawSyntaxNode {
315298
SyntaxParsingContext &ctx);
316299

317300
/// Form a deferred token node.
318-
static ParsedRawSyntaxNode makeDeferred(Token tok,
319-
const ParsedTrivia &leadingTrivia,
320-
const ParsedTrivia &trailingTrivia,
301+
static ParsedRawSyntaxNode makeDeferred(Token tok, StringRef leadingTrivia,
302+
StringRef trailingTrivia,
321303
SyntaxParsingContext &ctx);
322304

323305
/// Form a deferred missing token node.
324306
static ParsedRawSyntaxNode makeDeferredMissing(tok tokKind, SourceLoc loc) {
325-
auto raw = ParsedRawSyntaxNode(tokKind, loc, 0, nullptr, 0, 0);
307+
auto raw = ParsedRawSyntaxNode(tokKind, loc, /*tokLength=*/0,
308+
/*leadingTrivia=*/StringRef(),
309+
/*trailingTrivia=*/StringRef());
326310
raw.IsMissing = true;
327311
return raw;
328312
}

include/swift/Parse/ParsedRawSyntaxRecorder.h

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -44,13 +44,12 @@ class ParsedRawSyntaxRecorder {
4444
explicit ParsedRawSyntaxRecorder(std::shared_ptr<SyntaxParseActions> spActions)
4545
: SPActions(std::move(spActions)) {}
4646

47-
ParsedRawSyntaxNode recordToken(const Token &tok,
48-
const ParsedTrivia &leadingTrivia,
49-
const ParsedTrivia &trailingTrivia);
47+
ParsedRawSyntaxNode recordToken(const Token &tok, StringRef leadingTrivia,
48+
StringRef trailingTrivia);
5049

5150
ParsedRawSyntaxNode recordToken(tok tokenKind, CharSourceRange tokenRange,
52-
ArrayRef<ParsedTriviaPiece> leadingTrivia,
53-
ArrayRef<ParsedTriviaPiece> trailingTrivia);
51+
StringRef leadingTrivia,
52+
StringRef trailingTrivia);
5453

5554
/// Record a missing token. \p loc can be invalid or an approximate location
5655
/// of where the token would be if not missing.

include/swift/Parse/Parser.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -207,13 +207,13 @@ class Parser {
207207
/// This is the current token being considered by the parser.
208208
Token Tok;
209209

210-
/// leading trivias for \c Tok.
210+
/// Leading trivia for \c Tok.
211211
/// Always empty if !SF.shouldBuildSyntaxTree().
212-
ParsedTrivia LeadingTrivia;
212+
StringRef LeadingTrivia;
213213

214-
/// trailing trivias for \c Tok.
214+
/// Trailing trivia for \c Tok.
215215
/// Always empty if !SF.shouldBuildSyntaxTree().
216-
ParsedTrivia TrailingTrivia;
216+
StringRef TrailingTrivia;
217217

218218
/// The receiver to collect all consumed tokens.
219219
ConsumeTokenReceiver *TokReceiver;
@@ -549,7 +549,7 @@ class Parser {
549549
}
550550

551551
SourceLoc leadingTriviaLoc() {
552-
return Tok.getLoc().getAdvancedLoc(-LeadingTrivia.getLength());
552+
return Tok.getLoc().getAdvancedLoc(-LeadingTrivia.size());
553553
}
554554

555555
SourceLoc consumeIdentifier(Identifier &Result, bool diagnoseDollarPrefix) {

include/swift/Parse/SyntaxParseActions.h

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,9 @@ class SyntaxParseActions {
4141
public:
4242
virtual ~SyntaxParseActions() = default;
4343

44-
virtual OpaqueSyntaxNode recordToken(tok tokenKind,
45-
ArrayRef<ParsedTriviaPiece> leadingTrivia,
46-
ArrayRef<ParsedTriviaPiece> trailingTrivia,
47-
CharSourceRange range) = 0;
44+
virtual OpaqueSyntaxNode recordToken(tok tokenKind, StringRef leadingTrivia,
45+
StringRef trailingTrivia,
46+
CharSourceRange range) = 0;
4847

4948
/// Record a missing token. \c loc can be invalid or an approximate location
5049
/// of where the token would be if not missing.

include/swift/Parse/SyntaxParsingContext.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -264,8 +264,7 @@ class alignas(1 << SyntaxAlignInBits) SyntaxParsingContext {
264264
void addRawSyntax(ParsedRawSyntaxNode Raw);
265265

266266
/// Add Token with Trivia to the parts.
267-
void addToken(Token &Tok, const ParsedTrivia &LeadingTrivia,
268-
const ParsedTrivia &TrailingTrivia);
267+
void addToken(Token &Tok, StringRef LeadingTrivia, StringRef TrailingTrivia);
269268

270269
/// Add Syntax to the parts.
271270
void addSyntax(ParsedSyntax Node);

0 commit comments

Comments
 (0)