Skip to content

[libSyntax] Avoid lexing of trivia into pieces if possible #35649

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Feb 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 29 additions & 19 deletions include/swift/Parse/Lexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,16 +122,17 @@ class Lexer {

/// The current leading trivia for the next token.
///
/// This is only preserved if this Lexer was constructed with
/// `TriviaRetentionMode::WithTrivia`.
ParsedTrivia LeadingTrivia;
/// The StringRef points into the source buffer that is currently being lexed.
StringRef LeadingTrivia;

/// The current trailing trivia for the next token.
///
/// This is only preserved if this Lexer was constructed with
/// `TriviaRetentionMode::WithTrivia`.
ParsedTrivia TrailingTrivia;

/// The StringRef points into the source buffer that is currently being lexed.
StringRef TrailingTrivia;

/// The location at which the comment of the next token starts. \c nullptr if
/// the next token doesn't have a comment.
const char *CommentStart;

Lexer(const Lexer&) = delete;
void operator=(const Lexer&) = delete;

Expand Down Expand Up @@ -196,19 +197,19 @@ class Lexer {

/// Lex a token. If \c TriviaRetentionMode is \c WithTrivia, passed pointers
/// to trivias are populated.
void lex(Token &Result, ParsedTrivia &LeadingTriviaResult,
ParsedTrivia &TrailingTriviaResult) {
void lex(Token &Result, StringRef &LeadingTriviaResult,
StringRef &TrailingTriviaResult) {
Result = NextToken;
if (TriviaRetention == TriviaRetentionMode::WithTrivia) {
LeadingTriviaResult = {LeadingTrivia};
TrailingTriviaResult = {TrailingTrivia};
LeadingTriviaResult = LeadingTrivia;
TrailingTriviaResult = TrailingTrivia;
}
if (Result.isNot(tok::eof))
lexImpl();
}

void lex(Token &Result) {
ParsedTrivia LeadingTrivia, TrailingTrivia;
StringRef LeadingTrivia, TrailingTrivia;
lex(Result, LeadingTrivia, TrailingTrivia);
}

Expand Down Expand Up @@ -240,7 +241,7 @@ class Lexer {
/// After restoring the state, lexer will return this token and continue from
/// there.
State getStateForBeginningOfToken(const Token &Tok,
const ParsedTrivia &LeadingTrivia = {}) const {
const StringRef &LeadingTrivia = {}) const {

// If the token has a comment attached to it, rewind to before the comment,
// not just the start of the token. This ensures that we will re-lex and
Expand All @@ -249,8 +250,11 @@ class Lexer {
if (TokStart.isInvalid())
TokStart = Tok.getLoc();
auto S = getStateForBeginningOfTokenLoc(TokStart);
if (TriviaRetention == TriviaRetentionMode::WithTrivia)
if (TriviaRetention == TriviaRetentionMode::WithTrivia) {
S.LeadingTrivia = LeadingTrivia;
} else {
S.LeadingTrivia = StringRef();
}
return S;
}

Expand All @@ -275,8 +279,7 @@ class Lexer {

// Restore Trivia.
if (TriviaRetention == TriviaRetentionMode::WithTrivia)
if (auto &LTrivia = S.LeadingTrivia)
LeadingTrivia = std::move(*LTrivia);
LeadingTrivia = S.LeadingTrivia;
}

/// Restore the lexer state to a given state that is located before
Expand Down Expand Up @@ -550,7 +553,7 @@ class Lexer {
void lexOperatorIdentifier();
void lexHexNumber();
void lexNumber();
void lexTrivia(ParsedTrivia &T, bool IsForTrailingTrivia);
StringRef lexTrivia(bool IsForTrailingTrivia);
static unsigned lexUnicodeEscape(const char *&CurPtr, Lexer *Diags);

unsigned lexCharacter(const char *&CurPtr, char StopQuote,
Expand All @@ -572,7 +575,14 @@ class Lexer {

NulCharacterKind getNulCharacterKind(const char *Ptr) const;
};


/// A lexer that can lex trivia into its pieces
class TriviaLexer {
public:
/// Decompose the triva in \p TriviaStr into their pieces.
static ParsedTrivia lexTrivia(StringRef TriviaStr);
};

/// Given an ordered token \param Array , get the iterator pointing to the first
/// token that is not before \param Loc .
template<typename ArrayTy, typename Iterator = typename ArrayTy::iterator>
Expand Down
2 changes: 1 addition & 1 deletion include/swift/Parse/LexerState.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class LexerState {
private:
explicit LexerState(SourceLoc Loc) : Loc(Loc) {}
SourceLoc Loc;
llvm::Optional<ParsedTrivia> LeadingTrivia;
StringRef LeadingTrivia;
friend class Lexer;
};

Expand Down
50 changes: 17 additions & 33 deletions include/swift/Parse/ParsedRawSyntaxNode.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,10 @@ class ParsedRawSyntaxNode {
CharSourceRange Range;
};
struct DeferredTokenNode {
const ParsedTriviaPiece *TriviaPieces;
SourceLoc TokLoc;
unsigned TokLength;
uint16_t NumLeadingTrivia;
uint16_t NumTrailingTrivia;
StringRef LeadingTrivia;
StringRef TrailingTrivia;
};

union {
Expand All @@ -83,21 +82,11 @@ class ParsedRawSyntaxNode {
}

ParsedRawSyntaxNode(tok tokKind, SourceLoc tokLoc, unsigned tokLength,
const ParsedTriviaPiece *triviaPieces,
unsigned numLeadingTrivia,
unsigned numTrailingTrivia)
: DeferredToken{triviaPieces,
tokLoc, tokLength,
uint16_t(numLeadingTrivia),
uint16_t(numTrailingTrivia)},
SynKind(uint16_t(syntax::SyntaxKind::Token)),
TokKind(uint16_t(tokKind)),
DK(DataKind::DeferredToken) {
StringRef leadingTrivia, StringRef trailingTrivia)
: DeferredToken{tokLoc, tokLength, leadingTrivia, trailingTrivia},
SynKind(uint16_t(syntax::SyntaxKind::Token)),
TokKind(uint16_t(tokKind)), DK(DataKind::DeferredToken) {
assert(getTokenKind() == tokKind && "Token kind is too large value!");
assert(DeferredToken.NumLeadingTrivia == numLeadingTrivia &&
"numLeadingTrivia is too large value!");
assert(DeferredToken.NumTrailingTrivia == numTrailingTrivia &&
"numLeadingTrivia is too large value!");
}
ParsedRawSyntaxNode(const ParsedRawSyntaxNode &other) = delete;
ParsedRawSyntaxNode &operator=(const ParsedRawSyntaxNode &other) = delete;
Expand Down Expand Up @@ -280,11 +269,8 @@ class ParsedRawSyntaxNode {

CharSourceRange getDeferredTokenRangeWithTrivia() const {
assert(DK == DataKind::DeferredToken);
auto leadTriviaPieces = getDeferredLeadingTriviaPieces();
auto trailTriviaPieces = getDeferredTrailingTriviaPieces();

auto leadTriviaLen = ParsedTriviaPiece::getTotalLength(leadTriviaPieces);
auto trailTriviaLen = ParsedTriviaPiece::getTotalLength(trailTriviaPieces);
auto leadTriviaLen = DeferredToken.LeadingTrivia.size();
auto trailTriviaLen = DeferredToken.TrailingTrivia.size();

SourceLoc begin = DeferredToken.TokLoc.getAdvancedLoc(-leadTriviaLen);
unsigned len = leadTriviaLen + DeferredToken.TokLength + trailTriviaLen;
Expand All @@ -295,16 +281,13 @@ class ParsedRawSyntaxNode {
assert(DK == DataKind::DeferredToken);
return CharSourceRange{DeferredToken.TokLoc, DeferredToken.TokLength};
}
ArrayRef<ParsedTriviaPiece> getDeferredLeadingTriviaPieces() const {
StringRef getDeferredLeadingTrivia() const {
assert(DK == DataKind::DeferredToken);
return ArrayRef<ParsedTriviaPiece>(DeferredToken.TriviaPieces,
DeferredToken.NumLeadingTrivia);
return DeferredToken.LeadingTrivia;
}
ArrayRef<ParsedTriviaPiece> getDeferredTrailingTriviaPieces() const {
StringRef getDeferredTrailingTrivia() const {
assert(DK == DataKind::DeferredToken);
return ArrayRef<ParsedTriviaPiece>(
DeferredToken.TriviaPieces + DeferredToken.NumLeadingTrivia,
DeferredToken.NumTrailingTrivia);
return DeferredToken.TrailingTrivia;
}

//==========================================================================//
Expand All @@ -315,14 +298,15 @@ class ParsedRawSyntaxNode {
SyntaxParsingContext &ctx);

/// Form a deferred token node.
static ParsedRawSyntaxNode makeDeferred(Token tok,
const ParsedTrivia &leadingTrivia,
const ParsedTrivia &trailingTrivia,
static ParsedRawSyntaxNode makeDeferred(Token tok, StringRef leadingTrivia,
StringRef trailingTrivia,
SyntaxParsingContext &ctx);

/// Form a deferred missing token node.
static ParsedRawSyntaxNode makeDeferredMissing(tok tokKind, SourceLoc loc) {
auto raw = ParsedRawSyntaxNode(tokKind, loc, 0, nullptr, 0, 0);
auto raw = ParsedRawSyntaxNode(tokKind, loc, /*tokLength=*/0,
/*leadingTrivia=*/StringRef(),
/*trailingTrivia=*/StringRef());
raw.IsMissing = true;
return raw;
}
Expand Down
9 changes: 4 additions & 5 deletions include/swift/Parse/ParsedRawSyntaxRecorder.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,12 @@ class ParsedRawSyntaxRecorder {
explicit ParsedRawSyntaxRecorder(std::shared_ptr<SyntaxParseActions> spActions)
: SPActions(std::move(spActions)) {}

ParsedRawSyntaxNode recordToken(const Token &tok,
const ParsedTrivia &leadingTrivia,
const ParsedTrivia &trailingTrivia);
ParsedRawSyntaxNode recordToken(const Token &tok, StringRef leadingTrivia,
StringRef trailingTrivia);

ParsedRawSyntaxNode recordToken(tok tokenKind, CharSourceRange tokenRange,
ArrayRef<ParsedTriviaPiece> leadingTrivia,
ArrayRef<ParsedTriviaPiece> trailingTrivia);
StringRef leadingTrivia,
StringRef trailingTrivia);

/// Record a missing token. \p loc can be invalid or an approximate location
/// of where the token would be if not missing.
Expand Down
10 changes: 5 additions & 5 deletions include/swift/Parse/Parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -207,13 +207,13 @@ class Parser {
/// This is the current token being considered by the parser.
Token Tok;

/// leading trivias for \c Tok.
/// Leading trivia for \c Tok.
/// Always empty if !SF.shouldBuildSyntaxTree().
ParsedTrivia LeadingTrivia;
StringRef LeadingTrivia;

/// trailing trivias for \c Tok.
/// Trailing trivia for \c Tok.
/// Always empty if !SF.shouldBuildSyntaxTree().
ParsedTrivia TrailingTrivia;
StringRef TrailingTrivia;

/// The receiver to collect all consumed tokens.
ConsumeTokenReceiver *TokReceiver;
Expand Down Expand Up @@ -549,7 +549,7 @@ class Parser {
}

SourceLoc leadingTriviaLoc() {
return Tok.getLoc().getAdvancedLoc(-LeadingTrivia.getLength());
return Tok.getLoc().getAdvancedLoc(-LeadingTrivia.size());
}

SourceLoc consumeIdentifier(Identifier &Result, bool diagnoseDollarPrefix) {
Expand Down
7 changes: 3 additions & 4 deletions include/swift/Parse/SyntaxParseActions.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,9 @@ class SyntaxParseActions {
public:
virtual ~SyntaxParseActions() = default;

virtual OpaqueSyntaxNode recordToken(tok tokenKind,
ArrayRef<ParsedTriviaPiece> leadingTrivia,
ArrayRef<ParsedTriviaPiece> trailingTrivia,
CharSourceRange range) = 0;
virtual OpaqueSyntaxNode recordToken(tok tokenKind, StringRef leadingTrivia,
StringRef trailingTrivia,
CharSourceRange range) = 0;

/// Record a missing token. \c loc can be invalid or an approximate location
/// of where the token would be if not missing.
Expand Down
3 changes: 1 addition & 2 deletions include/swift/Parse/SyntaxParsingContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -264,8 +264,7 @@ class alignas(1 << SyntaxAlignInBits) SyntaxParsingContext {
void addRawSyntax(ParsedRawSyntaxNode Raw);

/// Add Token with Trivia to the parts.
void addToken(Token &Tok, const ParsedTrivia &LeadingTrivia,
const ParsedTrivia &TrailingTrivia);
void addToken(Token &Tok, StringRef LeadingTrivia, StringRef TrailingTrivia);

/// Add Syntax to the parts.
void addSyntax(ParsedSyntax Node);
Expand Down
Loading