Skip to content

[libSyntax] Store the token's text in the SyntaxArena #35733

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 11, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 27 additions & 31 deletions include/swift/Syntax/RawSyntax.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ typedef unsigned SyntaxNodeId;
///
/// This is implementation detail - do not expose it in public API.
class RawSyntax final
: private llvm::TrailingObjects<RawSyntax, RC<RawSyntax>, OwnedString> {
: private llvm::TrailingObjects<RawSyntax, RC<RawSyntax>> {
friend TrailingObjects;

/// The ID that shall be used for the next node that is created and does not
Expand All @@ -160,15 +160,17 @@ class RawSyntax final
/// An ID of this node that is stable across incremental parses
SyntaxNodeId NodeId;

mutable std::atomic<int> RefCount;

/// If this node was allocated using a \c SyntaxArena's bump allocator, a
/// reference to the arena to keep the underlying memory buffer of this node
/// alive. If this is a \c nullptr, the node owns its own memory buffer.
RC<SyntaxArena> Arena;

union {
struct {
// FIXME: Reduce TextLength to 30 bits so that common fits in 4 bytes?
/// Number of bytes this node takes up spelled out in the source code
/// Number of bytes this node takes up spelled out in the source code.
/// Always 0 if the node is missing.
unsigned TextLength : 32;
/// Whether this piece of syntax was actually present in the source.
unsigned Presence : 1;
Expand Down Expand Up @@ -196,18 +198,19 @@ class RawSyntax final
"Only 64 bits reserved for standard syntax bits");
uint64_t : bitmax(NumRawSyntaxBits, 64); // align to 16 bits
/// The kind of token this "token" node represents.
const char *LeadingTrivia;
const char *TokenText;
const char *TrailingTrivia;
unsigned LeadingTriviaLength : 32;
unsigned TokenLength : 32;
unsigned TrailingTriviaLength : 32;
unsigned TokenKind : 16;
StringRef LeadingTrivia;
StringRef TrailingTrivia;
} Token;
} Bits;

size_t numTrailingObjects(OverloadToken<RC<RawSyntax>>) const {
return isToken() ? 0 : Bits.Layout.NumChildren;
}
size_t numTrailingObjects(OverloadToken<OwnedString>) const {
return isToken() ? 1 : 0;
}

/// Constructor for creating layout nodes.
/// If the node has been allocated inside the bump allocator of a
Expand All @@ -223,7 +226,7 @@ class RawSyntax final
/// underlying storage.
/// If \p NodeId is \c None, the next free NodeId is used, if it is passed,
/// the caller needs to assure that the NodeId has not been used yet.
RawSyntax(tok TokKind, OwnedString Text, size_t TextLength,
RawSyntax(tok TokKind, StringRef Text, size_t TextLength,
StringRef LeadingTrivia, StringRef TrailingTrivia,
SourcePresence Presence, const RC<SyntaxArena> &Arena,
llvm::Optional<SyntaxNodeId> NodeId);
Expand All @@ -240,8 +243,6 @@ class RawSyntax final
return TextLength;
}

mutable std::atomic<int> RefCount;

public:
~RawSyntax();

Expand Down Expand Up @@ -286,15 +287,15 @@ class RawSyntax final
}

/// Make a raw "token" syntax node.
static RC<RawSyntax> make(tok TokKind, OwnedString Text, size_t TextLength,
static RC<RawSyntax> make(tok TokKind, StringRef Text, size_t TextLength,
StringRef LeadingTrivia, StringRef TrailingTrivia,
SourcePresence Presence,
const RC<SyntaxArena> &Arena = SyntaxArena::make(),
llvm::Optional<SyntaxNodeId> NodeId = llvm::None);

/// Make a raw "token" syntax node that was allocated in \p Arena.
static RC<RawSyntax>
makeAndCalcLength(tok TokKind, OwnedString Text, StringRef LeadingTrivia,
makeAndCalcLength(tok TokKind, StringRef Text, StringRef LeadingTrivia,
StringRef TrailingTrivia, SourcePresence Presence,
const RC<SyntaxArena> &Arena = SyntaxArena::make(),
llvm::Optional<SyntaxNodeId> NodeId = llvm::None) {
Expand All @@ -316,7 +317,7 @@ class RawSyntax final

/// Make a missing raw "token" syntax node.
static RC<RawSyntax>
missing(tok TokKind, OwnedString Text,
missing(tok TokKind, StringRef Text,
const RC<SyntaxArena> &Arena = SyntaxArena::make()) {
return make(TokKind, Text, /*TextLength=*/0, {}, {},
SourcePresence::Missing, Arena);
Expand Down Expand Up @@ -390,27 +391,24 @@ class RawSyntax final
return static_cast<tok>(Bits.Token.TokenKind);
}

/// Return the text of the token as an \c OwnedString. Keeping a reference to
/// this string will keep it alive even if the syntax node gets freed.
OwnedString getOwnedTokenText() const {
assert(isToken());
return *getTrailingObjects<OwnedString>();
}

/// Return the text of the token as a reference. The referenced buffer may
/// disappear when the syntax node gets freed.
StringRef getTokenText() const { return getOwnedTokenText().str(); }
StringRef getTokenText() const {
assert(isToken());
return StringRef(Bits.Token.TokenText, Bits.Token.TokenLength);
}

/// Return the unparsed leading trivia of the token.
StringRef getLeadingTrivia() const {
assert(isToken());
return Bits.Token.LeadingTrivia;
return StringRef(Bits.Token.LeadingTrivia, Bits.Token.LeadingTriviaLength);
}

/// Return the unparsed trailing trivia of the token.
StringRef getTrailingTrivia() const {
assert(isToken());
return Bits.Token.TrailingTrivia;
return StringRef(Bits.Token.TrailingTrivia,
Bits.Token.TrailingTriviaLength);
}

/// Return pieces that make up the leading trivia of the token.
Expand All @@ -434,17 +432,15 @@ class RawSyntax final
/// Return a new token like this one, but with the given leading
/// trivia instead.
RC<RawSyntax> withLeadingTrivia(StringRef NewLeadingTrivia) const {
return makeAndCalcLength(getTokenKind(), getOwnedTokenText(),
NewLeadingTrivia, getTrailingTrivia(),
getPresence());
return makeAndCalcLength(getTokenKind(), getTokenText(), NewLeadingTrivia,
getTrailingTrivia(), getPresence());
}

/// Return a new token like this one, but with the given trailing
/// trivia instead.
RC<RawSyntax> withTrailingTrivia(StringRef NewTrailingTrivia) const {
return makeAndCalcLength(getTokenKind(), getOwnedTokenText(),
getLeadingTrivia(), NewTrailingTrivia,
getPresence());
return makeAndCalcLength(getTokenKind(), getTokenText(), getLeadingTrivia(),
NewTrailingTrivia, getPresence());
}

/// @}
Expand Down Expand Up @@ -503,7 +499,7 @@ class RawSyntax final
/// Dump this piece of syntax recursively.
void dump(llvm::raw_ostream &OS, unsigned Indent = 0) const;

static void Profile(llvm::FoldingSetNodeID &ID, tok TokKind, OwnedString Text,
static void Profile(llvm::FoldingSetNodeID &ID, tok TokKind, StringRef Text,
StringRef LeadingTrivia, StringRef TrailingTrivia);
};

Expand Down
4 changes: 2 additions & 2 deletions include/swift/Syntax/Serialization/SyntaxDeserialization.h
Original file line number Diff line number Diff line change
Expand Up @@ -163,8 +163,8 @@ template <> struct MappingTraits<swift::RC<swift::RawSyntax>> {
in.mapRequired("id", nodeIdString);
unsigned nodeId = std::atoi(nodeIdString.data());
value = swift::RawSyntax::makeAndCalcLength(
tokenKind, swift::OwnedString::makeRefCounted(text), leadingTrivia,
trailingTrivia, presence, swift::SyntaxArena::make(), nodeId);
tokenKind, text, leadingTrivia, trailingTrivia, presence,
swift::SyntaxArena::make(), nodeId);
} else {
swift::SyntaxKind kind;
in.mapRequired("kind", kind);
Expand Down
6 changes: 3 additions & 3 deletions include/swift/Syntax/SyntaxFactory.h.gyb
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ class SyntaxArena;
struct SyntaxFactory {
/// Make any kind of token.
static TokenSyntax makeToken(tok Kind,
OwnedString Text, StringRef LeadingTrivia, StringRef TrailingTrivia,
StringRef Text, StringRef LeadingTrivia, StringRef TrailingTrivia,
SourcePresence Presence,
const RC<SyntaxArena> &Arena = SyntaxArena::make()
);
Expand Down Expand Up @@ -108,7 +108,7 @@ struct SyntaxFactory {
StringRef TrailingTrivia,
const RC<SyntaxArena> &Arena = SyntaxArena::make());
% else:
static TokenSyntax make${token.name}(OwnedString Text,
static TokenSyntax make${token.name}(StringRef Text,
StringRef LeadingTrivia, StringRef TrailingTrivia,
const RC<SyntaxArena> &Arena = SyntaxArena::make());
% end
Expand Down Expand Up @@ -139,7 +139,7 @@ struct SyntaxFactory {

/// Creates a TypeIdentifierSyntax with the provided name and leading/trailing
/// trivia.
static TypeSyntax makeTypeIdentifier(OwnedString TypeName,
static TypeSyntax makeTypeIdentifier(StringRef TypeName,
StringRef LeadingTrivia = {}, StringRef TrailingTrivia = {},
const RC<SyntaxArena> &Arena = SyntaxArena::make()
);
Expand Down
2 changes: 1 addition & 1 deletion include/swift/Syntax/TokenSyntax.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class TokenSyntax final : public Syntax {
public:
TokenSyntax(const SyntaxData Data) : Syntax(Data) {}

static TokenSyntax missingToken(const tok Kind, OwnedString Text) {
static TokenSyntax missingToken(const tok Kind, StringRef Text) {
return makeRoot<TokenSyntax>(RawSyntax::missing(Kind, Text));
}

Expand Down
7 changes: 3 additions & 4 deletions lib/Parse/Parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -336,12 +336,11 @@ swift::tokenizeWithTrivia(const LangOptions &LangOpts, const SourceManager &SM,
/*SplitTokens=*/ArrayRef<Token>(),
[&](const Token &Tok, StringRef LeadingTrivia, StringRef TrailingTrivia) {
CharSourceRange TokRange = Tok.getRange();
auto Text = OwnedString::makeRefCounted(Tok.getRawText());
size_t TextLength = LeadingTrivia.size() + TokRange.getByteLength() +
TrailingTrivia.size();
auto ThisToken =
RawSyntax::make(Tok.getKind(), Text, TextLength, LeadingTrivia,
TrailingTrivia, SourcePresence::Present);
auto ThisToken = RawSyntax::make(
Tok.getKind(), Tok.getRawText(), TextLength, LeadingTrivia,
TrailingTrivia, SourcePresence::Present);

auto ThisTokenPos =
RunningPos.advancedBy(ThisToken->getLeadingTriviaLength());
Expand Down
48 changes: 26 additions & 22 deletions lib/Syntax/RawSyntax.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,8 @@ RawSyntax::RawSyntax(SyntaxKind Kind, ArrayRef<RC<RawSyntax>> Layout,
size_t TextLength, SourcePresence Presence,
const RC<SyntaxArena> &Arena,
llvm::Optional<unsigned> NodeId)
: Arena(Arena), Bits({{unsigned(TextLength), unsigned(Presence), false}}),
RefCount(0) {
: RefCount(0), Arena(Arena),
Bits({{unsigned(TextLength), unsigned(Presence), false}}) {
assert(Arena && "RawSyntax nodes must always be allocated in an arena");
assert(Kind != SyntaxKind::Token &&
"'token' syntax node must be constructed with dedicated constructor");
Expand All @@ -139,36 +139,41 @@ RawSyntax::RawSyntax(SyntaxKind Kind, ArrayRef<RC<RawSyntax>> Layout,
getTrailingObjects<RC<RawSyntax>>());
}

RawSyntax::RawSyntax(tok TokKind, OwnedString Text, size_t TextLength,
RawSyntax::RawSyntax(tok TokKind, StringRef Text, size_t TextLength,
StringRef LeadingTrivia, StringRef TrailingTrivia,
SourcePresence Presence, const RC<SyntaxArena> &Arena,
llvm::Optional<unsigned> NodeId)
: Arena(Arena), Bits({{unsigned(TextLength), unsigned(Presence), true}}),
RefCount(0) {
: RefCount(0), Arena(Arena),
Bits({{unsigned(TextLength), unsigned(Presence), true}}) {
assert(Arena && "RawSyntax nodes must always be allocated in an arena");
copyToArenaIfNecessary(LeadingTrivia, Arena);
copyToArenaIfNecessary(Text, Arena);
copyToArenaIfNecessary(TrailingTrivia, Arena);

if (Presence == SourcePresence::Missing) {
assert(TextLength == 0);
} else {
assert(TextLength ==
LeadingTrivia.size() + Text.size() + TrailingTrivia.size());
}

if (NodeId.hasValue()) {
this->NodeId = NodeId.getValue();
NextFreeNodeId = std::max(this->NodeId + 1, NextFreeNodeId);
} else {
this->NodeId = NextFreeNodeId++;
}
Bits.Token.LeadingTrivia = LeadingTrivia.data();
Bits.Token.TokenText = Text.data();
Bits.Token.TrailingTrivia = TrailingTrivia.data();
Bits.Token.LeadingTriviaLength = LeadingTrivia.size();
Bits.Token.TokenLength = Text.size();
Bits.Token.TrailingTriviaLength = TrailingTrivia.size();
Bits.Token.TokenKind = unsigned(TokKind);
// FIXME: Copy the backing storage of the string into the arena
Bits.Token.LeadingTrivia = LeadingTrivia;
Bits.Token.TrailingTrivia = TrailingTrivia;

// Initialize token text.
::new (static_cast<void *>(getTrailingObjects<OwnedString>()))
OwnedString(Text);
}

RawSyntax::~RawSyntax() {
if (isToken()) {
getTrailingObjects<OwnedString>()->~OwnedString();
} else {
if (!isToken()) {
for (auto &child : getLayout())
child.~RC<RawSyntax>();
}
Expand All @@ -179,19 +184,19 @@ RC<RawSyntax> RawSyntax::make(SyntaxKind Kind, ArrayRef<RC<RawSyntax>> Layout,
const RC<SyntaxArena> &Arena,
llvm::Optional<unsigned> NodeId) {
assert(Arena && "RawSyntax nodes must always be allocated in an arena");
auto size = totalSizeToAlloc<RC<RawSyntax>, OwnedString>(Layout.size(), 0);
auto size = totalSizeToAlloc<RC<RawSyntax>>(Layout.size());
void *data = Arena->Allocate(size, alignof(RawSyntax));
return RC<RawSyntax>(
new (data) RawSyntax(Kind, Layout, TextLength, Presence, Arena, NodeId));
}

RC<RawSyntax> RawSyntax::make(tok TokKind, OwnedString Text, size_t TextLength,
RC<RawSyntax> RawSyntax::make(tok TokKind, StringRef Text, size_t TextLength,
StringRef LeadingTrivia, StringRef TrailingTrivia,
SourcePresence Presence,
const RC<SyntaxArena> &Arena,
llvm::Optional<unsigned> NodeId) {
assert(Arena && "RawSyntax nodes must always be allocated in an arena");
auto size = totalSizeToAlloc<RC<RawSyntax>, OwnedString>(0, 1);
auto size = totalSizeToAlloc<RC<RawSyntax>>(0);
void *data = Arena->Allocate(size, alignof(RawSyntax));
return RC<RawSyntax>(new (data)
RawSyntax(TokKind, Text, TextLength, LeadingTrivia,
Expand Down Expand Up @@ -306,9 +311,8 @@ void RawSyntax::dump(llvm::raw_ostream &OS, unsigned Indent) const {
OS << ')';
}

void RawSyntax::Profile(llvm::FoldingSetNodeID &ID, tok TokKind,
OwnedString Text, StringRef LeadingTrivia,
StringRef TrailingTrivia) {
void RawSyntax::Profile(llvm::FoldingSetNodeID &ID, tok TokKind, StringRef Text,
StringRef LeadingTrivia, StringRef TrailingTrivia) {
ID.AddInteger(unsigned(TokKind));
ID.AddInteger(LeadingTrivia.size());
ID.AddInteger(TrailingTrivia.size());
Expand All @@ -320,7 +324,7 @@ void RawSyntax::Profile(llvm::FoldingSetNodeID &ID, tok TokKind,
#include "swift/Syntax/TokenKinds.def"
break;
default:
ID.AddString(Text.str());
ID.AddString(Text);
break;
}
ID.AddString(LeadingTrivia);
Expand Down
Loading