Skip to content

Commit 512411b

Browse files
authored
Merge pull request #35733 from ahoppen/raw-syntax-text-storage
[libSyntax] Store the token's text in the SyntaxArena
2 parents 1d13550 + c1d65de commit 512411b

File tree

11 files changed

+81
-94
lines changed

11 files changed

+81
-94
lines changed

include/swift/Syntax/RawSyntax.h

Lines changed: 27 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ typedef unsigned SyntaxNodeId;
150150
///
151151
/// This is implementation detail - do not expose it in public API.
152152
class RawSyntax final
153-
: private llvm::TrailingObjects<RawSyntax, RC<RawSyntax>, OwnedString> {
153+
: private llvm::TrailingObjects<RawSyntax, RC<RawSyntax>> {
154154
friend TrailingObjects;
155155

156156
/// The ID that shall be used for the next node that is created and does not
@@ -160,15 +160,17 @@ class RawSyntax final
160160
/// An ID of this node that is stable across incremental parses
161161
SyntaxNodeId NodeId;
162162

163+
mutable std::atomic<int> RefCount;
164+
163165
/// If this node was allocated using a \c SyntaxArena's bump allocator, a
164166
/// reference to the arena to keep the underlying memory buffer of this node
165167
/// alive. If this is a \c nullptr, the node owns its own memory buffer.
166168
RC<SyntaxArena> Arena;
167169

168170
union {
169171
struct {
170-
// FIXME: Reduce TextLength to 30 bits so that common fits in 4 bytes?
171-
/// Number of bytes this node takes up spelled out in the source code
172+
/// Number of bytes this node takes up spelled out in the source code.
173+
/// Always 0 if the node is missing.
172174
unsigned TextLength : 32;
173175
/// Whether this piece of syntax was actually present in the source.
174176
unsigned Presence : 1;
@@ -196,18 +198,19 @@ class RawSyntax final
196198
"Only 64 bits reserved for standard syntax bits");
197199
uint64_t : bitmax(NumRawSyntaxBits, 64); // align to 16 bits
198200
/// The kind of token this "token" node represents.
201+
const char *LeadingTrivia;
202+
const char *TokenText;
203+
const char *TrailingTrivia;
204+
unsigned LeadingTriviaLength : 32;
205+
unsigned TokenLength : 32;
206+
unsigned TrailingTriviaLength : 32;
199207
unsigned TokenKind : 16;
200-
StringRef LeadingTrivia;
201-
StringRef TrailingTrivia;
202208
} Token;
203209
} Bits;
204210

205211
size_t numTrailingObjects(OverloadToken<RC<RawSyntax>>) const {
206212
return isToken() ? 0 : Bits.Layout.NumChildren;
207213
}
208-
size_t numTrailingObjects(OverloadToken<OwnedString>) const {
209-
return isToken() ? 1 : 0;
210-
}
211214

212215
/// Constructor for creating layout nodes.
213216
/// If the node has been allocated inside the bump allocator of a
@@ -223,7 +226,7 @@ class RawSyntax final
223226
/// underlying storage.
224227
/// If \p NodeId is \c None, the next free NodeId is used, if it is passed,
225228
/// the caller needs to assure that the NodeId has not been used yet.
226-
RawSyntax(tok TokKind, OwnedString Text, size_t TextLength,
229+
RawSyntax(tok TokKind, StringRef Text, size_t TextLength,
227230
StringRef LeadingTrivia, StringRef TrailingTrivia,
228231
SourcePresence Presence, const RC<SyntaxArena> &Arena,
229232
llvm::Optional<SyntaxNodeId> NodeId);
@@ -240,8 +243,6 @@ class RawSyntax final
240243
return TextLength;
241244
}
242245

243-
mutable std::atomic<int> RefCount;
244-
245246
public:
246247
~RawSyntax();
247248

@@ -286,15 +287,15 @@ class RawSyntax final
286287
}
287288

288289
/// Make a raw "token" syntax node.
289-
static RC<RawSyntax> make(tok TokKind, OwnedString Text, size_t TextLength,
290+
static RC<RawSyntax> make(tok TokKind, StringRef Text, size_t TextLength,
290291
StringRef LeadingTrivia, StringRef TrailingTrivia,
291292
SourcePresence Presence,
292293
const RC<SyntaxArena> &Arena = SyntaxArena::make(),
293294
llvm::Optional<SyntaxNodeId> NodeId = llvm::None);
294295

295296
/// Make a raw "token" syntax node that was allocated in \p Arena.
296297
static RC<RawSyntax>
297-
makeAndCalcLength(tok TokKind, OwnedString Text, StringRef LeadingTrivia,
298+
makeAndCalcLength(tok TokKind, StringRef Text, StringRef LeadingTrivia,
298299
StringRef TrailingTrivia, SourcePresence Presence,
299300
const RC<SyntaxArena> &Arena = SyntaxArena::make(),
300301
llvm::Optional<SyntaxNodeId> NodeId = llvm::None) {
@@ -316,7 +317,7 @@ class RawSyntax final
316317

317318
/// Make a missing raw "token" syntax node.
318319
static RC<RawSyntax>
319-
missing(tok TokKind, OwnedString Text,
320+
missing(tok TokKind, StringRef Text,
320321
const RC<SyntaxArena> &Arena = SyntaxArena::make()) {
321322
return make(TokKind, Text, /*TextLength=*/0, {}, {},
322323
SourcePresence::Missing, Arena);
@@ -390,27 +391,24 @@ class RawSyntax final
390391
return static_cast<tok>(Bits.Token.TokenKind);
391392
}
392393

393-
/// Return the text of the token as an \c OwnedString. Keeping a reference to
394-
/// this string will keep it alive even if the syntax node gets freed.
395-
OwnedString getOwnedTokenText() const {
396-
assert(isToken());
397-
return *getTrailingObjects<OwnedString>();
398-
}
399-
400394
/// Return the text of the token as a reference. The referenced buffer may
401395
/// disappear when the syntax node gets freed.
402-
StringRef getTokenText() const { return getOwnedTokenText().str(); }
396+
StringRef getTokenText() const {
397+
assert(isToken());
398+
return StringRef(Bits.Token.TokenText, Bits.Token.TokenLength);
399+
}
403400

404401
/// Return the unparsed leading trivia of the token.
405402
StringRef getLeadingTrivia() const {
406403
assert(isToken());
407-
return Bits.Token.LeadingTrivia;
404+
return StringRef(Bits.Token.LeadingTrivia, Bits.Token.LeadingTriviaLength);
408405
}
409406

410407
/// Return the unparsed trailing trivia of the token.
411408
StringRef getTrailingTrivia() const {
412409
assert(isToken());
413-
return Bits.Token.TrailingTrivia;
410+
return StringRef(Bits.Token.TrailingTrivia,
411+
Bits.Token.TrailingTriviaLength);
414412
}
415413

416414
/// Return pieces that make up the leading trivia of the token.
@@ -434,17 +432,15 @@ class RawSyntax final
434432
/// Return a new token like this one, but with the given leading
435433
/// trivia instead.
436434
RC<RawSyntax> withLeadingTrivia(StringRef NewLeadingTrivia) const {
437-
return makeAndCalcLength(getTokenKind(), getOwnedTokenText(),
438-
NewLeadingTrivia, getTrailingTrivia(),
439-
getPresence());
435+
return makeAndCalcLength(getTokenKind(), getTokenText(), NewLeadingTrivia,
436+
getTrailingTrivia(), getPresence());
440437
}
441438

442439
/// Return a new token like this one, but with the given trailing
443440
/// trivia instead.
444441
RC<RawSyntax> withTrailingTrivia(StringRef NewTrailingTrivia) const {
445-
return makeAndCalcLength(getTokenKind(), getOwnedTokenText(),
446-
getLeadingTrivia(), NewTrailingTrivia,
447-
getPresence());
442+
return makeAndCalcLength(getTokenKind(), getTokenText(), getLeadingTrivia(),
443+
NewTrailingTrivia, getPresence());
448444
}
449445

450446
/// @}
@@ -503,7 +499,7 @@ class RawSyntax final
503499
/// Dump this piece of syntax recursively.
504500
void dump(llvm::raw_ostream &OS, unsigned Indent = 0) const;
505501

506-
static void Profile(llvm::FoldingSetNodeID &ID, tok TokKind, OwnedString Text,
502+
static void Profile(llvm::FoldingSetNodeID &ID, tok TokKind, StringRef Text,
507503
StringRef LeadingTrivia, StringRef TrailingTrivia);
508504
};
509505

include/swift/Syntax/Serialization/SyntaxDeserialization.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -163,8 +163,8 @@ template <> struct MappingTraits<swift::RC<swift::RawSyntax>> {
163163
in.mapRequired("id", nodeIdString);
164164
unsigned nodeId = std::atoi(nodeIdString.data());
165165
value = swift::RawSyntax::makeAndCalcLength(
166-
tokenKind, swift::OwnedString::makeRefCounted(text), leadingTrivia,
167-
trailingTrivia, presence, swift::SyntaxArena::make(), nodeId);
166+
tokenKind, text, leadingTrivia, trailingTrivia, presence,
167+
swift::SyntaxArena::make(), nodeId);
168168
} else {
169169
swift::SyntaxKind kind;
170170
in.mapRequired("kind", kind);

include/swift/Syntax/SyntaxFactory.h.gyb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ class SyntaxArena;
4646
struct SyntaxFactory {
4747
/// Make any kind of token.
4848
static TokenSyntax makeToken(tok Kind,
49-
OwnedString Text, StringRef LeadingTrivia, StringRef TrailingTrivia,
49+
StringRef Text, StringRef LeadingTrivia, StringRef TrailingTrivia,
5050
SourcePresence Presence,
5151
const RC<SyntaxArena> &Arena = SyntaxArena::make()
5252
);
@@ -108,7 +108,7 @@ struct SyntaxFactory {
108108
StringRef TrailingTrivia,
109109
const RC<SyntaxArena> &Arena = SyntaxArena::make());
110110
% else:
111-
static TokenSyntax make${token.name}(OwnedString Text,
111+
static TokenSyntax make${token.name}(StringRef Text,
112112
StringRef LeadingTrivia, StringRef TrailingTrivia,
113113
const RC<SyntaxArena> &Arena = SyntaxArena::make());
114114
% end
@@ -139,7 +139,7 @@ struct SyntaxFactory {
139139

140140
/// Creates a TypeIdentifierSyntax with the provided name and leading/trailing
141141
/// trivia.
142-
static TypeSyntax makeTypeIdentifier(OwnedString TypeName,
142+
static TypeSyntax makeTypeIdentifier(StringRef TypeName,
143143
StringRef LeadingTrivia = {}, StringRef TrailingTrivia = {},
144144
const RC<SyntaxArena> &Arena = SyntaxArena::make()
145145
);

include/swift/Syntax/TokenSyntax.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ class TokenSyntax final : public Syntax {
3535
public:
3636
TokenSyntax(const SyntaxData Data) : Syntax(Data) {}
3737

38-
static TokenSyntax missingToken(const tok Kind, OwnedString Text) {
38+
static TokenSyntax missingToken(const tok Kind, StringRef Text) {
3939
return makeRoot<TokenSyntax>(RawSyntax::missing(Kind, Text));
4040
}
4141

lib/Parse/Parser.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -336,12 +336,11 @@ swift::tokenizeWithTrivia(const LangOptions &LangOpts, const SourceManager &SM,
336336
/*SplitTokens=*/ArrayRef<Token>(),
337337
[&](const Token &Tok, StringRef LeadingTrivia, StringRef TrailingTrivia) {
338338
CharSourceRange TokRange = Tok.getRange();
339-
auto Text = OwnedString::makeRefCounted(Tok.getRawText());
340339
size_t TextLength = LeadingTrivia.size() + TokRange.getByteLength() +
341340
TrailingTrivia.size();
342-
auto ThisToken =
343-
RawSyntax::make(Tok.getKind(), Text, TextLength, LeadingTrivia,
344-
TrailingTrivia, SourcePresence::Present);
341+
auto ThisToken = RawSyntax::make(
342+
Tok.getKind(), Tok.getRawText(), TextLength, LeadingTrivia,
343+
TrailingTrivia, SourcePresence::Present);
345344

346345
auto ThisTokenPos =
347346
RunningPos.advancedBy(ThisToken->getLeadingTriviaLength());

lib/Syntax/RawSyntax.cpp

Lines changed: 26 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -111,8 +111,8 @@ RawSyntax::RawSyntax(SyntaxKind Kind, ArrayRef<RC<RawSyntax>> Layout,
111111
size_t TextLength, SourcePresence Presence,
112112
const RC<SyntaxArena> &Arena,
113113
llvm::Optional<unsigned> NodeId)
114-
: Arena(Arena), Bits({{unsigned(TextLength), unsigned(Presence), false}}),
115-
RefCount(0) {
114+
: RefCount(0), Arena(Arena),
115+
Bits({{unsigned(TextLength), unsigned(Presence), false}}) {
116116
assert(Arena && "RawSyntax nodes must always be allocated in an arena");
117117
assert(Kind != SyntaxKind::Token &&
118118
"'token' syntax node must be constructed with dedicated constructor");
@@ -139,36 +139,41 @@ RawSyntax::RawSyntax(SyntaxKind Kind, ArrayRef<RC<RawSyntax>> Layout,
139139
getTrailingObjects<RC<RawSyntax>>());
140140
}
141141

142-
RawSyntax::RawSyntax(tok TokKind, OwnedString Text, size_t TextLength,
142+
RawSyntax::RawSyntax(tok TokKind, StringRef Text, size_t TextLength,
143143
StringRef LeadingTrivia, StringRef TrailingTrivia,
144144
SourcePresence Presence, const RC<SyntaxArena> &Arena,
145145
llvm::Optional<unsigned> NodeId)
146-
: Arena(Arena), Bits({{unsigned(TextLength), unsigned(Presence), true}}),
147-
RefCount(0) {
146+
: RefCount(0), Arena(Arena),
147+
Bits({{unsigned(TextLength), unsigned(Presence), true}}) {
148148
assert(Arena && "RawSyntax nodes must always be allocated in an arena");
149149
copyToArenaIfNecessary(LeadingTrivia, Arena);
150+
copyToArenaIfNecessary(Text, Arena);
150151
copyToArenaIfNecessary(TrailingTrivia, Arena);
151152

153+
if (Presence == SourcePresence::Missing) {
154+
assert(TextLength == 0);
155+
} else {
156+
assert(TextLength ==
157+
LeadingTrivia.size() + Text.size() + TrailingTrivia.size());
158+
}
159+
152160
if (NodeId.hasValue()) {
153161
this->NodeId = NodeId.getValue();
154162
NextFreeNodeId = std::max(this->NodeId + 1, NextFreeNodeId);
155163
} else {
156164
this->NodeId = NextFreeNodeId++;
157165
}
166+
Bits.Token.LeadingTrivia = LeadingTrivia.data();
167+
Bits.Token.TokenText = Text.data();
168+
Bits.Token.TrailingTrivia = TrailingTrivia.data();
169+
Bits.Token.LeadingTriviaLength = LeadingTrivia.size();
170+
Bits.Token.TokenLength = Text.size();
171+
Bits.Token.TrailingTriviaLength = TrailingTrivia.size();
158172
Bits.Token.TokenKind = unsigned(TokKind);
159-
// FIXME: Copy the backing storage of the string into the arena
160-
Bits.Token.LeadingTrivia = LeadingTrivia;
161-
Bits.Token.TrailingTrivia = TrailingTrivia;
162-
163-
// Initialize token text.
164-
::new (static_cast<void *>(getTrailingObjects<OwnedString>()))
165-
OwnedString(Text);
166173
}
167174

168175
RawSyntax::~RawSyntax() {
169-
if (isToken()) {
170-
getTrailingObjects<OwnedString>()->~OwnedString();
171-
} else {
176+
if (!isToken()) {
172177
for (auto &child : getLayout())
173178
child.~RC<RawSyntax>();
174179
}
@@ -179,19 +184,19 @@ RC<RawSyntax> RawSyntax::make(SyntaxKind Kind, ArrayRef<RC<RawSyntax>> Layout,
179184
const RC<SyntaxArena> &Arena,
180185
llvm::Optional<unsigned> NodeId) {
181186
assert(Arena && "RawSyntax nodes must always be allocated in an arena");
182-
auto size = totalSizeToAlloc<RC<RawSyntax>, OwnedString>(Layout.size(), 0);
187+
auto size = totalSizeToAlloc<RC<RawSyntax>>(Layout.size());
183188
void *data = Arena->Allocate(size, alignof(RawSyntax));
184189
return RC<RawSyntax>(
185190
new (data) RawSyntax(Kind, Layout, TextLength, Presence, Arena, NodeId));
186191
}
187192

188-
RC<RawSyntax> RawSyntax::make(tok TokKind, OwnedString Text, size_t TextLength,
193+
RC<RawSyntax> RawSyntax::make(tok TokKind, StringRef Text, size_t TextLength,
189194
StringRef LeadingTrivia, StringRef TrailingTrivia,
190195
SourcePresence Presence,
191196
const RC<SyntaxArena> &Arena,
192197
llvm::Optional<unsigned> NodeId) {
193198
assert(Arena && "RawSyntax nodes must always be allocated in an arena");
194-
auto size = totalSizeToAlloc<RC<RawSyntax>, OwnedString>(0, 1);
199+
auto size = totalSizeToAlloc<RC<RawSyntax>>(0);
195200
void *data = Arena->Allocate(size, alignof(RawSyntax));
196201
return RC<RawSyntax>(new (data)
197202
RawSyntax(TokKind, Text, TextLength, LeadingTrivia,
@@ -306,9 +311,8 @@ void RawSyntax::dump(llvm::raw_ostream &OS, unsigned Indent) const {
306311
OS << ')';
307312
}
308313

309-
void RawSyntax::Profile(llvm::FoldingSetNodeID &ID, tok TokKind,
310-
OwnedString Text, StringRef LeadingTrivia,
311-
StringRef TrailingTrivia) {
314+
void RawSyntax::Profile(llvm::FoldingSetNodeID &ID, tok TokKind, StringRef Text,
315+
StringRef LeadingTrivia, StringRef TrailingTrivia) {
312316
ID.AddInteger(unsigned(TokKind));
313317
ID.AddInteger(LeadingTrivia.size());
314318
ID.AddInteger(TrailingTrivia.size());
@@ -320,7 +324,7 @@ void RawSyntax::Profile(llvm::FoldingSetNodeID &ID, tok TokKind,
320324
#include "swift/Syntax/TokenKinds.def"
321325
break;
322326
default:
323-
ID.AddString(Text.str());
327+
ID.AddString(Text);
324328
break;
325329
}
326330
ID.AddString(LeadingTrivia);

0 commit comments

Comments
 (0)