Skip to content

Commit e43bad2

Browse files
committed
[libSyntax] Store the token's text in the SyntaxArena
Do the same thing that we are already doing for trivia: Since RawSyntax nodes always live inside a SyntaxArena, we don't need to tail-allocate an OwnedString to store the token's text. Instead we can just copy it to the SyntaxArena. If we copy the entire source buffer to the syntax arena at the start of parsing, this means that no more copies are required later on. Plus we also avoid ref-counting the OwnedString which should also increase performance.
1 parent a2996b7 commit e43bad2

File tree

11 files changed

+53
-80
lines changed

11 files changed

+53
-80
lines changed

include/swift/Syntax/RawSyntax.h

Lines changed: 15 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ typedef unsigned SyntaxNodeId;
150150
///
151151
/// This is implementation detail - do not expose it in public API.
152152
class RawSyntax final
153-
: private llvm::TrailingObjects<RawSyntax, RC<RawSyntax>, OwnedString> {
153+
: private llvm::TrailingObjects<RawSyntax, RC<RawSyntax>> {
154154
friend TrailingObjects;
155155

156156
/// The ID that shall be used for the next node that is created and does not
@@ -198,16 +198,14 @@ class RawSyntax final
198198
/// The kind of token this "token" node represents.
199199
unsigned TokenKind : 16;
200200
StringRef LeadingTrivia;
201+
StringRef TokenText;
201202
StringRef TrailingTrivia;
202203
} Token;
203204
} Bits;
204205

205206
size_t numTrailingObjects(OverloadToken<RC<RawSyntax>>) const {
206207
return isToken() ? 0 : Bits.Layout.NumChildren;
207208
}
208-
size_t numTrailingObjects(OverloadToken<OwnedString>) const {
209-
return isToken() ? 1 : 0;
210-
}
211209

212210
/// Constructor for creating layout nodes.
213211
/// If the node has been allocated inside the bump allocator of a
@@ -223,7 +221,7 @@ class RawSyntax final
223221
/// underlying storage.
224222
/// If \p NodeId is \c None, the next free NodeId is used, if it is passed,
225223
/// the caller needs to assure that the NodeId has not been used yet.
226-
RawSyntax(tok TokKind, OwnedString Text, size_t TextLength,
224+
RawSyntax(tok TokKind, StringRef Text, size_t TextLength,
227225
StringRef LeadingTrivia, StringRef TrailingTrivia,
228226
SourcePresence Presence, const RC<SyntaxArena> &Arena,
229227
llvm::Optional<SyntaxNodeId> NodeId);
@@ -286,15 +284,15 @@ class RawSyntax final
286284
}
287285

288286
/// Make a raw "token" syntax node.
289-
static RC<RawSyntax> make(tok TokKind, OwnedString Text, size_t TextLength,
287+
static RC<RawSyntax> make(tok TokKind, StringRef Text, size_t TextLength,
290288
StringRef LeadingTrivia, StringRef TrailingTrivia,
291289
SourcePresence Presence,
292290
const RC<SyntaxArena> &Arena = SyntaxArena::make(),
293291
llvm::Optional<SyntaxNodeId> NodeId = llvm::None);
294292

295293
/// Make a raw "token" syntax node that was allocated in \p Arena.
296294
static RC<RawSyntax>
297-
makeAndCalcLength(tok TokKind, OwnedString Text, StringRef LeadingTrivia,
295+
makeAndCalcLength(tok TokKind, StringRef Text, StringRef LeadingTrivia,
298296
StringRef TrailingTrivia, SourcePresence Presence,
299297
const RC<SyntaxArena> &Arena = SyntaxArena::make(),
300298
llvm::Optional<SyntaxNodeId> NodeId = llvm::None) {
@@ -316,7 +314,7 @@ class RawSyntax final
316314

317315
/// Make a missing raw "token" syntax node.
318316
static RC<RawSyntax>
319-
missing(tok TokKind, OwnedString Text,
317+
missing(tok TokKind, StringRef Text,
320318
const RC<SyntaxArena> &Arena = SyntaxArena::make()) {
321319
return make(TokKind, Text, /*TextLength=*/0, {}, {},
322320
SourcePresence::Missing, Arena);
@@ -390,16 +388,12 @@ class RawSyntax final
390388
return static_cast<tok>(Bits.Token.TokenKind);
391389
}
392390

393-
/// Return the text of the token as an \c OwnedString. Keeping a reference to
394-
/// this string will keep it alive even if the syntax node gets freed.
395-
OwnedString getOwnedTokenText() const {
396-
assert(isToken());
397-
return *getTrailingObjects<OwnedString>();
398-
}
399-
400391
/// Return the text of the token as a reference. The referenced buffer may
401392
/// disappear when the syntax node gets freed.
402-
StringRef getTokenText() const { return getOwnedTokenText().str(); }
393+
StringRef getTokenText() const {
394+
assert(isToken());
395+
return Bits.Token.TokenText;
396+
}
403397

404398
/// Return the unparsed leading trivia of the token.
405399
StringRef getLeadingTrivia() const {
@@ -434,17 +428,15 @@ class RawSyntax final
434428
/// Return a new token like this one, but with the given leading
435429
/// trivia instead.
436430
RC<RawSyntax> withLeadingTrivia(StringRef NewLeadingTrivia) const {
437-
return makeAndCalcLength(getTokenKind(), getOwnedTokenText(),
438-
NewLeadingTrivia, getTrailingTrivia(),
439-
getPresence());
431+
return makeAndCalcLength(getTokenKind(), getTokenText(), NewLeadingTrivia,
432+
getTrailingTrivia(), getPresence());
440433
}
441434

442435
/// Return a new token like this one, but with the given trailing
443436
/// trivia instead.
444437
RC<RawSyntax> withTrailingTrivia(StringRef NewTrailingTrivia) const {
445-
return makeAndCalcLength(getTokenKind(), getOwnedTokenText(),
446-
getLeadingTrivia(), NewTrailingTrivia,
447-
getPresence());
438+
return makeAndCalcLength(getTokenKind(), getTokenText(), getLeadingTrivia(),
439+
NewTrailingTrivia, getPresence());
448440
}
449441

450442
/// @}
@@ -503,7 +495,7 @@ class RawSyntax final
503495
/// Dump this piece of syntax recursively.
504496
void dump(llvm::raw_ostream &OS, unsigned Indent = 0) const;
505497

506-
static void Profile(llvm::FoldingSetNodeID &ID, tok TokKind, OwnedString Text,
498+
static void Profile(llvm::FoldingSetNodeID &ID, tok TokKind, StringRef Text,
507499
StringRef LeadingTrivia, StringRef TrailingTrivia);
508500
};
509501

include/swift/Syntax/Serialization/SyntaxDeserialization.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -163,8 +163,8 @@ template <> struct MappingTraits<swift::RC<swift::RawSyntax>> {
163163
in.mapRequired("id", nodeIdString);
164164
unsigned nodeId = std::atoi(nodeIdString.data());
165165
value = swift::RawSyntax::makeAndCalcLength(
166-
tokenKind, swift::OwnedString::makeRefCounted(text), leadingTrivia,
167-
trailingTrivia, presence, swift::SyntaxArena::make(), nodeId);
166+
tokenKind, text, leadingTrivia, trailingTrivia, presence,
167+
swift::SyntaxArena::make(), nodeId);
168168
} else {
169169
swift::SyntaxKind kind;
170170
in.mapRequired("kind", kind);

include/swift/Syntax/SyntaxFactory.h.gyb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ class SyntaxArena;
4646
struct SyntaxFactory {
4747
/// Make any kind of token.
4848
static TokenSyntax makeToken(tok Kind,
49-
OwnedString Text, StringRef LeadingTrivia, StringRef TrailingTrivia,
49+
StringRef Text, StringRef LeadingTrivia, StringRef TrailingTrivia,
5050
SourcePresence Presence,
5151
const RC<SyntaxArena> &Arena = SyntaxArena::make()
5252
);
@@ -108,7 +108,7 @@ struct SyntaxFactory {
108108
StringRef TrailingTrivia,
109109
const RC<SyntaxArena> &Arena = SyntaxArena::make());
110110
% else:
111-
static TokenSyntax make${token.name}(OwnedString Text,
111+
static TokenSyntax make${token.name}(StringRef Text,
112112
StringRef LeadingTrivia, StringRef TrailingTrivia,
113113
const RC<SyntaxArena> &Arena = SyntaxArena::make());
114114
% end
@@ -139,7 +139,7 @@ struct SyntaxFactory {
139139

140140
/// Creates a TypeIdentifierSyntax with the provided name and leading/trailing
141141
/// trivia.
142-
static TypeSyntax makeTypeIdentifier(OwnedString TypeName,
142+
static TypeSyntax makeTypeIdentifier(StringRef TypeName,
143143
StringRef LeadingTrivia = {}, StringRef TrailingTrivia = {},
144144
const RC<SyntaxArena> &Arena = SyntaxArena::make()
145145
);

include/swift/Syntax/TokenSyntax.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ class TokenSyntax final : public Syntax {
3535
public:
3636
TokenSyntax(const SyntaxData Data) : Syntax(Data) {}
3737

38-
static TokenSyntax missingToken(const tok Kind, OwnedString Text) {
38+
static TokenSyntax missingToken(const tok Kind, StringRef Text) {
3939
return makeRoot<TokenSyntax>(RawSyntax::missing(Kind, Text));
4040
}
4141

lib/Parse/Parser.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -336,12 +336,11 @@ swift::tokenizeWithTrivia(const LangOptions &LangOpts, const SourceManager &SM,
336336
/*SplitTokens=*/ArrayRef<Token>(),
337337
[&](const Token &Tok, StringRef LeadingTrivia, StringRef TrailingTrivia) {
338338
CharSourceRange TokRange = Tok.getRange();
339-
auto Text = OwnedString::makeRefCounted(Tok.getRawText());
340339
size_t TextLength = LeadingTrivia.size() + TokRange.getByteLength() +
341340
TrailingTrivia.size();
342-
auto ThisToken =
343-
RawSyntax::make(Tok.getKind(), Text, TextLength, LeadingTrivia,
344-
TrailingTrivia, SourcePresence::Present);
341+
auto ThisToken = RawSyntax::make(
342+
Tok.getKind(), Tok.getRawText(), TextLength, LeadingTrivia,
343+
TrailingTrivia, SourcePresence::Present);
345344

346345
auto ThisTokenPos =
347346
RunningPos.advancedBy(ThisToken->getLeadingTriviaLength());

lib/Syntax/RawSyntax.cpp

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -139,14 +139,15 @@ RawSyntax::RawSyntax(SyntaxKind Kind, ArrayRef<RC<RawSyntax>> Layout,
139139
getTrailingObjects<RC<RawSyntax>>());
140140
}
141141

142-
RawSyntax::RawSyntax(tok TokKind, OwnedString Text, size_t TextLength,
142+
RawSyntax::RawSyntax(tok TokKind, StringRef Text, size_t TextLength,
143143
StringRef LeadingTrivia, StringRef TrailingTrivia,
144144
SourcePresence Presence, const RC<SyntaxArena> &Arena,
145145
llvm::Optional<unsigned> NodeId)
146146
: Arena(Arena), Bits({{unsigned(TextLength), unsigned(Presence), true}}),
147147
RefCount(0) {
148148
assert(Arena && "RawSyntax nodes must always be allocated in an arena");
149149
copyToArenaIfNecessary(LeadingTrivia, Arena);
150+
copyToArenaIfNecessary(Text, Arena);
150151
copyToArenaIfNecessary(TrailingTrivia, Arena);
151152

152153
if (NodeId.hasValue()) {
@@ -156,19 +157,13 @@ RawSyntax::RawSyntax(tok TokKind, OwnedString Text, size_t TextLength,
156157
this->NodeId = NextFreeNodeId++;
157158
}
158159
Bits.Token.TokenKind = unsigned(TokKind);
159-
// FIXME: Copy the backing storage of the string into the arena
160160
Bits.Token.LeadingTrivia = LeadingTrivia;
161+
Bits.Token.TokenText = Text;
161162
Bits.Token.TrailingTrivia = TrailingTrivia;
162-
163-
// Initialize token text.
164-
::new (static_cast<void *>(getTrailingObjects<OwnedString>()))
165-
OwnedString(Text);
166163
}
167164

168165
RawSyntax::~RawSyntax() {
169-
if (isToken()) {
170-
getTrailingObjects<OwnedString>()->~OwnedString();
171-
} else {
166+
if (!isToken()) {
172167
for (auto &child : getLayout())
173168
child.~RC<RawSyntax>();
174169
}
@@ -179,19 +174,19 @@ RC<RawSyntax> RawSyntax::make(SyntaxKind Kind, ArrayRef<RC<RawSyntax>> Layout,
179174
const RC<SyntaxArena> &Arena,
180175
llvm::Optional<unsigned> NodeId) {
181176
assert(Arena && "RawSyntax nodes must always be allocated in an arena");
182-
auto size = totalSizeToAlloc<RC<RawSyntax>, OwnedString>(Layout.size(), 0);
177+
auto size = totalSizeToAlloc<RC<RawSyntax>>(Layout.size());
183178
void *data = Arena->Allocate(size, alignof(RawSyntax));
184179
return RC<RawSyntax>(
185180
new (data) RawSyntax(Kind, Layout, TextLength, Presence, Arena, NodeId));
186181
}
187182

188-
RC<RawSyntax> RawSyntax::make(tok TokKind, OwnedString Text, size_t TextLength,
183+
RC<RawSyntax> RawSyntax::make(tok TokKind, StringRef Text, size_t TextLength,
189184
StringRef LeadingTrivia, StringRef TrailingTrivia,
190185
SourcePresence Presence,
191186
const RC<SyntaxArena> &Arena,
192187
llvm::Optional<unsigned> NodeId) {
193188
assert(Arena && "RawSyntax nodes must always be allocated in an arena");
194-
auto size = totalSizeToAlloc<RC<RawSyntax>, OwnedString>(0, 1);
189+
auto size = totalSizeToAlloc<RC<RawSyntax>>(0);
195190
void *data = Arena->Allocate(size, alignof(RawSyntax));
196191
return RC<RawSyntax>(new (data)
197192
RawSyntax(TokKind, Text, TextLength, LeadingTrivia,
@@ -306,9 +301,8 @@ void RawSyntax::dump(llvm::raw_ostream &OS, unsigned Indent) const {
306301
OS << ')';
307302
}
308303

309-
void RawSyntax::Profile(llvm::FoldingSetNodeID &ID, tok TokKind,
310-
OwnedString Text, StringRef LeadingTrivia,
311-
StringRef TrailingTrivia) {
304+
void RawSyntax::Profile(llvm::FoldingSetNodeID &ID, tok TokKind, StringRef Text,
305+
StringRef LeadingTrivia, StringRef TrailingTrivia) {
312306
ID.AddInteger(unsigned(TokKind));
313307
ID.AddInteger(LeadingTrivia.size());
314308
ID.AddInteger(TrailingTrivia.size());
@@ -320,7 +314,7 @@ void RawSyntax::Profile(llvm::FoldingSetNodeID &ID, tok TokKind,
320314
#include "swift/Syntax/TokenKinds.def"
321315
break;
322316
default:
323-
ID.AddString(Text.str());
317+
ID.AddString(Text);
324318
break;
325319
}
326320
ID.AddString(LeadingTrivia);

lib/Syntax/SyntaxFactory.cpp.gyb

Lines changed: 14 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
using namespace swift;
3939
using namespace swift::syntax;
4040

41-
TokenSyntax SyntaxFactory::makeToken(tok Kind, OwnedString Text,
41+
TokenSyntax SyntaxFactory::makeToken(tok Kind, StringRef Text,
4242
StringRef LeadingTrivia,
4343
StringRef TrailingTrivia,
4444
SourcePresence Presence,
@@ -239,29 +239,24 @@ SyntaxFactory::makeBlank${node.syntax_kind}(const RC<SyntaxArena> &Arena) {
239239
SyntaxFactory::make${token.name}Keyword(StringRef LeadingTrivia,
240240
StringRef TrailingTrivia,
241241
const RC<SyntaxArena> &Arena) {
242-
return makeToken(tok::${token.kind},
243-
OwnedString::makeUnowned("${token.text}"),
244-
LeadingTrivia, TrailingTrivia,
245-
SourcePresence::Present, Arena);
242+
return makeToken(tok::${token.kind}, "${token.text}", LeadingTrivia,
243+
TrailingTrivia, SourcePresence::Present, Arena);
246244
}
247245
% elif token.text:
248246
TokenSyntax
249247
SyntaxFactory::make${token.name}Token(StringRef LeadingTrivia,
250248
StringRef TrailingTrivia,
251249
const RC<SyntaxArena> &Arena) {
252-
return makeToken(tok::${token.kind},
253-
OwnedString::makeUnowned("${token.text}"),
254-
LeadingTrivia, TrailingTrivia,
255-
SourcePresence::Present, Arena);
250+
return makeToken(tok::${token.kind}, "${token.text}", LeadingTrivia,
251+
TrailingTrivia, SourcePresence::Present, Arena);
256252
}
257253
% else:
258254
TokenSyntax
259-
SyntaxFactory::make${token.name}(OwnedString Text,
255+
SyntaxFactory::make${token.name}(StringRef Text,
260256
StringRef LeadingTrivia,
261257
StringRef TrailingTrivia,
262258
const RC<SyntaxArena> &Arena) {
263-
return makeToken(tok::${token.kind}, Text,
264-
LeadingTrivia, TrailingTrivia,
259+
return makeToken(tok::${token.kind}, Text, LeadingTrivia, TrailingTrivia,
265260
SourcePresence::Present, Arena);
266261
}
267262
% end
@@ -299,7 +294,7 @@ SyntaxFactory::makeGenericParameter(TokenSyntax Name,
299294
return makeGenericParameter(None, Name, None, None, TrailingComma, Arena);
300295
}
301296

302-
TypeSyntax SyntaxFactory::makeTypeIdentifier(OwnedString TypeName,
297+
TypeSyntax SyntaxFactory::makeTypeIdentifier(StringRef TypeName,
303298
StringRef LeadingTrivia,
304299
StringRef TrailingTrivia,
305300
const RC<SyntaxArena> &Arena) {
@@ -311,35 +306,30 @@ TypeSyntax SyntaxFactory::makeTypeIdentifier(OwnedString TypeName,
311306
TypeSyntax SyntaxFactory::makeAnyTypeIdentifier(StringRef LeadingTrivia,
312307
StringRef TrailingTrivia,
313308
const RC<SyntaxArena> &Arena) {
314-
return makeTypeIdentifier(OwnedString::makeUnowned("Any"), LeadingTrivia,
315-
TrailingTrivia, Arena);
309+
return makeTypeIdentifier("Any", LeadingTrivia, TrailingTrivia, Arena);
316310
}
317311

318312
TypeSyntax SyntaxFactory::makeSelfTypeIdentifier(StringRef LeadingTrivia,
319313
StringRef TrailingTrivia,
320314
const RC<SyntaxArena> &Arena) {
321-
return makeTypeIdentifier(OwnedString::makeUnowned("Self"),
322-
LeadingTrivia, TrailingTrivia, Arena);
315+
return makeTypeIdentifier("Self", LeadingTrivia, TrailingTrivia, Arena);
323316
}
324317

325318
TokenSyntax SyntaxFactory::makeTypeToken(StringRef LeadingTrivia,
326319
StringRef TrailingTrivia,
327320
const RC<SyntaxArena> &Arena) {
328-
return makeIdentifier(OwnedString::makeUnowned("Type"),
329-
LeadingTrivia, TrailingTrivia, Arena);
321+
return makeIdentifier("Type", LeadingTrivia, TrailingTrivia, Arena);
330322
}
331323

332324
TokenSyntax SyntaxFactory::makeProtocolToken(StringRef LeadingTrivia,
333325
StringRef TrailingTrivia,
334326
const RC<SyntaxArena> &Arena) {
335-
return makeIdentifier(OwnedString::makeUnowned("Protocol"),
336-
LeadingTrivia, TrailingTrivia, Arena);
327+
return makeIdentifier("Protocol", LeadingTrivia, TrailingTrivia, Arena);
337328
}
338329

339330
TokenSyntax SyntaxFactory::makeEqualityOperator(StringRef LeadingTrivia,
340331
StringRef TrailingTrivia,
341332
const RC<SyntaxArena> &Arena) {
342-
return makeToken(tok::oper_binary_spaced, OwnedString::makeUnowned("=="),
343-
LeadingTrivia, TrailingTrivia, SourcePresence::Present,
344-
Arena);
333+
return makeToken(tok::oper_binary_spaced, "==", LeadingTrivia, TrailingTrivia,
334+
SourcePresence::Present, Arena);
345335
}

lib/SyntaxParse/RawSyntaxTokenCache.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ static bool shouldCacheNode(tok TokKind, size_t TextSize,
4040
}
4141

4242
RC<RawSyntax> RawSyntaxTokenCache::getToken(RC<SyntaxArena> &Arena, tok TokKind,
43-
size_t TextLength, OwnedString Text,
43+
size_t TextLength, StringRef Text,
4444
StringRef LeadingTrivia,
4545
StringRef TrailingTrivia) {
4646
// Determine whether this token is worth to cache.

lib/SyntaxParse/RawSyntaxTokenCache.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ class RawSyntaxTokenCache {
6262

6363
public:
6464
RC<syntax::RawSyntax> getToken(RC<syntax::SyntaxArena> &Arena, tok TokKind,
65-
size_t TextLength, OwnedString Text,
65+
size_t TextLength, StringRef Text,
6666
StringRef LeadingTrivia,
6767
StringRef TrailingTrivia);
6868

lib/SyntaxParse/SyntaxTreeCreator.cpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -135,9 +135,8 @@ OpaqueSyntaxNode SyntaxTreeCreator::recordToken(tok tokenKind,
135135
StringRef trailingTriviaText = ArenaSourceBuffer.substr(
136136
trailingTriviaStartOffset, trailingTrivia.size());
137137

138-
auto ownedText = OwnedString::makeRefCounted(tokenText);
139138
auto raw =
140-
TokenCache->getToken(Arena, tokenKind, range.getByteLength(), ownedText,
139+
TokenCache->getToken(Arena, tokenKind, range.getByteLength(), tokenText,
141140
leadingTriviaText, trailingTriviaText);
142141
OpaqueSyntaxNode opaqueN = raw.get();
143142
raw.resetWithoutRelease();
@@ -146,8 +145,7 @@ OpaqueSyntaxNode SyntaxTreeCreator::recordToken(tok tokenKind,
146145

147146
OpaqueSyntaxNode
148147
SyntaxTreeCreator::recordMissingToken(tok kind, SourceLoc loc) {
149-
auto ownedText = OwnedString::makeRefCounted(getTokenText(kind));
150-
auto raw = RawSyntax::missing(kind, ownedText, Arena);
148+
auto raw = RawSyntax::missing(kind, getTokenText(kind), Arena);
151149
OpaqueSyntaxNode opaqueN = raw.get();
152150
raw.resetWithoutRelease();
153151
return opaqueN;

0 commit comments

Comments
 (0)