Skip to content

Commit c1d65de

Browse files
committed
[libSyntax] Optimise layout of RawSyntax to be more space efficient
This decreases the size of RawSyntax nodes from 88 to 64 bytes by - Avoiding some padding by moving RefCount further up - Limiting the length of tokens and their trivia to 32 bits. We would hit this limit with files >4GB but we also hit this limit at other places like the TextLength property in the Common bits.
1 parent e43bad2 commit c1d65de

File tree

2 files changed

+31
-17
lines changed

2 files changed

+31
-17
lines changed

include/swift/Syntax/RawSyntax.h

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -160,15 +160,17 @@ class RawSyntax final
160160
/// An ID of this node that is stable across incremental parses
161161
SyntaxNodeId NodeId;
162162

163+
mutable std::atomic<int> RefCount;
164+
163165
/// If this node was allocated using a \c SyntaxArena's bump allocator, a
164166
/// reference to the arena to keep the underlying memory buffer of this node
165167
/// alive. If this is a \c nullptr, the node owns its own memory buffer.
166168
RC<SyntaxArena> Arena;
167169

168170
union {
169171
struct {
170-
// FIXME: Reduce TextLength to 30 bits so that common fits in 4 bytes?
171-
/// Number of bytes this node takes up spelled out in the source code
172+
/// Number of bytes this node takes up spelled out in the source code.
173+
/// Always 0 if the node is missing.
172174
unsigned TextLength : 32;
173175
/// Whether this piece of syntax was actually present in the source.
174176
unsigned Presence : 1;
@@ -196,10 +198,13 @@ class RawSyntax final
196198
"Only 64 bits reserved for standard syntax bits");
197199
uint64_t : bitmax(NumRawSyntaxBits, 64); // align to 16 bits
198200
/// The kind of token this "token" node represents.
201+
const char *LeadingTrivia;
202+
const char *TokenText;
203+
const char *TrailingTrivia;
204+
unsigned LeadingTriviaLength : 32;
205+
unsigned TokenLength : 32;
206+
unsigned TrailingTriviaLength : 32;
199207
unsigned TokenKind : 16;
200-
StringRef LeadingTrivia;
201-
StringRef TokenText;
202-
StringRef TrailingTrivia;
203208
} Token;
204209
} Bits;
205210

@@ -238,8 +243,6 @@ class RawSyntax final
238243
return TextLength;
239244
}
240245

241-
mutable std::atomic<int> RefCount;
242-
243246
public:
244247
~RawSyntax();
245248

@@ -392,19 +395,20 @@ class RawSyntax final
392395
/// disappear when the syntax node gets freed.
393396
StringRef getTokenText() const {
394397
assert(isToken());
395-
return Bits.Token.TokenText;
398+
return StringRef(Bits.Token.TokenText, Bits.Token.TokenLength);
396399
}
397400

398401
/// Return the unparsed leading trivia of the token.
399402
StringRef getLeadingTrivia() const {
400403
assert(isToken());
401-
return Bits.Token.LeadingTrivia;
404+
return StringRef(Bits.Token.LeadingTrivia, Bits.Token.LeadingTriviaLength);
402405
}
403406

404407
/// Return the unparsed trailing trivia of the token.
405408
StringRef getTrailingTrivia() const {
406409
assert(isToken());
407-
return Bits.Token.TrailingTrivia;
410+
return StringRef(Bits.Token.TrailingTrivia,
411+
Bits.Token.TrailingTriviaLength);
408412
}
409413

410414
/// Return pieces that make up the leading trivia of the token.

lib/Syntax/RawSyntax.cpp

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -111,8 +111,8 @@ RawSyntax::RawSyntax(SyntaxKind Kind, ArrayRef<RC<RawSyntax>> Layout,
111111
size_t TextLength, SourcePresence Presence,
112112
const RC<SyntaxArena> &Arena,
113113
llvm::Optional<unsigned> NodeId)
114-
: Arena(Arena), Bits({{unsigned(TextLength), unsigned(Presence), false}}),
115-
RefCount(0) {
114+
: RefCount(0), Arena(Arena),
115+
Bits({{unsigned(TextLength), unsigned(Presence), false}}) {
116116
assert(Arena && "RawSyntax nodes must always be allocated in an arena");
117117
assert(Kind != SyntaxKind::Token &&
118118
"'token' syntax node must be constructed with dedicated constructor");
@@ -143,23 +143,33 @@ RawSyntax::RawSyntax(tok TokKind, StringRef Text, size_t TextLength,
143143
StringRef LeadingTrivia, StringRef TrailingTrivia,
144144
SourcePresence Presence, const RC<SyntaxArena> &Arena,
145145
llvm::Optional<unsigned> NodeId)
146-
: Arena(Arena), Bits({{unsigned(TextLength), unsigned(Presence), true}}),
147-
RefCount(0) {
146+
: RefCount(0), Arena(Arena),
147+
Bits({{unsigned(TextLength), unsigned(Presence), true}}) {
148148
assert(Arena && "RawSyntax nodes must always be allocated in an arena");
149149
copyToArenaIfNecessary(LeadingTrivia, Arena);
150150
copyToArenaIfNecessary(Text, Arena);
151151
copyToArenaIfNecessary(TrailingTrivia, Arena);
152152

153+
if (Presence == SourcePresence::Missing) {
154+
assert(TextLength == 0);
155+
} else {
156+
assert(TextLength ==
157+
LeadingTrivia.size() + Text.size() + TrailingTrivia.size());
158+
}
159+
153160
if (NodeId.hasValue()) {
154161
this->NodeId = NodeId.getValue();
155162
NextFreeNodeId = std::max(this->NodeId + 1, NextFreeNodeId);
156163
} else {
157164
this->NodeId = NextFreeNodeId++;
158165
}
166+
Bits.Token.LeadingTrivia = LeadingTrivia.data();
167+
Bits.Token.TokenText = Text.data();
168+
Bits.Token.TrailingTrivia = TrailingTrivia.data();
169+
Bits.Token.LeadingTriviaLength = LeadingTrivia.size();
170+
Bits.Token.TokenLength = Text.size();
171+
Bits.Token.TrailingTriviaLength = TrailingTrivia.size();
159172
Bits.Token.TokenKind = unsigned(TokKind);
160-
Bits.Token.LeadingTrivia = LeadingTrivia;
161-
Bits.Token.TokenText = Text;
162-
Bits.Token.TrailingTrivia = TrailingTrivia;
163173
}
164174

165175
RawSyntax::~RawSyntax() {

0 commit comments

Comments
 (0)