Skip to content

Commit 6f55c27

Browse files
fixup! Address review feedback for more comments
1 parent 63ccd9c commit 6f55c27

File tree

2 files changed

+49
-12
lines changed

2 files changed

+49
-12
lines changed

llvm/lib/Support/TrieHashIndexGenerator.h

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,51 +14,82 @@
1414

1515
namespace llvm {
1616

17+
/// The utility class that helps computing the index of the object inside trie
18+
/// from its hash. The generator can be configured with the number of bits
19+
/// used for each level of trie structure with \c NumRootsBits and \c
20+
/// NumSubtrieBits.
21+
/// For example, try computing indexes for a 16-bit hash 0x1234 with 8-bit root
22+
/// and 4-bit sub-trie:
23+
///
24+
/// IndexGenerator IndexGen{8, 4, Hash};
25+
/// size_t index1 = IndexGen.next(); // index 18 in root node.
26+
/// size_t index2 = IndexGen.next(); // index 3 in sub-trie level 1.
27+
/// size_t index3 = IndexGen.next(); // index 4 in sub-tire level 2.
28+
///
29+
/// This is used by different trie implementation to figure out where to
30+
/// insert/find the object in the data structure.
1731
struct IndexGenerator {
1832
size_t NumRootBits;
1933
size_t NumSubtrieBits;
2034
ArrayRef<uint8_t> Bytes;
2135
std::optional<size_t> StartBit = std::nullopt;
2236

37+
// Get the number of bits used to generate current index.
2338
size_t getNumBits() const {
2439
assert(StartBit);
2540
size_t TotalNumBits = Bytes.size() * 8;
2641
assert(*StartBit <= TotalNumBits);
2742
return std::min(*StartBit ? NumSubtrieBits : NumRootBits,
2843
TotalNumBits - *StartBit);
2944
}
45+
46+
// Get the index of the object in the next level of trie.
3047
size_t next() {
3148
size_t Index;
3249
if (!StartBit) {
50+
// Compute index for root when StartBit is not set.
3351
StartBit = 0;
3452
Index = getIndex(Bytes, *StartBit, NumRootBits);
3553
} else {
54+
// Compute index for sub-trie.
3655
*StartBit += *StartBit ? NumSubtrieBits : NumRootBits;
3756
assert((*StartBit - NumRootBits) % NumSubtrieBits == 0);
3857
Index = getIndex(Bytes, *StartBit, NumSubtrieBits);
3958
}
4059
return Index;
4160
}
4261

62+
// Provide a hint to speed up the index generation by providing the
63+
// information of the hash in current level. For example, if the object is
64+
// known to have \c Index on a level that already consumes first n \c Bits of
65+
// the hash, it can start index generation from this level by calling \c hint
66+
// function.
4367
size_t hint(unsigned Index, unsigned Bit) {
4468
assert(Bit < Bytes.size() * 8);
4569
assert(Bit == 0 || (Bit - NumRootBits) % NumSubtrieBits == 0);
4670
StartBit = Bit;
4771
return Index;
4872
}
4973

74+
// Utility funciton for looking up the index in the trie for an object that
75+
// has colliding hash bits in the front as the hash of the object that is
76+
// currently being computed.
5077
size_t getCollidingBits(ArrayRef<uint8_t> CollidingBits) const {
5178
assert(StartBit);
5279
return getIndex(CollidingBits, *StartBit, NumSubtrieBits);
5380
}
5481

82+
// Compute the index for the object from its hash, current start bits, and
83+
// the number of bits used for current level.
5584
static size_t getIndex(ArrayRef<uint8_t> Bytes, size_t StartBit,
5685
size_t NumBits) {
5786
assert(StartBit < Bytes.size() * 8);
58-
87+
// Drop all the bits before StartBit.
5988
Bytes = Bytes.drop_front(StartBit / 8u);
6089
StartBit %= 8u;
6190
size_t Index = 0;
91+
// Compute the index using the bits in range [StartBit, StartBit + NumBits),
92+
// note the range can spread across few `uint8_t` in the array.
6293
for (uint8_t Byte : Bytes) {
6394
size_t ByteStart = 0, ByteEnd = 8;
6495
if (StartBit) {

llvm/lib/Support/TrieRawHashMap.cpp

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -102,14 +102,15 @@ class TrieSubtrie final : public TrieNode {
102102
};
103103
} // end namespace
104104

105-
static size_t getTrieTailSize(size_t StartBit, size_t NumBits) {
106-
assert(NumBits < 20 && "Tries should have fewer than ~1M slots");
105+
// Compute the trailing object size in the trie node. This is the size of \c
106+
// Slots in TrieNodes that pointing to the children.
107+
static size_t getTrieTailSize(size_t NumBits) {
107108
return sizeof(TrieNode *) * (1u << NumBits);
108109
}
109110

110111
std::unique_ptr<TrieSubtrie> TrieSubtrie::create(size_t StartBit,
111112
size_t NumBits) {
112-
size_t Size = sizeof(TrieSubtrie) + getTrieTailSize(StartBit, NumBits);
113+
size_t Size = sizeof(TrieSubtrie) + getTrieTailSize(NumBits);
113114
void *Memory = ::malloc(Size);
114115
TrieSubtrie *S = ::new (Memory) TrieSubtrie(StartBit, NumBits);
115116
return std::unique_ptr<TrieSubtrie>(S);
@@ -128,15 +129,22 @@ TrieSubtrie::TrieSubtrie(size_t StartBit, size_t NumBits)
128129
"Expected no work in destructor for TrieNode");
129130
}
130131

132+
// Sink the nodes down sub-trie when the object being inserted collides with
133+
// the index of existing object in the trie. In this case, a new sub-trie needs
134+
// to be allocated to hold existing object.
131135
TrieSubtrie *TrieSubtrie::sink(
132136
size_t I, TrieContent &Content, size_t NumSubtrieBits, size_t NewI,
133137
function_ref<TrieSubtrie *(std::unique_ptr<TrieSubtrie>)> Saver) {
138+
// Create a new sub-trie that points to the existing object with the new
139+
// index for the next level.
134140
assert(NumSubtrieBits > 0);
135141
std::unique_ptr<TrieSubtrie> S = create(StartBit + NumBits, NumSubtrieBits);
136142

137143
assert(NewI < S->Slots.size());
138144
S->Slots[NewI].store(&Content);
139145

146+
// Using compare_exchange to atomically add back the new sub-trie to the trie
147+
// in the place of the exsiting object.
140148
TrieNode *ExistingNode = &Content;
141149
assert(I < Slots.size());
142150
if (Slots[I].compare_exchange_strong(ExistingNode, S.get()))
@@ -149,12 +157,15 @@ TrieSubtrie *TrieSubtrie::sink(
149157

150158
struct ThreadSafeTrieRawHashMapBase::ImplType {
151159
static std::unique_ptr<ImplType> create(size_t StartBit, size_t NumBits) {
152-
size_t Size = sizeof(ImplType) + getTrieTailSize(StartBit, NumBits);
160+
size_t Size = sizeof(ImplType) + getTrieTailSize(NumBits);
153161
void *Memory = ::malloc(Size);
154162
ImplType *Impl = ::new (Memory) ImplType(StartBit, NumBits);
155163
return std::unique_ptr<ImplType>(Impl);
156164
}
157165

166+
// Save the Subtrie into the ownship list of the trie structure in a
167+
// thread-safe way. The ownership transfer is done by compare_exchange the
168+
// pointer value inside the unique_ptr.
158169
TrieSubtrie *save(std::unique_ptr<TrieSubtrie> S) {
159170
assert(!S->Next && "Expected S to a freshly-constructed leaf");
160171

@@ -306,12 +317,7 @@ ThreadSafeTrieRawHashMapBase::ThreadSafeTrieRawHashMapBase(
306317
ContentOffset(ContentOffset),
307318
NumRootBits(NumRootBits ? *NumRootBits : DefaultNumRootBits),
308319
NumSubtrieBits(NumSubtrieBits ? *NumSubtrieBits : DefaultNumSubtrieBits),
309-
ImplPtr(nullptr) {
310-
assert((!NumRootBits || *NumRootBits < 20) &&
311-
"Root should have fewer than ~1M slots");
312-
assert((!NumSubtrieBits || *NumSubtrieBits < 10) &&
313-
"Subtries should have fewer than ~1K slots");
314-
}
320+
ImplPtr(nullptr) {}
315321

316322
ThreadSafeTrieRawHashMapBase::ThreadSafeTrieRawHashMapBase(
317323
ThreadSafeTrieRawHashMapBase &&RHS)
@@ -413,7 +419,7 @@ std::string ThreadSafeTrieRawHashMapBase::getTriePrefixAsString(
413419
TrieContent *Node = nullptr;
414420
while (Current) {
415421
TrieSubtrie *Next = nullptr;
416-
// find first used slot in the trie.
422+
// Find first used slot in the trie.
417423
for (unsigned I = 0, E = Current->Slots.size(); I < E; ++I) {
418424
auto *S = Current->get(I);
419425
if (!S)

0 commit comments

Comments
 (0)