Skip to content

Commit ece0399

Browse files
committed
[Runtime] Have ConcurrentReadableHashMap use 1-byte or 2-byte indices when possible.
1 parent 1345a2b commit ece0399

File tree

1 file changed

+97
-38
lines changed

1 file changed

+97
-38
lines changed

include/swift/Runtime/Concurrent.h

Lines changed: 97 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -588,10 +588,6 @@ template <class ElemTy> struct ConcurrentReadableHashMap {
588588
"Elements must not have destructors (they won't be called).");
589589

590590
private:
591-
/// The type of the elements of the indices array. TODO: use one or two byte
592-
/// indices for smaller tables to save more memory.
593-
using Index = unsigned;
594-
595591
/// The reciprocal of the load factor at which we expand the table. A value of
596592
/// 4 means that we resize at 1/4 = 75% load factor.
597593
static const size_t ResizeProportion = 4;
@@ -619,20 +615,77 @@ template <class ElemTy> struct ConcurrentReadableHashMap {
619615
/// is stored inline. We work around this contradiction by considering the
620616
/// first index to always be occupied with a value that never matches any key.
621617
struct IndexStorage {
622-
std::atomic<Index> Mask;
618+
// Index size is variable based on capacity, either 8, 16, or 32 bits.
619+
//
620+
// This is somewhat conservative. We could have, for example, a capacity of
621+
// 512 but a maximum index of only 200, which would still allow for 8-bit
622+
// indices. However, taking advantage of this would require reallocating
623+
// the index storage when the element count crossed a threshold, which is
624+
// more complex, and the advantages are minimal. This keeps it simple.
625+
//
626+
// The first byte of the storage is the log 2 of the capacity. The remaining
627+
// storage is then an array of 8, 16, or 32 bit integers, depending on the
628+
// capacity number. This union allows us to access the capacity, and then
629+
// access the rest of the storage by taking the address of one of the
630+
// IndexZero members and indexing into it (always avoiding index 0).
631+
union {
632+
uint8_t CapacityLog2;
633+
std::atomic<uint8_t> IndexZero8;
634+
std::atomic<uint16_t> IndexZero16;
635+
std::atomic<uint32_t> IndexZero32;
636+
};
637+
638+
// Get the size, in bytes, of the index needed for the given capacity.
639+
static unsigned indexSize(uint8_t capacityLog2) {
640+
if (capacityLog2 <= sizeof(uint8_t) * CHAR_BIT)
641+
return sizeof(uint8_t);
642+
if (capacityLog2 <= sizeof(uint16_t) * CHAR_BIT)
643+
return sizeof(uint16_t);
644+
return sizeof(uint32_t);
645+
}
646+
647+
unsigned indexSize() { return indexSize(CapacityLog2); }
623648

624-
static IndexStorage *allocate(size_t capacity) {
625-
assert((capacity & (capacity - 1)) == 0 &&
626-
"Capacity must be a power of 2");
627-
auto *ptr =
628-
reinterpret_cast<IndexStorage *>(calloc(capacity, sizeof(Mask)));
649+
static IndexStorage *allocate(size_t capacityLog2) {
650+
assert(capacityLog2 > 0);
651+
size_t capacity = 1UL << capacityLog2;
652+
auto *ptr = reinterpret_cast<IndexStorage *>(
653+
calloc(capacity, indexSize(capacityLog2)));
629654
if (!ptr)
630655
swift::crash("Could not allocate memory.");
631-
ptr->Mask.store(capacity - 1, std::memory_order_relaxed);
656+
ptr->CapacityLog2 = capacityLog2;
632657
return ptr;
633658
}
634659

635-
std::atomic<Index> &at(size_t i) { return (&Mask)[i]; }
660+
unsigned loadIndexAt(size_t i, std::memory_order order) {
661+
assert(i > 0 && "index zero is off-limits, used to store capacity");
662+
663+
switch (indexSize()) {
664+
case sizeof(uint8_t):
665+
return (&IndexZero8)[i].load(order);
666+
case sizeof(uint16_t):
667+
return (&IndexZero16)[i].load(order);
668+
case sizeof(uint32_t):
669+
return (&IndexZero32)[i].load(order);
670+
default:
671+
swift_runtime_unreachable("unknown index size");
672+
}
673+
}
674+
675+
void storeIndexAt(unsigned value, size_t i, std::memory_order order) {
676+
assert(i > 0 && "index zero is off-limits, used to store capacity");
677+
678+
switch (indexSize()) {
679+
case sizeof(uint8_t):
680+
return (&IndexZero8)[i].store(value, order);
681+
case sizeof(uint16_t):
682+
return (&IndexZero16)[i].store(value, order);
683+
case sizeof(uint32_t):
684+
return (&IndexZero32)[i].store(value, order);
685+
default:
686+
swift_runtime_unreachable("unknown index size");
687+
}
688+
}
636689
};
637690

638691
/// A simple linked list representing pointers that need to be freed.
@@ -720,27 +773,31 @@ template <class ElemTy> struct ConcurrentReadableHashMap {
720773
/// returning the new array with all existing indices copied into it. This
721774
/// operation performs a rehash, so that the indices are in the correct
722775
/// location in the new array.
723-
IndexStorage *resize(IndexStorage *indices, Index indicesMask,
776+
IndexStorage *resize(IndexStorage *indices, uint8_t indicesCapacityLog2,
724777
ElemTy *elements) {
725-
// Mask is size - 1. Double the size. Start with 4 (fits into 16-byte malloc
726-
// bucket).
727-
size_t newCount = indices ? 2 * (indicesMask + 1) : 4;
728-
size_t newMask = newCount - 1;
778+
// Double the size. Start with 16 (fits into 16-byte malloc
779+
// bucket), which is 2^4.
780+
size_t newCapacityLog2 = indices ? indicesCapacityLog2 + 1 : 4;
781+
size_t newMask = (1UL << newCapacityLog2) - 1;
729782

730-
IndexStorage *newIndices = IndexStorage::allocate(newCount);
783+
IndexStorage *newIndices = IndexStorage::allocate(newCapacityLog2);
731784

732-
for (size_t i = 1; i <= indicesMask; i++) {
733-
Index index = indices->at(i).load(std::memory_order_relaxed);
785+
size_t indicesCount = 1UL << indicesCapacityLog2;
786+
for (size_t i = 1; i < indicesCount; i++) {
787+
unsigned index = indices->loadIndexAt(i, std::memory_order_relaxed);
734788
if (index == 0)
735789
continue;
736790

737791
auto *element = &elements[index - 1];
738792
auto hash = hash_value(*element);
739793

740794
size_t newI = hash & newMask;
741-
while (newIndices->at(newI) != 0)
795+
// Index 0 is unusable (occupied by the capacity), so always skip it.
796+
while (newI == 0 ||
797+
newIndices->loadIndexAt(newI, std::memory_order_relaxed) != 0) {
742798
newI = (newI + 1) & newMask;
743-
newIndices->at(newI).store(index, std::memory_order_relaxed);
799+
}
800+
newIndices->storeIndexAt(index, newI, std::memory_order_relaxed);
744801
}
745802

746803
Indices.store(newIndices, std::memory_order_release);
@@ -752,32 +809,31 @@ template <class ElemTy> struct ConcurrentReadableHashMap {
752809

753810
/// Search for the given key within the given indices and elements arrays. If
754811
/// an entry already exists for that key, return a pointer to the element. If
755-
/// no entry exists, return a pointer to the location in the indices array
756-
/// where the index of the new element would be stored.
812+
/// no entry exists, return the location in the indices array where the index
813+
/// of the new element would be stored.
757814
template <class KeyTy>
758-
static std::pair<ElemTy *, std::atomic<Index> *>
815+
static std::pair<ElemTy *, unsigned>
759816
find(const KeyTy &key, IndexStorage *indices, size_t elementCount,
760817
ElemTy *elements) {
761818
if (!indices)
762-
return {nullptr, nullptr};
819+
return {nullptr, 0};
763820
auto hash = hash_value(key);
764-
auto indicesMask = indices->Mask.load(std::memory_order_relaxed);
821+
auto indicesMask = (1UL << indices->CapacityLog2) - 1;
765822

766823
auto i = hash & indicesMask;
767824
while (true) {
768825
// Index 0 is used for the mask and is not actually an index.
769826
if (i == 0)
770827
i++;
771828

772-
auto *indexPtr = &indices->at(i);
773-
auto index = indexPtr->load(std::memory_order_acquire);
829+
auto index = indices->loadIndexAt(i, std::memory_order_acquire);
774830
// Element indices are 1-based, 0 means no entry.
775831
if (index == 0)
776-
return {nullptr, indexPtr};
832+
return {nullptr, i};
777833
if (index - 1 < elementCount) {
778834
auto *candidate = &elements[index - 1];
779835
if (candidate->matchesKey(key))
780-
return {candidate, nullptr};
836+
return {candidate, 0};
781837
}
782838

783839
i = (i + 1) & indicesMask;
@@ -895,7 +951,7 @@ template <class ElemTy> struct ConcurrentReadableHashMap {
895951
if (!indices)
896952
indices = resize(indices, 0, nullptr);
897953

898-
auto indicesMask = indices->Mask.load(std::memory_order_relaxed);
954+
auto indicesCapacityLog2 = indices->CapacityLog2;
899955
auto elementCount = ElementCount.load(std::memory_order_relaxed);
900956
auto *elements = Elements.load(std::memory_order_relaxed);
901957

@@ -906,12 +962,14 @@ template <class ElemTy> struct ConcurrentReadableHashMap {
906962
return;
907963
}
908964

909-
// The actual capacity is indicesMask + 1. The number of slots in use is
910-
// elementCount + 1, since the mask also takes a slot.
911-
auto emptyCount = (indicesMask + 1) - (elementCount + 1);
912-
auto proportion = (indicesMask + 1) / emptyCount;
965+
auto indicesCapacity = 1UL << indicesCapacityLog2;
966+
967+
// The number of slots in use is elementCount + 1, since the capacity also
968+
// takes a slot.
969+
auto emptyCount = indicesCapacity - (elementCount + 1);
970+
auto proportion = indicesCapacity / emptyCount;
913971
if (proportion >= ResizeProportion) {
914-
indices = resize(indices, indicesMask, elements);
972+
indices = resize(indices, indicesCapacityLog2, elements);
915973
found = find(key, indices, elementCount, elements);
916974
assert(!found.first && "Shouldn't suddenly find the key after rehashing");
917975
}
@@ -928,7 +986,8 @@ template <class ElemTy> struct ConcurrentReadableHashMap {
928986
assert(hash_value(key) == hash_value(*element) &&
929987
"Element must have the same hash code as its key.");
930988
ElementCount.store(elementCount + 1, std::memory_order_release);
931-
found.second->store(elementCount + 1, std::memory_order_release);
989+
indices->storeIndexAt(elementCount + 1, found.second,
990+
std::memory_order_release);
932991
}
933992

934993
deallocateFreeListIfSafe();

0 commit comments

Comments
 (0)