@@ -588,10 +588,6 @@ template <class ElemTy> struct ConcurrentReadableHashMap {
588
588
" Elements must not have destructors (they won't be called)." );
589
589
590
590
private:
591
- // / The type of the elements of the indices array. TODO: use one or two byte
592
- // / indices for smaller tables to save more memory.
593
- using Index = unsigned ;
594
-
595
591
// / The reciprocal of the load factor at which we expand the table. A value of
596
592
// / 4 means that we resize at 1/4 = 75% load factor.
597
593
static const size_t ResizeProportion = 4 ;
@@ -619,20 +615,77 @@ template <class ElemTy> struct ConcurrentReadableHashMap {
619
615
// / is stored inline. We work around this contradiction by considering the
620
616
// / first index to always be occupied with a value that never matches any key.
621
617
struct IndexStorage {
622
- std::atomic<Index> Mask;
618
+ // Index size is variable based on capacity, either 8, 16, or 32 bits.
619
+ //
620
+ // This is somewhat conservative. We could have, for example, a capacity of
621
+ // 512 but a maximum index of only 200, which would still allow for 8-bit
622
+ // indices. However, taking advantage of this would require reallocating
623
+ // the index storage when the element count crossed a threshold, which is
624
+ // more complex, and the advantages are minimal. This keeps it simple.
625
+ //
626
+ // The first byte of the storage is the log 2 of the capacity. The remaining
627
+ // storage is then an array of 8, 16, or 32 bit integers, depending on the
628
+ // capacity number. This union allows us to access the capacity, and then
629
+ // access the rest of the storage by taking the address of one of the
630
+ // IndexZero members and indexing into it (always avoiding index 0).
631
+ union {
632
+ uint8_t CapacityLog2;
633
+ std::atomic<uint8_t > IndexZero8;
634
+ std::atomic<uint16_t > IndexZero16;
635
+ std::atomic<uint32_t > IndexZero32;
636
+ };
637
+
638
+ // Get the size, in bytes, of the index needed for the given capacity.
639
+ static unsigned indexSize (uint8_t capacityLog2) {
640
+ if (capacityLog2 <= sizeof (uint8_t ) * CHAR_BIT)
641
+ return sizeof (uint8_t );
642
+ if (capacityLog2 <= sizeof (uint16_t ) * CHAR_BIT)
643
+ return sizeof (uint16_t );
644
+ return sizeof (uint32_t );
645
+ }
646
+
647
+ unsigned indexSize () { return indexSize (CapacityLog2); }
623
648
624
- static IndexStorage *allocate (size_t capacity ) {
625
- assert ((capacity & (capacity - 1 )) == 0 &&
626
- " Capacity must be a power of 2 " ) ;
627
- auto *ptr =
628
- reinterpret_cast <IndexStorage *>( calloc (capacity, sizeof (Mask )));
649
+ static IndexStorage *allocate (size_t capacityLog2 ) {
650
+ assert (capacityLog2 > 0 );
651
+ size_t capacity = 1UL << capacityLog2 ;
652
+ auto *ptr = reinterpret_cast <IndexStorage *>(
653
+ calloc (capacity, indexSize (capacityLog2 )));
629
654
if (!ptr)
630
655
swift::crash (" Could not allocate memory." );
631
- ptr->Mask . store (capacity - 1 , std::memory_order_relaxed) ;
656
+ ptr->CapacityLog2 = capacityLog2 ;
632
657
return ptr;
633
658
}
634
659
635
- std::atomic<Index> &at (size_t i) { return (&Mask)[i]; }
660
+ unsigned loadIndexAt (size_t i, std::memory_order order) {
661
+ assert (i > 0 && " index zero is off-limits, used to store capacity" );
662
+
663
+ switch (indexSize ()) {
664
+ case sizeof (uint8_t ):
665
+ return (&IndexZero8)[i].load (order);
666
+ case sizeof (uint16_t ):
667
+ return (&IndexZero16)[i].load (order);
668
+ case sizeof (uint32_t ):
669
+ return (&IndexZero32)[i].load (order);
670
+ default :
671
+ swift_runtime_unreachable (" unknown index size" );
672
+ }
673
+ }
674
+
675
+ void storeIndexAt (unsigned value, size_t i, std::memory_order order) {
676
+ assert (i > 0 && " index zero is off-limits, used to store capacity" );
677
+
678
+ switch (indexSize ()) {
679
+ case sizeof (uint8_t ):
680
+ return (&IndexZero8)[i].store (value, order);
681
+ case sizeof (uint16_t ):
682
+ return (&IndexZero16)[i].store (value, order);
683
+ case sizeof (uint32_t ):
684
+ return (&IndexZero32)[i].store (value, order);
685
+ default :
686
+ swift_runtime_unreachable (" unknown index size" );
687
+ }
688
+ }
636
689
};
637
690
638
691
// / A simple linked list representing pointers that need to be freed.
@@ -720,27 +773,31 @@ template <class ElemTy> struct ConcurrentReadableHashMap {
720
773
// / returning the new array with all existing indices copied into it. This
721
774
// / operation performs a rehash, so that the indices are in the correct
722
775
// / location in the new array.
723
- IndexStorage *resize (IndexStorage *indices, Index indicesMask ,
776
+ IndexStorage *resize (IndexStorage *indices, uint8_t indicesCapacityLog2 ,
724
777
ElemTy *elements) {
725
- // Mask is size - 1. Double the size. Start with 4 (fits into 16-byte malloc
726
- // bucket).
727
- size_t newCount = indices ? 2 * (indicesMask + 1 ) : 4 ;
728
- size_t newMask = newCount - 1 ;
778
+ // Double the size. Start with 16 (fits into 16-byte malloc
779
+ // bucket), which is 2^4 .
780
+ size_t newCapacityLog2 = indices ? indicesCapacityLog2 + 1 : 4 ;
781
+ size_t newMask = ( 1UL << newCapacityLog2) - 1 ;
729
782
730
- IndexStorage *newIndices = IndexStorage::allocate (newCount );
783
+ IndexStorage *newIndices = IndexStorage::allocate (newCapacityLog2 );
731
784
732
- for (size_t i = 1 ; i <= indicesMask; i++) {
733
- Index index = indices->at (i).load (std::memory_order_relaxed);
785
+ size_t indicesCount = 1UL << indicesCapacityLog2;
786
+ for (size_t i = 1 ; i < indicesCount; i++) {
787
+ unsigned index = indices->loadIndexAt (i, std::memory_order_relaxed);
734
788
if (index == 0 )
735
789
continue ;
736
790
737
791
auto *element = &elements[index - 1 ];
738
792
auto hash = hash_value (*element);
739
793
740
794
size_t newI = hash & newMask;
741
- while (newIndices->at (newI) != 0 )
795
+ // Index 0 is unusable (occupied by the capacity), so always skip it.
796
+ while (newI == 0 ||
797
+ newIndices->loadIndexAt (newI, std::memory_order_relaxed) != 0 ) {
742
798
newI = (newI + 1 ) & newMask;
743
- newIndices->at (newI).store (index, std::memory_order_relaxed);
799
+ }
800
+ newIndices->storeIndexAt (index, newI, std::memory_order_relaxed);
744
801
}
745
802
746
803
Indices.store (newIndices, std::memory_order_release);
@@ -752,32 +809,31 @@ template <class ElemTy> struct ConcurrentReadableHashMap {
752
809
753
810
// / Search for the given key within the given indices and elements arrays. If
754
811
// / an entry already exists for that key, return a pointer to the element. If
755
- // / no entry exists, return a pointer to the location in the indices array
756
- // / where the index of the new element would be stored.
812
+ // / no entry exists, return the location in the indices array where the index
813
+ // / of the new element would be stored.
757
814
template <class KeyTy >
758
- static std::pair<ElemTy *, std::atomic<Index> * >
815
+ static std::pair<ElemTy *, unsigned >
759
816
find (const KeyTy &key, IndexStorage *indices, size_t elementCount,
760
817
ElemTy *elements) {
761
818
if (!indices)
762
- return {nullptr , nullptr };
819
+ return {nullptr , 0 };
763
820
auto hash = hash_value (key);
764
- auto indicesMask = indices->Mask . load (std::memory_order_relaxed) ;
821
+ auto indicesMask = ( 1UL << indices->CapacityLog2 ) - 1 ;
765
822
766
823
auto i = hash & indicesMask;
767
824
while (true ) {
768
825
// Index 0 is used for the mask and is not actually an index.
769
826
if (i == 0 )
770
827
i++;
771
828
772
- auto *indexPtr = &indices->at (i);
773
- auto index = indexPtr->load (std::memory_order_acquire);
829
+ auto index = indices->loadIndexAt (i, std::memory_order_acquire);
774
830
// Element indices are 1-based, 0 means no entry.
775
831
if (index == 0 )
776
- return {nullptr , indexPtr };
832
+ return {nullptr , i };
777
833
if (index - 1 < elementCount) {
778
834
auto *candidate = &elements[index - 1 ];
779
835
if (candidate->matchesKey (key))
780
- return {candidate, nullptr };
836
+ return {candidate, 0 };
781
837
}
782
838
783
839
i = (i + 1 ) & indicesMask;
@@ -895,7 +951,7 @@ template <class ElemTy> struct ConcurrentReadableHashMap {
895
951
if (!indices)
896
952
indices = resize (indices, 0 , nullptr );
897
953
898
- auto indicesMask = indices->Mask . load (std::memory_order_relaxed) ;
954
+ auto indicesCapacityLog2 = indices->CapacityLog2 ;
899
955
auto elementCount = ElementCount.load (std::memory_order_relaxed);
900
956
auto *elements = Elements.load (std::memory_order_relaxed);
901
957
@@ -906,12 +962,14 @@ template <class ElemTy> struct ConcurrentReadableHashMap {
906
962
return ;
907
963
}
908
964
909
- // The actual capacity is indicesMask + 1. The number of slots in use is
910
- // elementCount + 1, since the mask also takes a slot.
911
- auto emptyCount = (indicesMask + 1 ) - (elementCount + 1 );
912
- auto proportion = (indicesMask + 1 ) / emptyCount;
965
+ auto indicesCapacity = 1UL << indicesCapacityLog2;
966
+
967
+ // The number of slots in use is elementCount + 1, since the capacity also
968
+ // takes a slot.
969
+ auto emptyCount = indicesCapacity - (elementCount + 1 );
970
+ auto proportion = indicesCapacity / emptyCount;
913
971
if (proportion >= ResizeProportion) {
914
- indices = resize (indices, indicesMask , elements);
972
+ indices = resize (indices, indicesCapacityLog2 , elements);
915
973
found = find (key, indices, elementCount, elements);
916
974
assert (!found.first && " Shouldn't suddenly find the key after rehashing" );
917
975
}
@@ -928,7 +986,8 @@ template <class ElemTy> struct ConcurrentReadableHashMap {
928
986
assert (hash_value (key) == hash_value (*element) &&
929
987
" Element must have the same hash code as its key." );
930
988
ElementCount.store (elementCount + 1 , std::memory_order_release);
931
- found.second ->store (elementCount + 1 , std::memory_order_release);
989
+ indices->storeIndexAt (elementCount + 1 , found.second ,
990
+ std::memory_order_release);
932
991
}
933
992
934
993
deallocateFreeListIfSafe ();
0 commit comments