@@ -624,25 +624,76 @@ struct ConcurrentReadableHashMap {
624
624
// / is stored inline. We work around this contradiction by considering the
625
625
// / first index to always be occupied with a value that never matches any key.
626
626
struct IndexStorage {
627
+ using RawType = uintptr_t ;
628
+
629
+ RawType Value;
630
+
631
+ static constexpr uintptr_t log2 (uintptr_t x) {
632
+ return x <= 1 ? 0 : log2 (x >> 1 ) + 1 ;
633
+ }
634
+
635
+ static constexpr uintptr_t InlineIndexBits = 4 ;
636
+ static constexpr uintptr_t InlineIndexMask = 0xF ;
637
+ static constexpr uintptr_t InlineCapacity =
638
+ sizeof (RawType) * CHAR_BIT / InlineIndexBits;
639
+ static constexpr uintptr_t InlineCapacityLog2 = log2(InlineCapacity);
640
+
641
+ // Indices can be stored in different ways, depending on how big they need
642
+ // to be. The index mode is stored in the bottom two bits of Value. The
643
+ // meaning of the rest of Value depends on the mode.
644
+ enum class IndexMode {
645
+ // Value is treated as an array of four-bit integers, storing the indices.
646
+ // The first element overlaps with the mode, and is never used.
647
+ Inline,
648
+
649
+ // The rest of Value holds a pointer to storage. The first byte of this
650
+ // storage holds the log2 of the storage capacity. The storage is treated
651
+ // as an array of 8, 16, or 32-bit integers. The first element overlaps
652
+ // with the capacity, and is never used.
653
+ Array8,
654
+ Array16,
655
+ Array32,
656
+ };
657
+
658
+ IndexStorage () : Value(0 ) {}
659
+ IndexStorage (RawType value) : Value(value) {}
660
+ IndexStorage (void *ptr, unsigned indexSize, uint8_t capacityLog2) {
661
+ assert (capacityLog2 > InlineCapacityLog2);
662
+ IndexMode mode;
663
+ switch (indexSize) {
664
+ case sizeof (uint8_t ):
665
+ mode = IndexMode::Array8;
666
+ break ;
667
+ case sizeof (uint16_t ):
668
+ mode = IndexMode::Array16;
669
+ break ;
670
+ case sizeof (uint32_t ):
671
+ mode = IndexMode::Array32;
672
+ break ;
673
+ default :
674
+ swift_unreachable (" unknown index size" );
675
+ }
676
+ Value = reinterpret_cast <uintptr_t >(ptr) | static_cast <uintptr_t >(mode);
677
+ *reinterpret_cast <uint8_t *>(ptr) = capacityLog2;
678
+ }
679
+
680
+ bool valueIsPointer () { return Value & 3 ; }
681
+
682
+ void *pointer () {
683
+ if (valueIsPointer ())
684
+ return (void *)(Value & (RawType)~3 );
685
+ return nullptr ;
686
+ }
687
+
688
+ IndexMode indexMode () { return IndexMode (Value & 3 ); }
689
+
627
690
// Index size is variable based on capacity, either 8, 16, or 32 bits.
628
691
//
629
692
// This is somewhat conservative. We could have, for example, a capacity of
630
693
// 512 but a maximum index of only 200, which would still allow for 8-bit
631
694
// indices. However, taking advantage of this would require reallocating
632
695
// the index storage when the element count crossed a threshold, which is
633
696
// more complex, and the advantages are minimal. This keeps it simple.
634
- //
635
- // The first byte of the storage is the log 2 of the capacity. The remaining
636
- // storage is then an array of 8, 16, or 32 bit integers, depending on the
637
- // capacity number. This union allows us to access the capacity, and then
638
- // access the rest of the storage by taking the address of one of the
639
- // IndexZero members and indexing into it (always avoiding index 0).
640
- union {
641
- uint8_t CapacityLog2;
642
- std::atomic<uint8_t > IndexZero8;
643
- std::atomic<uint16_t > IndexZero16;
644
- std::atomic<uint32_t > IndexZero32;
645
- };
646
697
647
698
// Get the size, in bytes, of the index needed for the given capacity.
648
699
static unsigned indexSize (uint8_t capacityLog2) {
@@ -653,46 +704,66 @@ struct ConcurrentReadableHashMap {
653
704
return sizeof (uint32_t );
654
705
}
655
706
656
- unsigned indexSize () { return indexSize (CapacityLog2); }
707
+ uint8_t getCapacityLog2 () {
708
+ if (auto *ptr = pointer ())
709
+ return *reinterpret_cast <uint8_t *>(ptr);
710
+ return InlineCapacityLog2;
711
+ }
657
712
658
- static IndexStorage * allocate (size_t capacityLog2) {
713
+ static IndexStorage allocate (size_t capacityLog2) {
659
714
assert (capacityLog2 > 0 );
660
715
size_t capacity = 1UL << capacityLog2;
661
- auto *ptr = reinterpret_cast <IndexStorage *>(
662
- calloc (capacity, indexSize (capacityLog2)) );
716
+ unsigned size = indexSize (capacityLog2);
717
+ auto *ptr = calloc (capacity, size );
663
718
if (!ptr)
664
719
swift::crash (" Could not allocate memory." );
665
- ptr->CapacityLog2 = capacityLog2;
666
- return ptr;
720
+ return IndexStorage (ptr, size, capacityLog2);
667
721
}
668
722
669
723
unsigned loadIndexAt (size_t i, std::memory_order order) {
670
724
assert (i > 0 && " index zero is off-limits, used to store capacity" );
671
-
672
- switch (indexSize ()) {
673
- case sizeof (uint8_t ):
674
- return (&IndexZero8)[i].load (order);
675
- case sizeof (uint16_t ):
676
- return (&IndexZero16)[i].load (order);
677
- case sizeof (uint32_t ):
678
- return (&IndexZero32)[i].load (order);
679
- default :
680
- swift_unreachable (" unknown index size" );
725
+ assert (i < (1 << getCapacityLog2 ()) &&
726
+ " index is off the end of the indices" );
727
+
728
+ switch (indexMode ()) {
729
+ case IndexMode::Inline:
730
+ return (Value >> (i * InlineIndexBits)) & InlineIndexMask;
731
+ case IndexMode::Array8:
732
+ return ((std::atomic<uint8_t > *)pointer ())[i].load (order);
733
+ case IndexMode::Array16:
734
+ return ((std::atomic<uint16_t > *)pointer ())[i].load (order);
735
+ case IndexMode::Array32:
736
+ return ((std::atomic<uint32_t > *)pointer ())[i].load (order);
681
737
}
682
738
}
683
739
684
- void storeIndexAt (unsigned value, size_t i, std::memory_order order) {
740
+ void storeIndexAt (std::atomic<RawType> *inlineStorage, unsigned value,
741
+ size_t i, std::memory_order order) {
685
742
assert (i > 0 && " index zero is off-limits, used to store capacity" );
686
-
687
- switch (indexSize ()) {
688
- case sizeof (uint8_t ):
689
- return (&IndexZero8)[i].store (value, order);
690
- case sizeof (uint16_t ):
691
- return (&IndexZero16)[i].store (value, order);
692
- case sizeof (uint32_t ):
693
- return (&IndexZero32)[i].store (value, order);
694
- default :
695
- swift_unreachable (" unknown index size" );
743
+ assert (i < (1 << getCapacityLog2 ()) &&
744
+ " index is off the end of the indices" );
745
+
746
+ switch (indexMode ()) {
747
+ case IndexMode::Inline: {
748
+ assert (value == (value & InlineIndexMask) && " value is too big to fit" );
749
+ auto shift = i * InlineIndexBits;
750
+ assert ((Value & (InlineIndexMask << shift)) == 0 &&
751
+ " can't overwrite an existing index" );
752
+ assert (Value == inlineStorage->load (std::memory_order_relaxed) &&
753
+ " writing with a stale IndexStorage" );
754
+ auto newStorage = Value | ((RawType)value << shift);
755
+ inlineStorage->store (newStorage, order);
756
+ break ;
757
+ }
758
+ case IndexMode::Array8:
759
+ ((std::atomic<uint8_t > *)pointer ())[i].store (value, order);
760
+ break ;
761
+ case IndexMode::Array16:
762
+ ((std::atomic<uint16_t > *)pointer ())[i].store (value, order);
763
+ break ;
764
+ case IndexMode::Array32:
765
+ ((std::atomic<uint32_t > *)pointer ())[i].store (value, order);
766
+ break ;
696
767
}
697
768
}
698
769
};
@@ -753,7 +824,11 @@ struct ConcurrentReadableHashMap {
753
824
std::atomic<ElementStorage *> Elements{nullptr };
754
825
755
826
// / The array of indices.
756
- std::atomic<IndexStorage *> Indices{nullptr };
827
+ // /
828
+ // / This has to be stored as a IndexStorage::RawType instead of a IndexStorage
829
+ // / because some of our targets don't support interesting structs as atomic
830
+ // / types. See also MetadataCache::TrackingInfo which uses the same technique.
831
+ std::atomic<typename IndexStorage::RawType> Indices{0 };
757
832
758
833
// / The writer lock, which must be taken before any mutation of the table.
759
834
MutexTy WriterLock;
@@ -798,18 +873,17 @@ struct ConcurrentReadableHashMap {
798
873
// / returning the new array with all existing indices copied into it. This
799
874
// / operation performs a rehash, so that the indices are in the correct
800
875
// / location in the new array.
801
- IndexStorage *resize (IndexStorage *indices, uint8_t indicesCapacityLog2,
802
- ElemTy *elements) {
803
- // Double the size. Start with 16 (fits into 16-byte malloc
804
- // bucket), which is 2^4.
805
- size_t newCapacityLog2 = indices ? indicesCapacityLog2 + 1 : 4 ;
876
+ IndexStorage resize (IndexStorage indices, uint8_t indicesCapacityLog2,
877
+ ElemTy *elements) {
878
+ // Double the size.
879
+ size_t newCapacityLog2 = indicesCapacityLog2 + 1 ;
806
880
size_t newMask = (1UL << newCapacityLog2) - 1 ;
807
881
808
- IndexStorage * newIndices = IndexStorage::allocate (newCapacityLog2);
882
+ IndexStorage newIndices = IndexStorage::allocate (newCapacityLog2);
809
883
810
884
size_t indicesCount = 1UL << indicesCapacityLog2;
811
885
for (size_t i = 1 ; i < indicesCount; i++) {
812
- unsigned index = indices-> loadIndexAt (i, std::memory_order_relaxed);
886
+ unsigned index = indices. loadIndexAt (i, std::memory_order_relaxed);
813
887
if (index == 0 )
814
888
continue ;
815
889
@@ -819,15 +893,16 @@ struct ConcurrentReadableHashMap {
819
893
size_t newI = hash & newMask;
820
894
// Index 0 is unusable (occupied by the capacity), so always skip it.
821
895
while (newI == 0 ||
822
- newIndices-> loadIndexAt (newI, std::memory_order_relaxed) != 0 ) {
896
+ newIndices. loadIndexAt (newI, std::memory_order_relaxed) != 0 ) {
823
897
newI = (newI + 1 ) & newMask;
824
898
}
825
- newIndices-> storeIndexAt (index, newI, std::memory_order_relaxed);
899
+ newIndices. storeIndexAt (nullptr , index, newI, std::memory_order_relaxed);
826
900
}
827
901
828
- Indices.store (newIndices, std::memory_order_release);
902
+ Indices.store (newIndices. Value , std::memory_order_release);
829
903
830
- FreeListNode::add (&FreeList, indices);
904
+ if (auto *ptr = indices.pointer ())
905
+ FreeListNode::add (&FreeList, ptr);
831
906
832
907
return newIndices;
833
908
}
@@ -838,20 +913,18 @@ struct ConcurrentReadableHashMap {
838
913
// / of the new element would be stored.
839
914
template <class KeyTy >
840
915
static std::pair<ElemTy *, unsigned >
841
- find (const KeyTy &key, IndexStorage * indices, size_t elementCount,
916
+ find (const KeyTy &key, IndexStorage indices, size_t elementCount,
842
917
ElemTy *elements) {
843
- if (!indices)
844
- return {nullptr , 0 };
845
918
auto hash = hash_value (key);
846
- auto indicesMask = (1UL << indices-> CapacityLog2 ) - 1 ;
919
+ auto indicesMask = (1UL << indices. getCapacityLog2 () ) - 1 ;
847
920
848
921
auto i = hash & indicesMask;
849
922
while (true ) {
850
923
// Index 0 is used for the mask and is not actually an index.
851
924
if (i == 0 )
852
925
i++;
853
926
854
- auto index = indices-> loadIndexAt (i, std::memory_order_acquire);
927
+ auto index = indices. loadIndexAt (i, std::memory_order_acquire);
855
928
// Element indices are 1-based, 0 means no entry.
856
929
if (index == 0 )
857
930
return {nullptr , i};
@@ -884,12 +957,12 @@ struct ConcurrentReadableHashMap {
884
957
// / Readers take a snapshot of the hash map, then work with the snapshot.
885
958
class Snapshot {
886
959
ConcurrentReadableHashMap *Map;
887
- IndexStorage * Indices;
960
+ IndexStorage Indices;
888
961
ElemTy *Elements;
889
962
size_t ElementCount;
890
963
891
964
public:
892
- Snapshot (ConcurrentReadableHashMap *map, IndexStorage * indices,
965
+ Snapshot (ConcurrentReadableHashMap *map, IndexStorage indices,
893
966
ElemTy *elements, size_t elementCount)
894
967
: Map(map), Indices(indices), Elements(elements),
895
968
ElementCount (elementCount) {}
@@ -905,7 +978,7 @@ struct ConcurrentReadableHashMap {
905
978
// / Search for an element matching the given key. Returns a pointer to the
906
979
// / found element, or nullptr if no matching element exists.
907
980
template <class KeyTy > const ElemTy *find (const KeyTy &key) {
908
- if (!Indices || !ElementCount || !Elements)
981
+ if (!Indices. Value || !ElementCount || !Elements)
909
982
return nullptr ;
910
983
return ConcurrentReadableHashMap::find (key, Indices, ElementCount,
911
984
Elements)
@@ -937,7 +1010,7 @@ struct ConcurrentReadableHashMap {
937
1010
// pointer can just mean a concurrent insert that triggered a resize of the
938
1011
// elements array. This is harmless aside from a small performance hit, and
939
1012
// should not happen often.
940
- IndexStorage * indices;
1013
+ IndexStorage indices;
941
1014
size_t elementCount;
942
1015
ElementStorage *elements;
943
1016
ElementStorage *elements2;
@@ -972,11 +1045,8 @@ struct ConcurrentReadableHashMap {
972
1045
void getOrInsert (KeyTy key, const Call &call) {
973
1046
ScopedLockTy guard (WriterLock);
974
1047
975
- auto *indices = Indices.load (std::memory_order_relaxed);
976
- if (!indices)
977
- indices = resize (indices, 0 , nullptr );
978
-
979
- auto indicesCapacityLog2 = indices->CapacityLog2 ;
1048
+ auto indices = IndexStorage{Indices.load (std::memory_order_relaxed)};
1049
+ auto indicesCapacityLog2 = indices.getCapacityLog2 ();
980
1050
auto elementCount = ElementCount.load (std::memory_order_relaxed);
981
1051
auto *elements = Elements.load (std::memory_order_relaxed);
982
1052
auto *elementsPtr = elements ? elements->data () : nullptr ;
@@ -1012,8 +1082,8 @@ struct ConcurrentReadableHashMap {
1012
1082
assert (hash_value (key) == hash_value (*element) &&
1013
1083
" Element must have the same hash code as its key." );
1014
1084
ElementCount.store (elementCount + 1 , std::memory_order_release);
1015
- indices-> storeIndexAt (elementCount + 1 , found.second ,
1016
- std::memory_order_release);
1085
+ indices. storeIndexAt (&Indices, elementCount + 1 , found.second ,
1086
+ std::memory_order_release);
1017
1087
}
1018
1088
1019
1089
deallocateFreeListIfSafe ();
@@ -1024,16 +1094,17 @@ struct ConcurrentReadableHashMap {
1024
1094
void clear () {
1025
1095
ScopedLockTy guard (WriterLock);
1026
1096
1027
- auto * indices = Indices.load (std::memory_order_relaxed);
1097
+ IndexStorage indices = Indices.load (std::memory_order_relaxed);
1028
1098
auto *elements = Elements.load (std::memory_order_relaxed);
1029
1099
1030
1100
// Order doesn't matter here, snapshots will gracefully handle any field
1031
1101
// being NULL/0 while the others are not.
1032
- Indices.store (nullptr , std::memory_order_relaxed);
1102
+ Indices.store (0 , std::memory_order_relaxed);
1033
1103
ElementCount.store (0 , std::memory_order_relaxed);
1034
1104
Elements.store (nullptr , std::memory_order_relaxed);
1035
1105
1036
- FreeListNode::add (&FreeList, indices);
1106
+ if (auto *ptr = indices.pointer ())
1107
+ FreeListNode::add (&FreeList, ptr);
1037
1108
FreeListNode::add (&FreeList, elements);
1038
1109
1039
1110
deallocateFreeListIfSafe ();
0 commit comments