Skip to content

Commit 1ada509

Browse files
authored
Merge pull request #4589 from rjmccall/more-metadata-cache-improvements
2 parents c292861 + 4b22a29 commit 1ada509

File tree

6 files changed

+203
-235
lines changed

6 files changed

+203
-235
lines changed

include/swift/Runtime/Concurrent.h

Lines changed: 117 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@
1313
#define SWIFT_RUNTIME_CONCURRENTUTILS_H
1414
#include <iterator>
1515
#include <atomic>
16+
#include <functional>
1617
#include <stdint.h>
18+
#include "llvm/Support/Allocator.h"
1719

1820
#if defined(__FreeBSD__)
1921
#include <stdio.h>
@@ -125,48 +127,22 @@ template <class ElemTy> struct ConcurrentList {
125127
std::atomic<ConcurrentListNode<ElemTy> *> First;
126128
};
127129

128-
template <class T, bool Delete> class AtomicMaybeOwningPointer;
129-
130-
template <class T>
131-
class AtomicMaybeOwningPointer<T, false> {
132-
public:
133-
std::atomic<T*> Value;
134-
constexpr AtomicMaybeOwningPointer(T *value) : Value(value) {}
135-
};
136-
130+
/// A utility function for ordering two pointers, which is useful
131+
/// for implementing compareWithKey.
137132
template <class T>
138-
class AtomicMaybeOwningPointer<T, true> {
139-
public:
140-
std::atomic<T*> Value;
141-
constexpr AtomicMaybeOwningPointer(T *value) : Value(value) {}
142-
143-
~AtomicMaybeOwningPointer() {
144-
// This can use relaxed memory order because the client has to ensure
145-
// that all accesses are safely completed and their effects fully
146-
// visible before destruction occurs anyway.
147-
::delete Value.load(std::memory_order_relaxed);
148-
}
149-
};
150-
151-
/// A concurrent map that is implemented using a binary tree. It supports
152-
/// concurrent insertions but does not support removals or rebalancing of
153-
/// the tree.
154-
///
155-
/// The entry type must provide the following operations:
156-
///
157-
/// /// For debugging purposes only. Summarize this key as an integer value.
158-
/// long getKeyIntValueForDump() const;
159-
///
160-
/// /// A ternary comparison. KeyTy is the type of the key provided
161-
/// /// to find or getOrInsert.
162-
/// int compareWithKey(KeyTy key) const;
163-
///
164-
/// /// Return the amount of extra trailing space required by an entry,
165-
/// /// where KeyTy is the type of the first argument to getOrInsert and
166-
/// /// ArgTys is the type of the remaining arguments.
167-
/// static size_t getExtraAllocationSize(KeyTy key, ArgTys...)
168-
template <class EntryTy, bool ProvideDestructor = true>
169-
class ConcurrentMap {
133+
static inline int comparePointers(const T *left, const T *right) {
134+
return (left == right ? 0 : std::less<const T *>()(left, right) ? -1 : 1);
135+
}
136+
137+
template <class EntryTy, bool ProvideDestructor, class Allocator>
138+
class ConcurrentMapBase;
139+
140+
/// The partial specialization of ConcurrentMapBase whose destructor is
141+
/// trivial. The other implementation inherits from this, so this is a
142+
/// base for all ConcurrentMaps.
143+
template <class EntryTy, class Allocator>
144+
class ConcurrentMapBase<EntryTy, false, Allocator> : protected Allocator {
145+
protected:
170146
struct Node {
171147
std::atomic<Node*> Left;
172148
std::atomic<Node*> Right;
@@ -179,15 +155,7 @@ class ConcurrentMap {
179155
Node(const Node &) = delete;
180156
Node &operator=(const Node &) = delete;
181157

182-
~Node() {
183-
// These can be relaxed accesses because there is no safe way for
184-
// another thread to race an access to this node with our destruction
185-
// of it.
186-
::delete Left.load(std::memory_order_relaxed);
187-
::delete Right.load(std::memory_order_relaxed);
188-
}
189-
190-
#ifndef NDEBUG
158+
#ifndef NDEBUG
191159
void dump() const {
192160
auto L = Left.load(std::memory_order_acquire);
193161
auto R = Right.load(std::memory_order_acquire);
@@ -204,19 +172,105 @@ class ConcurrentMap {
204172
printf("\"%p\":f2 -> \"%p\":f0;\n", this, R);
205173
}
206174
}
207-
#endif
175+
#endif
208176
};
209177

210-
/// The root of the tree.
211-
AtomicMaybeOwningPointer<Node, ProvideDestructor> Root;
178+
std::atomic<Node*> Root;
179+
180+
constexpr ConcurrentMapBase() : Root(nullptr) {}
181+
182+
// Implicitly trivial destructor.
183+
~ConcurrentMapBase() = default;
184+
185+
void destroyNode(Node *node) {
186+
assert(node && "destroying null node");
187+
auto allocSize = sizeof(Node) + node->Payload.getExtraAllocationSize();
188+
189+
// Destroy the node's payload.
190+
node->~Node();
191+
192+
// Deallocate the node.
193+
this->Deallocate(node, allocSize);
194+
}
195+
};
196+
197+
/// The partial specialization of ConcurrentMapBase which provides a
198+
/// non-trivial destructor.
199+
template <class EntryTy, class Allocator>
200+
class ConcurrentMapBase<EntryTy, true, Allocator>
201+
: protected ConcurrentMapBase<EntryTy, false, Allocator> {
202+
protected:
203+
using super = ConcurrentMapBase<EntryTy, false, Allocator>;
204+
using Node = typename super::Node;
205+
206+
constexpr ConcurrentMapBase() {}
207+
208+
~ConcurrentMapBase() {
209+
destroyTree(this->Root);
210+
}
211+
212+
private:
213+
void destroyTree(const std::atomic<Node*> &edge) {
214+
// This can be a relaxed load because destruction is not allowed to race
215+
// with other operations.
216+
auto node = edge.load(std::memory_order_relaxed);
217+
if (!node) return;
218+
219+
// Destroy the node's children.
220+
destroyTree(node->Left);
221+
destroyTree(node->Right);
222+
223+
// Destroy the node itself.
224+
this->destroyNode(node);
225+
}
226+
};
227+
228+
/// A concurrent map that is implemented using a binary tree. It supports
229+
/// concurrent insertions but does not support removals or rebalancing of
230+
/// the tree.
231+
///
232+
/// The entry type must provide the following operations:
233+
///
234+
/// /// For debugging purposes only. Summarize this key as an integer value.
235+
/// long getKeyIntValueForDump() const;
236+
///
237+
/// /// A ternary comparison. KeyTy is the type of the key provided
238+
/// /// to find or getOrInsert.
239+
/// int compareWithKey(KeyTy key) const;
240+
///
241+
/// /// Return the amount of extra trailing space required by an entry,
242+
/// /// where KeyTy is the type of the first argument to getOrInsert and
243+
/// /// ArgTys is the type of the remaining arguments.
244+
/// static size_t getExtraAllocationSize(KeyTy key, ArgTys...)
245+
///
246+
/// /// Return the amount of extra trailing space that was requested for
247+
/// /// this entry. This method is only used to compute the size of the
248+
/// /// object during node deallocation; it does not need to return a
249+
/// /// correct value so long as the allocator's Deallocate implementation
250+
/// /// ignores this argument.
251+
/// size_t getExtraAllocationSize() const;
252+
///
253+
/// If ProvideDestructor is false, the destructor will be trivial. This
254+
/// can be appropriate when the object is declared at global scope.
255+
template <class EntryTy, bool ProvideDestructor = true,
256+
class Allocator = llvm::MallocAllocator>
257+
class ConcurrentMap
258+
: private ConcurrentMapBase<EntryTy, ProvideDestructor, Allocator> {
259+
using super = ConcurrentMapBase<EntryTy, ProvideDestructor, Allocator>;
260+
261+
using Node = typename super::Node;
262+
263+
/// Inherited from base class:
264+
/// std::atomic<Node*> Root;
265+
using super::Root;
212266

213267
/// This member stores the address of the last node that was found by the
214268
/// search procedure. We cache the last search to accelerate code that
215269
/// searches the same value in a loop.
216270
std::atomic<Node*> LastSearch;
217271

218272
public:
219-
constexpr ConcurrentMap() : Root(nullptr), LastSearch(nullptr) {}
273+
constexpr ConcurrentMap() : LastSearch(nullptr) {}
220274

221275
ConcurrentMap(const ConcurrentMap &) = delete;
222276
ConcurrentMap &operator=(const ConcurrentMap &) = delete;
@@ -226,9 +280,13 @@ class ConcurrentMap {
226280

227281
public:
228282

283+
Allocator &getAllocator() {
284+
return *this;
285+
}
286+
229287
#ifndef NDEBUG
230288
void dump() const {
231-
auto R = Root.Value.load(std::memory_order_acquire);
289+
auto R = Root.load(std::memory_order_acquire);
232290
printf("digraph g {\n"
233291
"graph [ rankdir = \"TB\"];\n"
234292
"node [ fontsize = \"16\" ];\n"
@@ -252,7 +310,7 @@ class ConcurrentMap {
252310
}
253311

254312
// Search the tree, starting from the root.
255-
Node *node = Root.Value.load(std::memory_order_acquire);
313+
Node *node = Root.load(std::memory_order_acquire);
256314
while (node) {
257315
int comparisonResult = node->Payload.compareWithKey(key);
258316
if (comparisonResult == 0) {
@@ -285,7 +343,7 @@ class ConcurrentMap {
285343
Node *newNode = nullptr;
286344

287345
// Start from the root.
288-
auto edge = &Root.Value;
346+
auto edge = &Root;
289347

290348
while (true) {
291349
// Load the edge.
@@ -302,7 +360,7 @@ class ConcurrentMap {
302360
// If it's equal, we can use this node.
303361
if (comparisonResult == 0) {
304362
// Destroy the node we allocated before if we're carrying one around.
305-
::delete newNode;
363+
if (newNode) this->destroyNode(newNode);
306364

307365
// Cache and report that we found an existing node.
308366
LastSearch.store(node, std::memory_order_release);
@@ -318,7 +376,7 @@ class ConcurrentMap {
318376
if (!newNode) {
319377
size_t allocSize =
320378
sizeof(Node) + EntryTy::getExtraAllocationSize(key, args...);
321-
void *memory = ::operator new(allocSize);
379+
void *memory = this->Allocate(allocSize, alignof(Node));
322380
newNode = ::new (memory) Node(key, std::forward<ArgTys>(args)...);
323381
}
324382

stdlib/public/runtime/AnyHashableSupport.cpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,19 +59,24 @@ struct HashableConformanceEntry {
5959
const Metadata *baseTypeThatConformsToHashable) {
6060
return 0;
6161
}
62+
63+
size_t getExtraAllocationSize() const {
64+
return 0;
65+
}
6266
};
6367
} // end unnamed namesapce
6468

6569
// FIXME(performance): consider merging this cache into the regular
6670
// protocol conformance cache.
67-
static Lazy<ConcurrentMap<HashableConformanceEntry>> HashableConformances;
71+
static ConcurrentMap<HashableConformanceEntry, /*Destructor*/ false>
72+
HashableConformances;
6873

6974
template<bool KnownToConformToHashable>
7075
LLVM_ATTRIBUTE_ALWAYS_INLINE
7176
static const Metadata *findHashableBaseTypeImpl(const Metadata *type) {
7277
// Check the cache first.
7378
if (HashableConformanceEntry *entry =
74-
HashableConformances->find(HashableConformanceKey{type})) {
79+
HashableConformances.find(HashableConformanceKey{type})) {
7580
return entry->baseTypeThatConformsToHashable;
7681
}
7782
if (!KnownToConformToHashable &&
@@ -92,8 +97,8 @@ static const Metadata *findHashableBaseTypeImpl(const Metadata *type) {
9297
break;
9398
baseTypeThatConformsToHashable = superclass;
9499
}
95-
HashableConformances->getOrInsert(HashableConformanceKey{type},
96-
baseTypeThatConformsToHashable);
100+
HashableConformances.getOrInsert(HashableConformanceKey{type},
101+
baseTypeThatConformsToHashable);
97102
return baseTypeThatConformsToHashable;
98103
}
99104

stdlib/public/runtime/HeapObject.cpp

Lines changed: 17 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -145,36 +145,36 @@ static void destroyGenericBox(HeapObject *o) {
145145
metadata->getAllocAlignMask());
146146
}
147147

148-
class BoxCacheEntry : public CacheEntry<BoxCacheEntry> {
148+
class BoxCacheEntry {
149149
public:
150-
FullMetadata<GenericBoxHeapMetadata> Metadata;
150+
FullMetadata<GenericBoxHeapMetadata> Data;
151151

152-
BoxCacheEntry(size_t numArguments)
153-
: Metadata{HeapMetadataHeader{{destroyGenericBox}, {nullptr}},
154-
GenericBoxHeapMetadata{MetadataKind::HeapGenericLocalVariable, 0,
155-
nullptr}} {
156-
assert(numArguments == 1);
152+
BoxCacheEntry(const Metadata *type)
153+
: Data{HeapMetadataHeader{{destroyGenericBox}, {/*vwtable*/ nullptr}},
154+
GenericBoxHeapMetadata{MetadataKind::HeapGenericLocalVariable,
155+
GenericBoxHeapMetadata::getHeaderOffset(type),
156+
type}} {
157157
}
158158

159-
size_t getNumArguments() const {
160-
return 1;
159+
long getKeyIntValueForDump() {
160+
return reinterpret_cast<long>(Data.BoxedType);
161161
}
162162

163-
static const char *getName() {
164-
return "BoxCache";
163+
int compareWithKey(const Metadata *type) const {
164+
return comparePointers(type, Data.BoxedType);
165165
}
166166

167-
FullMetadata<GenericBoxHeapMetadata> *getData() {
168-
return &Metadata;
167+
static size_t getExtraAllocationSize(const Metadata *key) {
168+
return 0;
169169
}
170-
const FullMetadata<GenericBoxHeapMetadata> *getData() const {
171-
return &Metadata;
170+
size_t getExtraAllocationSize() const {
171+
return 0;
172172
}
173173
};
174174

175175
} // end anonymous namespace
176176

177-
static Lazy<MetadataCache<BoxCacheEntry>> Boxes;
177+
static SimpleGlobalCache<BoxCacheEntry> Boxes;
178178

179179
SWIFT_CC(swift) SWIFT_RUNTIME_EXPORT
180180
BoxPair::Return
@@ -186,20 +186,7 @@ SWIFT_CC(swift) SWIFT_RT_ENTRY_IMPL_VISIBILITY
186186
extern "C"
187187
BoxPair::Return SWIFT_RT_ENTRY_IMPL(swift_allocBox)(const Metadata *type) {
188188
// Get the heap metadata for the box.
189-
auto &B = Boxes.get();
190-
const void *typeArg = type;
191-
auto entry = B.findOrAdd(&typeArg, 1, [&]() -> BoxCacheEntry* {
192-
// Create a new entry for the box.
193-
auto entry = BoxCacheEntry::allocate(B.getAllocator(), &typeArg, 1, 0);
194-
195-
auto metadata = entry->getData();
196-
metadata->Offset = GenericBoxHeapMetadata::getHeaderOffset(type);
197-
metadata->BoxedType = type;
198-
199-
return entry;
200-
});
201-
202-
auto metadata = entry->getData();
189+
auto metadata = &Boxes.getOrInsert(type).first->Data;
203190

204191
// Allocate and project the box.
205192
auto allocation = SWIFT_RT_ENTRY_CALL(swift_allocObject)(

0 commit comments

Comments
 (0)