Skip to content

More metadata cache improvements #4589

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
176 changes: 117 additions & 59 deletions include/swift/Runtime/Concurrent.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@
#define SWIFT_RUNTIME_CONCURRENTUTILS_H
#include <iterator>
#include <atomic>
#include <functional>
#include <stdint.h>
#include "llvm/Support/Allocator.h"

#if defined(__FreeBSD__)
#include <stdio.h>
Expand Down Expand Up @@ -125,48 +127,22 @@ template <class ElemTy> struct ConcurrentList {
std::atomic<ConcurrentListNode<ElemTy> *> First;
};

template <class T, bool Delete> class AtomicMaybeOwningPointer;

template <class T>
class AtomicMaybeOwningPointer<T, false> {
public:
std::atomic<T*> Value;
constexpr AtomicMaybeOwningPointer(T *value) : Value(value) {}
};

/// A utility function for ordering two pointers, which is useful
/// for implementing compareWithKey.
template <class T>
class AtomicMaybeOwningPointer<T, true> {
public:
std::atomic<T*> Value;
constexpr AtomicMaybeOwningPointer(T *value) : Value(value) {}

~AtomicMaybeOwningPointer() {
// This can use relaxed memory order because the client has to ensure
// that all accesses are safely completed and their effects fully
// visible before destruction occurs anyway.
::delete Value.load(std::memory_order_relaxed);
}
};

/// A concurrent map that is implemented using a binary tree. It supports
/// concurrent insertions but does not support removals or rebalancing of
/// the tree.
///
/// The entry type must provide the following operations:
///
/// /// For debugging purposes only. Summarize this key as an integer value.
/// long getKeyIntValueForDump() const;
///
/// /// A ternary comparison. KeyTy is the type of the key provided
/// /// to find or getOrInsert.
/// int compareWithKey(KeyTy key) const;
///
/// /// Return the amount of extra trailing space required by an entry,
/// /// where KeyTy is the type of the first argument to getOrInsert and
/// /// ArgTys is the type of the remaining arguments.
/// static size_t getExtraAllocationSize(KeyTy key, ArgTys...)
template <class EntryTy, bool ProvideDestructor = true>
class ConcurrentMap {
static inline int comparePointers(const T *left, const T *right) {
return (left == right ? 0 : std::less<const T *>()(left, right) ? -1 : 1);
}

template <class EntryTy, bool ProvideDestructor, class Allocator>
class ConcurrentMapBase;

/// The partial specialization of ConcurrentMapBase whose destructor is
/// trivial. The other implementation inherits from this, so this is a
/// base for all ConcurrentMaps.
template <class EntryTy, class Allocator>
class ConcurrentMapBase<EntryTy, false, Allocator> : protected Allocator {
protected:
struct Node {
std::atomic<Node*> Left;
std::atomic<Node*> Right;
Expand All @@ -179,15 +155,7 @@ class ConcurrentMap {
Node(const Node &) = delete;
Node &operator=(const Node &) = delete;

~Node() {
// These can be relaxed accesses because there is no safe way for
// another thread to race an access to this node with our destruction
// of it.
::delete Left.load(std::memory_order_relaxed);
::delete Right.load(std::memory_order_relaxed);
}

#ifndef NDEBUG
#ifndef NDEBUG
void dump() const {
auto L = Left.load(std::memory_order_acquire);
auto R = Right.load(std::memory_order_acquire);
Expand All @@ -204,19 +172,105 @@ class ConcurrentMap {
printf("\"%p\":f2 -> \"%p\":f0;\n", this, R);
}
}
#endif
#endif
};

/// The root of the tree.
AtomicMaybeOwningPointer<Node, ProvideDestructor> Root;
std::atomic<Node*> Root;

constexpr ConcurrentMapBase() : Root(nullptr) {}

// Implicitly trivial destructor.
~ConcurrentMapBase() = default;

void destroyNode(Node *node) {
assert(node && "destroying null node");
auto allocSize = sizeof(Node) + node->Payload.getExtraAllocationSize();

// Destroy the node's payload.
node->~Node();

// Deallocate the node.
this->Deallocate(node, allocSize);
}
};

/// The partial specialization of ConcurrentMapBase which provides a
/// non-trivial destructor.
template <class EntryTy, class Allocator>
class ConcurrentMapBase<EntryTy, true, Allocator>
: protected ConcurrentMapBase<EntryTy, false, Allocator> {
protected:
using super = ConcurrentMapBase<EntryTy, false, Allocator>;
using Node = typename super::Node;

constexpr ConcurrentMapBase() {}

~ConcurrentMapBase() {
destroyTree(this->Root);
}

private:
void destroyTree(const std::atomic<Node*> &edge) {
// This can be a relaxed load because destruction is not allowed to race
// with other operations.
auto node = edge.load(std::memory_order_relaxed);
if (!node) return;

// Destroy the node's children.
destroyTree(node->Left);
destroyTree(node->Right);

// Destroy the node itself.
this->destroyNode(node);
}
};

/// A concurrent map that is implemented using a binary tree. It supports
/// concurrent insertions but does not support removals or rebalancing of
/// the tree.
///
/// The entry type must provide the following operations:
///
/// /// For debugging purposes only. Summarize this key as an integer value.
/// long getKeyIntValueForDump() const;
///
/// /// A ternary comparison. KeyTy is the type of the key provided
/// /// to find or getOrInsert.
/// int compareWithKey(KeyTy key) const;
///
/// /// Return the amount of extra trailing space required by an entry,
/// /// where KeyTy is the type of the first argument to getOrInsert and
/// /// ArgTys is the type of the remaining arguments.
/// static size_t getExtraAllocationSize(KeyTy key, ArgTys...)
///
/// /// Return the amount of extra trailing space that was requested for
/// /// this entry. This method is only used to compute the size of the
/// /// object during node deallocation; it does not need to return a
/// /// correct value so long as the allocator's Deallocate implementation
/// /// ignores this argument.
/// size_t getExtraAllocationSize() const;
///
/// If ProvideDestructor is false, the destructor will be trivial. This
/// can be appropriate when the object is declared at global scope.
template <class EntryTy, bool ProvideDestructor = true,
class Allocator = llvm::MallocAllocator>
class ConcurrentMap
: private ConcurrentMapBase<EntryTy, ProvideDestructor, Allocator> {
using super = ConcurrentMapBase<EntryTy, ProvideDestructor, Allocator>;

using Node = typename super::Node;

/// Inherited from base class:
/// std::atomic<Node*> Root;
using super::Root;

/// This member stores the address of the last node that was found by the
/// search procedure. We cache the last search to accelerate code that
/// searches the same value in a loop.
std::atomic<Node*> LastSearch;

public:
constexpr ConcurrentMap() : Root(nullptr), LastSearch(nullptr) {}
constexpr ConcurrentMap() : LastSearch(nullptr) {}

ConcurrentMap(const ConcurrentMap &) = delete;
ConcurrentMap &operator=(const ConcurrentMap &) = delete;
Expand All @@ -226,9 +280,13 @@ class ConcurrentMap {

public:

Allocator &getAllocator() {
return *this;
}

#ifndef NDEBUG
void dump() const {
auto R = Root.Value.load(std::memory_order_acquire);
auto R = Root.load(std::memory_order_acquire);
printf("digraph g {\n"
"graph [ rankdir = \"TB\"];\n"
"node [ fontsize = \"16\" ];\n"
Expand All @@ -252,7 +310,7 @@ class ConcurrentMap {
}

// Search the tree, starting from the root.
Node *node = Root.Value.load(std::memory_order_acquire);
Node *node = Root.load(std::memory_order_acquire);
while (node) {
int comparisonResult = node->Payload.compareWithKey(key);
if (comparisonResult == 0) {
Expand Down Expand Up @@ -285,7 +343,7 @@ class ConcurrentMap {
Node *newNode = nullptr;

// Start from the root.
auto edge = &Root.Value;
auto edge = &Root;

while (true) {
// Load the edge.
Expand All @@ -302,7 +360,7 @@ class ConcurrentMap {
// If it's equal, we can use this node.
if (comparisonResult == 0) {
// Destroy the node we allocated before if we're carrying one around.
::delete newNode;
if (newNode) this->destroyNode(newNode);

// Cache and report that we found an existing node.
LastSearch.store(node, std::memory_order_release);
Expand All @@ -318,7 +376,7 @@ class ConcurrentMap {
if (!newNode) {
size_t allocSize =
sizeof(Node) + EntryTy::getExtraAllocationSize(key, args...);
void *memory = ::operator new(allocSize);
void *memory = this->Allocate(allocSize, alignof(Node));
newNode = ::new (memory) Node(key, std::forward<ArgTys>(args)...);
}

Expand Down
13 changes: 9 additions & 4 deletions stdlib/public/runtime/AnyHashableSupport.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,19 +59,24 @@ struct HashableConformanceEntry {
const Metadata *baseTypeThatConformsToHashable) {
return 0;
}

size_t getExtraAllocationSize() const {
return 0;
}
};
} // end unnamed namesapce

// FIXME(performance): consider merging this cache into the regular
// protocol conformance cache.
static Lazy<ConcurrentMap<HashableConformanceEntry>> HashableConformances;
static ConcurrentMap<HashableConformanceEntry, /*Destructor*/ false>
HashableConformances;

template<bool KnownToConformToHashable>
LLVM_ATTRIBUTE_ALWAYS_INLINE
static const Metadata *findHashableBaseTypeImpl(const Metadata *type) {
// Check the cache first.
if (HashableConformanceEntry *entry =
HashableConformances->find(HashableConformanceKey{type})) {
HashableConformances.find(HashableConformanceKey{type})) {
return entry->baseTypeThatConformsToHashable;
}
if (!KnownToConformToHashable &&
Expand All @@ -92,8 +97,8 @@ static const Metadata *findHashableBaseTypeImpl(const Metadata *type) {
break;
baseTypeThatConformsToHashable = superclass;
}
HashableConformances->getOrInsert(HashableConformanceKey{type},
baseTypeThatConformsToHashable);
HashableConformances.getOrInsert(HashableConformanceKey{type},
baseTypeThatConformsToHashable);
return baseTypeThatConformsToHashable;
}

Expand Down
47 changes: 17 additions & 30 deletions stdlib/public/runtime/HeapObject.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -145,36 +145,36 @@ static void destroyGenericBox(HeapObject *o) {
metadata->getAllocAlignMask());
}

class BoxCacheEntry : public CacheEntry<BoxCacheEntry> {
class BoxCacheEntry {
public:
FullMetadata<GenericBoxHeapMetadata> Metadata;
FullMetadata<GenericBoxHeapMetadata> Data;

BoxCacheEntry(size_t numArguments)
: Metadata{HeapMetadataHeader{{destroyGenericBox}, {nullptr}},
GenericBoxHeapMetadata{MetadataKind::HeapGenericLocalVariable, 0,
nullptr}} {
assert(numArguments == 1);
BoxCacheEntry(const Metadata *type)
: Data{HeapMetadataHeader{{destroyGenericBox}, {/*vwtable*/ nullptr}},
GenericBoxHeapMetadata{MetadataKind::HeapGenericLocalVariable,
GenericBoxHeapMetadata::getHeaderOffset(type),
type}} {
}

size_t getNumArguments() const {
return 1;
long getKeyIntValueForDump() {
return reinterpret_cast<long>(Data.BoxedType);
}

static const char *getName() {
return "BoxCache";
int compareWithKey(const Metadata *type) const {
return comparePointers(type, Data.BoxedType);
}

FullMetadata<GenericBoxHeapMetadata> *getData() {
return &Metadata;
static size_t getExtraAllocationSize(const Metadata *key) {
return 0;
}
const FullMetadata<GenericBoxHeapMetadata> *getData() const {
return &Metadata;
size_t getExtraAllocationSize() const {
return 0;
}
};

} // end anonymous namespace

static Lazy<MetadataCache<BoxCacheEntry>> Boxes;
static SimpleGlobalCache<BoxCacheEntry> Boxes;

SWIFT_CC(swift) SWIFT_RUNTIME_EXPORT
BoxPair::Return
Expand All @@ -186,20 +186,7 @@ SWIFT_CC(swift) SWIFT_RT_ENTRY_IMPL_VISIBILITY
extern "C"
BoxPair::Return SWIFT_RT_ENTRY_IMPL(swift_allocBox)(const Metadata *type) {
// Get the heap metadata for the box.
auto &B = Boxes.get();
const void *typeArg = type;
auto entry = B.findOrAdd(&typeArg, 1, [&]() -> BoxCacheEntry* {
// Create a new entry for the box.
auto entry = BoxCacheEntry::allocate(B.getAllocator(), &typeArg, 1, 0);

auto metadata = entry->getData();
metadata->Offset = GenericBoxHeapMetadata::getHeaderOffset(type);
metadata->BoxedType = type;

return entry;
});

auto metadata = entry->getData();
auto metadata = &Boxes.getOrInsert(type).first->Data;

// Allocate and project the box.
auto allocation = SWIFT_RT_ENTRY_CALL(swift_allocObject)(
Expand Down
Loading