|
| 1 | +//===-- A lock-free data structure for a fixed capacity stack ---*- C++ -*-===// |
| 2 | +// |
| 3 | +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | +// See https://llvm.org/LICENSE.txt for license information. |
| 5 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | +// |
| 7 | +//===----------------------------------------------------------------------===// |
| 8 | + |
| 9 | +#ifndef LLVM_LIBC_SRC___SUPPORT_FIXEDSTACK_H |
| 10 | +#define LLVM_LIBC_SRC___SUPPORT_FIXEDSTACK_H |
| 11 | + |
| 12 | +#include "src/__support/CPP/array.h" |
| 13 | +#include "src/__support/CPP/atomic.h" |
| 14 | + |
| 15 | +#include <stdint.h> |
| 16 | + |
| 17 | +namespace LIBC_NAMESPACE { |
| 18 | + |
| 19 | +// A lock-free fixed size stack backed by an underlying cpp::array data |
| 20 | +// structure. It supports push and pop operations in a thread safe manner. |
| 21 | +template <typename T, uint32_t CAPACITY> class alignas(16) FixedStack { |
| 22 | + static_assert(CAPACITY < UINT32_MAX, "Invalid buffer size"); |
| 23 | + |
| 24 | + // The head of the free and used stacks. Represents as a 32-bit index combined |
| 25 | + // with a 32-bit ABA tag that is updated in a single atomic operation. |
| 26 | + uint64_t free; |
| 27 | + uint64_t used; |
| 28 | + |
| 29 | + // The stack is a linked list of indices into the underlying data |
| 30 | + cpp::array<uint32_t, CAPACITY> next; |
| 31 | + cpp::array<T, CAPACITY> data; |
| 32 | + |
| 33 | + // Get the 32-bit index into the underlying array from the head. |
| 34 | + static constexpr uint32_t get_node(uint64_t head) { |
| 35 | + return static_cast<uint32_t>(head & 0xffffffff); |
| 36 | + } |
| 37 | + |
| 38 | + // Increment the old ABA tag and merge it into the new index. |
| 39 | + static constexpr uint64_t make_new_head(uint64_t orig, uint32_t node) { |
| 40 | + return static_cast<uint64_t>(node) | (((orig >> 32ul) + 1ul) << 32ul); |
| 41 | + } |
| 42 | + |
| 43 | + void sleep_briefly() { |
| 44 | +#if defined(LIBC_TARGET_ARCH_IS_NVPTX) |
| 45 | + if (__nvvm_reflect("__CUDA_ARCH") >= 700) |
| 46 | + LIBC_INLINE_ASM("nanosleep.u32 32;" ::: "memory"); |
| 47 | +#elif defined(LIBC_TARGET_ARCH_IS_AMDGPU) |
| 48 | + __builtin_amdgcn_s_sleep(1); |
| 49 | +#elif defined(LIBC_TARGET_ARCH_IS_X86) |
| 50 | + __builtin_ia32_pause(); |
| 51 | +#else |
| 52 | + // Simply do nothing if sleeping isn't supported on this platform. |
| 53 | +#endif |
| 54 | + } |
| 55 | + |
| 56 | + // Helper macros for the atomic operations. We cannot use the standard |
| 57 | + // cpp::atomic helpers because the initializer will no longer be constexpr and |
| 58 | + // the NVPTX backend cannot currently support all of the atomics. |
| 59 | +#define atomic_load(val, mem_order) __atomic_load_n(val, (int)mem_order) |
| 60 | +#define atomic_cas(val, expected, desired, success_order, failure_order) \ |
| 61 | + __atomic_compare_exchange_n(val, expected, desired, /*weak=*/true, \ |
| 62 | + (int)success_order, (int)failure_order) |
| 63 | + |
| 64 | + // Attempts to pop data from the given stack by making it point to the next |
| 65 | + // node. We repeatedly attempt to write to the head using compare-and-swap, |
| 66 | + // expecting that it has not been changed by any other thread. |
| 67 | + uint32_t pop_impl(uint64_t *head) { |
| 68 | + uint64_t orig = atomic_load(head, cpp::MemoryOrder::RELAXED); |
| 69 | + |
| 70 | + for (;;) { |
| 71 | + if (get_node(orig) == CAPACITY) |
| 72 | + return CAPACITY; |
| 73 | + |
| 74 | + uint32_t node = |
| 75 | + atomic_load(&next[get_node(orig)], cpp::MemoryOrder::RELAXED); |
| 76 | + if (atomic_cas(head, &orig, make_new_head(orig, node), |
| 77 | + cpp::MemoryOrder::ACQUIRE, cpp::MemoryOrder::RELAXED)) |
| 78 | + break; |
| 79 | + sleep_briefly(); |
| 80 | + } |
| 81 | + return get_node(orig); |
| 82 | + } |
| 83 | + |
| 84 | + // Attempts to push data to the given stack by making it point to the new |
| 85 | + // node. We repeatedly attempt to write to the head using compare-and-swap, |
| 86 | + // expecting that it has not been changed by any other thread. |
| 87 | + uint32_t push_impl(uint64_t *head, uint32_t node) { |
| 88 | + uint64_t orig = atomic_load(head, cpp::MemoryOrder::RELAXED); |
| 89 | + for (;;) { |
| 90 | + next[node] = get_node(orig); |
| 91 | + if (atomic_cas(head, &orig, make_new_head(orig, node), |
| 92 | + cpp::MemoryOrder::RELEASE, cpp::MemoryOrder::RELAXED)) |
| 93 | + break; |
| 94 | + sleep_briefly(); |
| 95 | + } |
| 96 | + return get_node(*head); |
| 97 | + } |
| 98 | + |
| 99 | +public: |
| 100 | + // Initialize the free stack to be full and the used stack to be empty. We use |
| 101 | + // the capacity of the stack as a sentinel value. |
| 102 | + constexpr FixedStack() : free(0), used(CAPACITY), data{} { |
| 103 | + for (uint32_t i = 0; i < CAPACITY; ++i) |
| 104 | + next[i] = i + 1; |
| 105 | + } |
| 106 | + |
| 107 | + bool push(const T &val) { |
| 108 | + uint32_t node = pop_impl(&free); |
| 109 | + if (node == CAPACITY) |
| 110 | + return false; |
| 111 | + |
| 112 | + data[node] = val; |
| 113 | + push_impl(&used, node); |
| 114 | + return true; |
| 115 | + } |
| 116 | + |
| 117 | + bool pop(T &val) { |
| 118 | + uint32_t node = pop_impl(&used); |
| 119 | + if (node == CAPACITY) |
| 120 | + return false; |
| 121 | + |
| 122 | + val = data[node]; |
| 123 | + push_impl(&free, node); |
| 124 | + return true; |
| 125 | + } |
| 126 | + |
| 127 | + bool empty() const { |
| 128 | + return get_node(atomic_load(&used, cpp::MemoryOrder::RELAXED)) == CAPACITY; |
| 129 | + } |
| 130 | + |
| 131 | + bool full() const { |
| 132 | + return get_node(atomic_load(&free, cpp::MemoryOrder::RELAXED)) == CAPACITY; |
| 133 | + } |
| 134 | + |
| 135 | +#undef atomic_load |
| 136 | +#undef atomic_cas |
| 137 | +}; |
| 138 | + |
| 139 | +} // namespace LIBC_NAMESPACE |
| 140 | + |
| 141 | +#endif // LLVM_LIBC_SRC___SUPPORT_FIXEDSTACK_H |
0 commit comments