Skip to content

[libc] Use clang's scoped atomics if available from the compiler #74769

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 64 additions & 18 deletions libc/src/__support/CPP/atomic.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,18 @@ enum class MemoryOrder : int {
SEQ_CST = __ATOMIC_SEQ_CST
};

// These are a clang extension, see the clang documenation for more information:
// https://clang.llvm.org/docs/LanguageExtensions.html#scoped-atomic-builtins.
enum class MemoryScope : int {
#if defined(__MEMORY_SCOPE_SYSTEM) && defined(__MEMORY_SCOPE_DEVICE)
SYSTEM = __MEMORY_SCOPE_SYSTEM,
DEVICE = __MEMORY_SCOPE_DEVICE,
#else
SYSTEM = 0,
DEVICE = 0,
#endif
};

template <typename T> struct Atomic {
// For now, we will restrict to only arithmetic types.
static_assert(is_arithmetic_v<T>, "Only arithmetic types can be atomic.");
Expand Down Expand Up @@ -54,48 +66,82 @@ template <typename T> struct Atomic {
Atomic(const Atomic &) = delete;
Atomic &operator=(const Atomic &) = delete;

// Atomic load
// Atomic load.
operator T() { return __atomic_load_n(&val, int(MemoryOrder::SEQ_CST)); }

T load(MemoryOrder mem_ord = MemoryOrder::SEQ_CST) {
return __atomic_load_n(&val, int(mem_ord));
T load(MemoryOrder mem_ord = MemoryOrder::SEQ_CST,
[[maybe_unused]] MemoryScope mem_scope = MemoryScope::DEVICE) {
if constexpr (LIBC_HAS_BUILTIN(__scoped_atomic_load_n))
return __scoped_atomic_load_n(&val, int(mem_ord), (int)(mem_scope));
else
return __atomic_load_n(&val, int(mem_ord));
}

// Atomic store
// Atomic store.
T operator=(T rhs) {
__atomic_store_n(&val, rhs, int(MemoryOrder::SEQ_CST));
return rhs;
}

void store(T rhs, MemoryOrder mem_ord = MemoryOrder::SEQ_CST) {
__atomic_store_n(&val, rhs, int(mem_ord));
void store(T rhs, MemoryOrder mem_ord = MemoryOrder::SEQ_CST,
[[maybe_unused]] MemoryScope mem_scope = MemoryScope::DEVICE) {
if constexpr (LIBC_HAS_BUILTIN(__scoped_atomic_store_n))
__scoped_atomic_store_n(&val, rhs, int(mem_ord), (int)(mem_scope));
else
__atomic_store_n(&val, rhs, int(mem_ord));
}

// Atomic compare exchange
bool compare_exchange_strong(T &expected, T desired,
MemoryOrder mem_ord = MemoryOrder::SEQ_CST) {
bool compare_exchange_strong(
T &expected, T desired, MemoryOrder mem_ord = MemoryOrder::SEQ_CST,
[[maybe_unused]] MemoryScope mem_scope = MemoryScope::DEVICE) {
return __atomic_compare_exchange_n(&val, &expected, desired, false,
int(mem_ord), int(mem_ord));
}

T exchange(T desired, MemoryOrder mem_ord = MemoryOrder::SEQ_CST) {
return __atomic_exchange_n(&val, desired, int(mem_ord));
T exchange(T desired, MemoryOrder mem_ord = MemoryOrder::SEQ_CST,
[[maybe_unused]] MemoryScope mem_scope = MemoryScope::DEVICE) {
if constexpr (LIBC_HAS_BUILTIN(__scoped_atomic_exchange_n))
return __scoped_atomic_exchange_n(&val, desired, int(mem_ord),
(int)(mem_scope));
else
return __atomic_exchange_n(&val, desired, int(mem_ord));
}

T fetch_add(T increment, MemoryOrder mem_ord = MemoryOrder::SEQ_CST) {
return __atomic_fetch_add(&val, increment, int(mem_ord));
T fetch_add(T increment, MemoryOrder mem_ord = MemoryOrder::SEQ_CST,
[[maybe_unused]] MemoryScope mem_scope = MemoryScope::DEVICE) {
if constexpr (LIBC_HAS_BUILTIN(__scoped_atomic_fetch_add))
return __scoped_atomic_fetch_add(&val, increment, int(mem_ord),
(int)(mem_scope));
else
return __atomic_fetch_add(&val, increment, int(mem_ord));
}

T fetch_or(T mask, MemoryOrder mem_ord = MemoryOrder::SEQ_CST) {
return __atomic_fetch_or(&val, mask, int(mem_ord));
T fetch_or(T mask, MemoryOrder mem_ord = MemoryOrder::SEQ_CST,
[[maybe_unused]] MemoryScope mem_scope = MemoryScope::DEVICE) {
if constexpr (LIBC_HAS_BUILTIN(__scoped_atomic_fetch_or))
return __scoped_atomic_fetch_or(&val, mask, int(mem_ord),
(int)(mem_scope));
else
return __atomic_fetch_or(&val, mask, int(mem_ord));
}

T fetch_and(T mask, MemoryOrder mem_ord = MemoryOrder::SEQ_CST) {
return __atomic_fetch_and(&val, mask, int(mem_ord));
T fetch_and(T mask, MemoryOrder mem_ord = MemoryOrder::SEQ_CST,
[[maybe_unused]] MemoryScope mem_scope = MemoryScope::DEVICE) {
if constexpr (LIBC_HAS_BUILTIN(__scoped_atomic_fetch_and))
return __scoped_atomic_fetch_and(&val, mask, int(mem_ord),
(int)(mem_scope));
else
return __atomic_fetch_and(&val, mask, int(mem_ord));
}

T fetch_sub(T decrement, MemoryOrder mem_ord = MemoryOrder::SEQ_CST) {
return __atomic_fetch_sub(&val, decrement, int(mem_ord));
T fetch_sub(T decrement, MemoryOrder mem_ord = MemoryOrder::SEQ_CST,
[[maybe_unused]] MemoryScope mem_scope = MemoryScope::DEVICE) {
if constexpr (LIBC_HAS_BUILTIN(__scoped_atomic_fetch_sub))
return __scoped_atomic_fetch_sub(&val, decrement, int(mem_ord),
(int)(mem_scope));
else
return __atomic_fetch_sub(&val, decrement, int(mem_ord));
}

// Set the value without using an atomic operation. This is useful
Expand Down
17 changes: 11 additions & 6 deletions libc/src/__support/RPC/rpc.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,14 +109,16 @@ template <bool Invert, typename Packet> struct Process {

/// Retrieve the inbox state from memory shared between processes.
LIBC_INLINE uint32_t load_inbox(uint64_t lane_mask, uint32_t index) const {
return gpu::broadcast_value(lane_mask,
inbox[index].load(cpp::MemoryOrder::RELAXED));
return gpu::broadcast_value(
lane_mask,
inbox[index].load(cpp::MemoryOrder::RELAXED, cpp::MemoryScope::SYSTEM));
}

/// Retrieve the outbox state from memory shared between processes.
LIBC_INLINE uint32_t load_outbox(uint64_t lane_mask, uint32_t index) const {
return gpu::broadcast_value(lane_mask,
outbox[index].load(cpp::MemoryOrder::RELAXED));
outbox[index].load(cpp::MemoryOrder::RELAXED,
cpp::MemoryScope::SYSTEM));
}

/// Signal to the other process that this one is finished with the buffer.
Expand All @@ -126,7 +128,8 @@ template <bool Invert, typename Packet> struct Process {
LIBC_INLINE uint32_t invert_outbox(uint32_t index, uint32_t current_outbox) {
uint32_t inverted_outbox = !current_outbox;
atomic_thread_fence(cpp::MemoryOrder::RELEASE);
outbox[index].store(inverted_outbox, cpp::MemoryOrder::RELAXED);
outbox[index].store(inverted_outbox, cpp::MemoryOrder::RELAXED,
cpp::MemoryScope::SYSTEM);
return inverted_outbox;
}

Expand Down Expand Up @@ -241,7 +244,8 @@ template <bool Invert, typename Packet> struct Process {
uint32_t slot = index / NUM_BITS_IN_WORD;
uint32_t bit = index % NUM_BITS_IN_WORD;
return bits[slot].fetch_or(static_cast<uint32_t>(cond) << bit,
cpp::MemoryOrder::RELAXED) &
cpp::MemoryOrder::RELAXED,
cpp::MemoryScope::DEVICE) &
(1u << bit);
}

Expand All @@ -251,7 +255,8 @@ template <bool Invert, typename Packet> struct Process {
uint32_t slot = index / NUM_BITS_IN_WORD;
uint32_t bit = index % NUM_BITS_IN_WORD;
return bits[slot].fetch_and(~0u ^ (static_cast<uint32_t>(cond) << bit),
cpp::MemoryOrder::RELAXED) &
cpp::MemoryOrder::RELAXED,
cpp::MemoryScope::DEVICE) &
(1u << bit);
}
};
Expand Down