-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[libc] Pull last dependencies into rpc_util.h #116693
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-libc Author: Joseph Huber (jhuber6) ChangesSummary: Full diff: https://github.com/llvm/llvm-project/pull/116693.diff 6 Files Affected:
diff --git a/libc/src/__support/OSUtil/gpu/exit.cpp b/libc/src/__support/OSUtil/gpu/exit.cpp
index 8aaa41b4e3eefc..0cb266a42d180a 100644
--- a/libc/src/__support/OSUtil/gpu/exit.cpp
+++ b/libc/src/__support/OSUtil/gpu/exit.cpp
@@ -8,6 +8,7 @@
#include "src/__support/OSUtil/exit.h"
+#include "src/__support/GPU/utils.h"
#include "src/__support/RPC/rpc_client.h"
#include "src/__support/macros/config.h"
#include "src/__support/macros/properties/architectures.h"
diff --git a/libc/src/__support/RPC/rpc.h b/libc/src/__support/RPC/rpc.h
index a257003a907de8..be7686b2a2fe59 100644
--- a/libc/src/__support/RPC/rpc.h
+++ b/libc/src/__support/RPC/rpc.h
@@ -19,8 +19,7 @@
#define LLVM_LIBC_SRC___SUPPORT_RPC_RPC_H
#include "rpc_util.h"
-#include "src/__support/CPP/optional.h"
-#include "src/__support/GPU/utils.h"
+#include "src/__support/macros/attributes.h"
#include "src/__support/macros/config.h"
#include <stdint.h>
@@ -110,14 +109,14 @@ template <bool Invert> struct Process {
/// Retrieve the inbox state from memory shared between processes.
LIBC_INLINE uint32_t load_inbox(uint64_t lane_mask, uint32_t index) const {
- return gpu::broadcast_value(
+ return rpc::broadcast_value(
lane_mask, __scoped_atomic_load_n(&inbox[index], __ATOMIC_RELAXED,
__MEMORY_SCOPE_SYSTEM));
}
/// Retrieve the outbox state from memory shared between processes.
LIBC_INLINE uint32_t load_outbox(uint64_t lane_mask, uint32_t index) const {
- return gpu::broadcast_value(
+ return rpc::broadcast_value(
lane_mask, __scoped_atomic_load_n(&outbox[index], __ATOMIC_RELAXED,
__MEMORY_SCOPE_SYSTEM));
}
@@ -162,9 +161,10 @@ template <bool Invert> struct Process {
/// Attempt to claim the lock at index. Return true on lock taken.
/// lane_mask is a bitmap of the threads in the warp that would hold the
- /// single lock on success, e.g. the result of gpu::get_lane_mask()
+ /// single lock on success, e.g. the result of rpc::get_lane_mask()
/// The lock is held when the n-th bit of the lock bitfield is set.
- LIBC_INLINE bool try_lock(uint64_t lane_mask, uint32_t index) {
+ [[clang::convergent]] LIBC_INLINE bool try_lock(uint64_t lane_mask,
+ uint32_t index) {
// On amdgpu, test and set to the nth lock bit and a sync_lane would suffice
// On volta, need to handle differences between the threads running and
// the threads that were detected in the previous call to get_lane_mask()
@@ -173,12 +173,12 @@ template <bool Invert> struct Process {
// There may be threads active which are not in lane mask which must not
// succeed in taking the lock, as otherwise it will leak. This is handled
// by making threads which are not in lane_mask or with 0, a no-op.
- uint32_t id = gpu::get_lane_id();
+ uint32_t id = rpc::get_lane_id();
bool id_in_lane_mask = lane_mask & (1ul << id);
// All threads in the warp call fetch_or. Possibly at the same time.
bool before = set_nth(lock, index, id_in_lane_mask);
- uint64_t packed = gpu::ballot(lane_mask, before);
+ uint64_t packed = rpc::ballot(lane_mask, before);
// If every bit set in lane_mask is also set in packed, every single thread
// in the warp failed to get the lock. Ballot returns unset for threads not
@@ -204,7 +204,8 @@ template <bool Invert> struct Process {
/// Unlock the lock at index. We need a lane sync to keep this function
/// convergent, otherwise the compiler will sink the store and deadlock.
- LIBC_INLINE void unlock(uint64_t lane_mask, uint32_t index) {
+ [[clang::convergent]] LIBC_INLINE void unlock(uint64_t lane_mask,
+ uint32_t index) {
// Do not move any writes past the unlock.
__atomic_thread_fence(__ATOMIC_RELEASE);
@@ -212,8 +213,8 @@ template <bool Invert> struct Process {
// restrict to a single thread to avoid one thread dropping the lock, then
// an unrelated warp claiming the lock, then a second thread in this warp
// dropping the lock again.
- clear_nth(lock, index, gpu::is_first_lane(lane_mask));
- gpu::sync_lane(lane_mask);
+ clear_nth(lock, index, rpc::is_first_lane(lane_mask));
+ rpc::sync_lane(lane_mask);
}
/// Number of bytes to allocate for an inbox or outbox.
@@ -276,9 +277,9 @@ template <typename F>
LIBC_INLINE static void invoke_rpc(F &&fn, uint32_t lane_size,
uint64_t lane_mask, Buffer *slot) {
if constexpr (is_process_gpu()) {
- fn(&slot[gpu::get_lane_id()], gpu::get_lane_id());
+ fn(&slot[rpc::get_lane_id()], rpc::get_lane_id());
} else {
- for (uint32_t i = 0; i < lane_size; i += gpu::get_lane_size())
+ for (uint32_t i = 0; i < lane_size; i += rpc::get_num_lanes())
if (lane_mask & (1ul << i))
fn(&slot[i], i);
}
@@ -302,7 +303,7 @@ template <bool T> struct Port {
friend struct Client;
friend struct Server;
- friend class cpp::optional<Port<T>>;
+ friend class rpc::optional<Port<T>>;
public:
template <typename U> LIBC_INLINE void recv(U use);
@@ -323,7 +324,7 @@ template <bool T> struct Port {
LIBC_INLINE void close() {
// Wait for all lanes to finish using the port.
- gpu::sync_lane(lane_mask);
+ rpc::sync_lane(lane_mask);
// The server is passive, if it own the buffer when it closes we need to
// give ownership back to the client.
@@ -358,9 +359,6 @@ struct Client {
private:
Process<false> process;
};
-static_assert(cpp::is_trivially_copyable<Client>::value &&
- sizeof(Process<true>) == sizeof(Process<false>),
- "The client is not trivially copyable from the server");
/// The RPC server used to respond to the client.
struct Server {
@@ -373,7 +371,7 @@ struct Server {
: process(port_count, buffer) {}
using Port = rpc::Port<true>;
- LIBC_INLINE cpp::optional<Port> try_open(uint32_t lane_size,
+ LIBC_INLINE rpc::optional<Port> try_open(uint32_t lane_size,
uint32_t start = 0);
LIBC_INLINE Port open(uint32_t lane_size);
@@ -466,7 +464,7 @@ LIBC_INLINE void Port<T>::send_n(const void *const *src, uint64_t *size) {
});
uint64_t idx = sizeof(Buffer::data) - sizeof(uint64_t);
uint64_t mask = process.header[index].mask;
- while (gpu::ballot(mask, idx < num_sends)) {
+ while (rpc::ballot(mask, idx < num_sends)) {
send([=](Buffer *buffer, uint32_t id) {
uint64_t len = lane_value(size, id) - idx > sizeof(Buffer::data)
? sizeof(Buffer::data)
@@ -499,7 +497,7 @@ LIBC_INLINE void Port<T>::recv_n(void **dst, uint64_t *size, A &&alloc) {
});
uint64_t idx = sizeof(Buffer::data) - sizeof(uint64_t);
uint64_t mask = process.header[index].mask;
- while (gpu::ballot(mask, idx < num_recvs)) {
+ while (rpc::ballot(mask, idx < num_recvs)) {
recv([=](Buffer *buffer, uint32_t id) {
uint64_t len = lane_value(size, id) - idx > sizeof(Buffer::data)
? sizeof(Buffer::data)
@@ -517,16 +515,17 @@ LIBC_INLINE void Port<T>::recv_n(void **dst, uint64_t *size, A &&alloc) {
/// port. Each port instance uses an associated \p opcode to tell the server
/// what to do. The Client interface provides the appropriate lane size to the
/// port using the platform's returned value.
-template <uint16_t opcode> LIBC_INLINE Client::Port Client::open() {
+template <uint16_t opcode>
+[[clang::convergent]] LIBC_INLINE Client::Port Client::open() {
// Repeatedly perform a naive linear scan for a port that can be opened to
// send data.
- for (uint32_t index = gpu::get_cluster_id();; ++index) {
+ for (uint32_t index = 0;; ++index) {
// Start from the beginning if we run out of ports to check.
if (index >= process.port_count)
index = 0;
// Attempt to acquire the lock on this index.
- uint64_t lane_mask = gpu::get_lane_mask();
+ uint64_t lane_mask = rpc::get_lane_mask();
if (!process.try_lock(lane_mask, index))
continue;
@@ -540,22 +539,22 @@ template <uint16_t opcode> LIBC_INLINE Client::Port Client::open() {
continue;
}
- if (gpu::is_first_lane(lane_mask)) {
+ if (rpc::is_first_lane(lane_mask)) {
process.header[index].opcode = opcode;
process.header[index].mask = lane_mask;
}
- gpu::sync_lane(lane_mask);
- return Port(process, lane_mask, gpu::get_lane_size(), index, out);
+ rpc::sync_lane(lane_mask);
+ return Port(process, lane_mask, rpc::get_num_lanes(), index, out);
}
}
/// Attempts to open a port to use as the server. The server can only open a
/// port if it has a pending receive operation
-LIBC_INLINE cpp::optional<typename Server::Port>
+[[clang::convergent]] LIBC_INLINE rpc::optional<typename Server::Port>
Server::try_open(uint32_t lane_size, uint32_t start) {
// Perform a naive linear scan for a port that has a pending request.
for (uint32_t index = start; index < process.port_count; ++index) {
- uint64_t lane_mask = gpu::get_lane_mask();
+ uint64_t lane_mask = rpc::get_lane_mask();
uint32_t in = process.load_inbox(lane_mask, index);
uint32_t out = process.load_outbox(lane_mask, index);
@@ -578,13 +577,13 @@ Server::try_open(uint32_t lane_size, uint32_t start) {
return Port(process, lane_mask, lane_size, index, out);
}
- return cpp::nullopt;
+ return rpc::nullopt;
}
LIBC_INLINE Server::Port Server::open(uint32_t lane_size) {
for (;;) {
- if (cpp::optional<Server::Port> p = try_open(lane_size))
- return cpp::move(p.value());
+ if (rpc::optional<Server::Port> p = try_open(lane_size))
+ return rpc::move(p.value());
sleep_briefly();
}
}
diff --git a/libc/src/__support/RPC/rpc_client.h b/libc/src/__support/RPC/rpc_client.h
index 695b6b7515bf7b..7bd6d0b5e00b47 100644
--- a/libc/src/__support/RPC/rpc_client.h
+++ b/libc/src/__support/RPC/rpc_client.h
@@ -12,11 +12,16 @@
#include "rpc.h"
#include "include/llvm-libc-types/rpc_opcodes_t.h"
+#include "src/__support/CPP/type_traits.h"
#include "src/__support/macros/config.h"
namespace LIBC_NAMESPACE_DECL {
namespace rpc {
+static_assert(cpp::is_trivially_copyable<Client>::value &&
+ sizeof(Process<true>) == sizeof(Process<false>),
+ "The client is not trivially copyable from the server");
+
/// The libc client instance used to communicate with the server.
extern Client client;
diff --git a/libc/src/__support/RPC/rpc_util.h b/libc/src/__support/RPC/rpc_util.h
index 93b8289617484e..aa23549d4b8c2e 100644
--- a/libc/src/__support/RPC/rpc_util.h
+++ b/libc/src/__support/RPC/rpc_util.h
@@ -9,23 +9,230 @@
#ifndef LLVM_LIBC_SRC___SUPPORT_RPC_RPC_UTIL_H
#define LLVM_LIBC_SRC___SUPPORT_RPC_RPC_UTIL_H
-#include "src/__support/CPP/type_traits.h"
#include "src/__support/macros/attributes.h"
#include "src/__support/macros/config.h"
-#include "src/__support/threads/sleep.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#if defined(__NVPTX__) || defined(__AMDGPU__)
+#include <gpuintrin.h>
+#define RPC_TARGET_IS_GPU
+#endif
namespace LIBC_NAMESPACE_DECL {
namespace rpc {
+template <typename T> struct type_identity {
+ using type = T;
+};
+
+template <class T, T v> struct type_constant {
+ static inline constexpr T value = v;
+};
+
+template <class T> struct remove_reference : type_identity<T> {};
+template <class T> struct remove_reference<T &> : type_identity<T> {};
+template <class T> struct remove_reference<T &&> : type_identity<T> {};
+
+template <class T> struct is_const : type_constant<bool, false> {};
+template <class T> struct is_const<const T> : type_constant<bool, true> {};
+
+/// Freestanding implementation of std::move.
+template <class T>
+LIBC_INLINE constexpr typename remove_reference<T>::type &&move(T &&t) {
+ return static_cast<typename remove_reference<T>::type &&>(t);
+}
+
+/// Freestanding implementation of std::forward.
+template <typename T>
+LIBC_INLINE constexpr T &&forward(typename remove_reference<T>::type &value) {
+ return static_cast<T &&>(value);
+}
+template <typename T>
+LIBC_INLINE constexpr T &&forward(typename remove_reference<T>::type &&value) {
+ return static_cast<T &&>(value);
+}
+
+struct in_place_t {
+ LIBC_INLINE explicit in_place_t() = default;
+};
+
+struct nullopt_t {
+ LIBC_INLINE constexpr explicit nullopt_t() = default;
+};
+
+constexpr inline in_place_t in_place{};
+constexpr inline nullopt_t nullopt{};
+
+/// Freestanding and minimal implementation of std::optional.
+template <typename T> class optional {
+ template <typename U> struct OptionalStorage {
+ union {
+ char empty;
+ U stored_value;
+ };
+
+ bool in_use = false;
+
+ LIBC_INLINE ~OptionalStorage() { reset(); }
+
+ LIBC_INLINE constexpr OptionalStorage() : empty() {}
+
+ template <typename... Args>
+ LIBC_INLINE constexpr explicit OptionalStorage(in_place_t, Args &&...args)
+ : stored_value(forward<Args>(args)...) {}
+
+ LIBC_INLINE constexpr void reset() {
+ if (in_use)
+ stored_value.~U();
+ in_use = false;
+ }
+ };
+
+ OptionalStorage<T> storage;
+
+public:
+ LIBC_INLINE constexpr optional() = default;
+ LIBC_INLINE constexpr optional(nullopt_t) {}
+
+ LIBC_INLINE constexpr optional(const T &t) : storage(in_place, t) {
+ storage.in_use = true;
+ }
+ LIBC_INLINE constexpr optional(const optional &) = default;
+
+ LIBC_INLINE constexpr optional(T &&t) : storage(in_place, move(t)) {
+ storage.in_use = true;
+ }
+ LIBC_INLINE constexpr optional(optional &&O) = default;
+
+ LIBC_INLINE constexpr optional &operator=(T &&t) {
+ storage = move(t);
+ return *this;
+ }
+ LIBC_INLINE constexpr optional &operator=(optional &&) = default;
+
+ LIBC_INLINE constexpr optional &operator=(const T &t) {
+ storage = t;
+ return *this;
+ }
+ LIBC_INLINE constexpr optional &operator=(const optional &) = default;
+
+ LIBC_INLINE constexpr void reset() { storage.reset(); }
+
+ LIBC_INLINE constexpr const T &value() const & {
+ return storage.stored_value;
+ }
+
+ LIBC_INLINE constexpr T &value() & { return storage.stored_value; }
+
+ LIBC_INLINE constexpr explicit operator bool() const {
+ return storage.in_use;
+ }
+ LIBC_INLINE constexpr bool has_value() const { return storage.in_use; }
+ LIBC_INLINE constexpr const T *operator->() const {
+ return &storage.stored_value;
+ }
+ LIBC_INLINE constexpr T *operator->() { return &storage.stored_value; }
+ LIBC_INLINE constexpr const T &operator*() const & {
+ return storage.stored_value;
+ }
+ LIBC_INLINE constexpr T &operator*() & { return storage.stored_value; }
+
+ LIBC_INLINE constexpr T &&value() && { return move(storage.stored_value); }
+ LIBC_INLINE constexpr T &&operator*() && {
+ return move(storage.stored_value);
+ }
+};
+
+/// Suspend the thread briefly to assist the thread scheduler during busy loops.
+LIBC_INLINE void sleep_briefly() {
+#if defined(LIBC_TARGET_ARCH_IS_NVPTX)
+ if (__nvvm_reflect("__CUDA_ARCH") >= 700)
+ LIBC_INLINE_ASM("nanosleep.u32 64;" :: : "memory");
+#elif defined(LIBC_TARGET_ARCH_IS_AMDGPU)
+ __builtin_amdgcn_s_sleep(2);
+#elif defined(LIBC_TARGET_ARCH_IS_X86)
+ __builtin_ia32_pause();
+#elif defined(LIBC_TARGET_ARCH_IS_AARCH64) && __has_builtin(__builtin_arm_isb)
+ __builtin_arm_isb(0xf);
+#elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
+ asm volatile("isb\n" ::: "memory");
+#else
+ // Simply do nothing if sleeping isn't supported on this platform.
+#endif
+}
+
/// Conditional to indicate if this process is running on the GPU.
LIBC_INLINE constexpr bool is_process_gpu() {
-#if defined(__NVPTX__) || defined(__AMDGPU__)
+#ifdef RPC_TARGET_IS_GPU
return true;
#else
return false;
#endif
}
+/// Wait for all lanes in the group to complete.
+LIBC_INLINE void sync_lane(uint64_t lane_mask) {
+#ifdef RPC_TARGET_IS_GPU
+ return __gpu_sync_lane(lane_mask);
+#endif
+}
+
+/// Copies the value from the first active thread to the rest.
+LIBC_INLINE uint32_t broadcast_value(uint64_t lane_mask, uint32_t x) {
+#ifdef RPC_TARGET_IS_GPU
+ return __gpu_read_first_lane_u32(lane_mask, x);
+#else
+ return x;
+#endif
+}
+
+/// Returns the number lanes that participate in the RPC interface.
+LIBC_INLINE uint32_t get_num_lanes() {
+#ifdef RPC_TARGET_IS_GPU
+ return __gpu_num_lanes();
+#else
+ return 1;
+#endif
+}
+
+/// Returns the id of the thread inside of an AMD wavefront executing together.
+LIBC_INLINE uint64_t get_lane_mask() {
+#ifdef RPC_TARGET_IS_GPU
+ return __gpu_lane_mask();
+#else
+ return 1;
+#endif
+}
+
+/// Returns the id of the thread inside of an AMD wavefront executing together.
+LIBC_INLINE uint32_t get_lane_id() {
+#ifdef RPC_TARGET_IS_GPU
+ return __gpu_lane_id();
+#else
+ return 0;
+#endif
+}
+
+/// Conditional that is only true for a single thread in a lane.
+LIBC_INLINE bool is_first_lane(uint64_t lane_mask) {
+#ifdef RPC_TARGET_IS_GPU
+ return __gpu_is_first_in_lane(lane_mask);
+#else
+ return true;
+#endif
+}
+
+/// Returns a bitmask of threads in the current lane for which \p x is true.
+LIBC_INLINE uint64_t ballot(uint64_t lane_mask, bool x) {
+#ifdef RPC_TARGET_IS_GPU
+ return __gpu_ballot(lane_mask, x);
+#else
+ return x;
+#endif
+}
+
/// Return \p val aligned "upwards" according to \p align.
template <typename V, typename A>
LIBC_INLINE constexpr V align_up(V val, A align) {
@@ -44,7 +251,7 @@ template <typename V> LIBC_INLINE V &lane_value(V *val, uint32_t id) {
/// Advance the \p p by \p bytes.
template <typename T, typename U> LIBC_INLINE T *advance(T *ptr, U bytes) {
- if constexpr (cpp::is_const_v<T>)
+ if constexpr (is_const<T>::value)
return reinterpret_cast<T *>(reinterpret_cast<const uint8_t *>(ptr) +
bytes);
else
diff --git a/libc/src/stdio/gpu/vfprintf_utils.h b/libc/src/stdio/gpu/vfprintf_utils.h
index 5010ee16d96074..409775f3f33cc8 100644
--- a/libc/src/stdio/gpu/vfprintf_utils.h
+++ b/libc/src/stdio/gpu/vfprintf_utils.h
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "hdr/types/FILE.h"
+#include "src/__support/GPU/utils.h"
#include "src/__support/RPC/rpc_client.h"
#include "src/__support/arg_list.h"
#include "src/__support/macros/config.h"
diff --git a/libc/src/stdlib/gpu/abort.cpp b/libc/src/stdlib/gpu/abort.cpp
index cfc7e9b8e228ba..3a06fb38c3f64f 100644
--- a/libc/src/stdlib/gpu/abort.cpp
+++ b/libc/src/stdlib/gpu/abort.cpp
@@ -6,6 +6,7 @@
//
//===----------------------------------------------------------------------===//
+#include "src/__support/GPU/utils.h"
#include "src/__support/RPC/rpc_client.h"
#include "src/__support/common.h"
#include "src/__support/macros/config.h"
|
Changes in last commit, will squash once #116454 lands. |
✅ With the latest revision this PR passed the C/C++ code formatter. |
Summary: Last bit in-place to remove the dependencies on LLVM libc headers. This just pulls the `sleep_briefly`, `std::optinal` and `type_traits` definitions into the `rpc_util.h` header. This duplicates some code for now but will soon be moved into the `include/rpc` directory. At that point I will remove all the `LIBC_INLINE` and just make it `RPC_INLINE`. Internal use will then have a wrapper to make it all LIBC namespaced, implementations will then implement their own handling.
Summary:
Last bit in-place to remove the dependencies on LLVM libc headers. This
just pulls the
sleep_briefly
,std::optinal
andtype_traits
definitions into therpc_util.h
header. This duplicates some code for now but will soon bemoved into the
include/rpc
directory. At that point I will remove allthe
LIBC_INLINE
and just make itRPC_INLINE
. Internal use will thenhave a wrapper to make it all LIBC namespaced, implementations will then
implement their own handling.