-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[libc] Increase RPC opcode to 32-bit and use a class byte #116905
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Summary: Currently, the RPC interface uses a basic opcode to communicate with the server. This currently is 16 bits. There's no reason for this to be 16 bits, because on the GPU a 32-bit write is the same as a 16-bit write performance wise. Additionally, I am now making all the `libc` based opcodes qualified with the 'c' type, mimiciing how Linux handles `ioctls` all coming from the same driver. This will make it easier to extend the interface when it's exported directly.
@llvm/pr-subscribers-libc Author: Joseph Huber (jhuber6) ChangesSummary: Additionally, I am now making all the Full diff: https://github.com/llvm/llvm-project/pull/116905.diff 7 Files Affected:
diff --git a/libc/docs/gpu/rpc.rst b/libc/docs/gpu/rpc.rst
index ee5865d7f64079..e1244154341e9f 100644
--- a/libc/docs/gpu/rpc.rst
+++ b/libc/docs/gpu/rpc.rst
@@ -302,6 +302,6 @@ associated with relocatable device code linking.
Extensions
----------
-We describe which operation the RPC server should take with a 16-bit opcode. We
-consider the first 32768 numbers to be reserved while the others are free to
-use.
+The opcode is a 32-bit integer that must be unique to the requested operation.
+All opcodes used by ``libc`` internally have the character ``c`` in the most
+significant byte.
diff --git a/libc/include/llvm-libc-types/rpc_opcodes_t.h b/libc/include/llvm-libc-types/rpc_opcodes_t.h
index 1a6c0cd9bc4a14..f3b35518935a54 100644
--- a/libc/include/llvm-libc-types/rpc_opcodes_t.h
+++ b/libc/include/llvm-libc-types/rpc_opcodes_t.h
@@ -9,38 +9,41 @@
#ifndef LLVM_LIBC_TYPES_RPC_OPCODES_T_H
#define LLVM_LIBC_TYPES_RPC_OPCODES_T_H
+#define LLVM_LIBC_RPC_BASE 'c'
+#define LLVM_LIBC_OPCODE(n) (LLVM_LIBC_RPC_BASE << 24 | n)
+
typedef enum {
- RPC_NOOP = 0,
- RPC_EXIT,
- RPC_WRITE_TO_STDOUT,
- RPC_WRITE_TO_STDERR,
- RPC_WRITE_TO_STREAM,
- RPC_WRITE_TO_STDOUT_NEWLINE,
- RPC_READ_FROM_STREAM,
- RPC_READ_FGETS,
- RPC_OPEN_FILE,
- RPC_CLOSE_FILE,
- RPC_MALLOC,
- RPC_FREE,
- RPC_HOST_CALL,
- RPC_ABORT,
- RPC_FEOF,
- RPC_FERROR,
- RPC_CLEARERR,
- RPC_FSEEK,
- RPC_FTELL,
- RPC_FFLUSH,
- RPC_UNGETC,
- RPC_PRINTF_TO_STDOUT,
- RPC_PRINTF_TO_STDERR,
- RPC_PRINTF_TO_STREAM,
- RPC_PRINTF_TO_STDOUT_PACKED,
- RPC_PRINTF_TO_STDERR_PACKED,
- RPC_PRINTF_TO_STREAM_PACKED,
- RPC_REMOVE,
- RPC_RENAME,
- RPC_SYSTEM,
- RPC_LAST = 0xFFFF,
+ RPC_NOOP = LLVM_LIBC_OPCODE(0),
+ RPC_EXIT = LLVM_LIBC_OPCODE(1),
+ RPC_WRITE_TO_STDOUT = LLVM_LIBC_OPCODE(2),
+ RPC_WRITE_TO_STDERR = LLVM_LIBC_OPCODE(3),
+ RPC_WRITE_TO_STREAM = LLVM_LIBC_OPCODE(4),
+ RPC_WRITE_TO_STDOUT_NEWLINE = LLVM_LIBC_OPCODE(5),
+ RPC_READ_FROM_STREAM = LLVM_LIBC_OPCODE(6),
+ RPC_READ_FGETS = LLVM_LIBC_OPCODE(7),
+ RPC_OPEN_FILE = LLVM_LIBC_OPCODE(8),
+ RPC_CLOSE_FILE = LLVM_LIBC_OPCODE(9),
+ RPC_MALLOC = LLVM_LIBC_OPCODE(10),
+ RPC_FREE = LLVM_LIBC_OPCODE(11),
+ RPC_HOST_CALL = LLVM_LIBC_OPCODE(12),
+ RPC_ABORT = LLVM_LIBC_OPCODE(13),
+ RPC_FEOF = LLVM_LIBC_OPCODE(14),
+ RPC_FERROR = LLVM_LIBC_OPCODE(15),
+ RPC_CLEARERR = LLVM_LIBC_OPCODE(16),
+ RPC_FSEEK = LLVM_LIBC_OPCODE(17),
+ RPC_FTELL = LLVM_LIBC_OPCODE(18),
+ RPC_FFLUSH = LLVM_LIBC_OPCODE(19),
+ RPC_UNGETC = LLVM_LIBC_OPCODE(20),
+ RPC_PRINTF_TO_STDOUT = LLVM_LIBC_OPCODE(21),
+ RPC_PRINTF_TO_STDERR = LLVM_LIBC_OPCODE(22),
+ RPC_PRINTF_TO_STREAM = LLVM_LIBC_OPCODE(23),
+ RPC_PRINTF_TO_STDOUT_PACKED = LLVM_LIBC_OPCODE(24),
+ RPC_PRINTF_TO_STDERR_PACKED = LLVM_LIBC_OPCODE(25),
+ RPC_PRINTF_TO_STREAM_PACKED = LLVM_LIBC_OPCODE(26),
+ RPC_REMOVE = LLVM_LIBC_OPCODE(27),
+ RPC_RENAME = LLVM_LIBC_OPCODE(28),
+ RPC_SYSTEM = LLVM_LIBC_OPCODE(29),
+ RPC_LAST = 0xFFFFFFFF,
} rpc_opcode_t;
#endif // LLVM_LIBC_TYPES_RPC_OPCODES_T_H
diff --git a/libc/src/__support/RPC/rpc.h b/libc/src/__support/RPC/rpc.h
index 19561d05bff9ef..ed5ef29334e754 100644
--- a/libc/src/__support/RPC/rpc.h
+++ b/libc/src/__support/RPC/rpc.h
@@ -51,7 +51,7 @@ static_assert(sizeof(Buffer) == 64, "Buffer size mismatch");
/// perform and which threads are active in the slots.
struct Header {
uint64_t mask;
- uint16_t opcode;
+ uint32_t opcode;
};
/// The maximum number of parallel ports that the RPC interface can support.
@@ -319,11 +319,11 @@ template <bool T> struct Port {
template <typename A>
LIBC_INLINE void recv_n(void **dst, uint64_t *size, A &&alloc);
- LIBC_INLINE uint16_t get_opcode() const {
+ LIBC_INLINE uint32_t get_opcode() const {
return process.header[index].opcode;
}
- LIBC_INLINE uint16_t get_index() const { return index; }
+ LIBC_INLINE uint32_t get_index() const { return index; }
LIBC_INLINE void close() {
// Wait for all lanes to finish using the port.
@@ -357,7 +357,7 @@ struct Client {
: process(port_count, buffer) {}
using Port = rpc::Port<false>;
- template <uint16_t opcode> LIBC_INLINE Port open();
+ template <uint32_t opcode> LIBC_INLINE Port open();
private:
Process<false> process;
@@ -518,7 +518,7 @@ LIBC_INLINE void Port<T>::recv_n(void **dst, uint64_t *size, A &&alloc) {
/// port. Each port instance uses an associated \p opcode to tell the server
/// what to do. The Client interface provides the appropriate lane size to the
/// port using the platform's returned value.
-template <uint16_t opcode>
+template <uint32_t opcode>
[[clang::convergent]] LIBC_INLINE Client::Port Client::open() {
// Repeatedly perform a naive linear scan for a port that can be opened to
// send data.
diff --git a/libc/src/stdio/gpu/file.h b/libc/src/stdio/gpu/file.h
index 16d64e8f377501..6ca792b4545806 100644
--- a/libc/src/stdio/gpu/file.h
+++ b/libc/src/stdio/gpu/file.h
@@ -49,7 +49,7 @@ LIBC_INLINE ::FILE *to_stream(uintptr_t f) {
return stream;
}
-template <uint16_t opcode>
+template <uint32_t opcode>
LIBC_INLINE uint64_t write_impl(::FILE *file, const void *data, size_t size) {
uint64_t ret = 0;
rpc::Client::Port port = rpc::client.open<opcode>();
diff --git a/libc/src/stdio/gpu/vfprintf_utils.h b/libc/src/stdio/gpu/vfprintf_utils.h
index 409775f3f33cc8..a0a8c39781ad69 100644
--- a/libc/src/stdio/gpu/vfprintf_utils.h
+++ b/libc/src/stdio/gpu/vfprintf_utils.h
@@ -16,7 +16,7 @@
namespace LIBC_NAMESPACE_DECL {
-template <uint16_t opcode>
+template <uint32_t opcode>
LIBC_INLINE int vfprintf_impl(::FILE *__restrict file,
const char *__restrict format, size_t format_size,
va_list vlist) {
diff --git a/libc/utils/gpu/server/llvmlibc_rpc_server.h b/libc/utils/gpu/server/llvmlibc_rpc_server.h
index b0cf2f916b3856..98df882afa21cf 100644
--- a/libc/utils/gpu/server/llvmlibc_rpc_server.h
+++ b/libc/utils/gpu/server/llvmlibc_rpc_server.h
@@ -79,7 +79,7 @@ rpc_status_t rpc_handle_server(rpc_device_t rpc_device);
/// Register a callback to handle an opcode from the RPC client. The associated
/// data must remain accessible as long as the user intends to handle the server
/// with this callback.
-rpc_status_t rpc_register_callback(rpc_device_t rpc_device, uint16_t opcode,
+rpc_status_t rpc_register_callback(rpc_device_t rpc_device, uint32_t opcode,
rpc_opcode_callback_ty callback, void *data);
/// Obtain a pointer to a local client buffer that can be copied directly to the
diff --git a/libc/utils/gpu/server/rpc_server.cpp b/libc/utils/gpu/server/rpc_server.cpp
index 11b6d0e27ab948..972601aaf1d5e0 100644
--- a/libc/utils/gpu/server/rpc_server.cpp
+++ b/libc/utils/gpu/server/rpc_server.cpp
@@ -215,8 +215,8 @@ static void handle_printf(rpc::Server::Port &port, TempStorage &temp_storage) {
template <uint32_t lane_size>
rpc_status_t handle_server_impl(
rpc::Server &server,
- const std::unordered_map<uint16_t, rpc_opcode_callback_ty> &callbacks,
- const std::unordered_map<uint16_t, void *> &callback_data,
+ const std::unordered_map<uint32_t, rpc_opcode_callback_ty> &callbacks,
+ const std::unordered_map<uint32_t, void *> &callback_data,
uint32_t &index) {
auto port = server.try_open(lane_size, index);
if (!port)
@@ -477,8 +477,8 @@ struct Device {
void *buffer;
rpc::Server server;
rpc::Client client;
- std::unordered_map<uint16_t, rpc_opcode_callback_ty> callbacks;
- std::unordered_map<uint16_t, void *> callback_data;
+ std::unordered_map<uint32_t, rpc_opcode_callback_ty> callbacks;
+ std::unordered_map<uint32_t, void *> callback_data;
};
rpc_status_t rpc_server_init(rpc_device_t *rpc_device, uint64_t num_ports,
@@ -528,7 +528,7 @@ rpc_status_t rpc_handle_server(rpc_device_t rpc_device) {
}
}
-rpc_status_t rpc_register_callback(rpc_device_t rpc_device, uint16_t opcode,
+rpc_status_t rpc_register_callback(rpc_device_t rpc_device, uint32_t opcode,
rpc_opcode_callback_ty callback,
void *data) {
if (!rpc_device.handle)
|
Summary:
Currently, the RPC interface uses a basic opcode to communicate with the
server. This currently is 16 bits. There's no reason for this to be 16
bits, because on the GPU a 32-bit write is the same as a 16-bit write
performance wise.
Additionally, I am now making all the
libc
based opcodes qualifiedwith the 'c' type, mimiciing how Linux handles
ioctls
all coming fromthe same driver. This will make it easier to extend the interface when
it's exported directly.