Skip to content

Commit 7fd9f0f

Browse files
committed
[libc] Remove MAX_LANE_SIZE definition from the RPC server
This `MAX_LANE_SIZE` was a hack from the days when we used a single instance of the server and had some GPU state handle it. Now that we have everything templated this really shouldn't be used. This patch removes its use and replaces it with template arguments. Reviewed By: JonChesterfield Differential Revision: https://reviews.llvm.org/D158633
1 parent 51ebecf commit 7fd9f0f

File tree

6 files changed

+25
-29
lines changed

6 files changed

+25
-29
lines changed

libc/src/__support/RPC/rpc_util.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,6 @@
1717
namespace __llvm_libc {
1818
namespace rpc {
1919

20-
/// Maximum amount of data a single lane can use.
21-
constexpr uint64_t MAX_LANE_SIZE = 64;
22-
2320
/// Suspend the thread briefly to assist the thread scheduler during busy loops.
2421
LIBC_INLINE void sleep_briefly() {
2522
#if defined(LIBC_TARGET_ARCH_IS_NVPTX) && __CUDA_ARCH__ >= 700

libc/utils/gpu/loader/Loader.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,9 @@ inline void handle_error(rpc_status_t) {
107107
handle_error("Failure in the RPC server\n");
108108
}
109109

110+
template <uint32_t lane_size>
110111
inline void register_rpc_callbacks(uint32_t device_id) {
112+
static_assert(lane_size == 32 || lane_size == 64, "Invalid Lane size");
111113
// Register the ping test for the `libc` tests.
112114
rpc_register_callback(
113115
device_id, static_cast<rpc_opcode_t>(RPC_TEST_INCREMENT),
@@ -207,14 +209,14 @@ inline void register_rpc_callbacks(uint32_t device_id) {
207209
rpc_register_callback(
208210
device_id, static_cast<rpc_opcode_t>(RPC_TEST_STREAM),
209211
[](rpc_port_t port, void *data) {
210-
uint64_t sizes[RPC_MAXIMUM_LANE_SIZE] = {0};
211-
void *dst[RPC_MAXIMUM_LANE_SIZE] = {nullptr};
212+
uint64_t sizes[lane_size] = {0};
213+
void *dst[lane_size] = {nullptr};
212214
rpc_recv_n(
213215
port, dst, sizes,
214216
[](uint64_t size, void *) -> void * { return new char[size]; },
215217
nullptr);
216218
rpc_send_n(port, dst, sizes);
217-
for (uint64_t i = 0; i < RPC_MAXIMUM_LANE_SIZE; ++i) {
219+
for (uint64_t i = 0; i < lane_size; ++i) {
218220
if (dst[i])
219221
delete[] reinterpret_cast<uint8_t *>(dst[i]);
220222
}

libc/utils/gpu/loader/amdgpu/Loader.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -150,8 +150,6 @@ hsa_status_t launch_kernel(hsa_agent_t dev_agent, hsa_executable_t executable,
150150

151151
// Register RPC callbacks for the malloc and free functions on HSA.
152152
uint32_t device_id = 0;
153-
register_rpc_callbacks(device_id);
154-
155153
auto tuple = std::make_tuple(dev_agent, coarsegrained_pool);
156154
rpc_register_callback(
157155
device_id, RPC_MALLOC,
@@ -424,6 +422,14 @@ int load(int argc, char **argv, char **envp, void *image, size_t size,
424422
wavefront_size, rpc_alloc, &tuple))
425423
handle_error(err);
426424

425+
// Register callbacks for the RPC unit tests.
426+
if (wavefront_size == 32)
427+
register_rpc_callbacks<32>(device_id);
428+
else if (wavefront_size == 64)
429+
register_rpc_callbacks<64>(device_id);
430+
else
431+
handle_error("Invalid wavefront size");
432+
427433
// Obtain the GPU's fixed-frequency clock rate and copy it to the GPU.
428434
// If the clock_freq symbol is missing, no work to do.
429435
hsa_executable_symbol_t freq_sym;

libc/utils/gpu/loader/nvptx/Loader.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ CUresult launch_kernel(CUmodule binary, CUstream stream,
177177

178178
// Register RPC callbacks for the malloc and free functions on HSA.
179179
uint32_t device_id = 0;
180-
register_rpc_callbacks(device_id);
180+
register_rpc_callbacks<32>(device_id);
181181

182182
rpc_register_callback(
183183
device_id, RPC_MALLOC,

libc/utils/gpu/server/rpc_server.cpp

Lines changed: 11 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,6 @@ static_assert(sizeof(rpc_buffer_t) == sizeof(rpc::Buffer),
2626
static_assert(RPC_MAXIMUM_PORT_COUNT == rpc::MAX_PORT_COUNT,
2727
"Incorrect maximum port count");
2828

29-
static_assert(RPC_MAXIMUM_LANE_SIZE == rpc::MAX_LANE_SIZE,
30-
"Incorrect maximum port count");
31-
3229
// The client needs to support different lane sizes for the SIMT model. Because
3330
// of this we need to select between the possible sizes that the client can use.
3431
struct Server {
@@ -80,9 +77,9 @@ struct Server {
8077
case RPC_WRITE_TO_STREAM:
8178
case RPC_WRITE_TO_STDERR:
8279
case RPC_WRITE_TO_STDOUT: {
83-
uint64_t sizes[rpc::MAX_LANE_SIZE] = {0};
84-
void *strs[rpc::MAX_LANE_SIZE] = {nullptr};
85-
FILE *files[rpc::MAX_LANE_SIZE] = {nullptr};
80+
uint64_t sizes[lane_size] = {0};
81+
void *strs[lane_size] = {nullptr};
82+
FILE *files[lane_size] = {nullptr};
8683
if (port->get_opcode() == RPC_WRITE_TO_STREAM)
8784
port->recv([&](rpc::Buffer *buffer, uint32_t id) {
8885
files[id] = reinterpret_cast<FILE *>(buffer->data[0]);
@@ -96,18 +93,15 @@ struct Server {
9693
: files[id]);
9794
uint64_t ret = fwrite(strs[id], 1, sizes[id], file);
9895
std::memcpy(buffer->data, &ret, sizeof(uint64_t));
96+
delete[] reinterpret_cast<uint8_t *>(strs[id]);
9997
});
100-
for (uint64_t i = 0; i < rpc::MAX_LANE_SIZE; ++i) {
101-
if (strs[i])
102-
delete[] reinterpret_cast<uint8_t *>(strs[i]);
103-
}
10498
break;
10599
}
106100
case RPC_READ_FROM_STREAM:
107101
case RPC_READ_FROM_STDIN: {
108-
uint64_t sizes[rpc::MAX_LANE_SIZE] = {0};
109-
void *data[rpc::MAX_LANE_SIZE] = {nullptr};
110-
uint64_t rets[rpc::MAX_LANE_SIZE] = {0};
102+
uint64_t sizes[lane_size] = {0};
103+
void *data[lane_size] = {nullptr};
104+
uint64_t rets[lane_size] = {0};
111105
port->recv([&](rpc::Buffer *buffer, uint32_t id) {
112106
sizes[id] = buffer->data[0];
113107
data[id] = new char[sizes[id]];
@@ -124,8 +118,8 @@ struct Server {
124118
break;
125119
}
126120
case RPC_OPEN_FILE: {
127-
uint64_t sizes[rpc::MAX_LANE_SIZE] = {0};
128-
void *paths[rpc::MAX_LANE_SIZE] = {nullptr};
121+
uint64_t sizes[lane_size] = {0};
122+
void *paths[lane_size] = {nullptr};
129123
port->recv_n(paths, sizes, [&](uint64_t size) { return new char[size]; });
130124
port->recv_and_send([&](rpc::Buffer *buffer, uint32_t id) {
131125
FILE *file = fopen(reinterpret_cast<char *>(paths[id]),
@@ -152,8 +146,8 @@ struct Server {
152146
break;
153147
}
154148
case RPC_HOST_CALL: {
155-
uint64_t sizes[rpc::MAX_LANE_SIZE] = {0};
156-
void *args[rpc::MAX_LANE_SIZE] = {nullptr};
149+
uint64_t sizes[lane_size] = {0};
150+
void *args[lane_size] = {nullptr};
157151
port->recv_n(args, sizes, [&](uint64_t size) { return new char[size]; });
158152
port->recv([&](rpc::Buffer *buffer, uint32_t id) {
159153
reinterpret_cast<void (*)(void *)>(buffer->data[0])(args[id]);

libc/utils/gpu/server/rpc_server.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,6 @@ extern "C" {
2020
/// The maxium number of ports that can be opened for any server.
2121
const uint64_t RPC_MAXIMUM_PORT_COUNT = 512;
2222

23-
/// The maximum number of parallel lanes that we can support.
24-
const uint64_t RPC_MAXIMUM_LANE_SIZE = 64;
25-
2623
/// The symbol name associated with the client for use with the LLVM C library
2724
/// implementation.
2825
const char *const rpc_client_symbol_name = "__llvm_libc_rpc_client";

0 commit comments

Comments
 (0)