Skip to content

Commit 5470ea4

Browse files
authored
[libc] Change the starting port index to use the SMID (#79200)
Summary: The RPC interface uses several ports to provide parallel access. Right now we begin the search at the beginning, which heavily contests the early ports. Using the SMID allows us to stagger the starting index based off of the cluster identifier that is executing the current warp. Multiple warps can share an SM, but it will guaruntee that the contention for the low indices is lower. This also increases the maximum port size to around 4096, this is because 512 isn't enough to cover the full hardare parallelism needed to guarantee this doesdn't deadlock.
1 parent f2a78e6 commit 5470ea4

File tree

5 files changed

+17
-3
lines changed

5 files changed

+17
-3
lines changed

libc/src/__support/GPU/amdgpu/utils.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,14 @@ LIBC_INLINE uint64_t fixed_frequency_clock() {
179179
/// Terminates execution of the associated wavefront.
180180
[[noreturn]] LIBC_INLINE void end_program() { __builtin_amdgcn_endpgm(); }
181181

182+
/// Returns a unique identifier for the process cluster the current wavefront is
183+
/// executing on. Here we use the identifier for the compute unit (CU) and
184+
/// shader engine.
185+
/// FIXME: Currently unimplemented on AMDGPU until we have a simpler interface
186+
/// than the one at
187+
/// https://github.com/ROCm/clr/blob/develop/hipamd/include/hip/amd_detail/amd_device_functions.h#L899
188+
LIBC_INLINE uint32_t get_cluster_id() { return 0; }
189+
182190
} // namespace gpu
183191
} // namespace LIBC_NAMESPACE
184192

libc/src/__support/GPU/generic/utils.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ LIBC_INLINE uint64_t fixed_frequency_clock() { return 0; }
7575

7676
[[noreturn]] LIBC_INLINE void end_program() { __builtin_unreachable(); }
7777

78+
LIBC_INLINE uint32_t get_cluster_id() { return 0; }
79+
7880
} // namespace gpu
7981
} // namespace LIBC_NAMESPACE
8082

libc/src/__support/GPU/nvptx/utils.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,10 @@ LIBC_INLINE uint64_t fixed_frequency_clock() {
150150
/// Terminates execution of the calling thread.
151151
[[noreturn]] LIBC_INLINE void end_program() { __nvvm_exit(); }
152152

153+
/// Returns a unique identifier for the process cluster the current warp is
154+
/// executing on. Here we use the identifier for the symmetric multiprocessor.
155+
LIBC_INLINE uint32_t get_cluster_id() { return __nvvm_read_ptx_sreg_smid(); }
156+
153157
} // namespace gpu
154158
} // namespace LIBC_NAMESPACE
155159

libc/src/__support/RPC/rpc.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ template <uint32_t lane_size = gpu::LANE_SIZE> struct alignas(64) Packet {
5757
};
5858

5959
/// The maximum number of parallel ports that the RPC interface can support.
60-
constexpr uint64_t MAX_PORT_COUNT = 512;
60+
constexpr uint64_t MAX_PORT_COUNT = 4096;
6161

6262
/// A common process used to synchronize communication between a client and a
6363
/// server. The process contains a read-only inbox and a write-only outbox used
@@ -519,7 +519,7 @@ LIBC_INLINE void Port<T, S>::recv_n(void **dst, uint64_t *size, A &&alloc) {
519519
template <uint16_t opcode> LIBC_INLINE Client::Port Client::open() {
520520
// Repeatedly perform a naive linear scan for a port that can be opened to
521521
// send data.
522-
for (uint32_t index = 0;; ++index) {
522+
for (uint32_t index = gpu::get_cluster_id();; ++index) {
523523
// Start from the beginning if we run out of ports to check.
524524
if (index >= process.port_count)
525525
index = 0;

libc/utils/gpu/server/rpc_server.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ extern "C" {
1818
#endif
1919

2020
/// The maximum number of ports that can be opened for any server.
21-
const uint64_t RPC_MAXIMUM_PORT_COUNT = 512;
21+
const uint64_t RPC_MAXIMUM_PORT_COUNT = 4096;
2222

2323
/// The symbol name associated with the client for use with the LLVM C library
2424
/// implementation.

0 commit comments

Comments
 (0)