Skip to content

Commit fb3f4b0

Browse files
authored
[libc] Add memory fence utility to the GPU utilities (#91756)
Summary: GPUs like to execute instructions in the background until something excplitely consumes them. We are working on adding some microbenchmarking code, which requires flushing the pending memory operations beforehand. This patch simply adds these utility functions that will be used in the near future.
1 parent 5d18d57 commit fb3f4b0

File tree

2 files changed

+9
-0
lines changed

2 files changed

+9
-0
lines changed

libc/src/__support/GPU/amdgpu/utils.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,11 @@ LIBC_INLINE uint32_t get_lane_size() {
140140
__builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "workgroup");
141141
}
142142

143+
/// Waits for all pending memory operations to complete in program order.
144+
[[clang::convergent]] LIBC_INLINE void memory_fence() {
145+
__builtin_amdgcn_fence(__ATOMIC_ACQ_REL, "");
146+
}
147+
143148
/// Wait for all threads in the wavefront to converge, this is a noop on AMDGPU.
144149
[[clang::convergent]] LIBC_INLINE void sync_lane(uint64_t) {
145150
__builtin_amdgcn_wave_barrier();

libc/src/__support/GPU/nvptx/utils.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,9 +118,13 @@ LIBC_INLINE uint32_t get_lane_size() { return 32; }
118118
uint32_t mask = static_cast<uint32_t>(lane_mask);
119119
return __nvvm_vote_ballot_sync(mask, x);
120120
}
121+
121122
/// Waits for all the threads in the block to converge and issues a fence.
122123
[[clang::convergent]] LIBC_INLINE void sync_threads() { __syncthreads(); }
123124

125+
/// Waits for all pending memory operations to complete in program order.
126+
[[clang::convergent]] LIBC_INLINE void memory_fence() { __nvvm_membar_sys(); }
127+
124128
/// Waits for all threads in the warp to reconverge for independent scheduling.
125129
[[clang::convergent]] LIBC_INLINE void sync_lane(uint64_t mask) {
126130
__nvvm_bar_warp_sync(static_cast<uint32_t>(mask));

0 commit comments

Comments
 (0)