Skip to content

Commit d256797

Browse files
[nfc][libomptarget] Drop parameter to named_sync
[nfc][libomptarget] Drop parameter to named_sync named_sync has one call site (in sync.cu) where it always passed L1_BARRIER. Folding this into the call site and dropping the macro is a simplification. amdgpu doesn't have ptx' bar.sync instruction. A correct implementation of __kmpc_impl_named_sync in terms of shared memory is much easier if it can assume that the barrier argument is this constant. Said implementation is left for a second patch. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D88474
1 parent bcd0559 commit d256797

File tree

3 files changed

+8
-15
lines changed

3 files changed

+8
-15
lines changed

openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,6 @@
4242

4343
#define WARPSIZE 64
4444

45-
// The named barrier for active parallel threads of a team in an L1 parallel
46-
// region to synchronize with each other.
47-
#define L1_BARRIER (1)
48-
4945
// Maximum number of preallocated arguments to an outlined parallel/simd
5046
// function. Anything more requires dynamic memory allocation.
5147
#define MAX_SHARED_ARGS 20
@@ -113,10 +109,9 @@ INLINE void __kmpc_impl_syncwarp(__kmpc_impl_lanemask_t) {
113109
// AMDGCN doesn't need to sync threads in a warp
114110
}
115111

116-
INLINE void __kmpc_impl_named_sync(int barrier, uint32_t num_threads) {
117-
// we have protected the master warp from releasing from its barrier
118-
// due to a full workgroup barrier in the middle of a work function.
119-
// So it is ok to issue a full workgroup barrier here.
112+
INLINE void __kmpc_impl_named_sync(uint32_t num_threads) {
113+
(void)num_threads;
114+
// TODO: Implement on top of __SHARED__
120115
__builtin_amdgcn_s_barrier();
121116
}
122117

openmp/libomptarget/deviceRTLs/common/src/sync.cu

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,7 @@ EXTERN void __kmpc_barrier(kmp_Ident *loc_ref, int32_t tid) {
6060
PRINT(LD_SYNC,
6161
"call kmpc_barrier with %d omp threads, sync parameter %d\n",
6262
(int)numberOfActiveOMPThreads, (int)threads);
63-
// Barrier #1 is for synchronization among active threads.
64-
__kmpc_impl_named_sync(L1_BARRIER, threads);
63+
__kmpc_impl_named_sync(threads);
6564
}
6665
} else {
6766
// Still need to flush the memory per the standard.

openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,6 @@
3737

3838
#define WARPSIZE 32
3939

40-
// The named barrier for active parallel threads of a team in an L1 parallel
41-
// region to synchronize with each other.
42-
#define L1_BARRIER (1)
43-
4440
// Maximum number of preallocated arguments to an outlined parallel/simd function.
4541
// Anything more requires dynamic memory allocation.
4642
#define MAX_SHARED_ARGS 20
@@ -187,7 +183,10 @@ INLINE void __kmpc_impl_syncwarp(__kmpc_impl_lanemask_t Mask) {
187183
#endif // CUDA_VERSION
188184
}
189185

190-
INLINE void __kmpc_impl_named_sync(int barrier, uint32_t num_threads) {
186+
INLINE void __kmpc_impl_named_sync(uint32_t num_threads) {
187+
// The named barrier for active parallel threads of a team in an L1 parallel
188+
// region to synchronize with each other.
189+
int barrier = 1;
191190
asm volatile("bar.sync %0, %1;"
192191
:
193192
: "r"(barrier), "r"(num_threads)

0 commit comments

Comments
 (0)