Skip to content

Commit 9b06ac9

Browse files
[nfc][omptarget] Use builtin var abstraction. Second pass at D69476
Summary: [nfc][omptarget] Use builtin var abstraction. Second pass at D69476 Use the wrappers in support.h for cuda builtin variables at all call sites. Localises use of cuda and removes WARPSIZE==32 assumption in debug.h. Reviewers: ABataev, jdoerfert, grokos Reviewed By: jdoerfert Subscribers: openmp-commits Tags: #openmp Differential Revision: https://reviews.llvm.org/D69693
1 parent 197bad5 commit 9b06ac9

File tree

4 files changed

+18
-22
lines changed

4 files changed

+18
-22
lines changed

openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,6 @@
1313
#include "target_impl.h"
1414
#include <stdio.h>
1515

16-
// Warp ID in the CUDA block
17-
INLINE static unsigned getWarpId() { return threadIdx.x / WARPSIZE; }
18-
// Lane ID in the CUDA warp.
19-
INLINE static unsigned getLaneId() { return threadIdx.x % WARPSIZE; }
20-
2116
// Return true if this is the first active thread in the warp.
2217
INLINE static bool IsWarpMasterActiveThread() {
2318
unsigned long long Mask = __kmpc_impl_activemask();
@@ -67,7 +62,7 @@ __kmpc_initialize_data_sharing_environment(__kmpc_data_sharing_slot *rootS,
6762
DSPRINT0(DSFLAG_INIT,
6863
"Entering __kmpc_initialize_data_sharing_environment\n");
6964

70-
unsigned WID = getWarpId();
65+
unsigned WID = GetWarpId();
7166
DSPRINT(DSFLAG_INIT, "Warp ID: %u\n", WID);
7267

7368
omptarget_nvptx_TeamDescr *teamDescr =
@@ -111,7 +106,7 @@ EXTERN void *__kmpc_data_sharing_environment_begin(
111106
DSPRINT(DSFLAG, "Default Data Size %016llx\n",
112107
(unsigned long long)SharingDefaultDataSize);
113108

114-
unsigned WID = getWarpId();
109+
unsigned WID = GetWarpId();
115110
__kmpc_impl_lanemask_t CurActiveThreads = __kmpc_impl_activemask();
116111

117112
__kmpc_data_sharing_slot *&SlotP = DataSharingState.SlotPtr[WID];
@@ -231,7 +226,7 @@ EXTERN void __kmpc_data_sharing_environment_end(
231226

232227
DSPRINT0(DSFLAG, "Entering __kmpc_data_sharing_environment_end\n");
233228

234-
unsigned WID = getWarpId();
229+
unsigned WID = GetWarpId();
235230

236231
if (IsEntryPoint) {
237232
if (IsWarpMasterActiveThread()) {
@@ -359,7 +354,7 @@ EXTERN void __kmpc_data_sharing_init_stack_spmd() {
359354
// This function initializes the stack pointer with the pointer to the
360355
// statically allocated shared memory slots. The size of a shared memory
361356
// slot is pre-determined to be 256 bytes.
362-
if (threadIdx.x == 0)
357+
if (GetThreadIdInBlock() == 0)
363358
data_sharing_init_stack_common();
364359

365360
__threadfence_block();
@@ -377,7 +372,7 @@ INLINE static void* data_sharing_push_stack_common(size_t PushSize) {
377372
PushSize = (PushSize + (Alignment - 1)) / Alignment * Alignment;
378373

379374
// Frame pointer must be visible to all workers in the same warp.
380-
const unsigned WID = getWarpId();
375+
const unsigned WID = GetWarpId();
381376
void *FrameP = 0;
382377
__kmpc_impl_lanemask_t CurActive = __kmpc_impl_activemask();
383378

@@ -467,7 +462,7 @@ EXTERN void *__kmpc_data_sharing_push_stack(size_t DataSize,
467462
// Compute the start address of the frame of each thread in the warp.
468463
uintptr_t FrameStartAddress =
469464
(uintptr_t) data_sharing_push_stack_common(PushSize);
470-
FrameStartAddress += (uintptr_t) (getLaneId() * DataSize);
465+
FrameStartAddress += (uintptr_t) (GetLaneId() * DataSize);
471466
return (void *)FrameStartAddress;
472467
}
473468

@@ -482,7 +477,7 @@ EXTERN void __kmpc_data_sharing_pop_stack(void *FrameStart) {
482477
__threadfence_block();
483478

484479
if (GetThreadIdInBlock() % WARPSIZE == 0) {
485-
unsigned WID = getWarpId();
480+
unsigned WID = GetWarpId();
486481

487482
// Current slot
488483
__kmpc_data_sharing_slot *&SlotP = DataSharingState.SlotPtr[WID];

openmp/libomptarget/deviceRTLs/nvptx/src/debug.h

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -128,12 +128,12 @@
128128

129129
#if OMPTARGET_NVPTX_DEBUG || OMPTARGET_NVPTX_TEST || OMPTARGET_NVPTX_WARNING
130130
#include <stdio.h>
131-
#include "target_impl.h"
131+
#include "support.h"
132132

133133
template <typename... Arguments>
134134
NOINLINE static void log(const char *fmt, Arguments... parameters) {
135-
printf(fmt, (int)blockIdx.x, (int)threadIdx.x, (int)(threadIdx.x / WARPSIZE),
136-
(int)(threadIdx.x & 0x1F), parameters...);
135+
printf(fmt, (int)GetBlockIdInKernel(), (int)GetThreadIdInBlock(),
136+
(int)GetWarpId(), (int)GetLaneId, parameters...);
137137
}
138138

139139
#endif
@@ -144,9 +144,8 @@ template <typename... Arguments>
144144
NOINLINE static void check(bool cond, const char *fmt,
145145
Arguments... parameters) {
146146
if (!cond)
147-
printf(fmt, (int)blockIdx.x, (int)threadIdx.x,
148-
(int)(threadIdx.x / WARPSIZE), (int)(threadIdx.x & 0x1F),
149-
parameters...);
147+
printf(fmt, (int)GetBlockIdInKernel(), (int)GetThreadIdInBlock(),
148+
(int)GetWarpId(), (int)GetLaneId, parameters...);
150149
assert(cond);
151150
}
152151

openmp/libomptarget/deviceRTLs/nvptx/src/libcall.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -364,7 +364,7 @@ EXTERN void omp_set_lock(omp_lock_t *lock) {
364364
for (;;) {
365365
now = clock();
366366
clock_t cycles = now > start ? now - start : now + (0xffffffff - start);
367-
if (cycles >= __OMP_SPIN * blockIdx.x) {
367+
if (cycles >= __OMP_SPIN * GetBlockIdInKernel()) {
368368
break;
369369
}
370370
}

openmp/libomptarget/deviceRTLs/nvptx/src/support.cu

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -106,9 +106,9 @@ INLINE int GetNumberOfBlocksInKernel() { return gridDim.x; }
106106

107107
INLINE int GetNumberOfThreadsInBlock() { return blockDim.x; }
108108

109-
INLINE unsigned GetWarpId() { return threadIdx.x / WARPSIZE; }
109+
INLINE unsigned GetWarpId() { return GetThreadIdInBlock() / WARPSIZE; }
110110

111-
INLINE unsigned GetLaneId() { return threadIdx.x & (WARPSIZE - 1); }
111+
INLINE unsigned GetLaneId() { return GetThreadIdInBlock() & (WARPSIZE - 1); }
112112

113113
////////////////////////////////////////////////////////////////////////////////
114114
//
@@ -124,7 +124,9 @@ INLINE unsigned GetLaneId() { return threadIdx.x & (WARPSIZE - 1); }
124124
// If NumThreads is 1024, master id is 992.
125125
//
126126
// Called in Generic Execution Mode only.
127-
INLINE int GetMasterThreadID() { return (blockDim.x - 1) & ~(WARPSIZE - 1); }
127+
INLINE int GetMasterThreadID() {
128+
return (GetNumberOfThreadsInBlock() - 1) & ~(WARPSIZE - 1);
129+
}
128130

129131
// The last warp is reserved for the master; other warps are workers.
130132
// Called in Generic Execution Mode only.

0 commit comments

Comments
 (0)