Skip to content

[OpenMP] Remove use of '__AMDGCN_WAVEFRONT_SIZE' #113156

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 3 additions & 7 deletions offload/DeviceRTL/src/Mapping.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ namespace ompx {
namespace impl {

// Forward declarations defined to be defined for AMDGCN and NVPTX.
const llvm::omp::GV &getGridValue();
LaneMaskTy activemask();
LaneMaskTy lanemaskLT();
LaneMaskTy lanemaskGT();
Expand All @@ -37,15 +36,14 @@ uint32_t getBlockIdInKernel(int32_t Dim);
uint32_t getNumberOfBlocksInKernel(int32_t Dim);
uint32_t getWarpIdInBlock();
uint32_t getNumberOfWarpsInBlock();
uint32_t getWarpSize();

/// AMDGCN Implementation
///
///{
#pragma omp begin declare variant match(device = {arch(amdgcn)})

const llvm::omp::GV &getGridValue() {
return llvm::omp::getAMDGPUGridValues<__AMDGCN_WAVEFRONT_SIZE>();
}
uint32_t getWarpSize() { return __builtin_amdgcn_wavefrontsize(); }

uint32_t getNumberOfThreadsInBlock(int32_t Dim) {
switch (Dim) {
Expand Down Expand Up @@ -152,7 +150,7 @@ uint32_t getNumberOfThreadsInBlock(int32_t Dim) {
UNREACHABLE("Dim outside range!");
}

const llvm::omp::GV &getGridValue() { return llvm::omp::NVPTXGridValues; }
uint32_t getWarpSize() { return __nvvm_read_ptx_sreg_warpsize(); }

LaneMaskTy activemask() { return __nvvm_activemask(); }

Expand Down Expand Up @@ -219,8 +217,6 @@ uint32_t getNumberOfWarpsInBlock() {
#pragma omp end declare variant
///}

uint32_t getWarpSize() { return getGridValue().GV_Warp_Size; }

} // namespace impl
} // namespace ompx

Expand Down
25 changes: 18 additions & 7 deletions offload/test/offloading/ompx_bare_ballot_sync.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,22 +8,33 @@
#include <stdio.h>
#include <stdlib.h>

#pragma omp begin declare variant match(device = {arch(amdgcn)})
unsigned get_warp_size() { return __builtin_amdgcn_wavefrontsize(); }
#pragma omp end declare variant

#pragma omp begin declare variant match(device = {arch(nvptx64)})
unsigned get_warp_size() { return __nvvm_read_ptx_sreg_warpsize(); }
#pragma omp end declare variant

#pragma omp begin declare variant match(device = {kind(cpu)})
unsigned get_warp_size() { return 1; }
#pragma omp end declare variant

int main(int argc, char *argv[]) {
const int num_blocks = 1;
const int block_size = 256;
const int N = num_blocks * block_size;
int *res = (int *)malloc(N * sizeof(int));

#pragma omp target teams ompx_bare num_teams(num_blocks) thread_limit(block_size) \
map(from: res[0:N])
#pragma omp target teams ompx_bare num_teams(num_blocks) \
thread_limit(block_size) map(from : res[0 : N])
{
int tid = ompx_thread_id_x();
uint64_t mask = ompx_ballot_sync(~0LU, tid & 0x1);
#if defined __AMDGCN_WAVEFRONT_SIZE && __AMDGCN_WAVEFRONT_SIZE == 64
res[tid] = mask == 0xaaaaaaaaaaaaaaaa;
#else
res[tid] = mask == 0xaaaaaaaa;
#endif
if (get_warp_size() == 64)
res[tid] = mask == 0xaaaaaaaaaaaaaaaa;
else
res[tid] = mask == 0xaaaaaaaa;
}

for (int i = 0; i < N; ++i)
Expand Down
18 changes: 13 additions & 5 deletions offload/test/offloading/ompx_bare_shfl_down_sync.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,18 @@
#include <ompx.h>
#include <type_traits>

#pragma omp begin declare variant match(device = {arch(amdgcn)})
unsigned get_warp_size() { return __builtin_amdgcn_wavefrontsize(); }
#pragma omp end declare variant

#pragma omp begin declare variant match(device = {arch(nvptx64)})
unsigned get_warp_size() { return __nvvm_read_ptx_sreg_warpsize(); }
#pragma omp end declare variant

#pragma omp begin declare variant match(device = {kind(cpu)})
unsigned get_warp_size() { return 1; }
#pragma omp end declare variant

template <typename T, std::enable_if_t<std::is_integral<T>::value, bool> = true>
bool equal(T LHS, T RHS) {
return LHS == RHS;
Expand All @@ -32,11 +44,7 @@ template <typename T> void test() {
{
int tid = ompx_thread_id_x();
T val = ompx::shfl_down_sync(~0U, static_cast<T>(tid), 1);
#ifdef __AMDGCN_WAVEFRONT_SIZE
int warp_size = __AMDGCN_WAVEFRONT_SIZE;
#else
int warp_size = 32;
#endif
int warp_size = get_warp_size();
if ((tid & (warp_size - 1)) != warp_size - 1)
res[tid] = equal(val, static_cast<T>(tid + 1));
else
Expand Down
Loading