Skip to content

Commit d0f9ddd

Browse files
committed
[OpenMP] Utilize the "non-uniform-workgroup" to simplify DeviceRTL
OpenMP offloading always uses uniform workgroups, see https://reviews.llvm.org/D135374. The runtime doesn't need to handle non-uniform workgroups at all either. Differential Revision: https://reviews.llvm.org/D135444
1 parent dc452a7 commit d0f9ddd

File tree

1 file changed

+4
-23
lines changed

1 file changed

+4
-23
lines changed

openmp/libomptarget/DeviceRTL/src/Mapping.cpp

Lines changed: 4 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,6 @@ namespace impl {
2626

2727
// Forward declarations defined to be defined for AMDGCN and NVPTX.
2828
const llvm::omp::GV &getGridValue();
29-
uint32_t getGridDim(uint32_t n, uint16_t d);
30-
uint32_t getWorkgroupDim(uint32_t group_id, uint32_t grid_size,
31-
uint16_t group_size);
3229
uint32_t getNumHardwareThreadsInBlock();
3330
LaneMaskTy activemask();
3431
LaneMaskTy lanemaskLT();
@@ -50,21 +47,8 @@ const llvm::omp::GV &getGridValue() {
5047
return llvm::omp::getAMDGPUGridValues<__AMDGCN_WAVEFRONT_SIZE>();
5148
}
5249

53-
uint32_t getGridDim(uint32_t n, uint16_t d) {
54-
uint32_t q = n / d;
55-
return q + (n > q * d);
56-
}
57-
58-
uint32_t getWorkgroupDim(uint32_t group_id, uint32_t grid_size,
59-
uint16_t group_size) {
60-
uint32_t r = grid_size - group_id * group_size;
61-
return (r < group_size) ? r : group_size;
62-
}
63-
6450
uint32_t getNumHardwareThreadsInBlock() {
65-
return getWorkgroupDim(__builtin_amdgcn_workgroup_id_x(),
66-
__builtin_amdgcn_grid_size_x(),
67-
__builtin_amdgcn_workgroup_size_x());
51+
return __builtin_amdgcn_workgroup_size_x();
6852
}
6953

7054
LaneMaskTy activemask() { return __builtin_amdgcn_read_exec(); }
@@ -95,10 +79,7 @@ uint32_t getKernelSize() { return __builtin_amdgcn_grid_size_x(); }
9579

9680
uint32_t getBlockId() { return __builtin_amdgcn_workgroup_id_x(); }
9781

98-
uint32_t getNumberOfBlocks() {
99-
return getGridDim(__builtin_amdgcn_grid_size_x(),
100-
__builtin_amdgcn_workgroup_size_x());
101-
}
82+
uint32_t getNumberOfBlocks() { return __builtin_amdgcn_grid_size_x(); }
10283

10384
uint32_t getWarpId() {
10485
return impl::getThreadIdInBlock() / mapping::getWarpSize();
@@ -228,8 +209,8 @@ uint32_t mapping::getThreadIdInBlock() {
228209
uint32_t mapping::getWarpSize() { return impl::getWarpSize(); }
229210

230211
uint32_t mapping::getBlockSize(bool IsSPMD) {
231-
uint32_t BlockSize = mapping::getNumberOfProcessorElements() -
232-
(!IsSPMD * impl::getWarpSize());
212+
uint32_t BlockSize =
213+
mapping::getNumberOfProcessorElements() - (!IsSPMD * impl::getWarpSize());
233214
return BlockSize;
234215
}
235216
uint32_t mapping::getBlockSize() {

0 commit comments

Comments
 (0)