@@ -26,9 +26,6 @@ namespace impl {
26
26
27
27
// Forward declarations defined to be defined for AMDGCN and NVPTX.
28
28
const llvm::omp::GV &getGridValue ();
29
- uint32_t getGridDim (uint32_t n, uint16_t d);
30
- uint32_t getWorkgroupDim (uint32_t group_id, uint32_t grid_size,
31
- uint16_t group_size);
32
29
uint32_t getNumHardwareThreadsInBlock ();
33
30
LaneMaskTy activemask ();
34
31
LaneMaskTy lanemaskLT ();
@@ -50,21 +47,8 @@ const llvm::omp::GV &getGridValue() {
50
47
return llvm::omp::getAMDGPUGridValues<__AMDGCN_WAVEFRONT_SIZE>();
51
48
}
52
49
53
- uint32_t getGridDim (uint32_t n, uint16_t d) {
54
- uint32_t q = n / d;
55
- return q + (n > q * d);
56
- }
57
-
58
- uint32_t getWorkgroupDim (uint32_t group_id, uint32_t grid_size,
59
- uint16_t group_size) {
60
- uint32_t r = grid_size - group_id * group_size;
61
- return (r < group_size) ? r : group_size;
62
- }
63
-
64
50
uint32_t getNumHardwareThreadsInBlock () {
65
- return getWorkgroupDim (__builtin_amdgcn_workgroup_id_x (),
66
- __builtin_amdgcn_grid_size_x (),
67
- __builtin_amdgcn_workgroup_size_x ());
51
+ return __builtin_amdgcn_workgroup_size_x ();
68
52
}
69
53
70
54
LaneMaskTy activemask () { return __builtin_amdgcn_read_exec (); }
@@ -95,10 +79,7 @@ uint32_t getKernelSize() { return __builtin_amdgcn_grid_size_x(); }
95
79
96
80
uint32_t getBlockId () { return __builtin_amdgcn_workgroup_id_x (); }
97
81
98
- uint32_t getNumberOfBlocks () {
99
- return getGridDim (__builtin_amdgcn_grid_size_x (),
100
- __builtin_amdgcn_workgroup_size_x ());
101
- }
82
+ uint32_t getNumberOfBlocks () { return __builtin_amdgcn_grid_size_x (); }
102
83
103
84
uint32_t getWarpId () {
104
85
return impl::getThreadIdInBlock () / mapping::getWarpSize ();
@@ -228,8 +209,8 @@ uint32_t mapping::getThreadIdInBlock() {
228
209
uint32_t mapping::getWarpSize () { return impl::getWarpSize (); }
229
210
230
211
uint32_t mapping::getBlockSize (bool IsSPMD) {
231
- uint32_t BlockSize = mapping::getNumberOfProcessorElements () -
232
- (!IsSPMD * impl::getWarpSize ());
212
+ uint32_t BlockSize =
213
+ mapping::getNumberOfProcessorElements () - (!IsSPMD * impl::getWarpSize ());
233
214
return BlockSize;
234
215
}
235
216
uint32_t mapping::getBlockSize () {
0 commit comments