Skip to content

Commit 7ef5baf

Browse files
committed
[OpenMP][Offload] Fix envar for setting teams per cu
The envar will change the num of groups instead of the max num of groups.
1 parent e1f2791 commit 7ef5baf

File tree

1 file changed

+8
-13
lines changed
  • offload/plugins-nextgen/amdgpu/src

1 file changed

+8
-13
lines changed

offload/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1101,12 +1101,7 @@ struct AMDGPUKernelTy : public GenericKernelTy {
11011101
if (GenericDevice.isFastReductionEnabled()) {
11021102
// When fast reduction is enabled, the number of teams is capped by
11031103
// the MaxCUMultiplier constant.
1104-
// When envar is enabled, use it for computing MaxNumGroup.
1105-
if (EnvarCUMultiplier > 0)
1106-
MaxNumGroups = DeviceNumCUs * EnvarCUMultiplier;
1107-
else
1108-
MaxNumGroups = DeviceNumCUs * llvm::omp::xteam_red::MaxCUMultiplier;
1109-
1104+
MaxNumGroups = DeviceNumCUs * llvm::omp::xteam_red::MaxCUMultiplier;
11101105
} else {
11111106
// When fast reduction is not enabled, the number of teams is capped
11121107
// by the metadata that clang CodeGen created. The number of teams
@@ -1117,13 +1112,7 @@ struct AMDGPUKernelTy : public GenericKernelTy {
11171112
// ConstWGSize is the block size that CodeGen used.
11181113
uint32_t CUMultiplier =
11191114
llvm::omp::xteam_red::getXteamRedCUMultiplier(ConstWGSize);
1120-
1121-
if (EnvarCUMultiplier > 0) {
1122-
MaxNumGroups =
1123-
DeviceNumCUs * std::min(CUMultiplier, EnvarCUMultiplier);
1124-
} else {
1125-
MaxNumGroups = DeviceNumCUs * CUMultiplier;
1126-
}
1115+
MaxNumGroups = DeviceNumCUs * CUMultiplier;
11271116
}
11281117

11291118
// If envar OMPX_XTEAMREDUCTION_OCCUPANCY_BASED_OPT is set and no
@@ -1178,6 +1167,12 @@ struct AMDGPUKernelTy : public GenericKernelTy {
11781167
}
11791168
NumGroups = DesiredNumGroups;
11801169
}
1170+
1171+
// Prefer OMPX_AdjustNumTeamsForXteamRedSmallBlockSize over
1172+
// OMPX_XTeamRedTeamsPerCU.
1173+
if (AdjustFactor == 0 && EnvarCUMultiplier > 0)
1174+
NumGroups = DeviceNumCUs * EnvarCUMultiplier;
1175+
11811176
NumGroups = std::min(NumGroups, MaxNumGroups);
11821177
NumGroups = std::min(NumGroups, NumGroupsFromTripCount);
11831178

0 commit comments

Comments
 (0)