[OpenMP][Offload] Resubmit - fix envar for setting teams per CU (llvm#1399)

ronlieb · web-flow · commit a56f8023210b · 2025-03-29T07:23:01.000-04:00
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -1100,12 +1100,7 @@ struct AMDGPUKernelTy : public GenericKernelTy {
       if (GenericDevice.isFastReductionEnabled()) {
         // When fast reduction is enabled, the number of teams is capped by
         // the MaxCUMultiplier constant.
-        // When envar is enabled, use it for computing MaxNumGroup.
-        if (EnvarCUMultiplier > 0)
-          MaxNumGroups = DeviceNumCUs * EnvarCUMultiplier;
-        else
-          MaxNumGroups = DeviceNumCUs * llvm::omp::xteam_red::MaxCUMultiplier;
-
+        MaxNumGroups = DeviceNumCUs * llvm::omp::xteam_red::MaxCUMultiplier;
       } else {
         // When fast reduction is not enabled, the number of teams is capped
         // by the metadata that clang CodeGen created. The number of teams
@@ -1116,13 +1111,7 @@ struct AMDGPUKernelTy : public GenericKernelTy {
         // ConstWGSize is the block size that CodeGen used.
         uint32_t CUMultiplier =
             llvm::omp::xteam_red::getXteamRedCUMultiplier(ConstWGSize);
-
-        if (EnvarCUMultiplier > 0) {
-          MaxNumGroups =
-              DeviceNumCUs * std::min(CUMultiplier, EnvarCUMultiplier);
-        } else {
-          MaxNumGroups = DeviceNumCUs * CUMultiplier;
-        }
+        MaxNumGroups = DeviceNumCUs * CUMultiplier;
       }
 
       // If envar OMPX_XTEAMREDUCTION_OCCUPANCY_BASED_OPT is set and no
@@ -1177,6 +1166,12 @@ struct AMDGPUKernelTy : public GenericKernelTy {
           }
           NumGroups = DesiredNumGroups;
         }
+
+        // Prefer OMPX_AdjustNumTeamsForXteamRedSmallBlockSize over
+        // OMPX_XTeamRedTeamsPerCU.
+        if (AdjustFactor == 0 && EnvarCUMultiplier > 0)
+          NumGroups = DeviceNumCUs * EnvarCUMultiplier;
+
         NumGroups = std::min(NumGroups, MaxNumGroups);
         NumGroups = std::min(NumGroups, NumGroupsFromTripCount);