Revert "[OpenMP][Offload] Fix envar for setting teams per cu"

ronlieb · ronlieb · commit 8bcd336113a5 · 2025-03-28T14:25:44.000-05:00
This reverts commit 7ef5baf.
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -1100,7 +1100,12 @@ struct AMDGPUKernelTy : public GenericKernelTy {
       if (GenericDevice.isFastReductionEnabled()) {
         // When fast reduction is enabled, the number of teams is capped by
         // the MaxCUMultiplier constant.
-        MaxNumGroups = DeviceNumCUs * llvm::omp::xteam_red::MaxCUMultiplier;
+        // When envar is enabled, use it for computing MaxNumGroup.
+        if (EnvarCUMultiplier > 0)
+          MaxNumGroups = DeviceNumCUs * EnvarCUMultiplier;
+        else
+          MaxNumGroups = DeviceNumCUs * llvm::omp::xteam_red::MaxCUMultiplier;
+
       } else {
         // When fast reduction is not enabled, the number of teams is capped
         // by the metadata that clang CodeGen created. The number of teams
@@ -1111,7 +1116,13 @@ struct AMDGPUKernelTy : public GenericKernelTy {
         // ConstWGSize is the block size that CodeGen used.
         uint32_t CUMultiplier =
             llvm::omp::xteam_red::getXteamRedCUMultiplier(ConstWGSize);
-        MaxNumGroups = DeviceNumCUs * CUMultiplier;
+
+        if (EnvarCUMultiplier > 0) {
+          MaxNumGroups =
+              DeviceNumCUs * std::min(CUMultiplier, EnvarCUMultiplier);
+        } else {
+          MaxNumGroups = DeviceNumCUs * CUMultiplier;
+        }
       }
 
       // If envar OMPX_XTEAMREDUCTION_OCCUPANCY_BASED_OPT is set and no
@@ -1166,12 +1177,6 @@ struct AMDGPUKernelTy : public GenericKernelTy {
           }
           NumGroups = DesiredNumGroups;
         }
-
-        // Prefer OMPX_AdjustNumTeamsForXteamRedSmallBlockSize over
-        // OMPX_XTeamRedTeamsPerCU.
-        if (AdjustFactor == 0 && EnvarCUMultiplier > 0)
-          NumGroups = DeviceNumCUs * EnvarCUMultiplier;
-
         NumGroups = std::min(NumGroups, MaxNumGroups);
         NumGroups = std::min(NumGroups, NumGroupsFromTripCount);