[OpenMP] Remove 'minncta' attributes from NVPTX kernels (#88398)

jhuber6 · web-flow · commit 0287a5cc4e2a · 2024-04-15T15:28:05.000-05:00
Summary: Currently we treat this attribute as a minimum number for the amount of blocks scheduled on the kernel. However, the doucmentation states that this applies to CTA's mapped onto a *single* SM. Currently we just set it to the total number of blocks, which will almost always result in a warning that the value is out of range and will be ignored. We don't have a good way to automatically know how many CTAs can be put on a single SM nor if we should do this, so we should probably leave this up to users manually adding it. https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#performance-tuning-directives-minnctapersm
diff --git a/clang/test/OpenMP/ompx_attributes_codegen.cpp b/clang/test/OpenMP/ompx_attributes_codegen.cpp
@@ -36,6 +36,5 @@ void func() {
 // NVIDIA: "omp_target_thread_limit"="45"
 // NVIDIA: "omp_target_thread_limit"="17"
 // NVIDIA: !{ptr @__omp_offloading[[HASH1:.*]]_l16, !"maxntidx", i32 20}
-// NVIDIA: !{ptr @__omp_offloading[[HASH2:.*]]_l18, !"minctasm", i32 90}
-// NVIDIA: !{ptr @__omp_offloading[[HASH2]]_l18, !"maxntidx", i32 45}
+// NVIDIA: !{ptr @__omp_offloading[[HASH2:.*]]_l18, !"maxntidx", i32 45}
 // NVIDIA: !{ptr @__omp_offloading[[HASH3:.*]]_l20, !"maxntidx", i32 17}
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -4786,11 +4786,9 @@ OpenMPIRBuilder::readTeamBoundsForKernel(const Triple &, Function &Kernel) {
 
 void OpenMPIRBuilder::writeTeamsForKernel(const Triple &T, Function &Kernel,
                                           int32_t LB, int32_t UB) {
-  if (T.isNVPTX()) {
+  if (T.isNVPTX())
     if (UB > 0)
       updateNVPTXMetadata(Kernel, "maxclusterrank", UB, true);
-    updateNVPTXMetadata(Kernel, "minctasm", LB, false);
-  }
   if (T.isAMDGPU())
     Kernel.addFnAttr("amdgpu-max-num-workgroups", llvm::utostr(LB) + ",1,1");