address comments, fixup test

AlexMaclean · AlexMaclean · commit 5b37b0417bf6 · 2025-02-26T04:57:07.000Z
diff --git a/clang/test/OpenMP/ompx_attributes_codegen.cpp b/clang/test/OpenMP/ompx_attributes_codegen.cpp
@@ -11,13 +11,13 @@
 
 // Check that the target attributes are set on the generated kernel
 void func() {
-  // AMD: amdgpu_kernel void @__omp_offloading[[HASH:.*]]_l18(ptr {{[^,]+}}) #0
-  // AMD: amdgpu_kernel void @__omp_offloading[[HASH:.*]]_l20(ptr {{[^,]+}})
-  // AMD: amdgpu_kernel void @__omp_offloading[[HASH:.*]]_l22(ptr {{[^,]+}}) #4
+  // AMD: amdgpu_kernel void @__omp_offloading[[HASH:.*]]_l22(ptr {{[^,]+}}) #0
+  // AMD: amdgpu_kernel void @__omp_offloading[[HASH:.*]]_l24(ptr {{[^,]+}})
+  // AMD: amdgpu_kernel void @__omp_offloading[[HASH:.*]]_l26(ptr {{[^,]+}}) #4
 
-  // NVIDIA: ptx_kernel void @__omp_offloading[[HASH:.*]]_l18(ptr {{[^,]+}}) #[[ATTR0:[0-9]+]]
-  // NVIDIA: ptx_kernel void @__omp_offloading[[HASH:.*]]_l20(ptr {{[^,]+}}) #[[ATTR1:[0-9]+]]
-  // NVIDIA: ptx_kernel void @__omp_offloading[[HASH:.*]]_l22(ptr {{[^,]+}}) #[[ATTR2:[0-9]+]]
+  // NVIDIA: ptx_kernel void @__omp_offloading[[HASH:.*]]_l22(ptr {{[^,]+}}) #[[ATTR0:[0-9]+]]
+  // NVIDIA: ptx_kernel void @__omp_offloading[[HASH:.*]]_l24(ptr {{[^,]+}}) #[[ATTR1:[0-9]+]]
+  // NVIDIA: ptx_kernel void @__omp_offloading[[HASH:.*]]_l26(ptr {{[^,]+}}) #[[ATTR2:[0-9]+]]
 
   #pragma omp target ompx_attribute([[clang::amdgpu_flat_work_group_size(10, 20)]])
   {}
@@ -39,11 +39,11 @@ void func() {
 
 // It is unclear if we should use the AMD annotations for other targets, we do for now.
 // NVIDIA: attributes #[[ATTR0]]
-// NVIDIA-SAME: "omp_target_thread_limit"="20"
 // NVIDIA-SAME: "nvvm.maxntid"="20"
+// NVIDIA-SAME: "omp_target_thread_limit"="20"
 // NVIDIA: attributes #[[ATTR1]]
-// NVIDIA-SAME: "omp_target_thread_limit"="45"
 // NVIDIA-SAME: "nvvm.maxntid"="45"
+// NVIDIA-SAME: "omp_target_thread_limit"="45"
 // NVIDIA: attributes #[[ATTR2]]
-// NVIDIA-SAME: "omp_target_thread_limit"="17"
 // NVIDIA-SAME: "nvvm.maxntid"="17"
+// NVIDIA-SAME: "omp_target_thread_limit"="17"
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
@@ -5033,6 +5033,8 @@ static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC,
   unsigned Length = 0;
 
   if (F->hasFnAttribute(Attr)) {
+    // We expect the existing attribute to have the form "x[,y[,z]]". Here we
+    // parse these elements placing them into Vect3
     StringRef S = F->getFnAttribute(Attr).getValueAsString();
     for (; Length < 3 && !S.empty(); Length++) {
       auto [Part, Rest] = S.split(',');
@@ -5041,19 +5043,22 @@ static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC,
     }
   }
 
-  const uint64_t VInt = mdconst::extract<ConstantInt>(V)->getZExtValue();
-  const std::string VStr = llvm::utostr(VInt);
-
   const unsigned Dim = DimC - 'x';
   assert(Dim >= 0 && Dim < 3 && "Unexpected dim char");
 
+  const uint64_t VInt = mdconst::extract<ConstantInt>(V)->getZExtValue();
+  const std::string VStr = llvm::utostr(VInt);
   Vect3[Dim] = VStr;
   Length = std::max(Length, Dim + 1);
 
   const std::string NewAttr = llvm::join(ArrayRef(Vect3, Length), ",");
   F->addFnAttr(Attr, NewAttr);
 }
 
+static inline bool isXYZ(StringRef S) {
+  return S == "x" || S == "y" || S == "z";
+}
+
 bool static upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K,
                                         const Metadata *V) {
   if (K == "kernel") {
@@ -5092,15 +5097,15 @@ bool static upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K,
     cast<Function>(GV)->addFnAttr("nvvm.maxnreg", llvm::utostr(CV));
     return true;
   }
-  if (K.consume_front("maxntid") && (K == "x" || K == "y" || K == "z")) {
+  if (K.consume_front("maxntid") && isXYZ(K)) {
     upgradeNVVMFnVectorAttr("nvvm.maxntid", K[0], GV, V);
     return true;
   }
-  if (K.consume_front("reqntid") && (K == "x" || K == "y" || K == "z")) {
+  if (K.consume_front("reqntid") && isXYZ(K)) {
     upgradeNVVMFnVectorAttr("nvvm.reqntid", K[0], GV, V);
     return true;
   }
-  if (K.consume_front("cluster_dim_") && (K == "x" || K == "y" || K == "z")) {
+  if (K.consume_front("cluster_dim_") && isXYZ(K)) {
     upgradeNVVMFnVectorAttr("nvvm.cluster_dim", K[0], GV, V);
     return true;
   }
diff --git a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
@@ -22,6 +22,7 @@
 #include "llvm/IR/Module.h"
 #include "llvm/Support/Alignment.h"
 #include "llvm/Support/Mutex.h"
+#include <cstdint>
 #include <cstring>
 #include <map>
 #include <mutex>
@@ -204,6 +205,8 @@ static SmallVector<unsigned, 3> getFnAttrParsedVector(const Function &F,
   auto &Ctx = F.getContext();
 
   if (F.hasFnAttribute(Attr)) {
+    // We expect the attribute value to be of the form "x[,y[,z]]", where x, y,
+    // and z are unsigned values.
     StringRef S = F.getFnAttribute(Attr).getValueAsString();
     for (unsigned I = 0; I < 3 && !S.empty(); I++) {
       auto [First, Rest] = S.split(",");
@@ -218,14 +221,11 @@ static SmallVector<unsigned, 3> getFnAttrParsedVector(const Function &F,
   return V;
 }
 
-static std::optional<unsigned> getVectorProduct(ArrayRef<unsigned> V) {
+static std::optional<uint64_t> getVectorProduct(ArrayRef<unsigned> V) {
   if (V.empty())
     return std::nullopt;
 
-  unsigned Product = 1;
-  for (const unsigned E : V)
-    Product *= E;
-  return Product;
+  return std::accumulate(V.begin(), V.end(), 1, std::multiplies<uint64_t>{});
 }
 
 bool isParamGridConstant(const Value &V) {
@@ -298,7 +298,7 @@ SmallVector<unsigned, 3> getClusterDim(const Function &F) {
   return getFnAttrParsedVector(F, "nvvm.cluster_dim");
 }
 
-std::optional<unsigned> getOverallMaxNTID(const Function &F) {
+std::optional<uint64_t> getOverallMaxNTID(const Function &F) {
   // Note: The semantics here are a bit strange. The PTX ISA states the
   // following (11.4.2. Performance-Tuning Directives: .maxntid):
   //
@@ -309,7 +309,7 @@ std::optional<unsigned> getOverallMaxNTID(const Function &F) {
   return getVectorProduct(MaxNTID);
 }
 
-std::optional<unsigned> getOverallReqNTID(const Function &F) {
+std::optional<uint64_t> getOverallReqNTID(const Function &F) {
   // Note: The semantics here are a bit strange. See getMaxNTID.
   const auto ReqNTID = getReqNTID(F);
   return getVectorProduct(ReqNTID);
diff --git a/llvm/lib/Target/NVPTX/NVPTXUtilities.h b/llvm/lib/Target/NVPTX/NVPTXUtilities.h
@@ -52,8 +52,8 @@ SmallVector<unsigned, 3> getMaxNTID(const Function &);
 SmallVector<unsigned, 3> getReqNTID(const Function &);
 SmallVector<unsigned, 3> getClusterDim(const Function &);
 
-std::optional<unsigned> getOverallMaxNTID(const Function &);
-std::optional<unsigned> getOverallReqNTID(const Function &);
+std::optional<uint64_t> getOverallMaxNTID(const Function &);
+std::optional<uint64_t> getOverallReqNTID(const Function &);
 
 std::optional<unsigned> getMaxClusterRank(const Function &);
 std::optional<unsigned> getMinCTASm(const Function &);