PR feedback

jchlanda · jchlanda · commit 9431687c6bfe · 2023-10-23T01:19:25.000-07:00
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -758,23 +758,23 @@ void CodeGenFunction::EmitKernelMetadata(const FunctionDecl *FD,
                     llvm::MDNode::get(Context, AttrMDArgs));
   }
 
-  if (const auto *A = FD->getAttr<SYCLIntelMinWorkGroupsPerComputeUnitAttr>()) {
-    const auto *CE = cast<ConstantExpr>(A->getValue());
+  auto attrAsMDArg = [&](Expr *E) {
+    const auto *CE = cast<ConstantExpr>(E);
     std::optional<llvm::APSInt> ArgVal = CE->getResultAsAPSInt();
-    llvm::Metadata *AttrMDArgs[] = {llvm::ConstantAsMetadata::get(
-        Builder.getInt32(ArgVal->getSExtValue()))};
+    assert(ArgVal.has_value() && "Failed to obtain attribute value.");
+    return llvm::ConstantAsMetadata::get(
+        Builder.getInt32(ArgVal->getSExtValue()));
+  };
+
+  if (const auto *A = FD->getAttr<SYCLIntelMinWorkGroupsPerComputeUnitAttr>()) {
     Fn->setMetadata("min_work_groups_per_cu",
-                    llvm::MDNode::get(Context, AttrMDArgs));
+                    llvm::MDNode::get(Context, {attrAsMDArg(A->getValue())}));
   }
 
   if (const auto *A =
           FD->getAttr<SYCLIntelMaxWorkGroupsPerMultiprocessorAttr>()) {
-    const auto *CE = cast<ConstantExpr>(A->getValue());
-    std::optional<llvm::APSInt> ArgVal = CE->getResultAsAPSInt();
-    llvm::Metadata *AttrMDArgs[] = {llvm::ConstantAsMetadata::get(
-        Builder.getInt32(ArgVal->getSExtValue()))};
     Fn->setMetadata("max_work_groups_per_mp",
-                    llvm::MDNode::get(Context, AttrMDArgs));
+                    llvm::MDNode::get(Context, {attrAsMDArg(A->getValue())}));
   }
 
   if (const SYCLIntelMaxWorkGroupSizeAttr *A =
diff --git a/clang/lib/CodeGen/Targets/NVPTX.cpp b/clang/lib/CodeGen/Targets/NVPTX.cpp
@@ -251,23 +251,24 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
                         (*MWGS->getXDimVal()).getExtValue();
       if (MaxThreads > 0)
         addNVVMMetadata(F, "maxntidx", MaxThreads);
-    }
-    if (const auto *MWGPCU =
-            FD->getAttr<SYCLIntelMinWorkGroupsPerComputeUnitAttr>()) {
-      auto *MinWorkGroups = MWGPCU->getValue();
-      if (const auto *CE = dyn_cast<ConstantExpr>(MinWorkGroups)) {
-        auto MinVal = CE->getResultAsAPSInt();
-        // The value is guaranteed to be > 0, pass it to the metadata.
-        addNVVMMetadata(F, "minnctapersm", MinVal.getExtValue());
-      }
-    }
-    if (const auto *MWGPMP =
-            FD->getAttr<SYCLIntelMaxWorkGroupsPerMultiprocessorAttr>()) {
-      auto *MaxWorkGroups = MWGPMP->getValue();
-      if (const auto *CE = dyn_cast<ConstantExpr>(MaxWorkGroups)) {
-        auto MaxVal = CE->getResultAsAPSInt();
+
+      auto attrValue = [&](Expr *E) {
+        const auto *CE = cast<ConstantExpr>(E);
+        std::optional<llvm::APInt> Val = CE->getResultAsAPSInt();
+        assert(Val.has_value() && "Failed to get attribute value.");
+        return Val->getZExtValue();
+      };
+
+      if (const auto *MWGPCU =
+              FD->getAttr<SYCLIntelMinWorkGroupsPerComputeUnitAttr>()) {
         // The value is guaranteed to be > 0, pass it to the metadata.
-        addNVVMMetadata(F, "maxclusterrank", MaxVal.getExtValue());
+        addNVVMMetadata(F, "minnctapersm", attrValue(MWGPCU->getValue()));
+
+        if (const auto *MWGPMP =
+                FD->getAttr<SYCLIntelMaxWorkGroupsPerMultiprocessorAttr>()) {
+          // The value is guaranteed to be > 0, pass it to the metadata.
+          addNVVMMetadata(F, "maxclusterrank", attrValue(MWGPMP->getValue()));
+        }
       }
     }
   }
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -22,6 +22,7 @@
 #include "clang/AST/Mangle.h"
 #include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/AST/Type.h"
+#include "clang/Basic/AttributeCommonInfo.h"
 #include "clang/Basic/CharInfo.h"
 #include "clang/Basic/Cuda.h"
 #include "clang/Basic/DarwinSDKInfo.h"
@@ -200,13 +201,18 @@ static unsigned getNumAttributeArgs(const ParsedAttr &AL) {
   return AL.getNumArgs() + AL.hasParsedType();
 }
 
-/// A helper function to provide Attribute Location for the Attr types
-/// AND the ParsedAttr.
-template <typename AttrInfo>
-static std::enable_if_t<std::is_base_of_v<Attr, AttrInfo>, SourceLocation>
-getAttrLoc(const AttrInfo &AL) {
+/// Helper functions to provide Attribute Location for the Attr types,
+/// AttributeCommonInfo AND the ParsedAttr.
+template <typename T>
+static std::enable_if_t<std::is_base_of_v<Attr, T>, SourceLocation>
+getAttrLoc(const T &AL) {
   return AL.getLocation();
 }
+template <typename T,
+          std::enable_if_t<std::is_same_v<AttributeCommonInfo, T>, bool> = true>
+static SourceLocation getAttrLoc(const T &AL) {
+  return AL.getScopeLoc();
+}
 static SourceLocation getAttrLoc(const ParsedAttr &AL) { return AL.getLoc(); }
 
 /// If Expr is a valid integer constant, get the value of the integer
@@ -4447,26 +4453,6 @@ void Sema::AddSYCLIntelMaxGlobalWorkDimAttr(Decl *D,
   D->addAttr(::new (Context) SYCLIntelMaxGlobalWorkDimAttr(Context, CI, E));
 }
 
-// Check that the attribute is an integer constant that can fit in 32-bits.
-// Issue correct error message and return false on failure.
-bool static check32BitInt(const Expr *E, const AttributeCommonInfo &CI,
-                          Sema &S) {
-  std::optional<llvm::APSInt> I = llvm::APSInt(64);
-  if (!(I = E->getIntegerConstantExpr(S.Context))) {
-    S.Diag(E->getExprLoc(), diag::err_attribute_argument_n_type)
-        << CI << 0 << AANT_ArgumentIntegerConstant << E->getSourceRange();
-    return false;
-  }
-  // Make sure we can fit it in 32 bits.
-  if (!I->isIntN(32)) {
-    S.Diag(E->getExprLoc(), diag::err_ice_too_large)
-        << toString(*I, 10, false) << 32 << /* Unsigned */ 1;
-    return false;
-  }
-
-  return true;
-}
-
 void Sema::AddSYCLIntelMinWorkGroupsPerComputeUnitAttr(
     Decl *D, const AttributeCommonInfo &CI, Expr *E) {
   if (Context.getLangOpts().SYCLIsDevice &&
@@ -4476,24 +4462,22 @@ void Sema::AddSYCLIntelMinWorkGroupsPerComputeUnitAttr(
     return;
   }
   if (!E->isValueDependent()) {
-    if (!check32BitInt(E, CI, *this))
+    uint32_t Val;
+    if (!checkUInt32Argument(*this, CI, E, Val, UINT_MAX /* Idx */,
+                             true /* StrictlyUnsigned */))
       return;
+
     // Validate that we have an integer constant expression and then store the
     // converted constant expression into the semantic attribute so that we
     // don't have to evaluate it again later.
     llvm::APSInt ArgVal;
     ExprResult Res = VerifyIntegerConstantExpression(E, &ArgVal);
     if (Res.isInvalid())
       return;
+    if (Val != ArgVal)
+      llvm_unreachable("Values must not differ.");
     E = Res.get();
 
-    // This attribute must be greater than 0.
-    if (ArgVal <= 0) {
-      Diag(E->getBeginLoc(), diag::err_attribute_argument_is_zero)
-          << CI << E->getSourceRange();
-      return;
-    }
-
     // Check to see if there's a duplicate attribute with different values
     // already applied to the declaration.
     if (const auto *DeclAttr =
@@ -4543,8 +4527,11 @@ void Sema::AddSYCLIntelMaxWorkGroupsPerMultiprocessorAttr(
     }
   }
   if (!E->isValueDependent()) {
-    if (!check32BitInt(E, CI, *this))
+    uint32_t Val;
+    if (!checkUInt32Argument(*this, CI, E, Val, UINT_MAX /* Idx */,
+                             true /* StrictlyUnsigned */))
       return;
+
     // Validate that we have an integer constant expression and then store the
     // converted constant expression into the semantic attribute so that we
     // don't have to evaluate it again later.
@@ -4553,13 +4540,8 @@ void Sema::AddSYCLIntelMaxWorkGroupsPerMultiprocessorAttr(
     if (Res.isInvalid())
       return;
     E = Res.get();
-
-    // This attribute must be greater than 0.
-    if (ArgVal <= 0) {
-      Diag(E->getBeginLoc(), diag::err_attribute_argument_is_zero)
-          << CI << E->getSourceRange();
-      return;
-    }
+    if (Val != ArgVal)
+      llvm_unreachable("Values must not differ.");
 
     // Check to see if there's a duplicate attribute with different values
     // already applied to the declaration.
diff --git a/clang/test/CodeGenSYCL/launch_bounds_nvptx.cpp b/clang/test/CodeGenSYCL/launch_bounds_nvptx.cpp
@@ -1,6 +1,10 @@
-// REQUIRES: cuda
+// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -sycl-std=2017 -triple nvptx-unknown-unknown -target-cpu sm_90 -disable-llvm-passes -S -emit-llvm -o - %s | FileCheck %s
 
-// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -sycl-std=2017 -triple nvptx-unknown-unknown -target-cpu sm_90 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s
+// Test correct handling of maximum work group size, minimum work groups per
+// compute unit and maximum work groups per multi-processor attributes, that
+// correspond to CUDA's launch bounds. Expect max_work_group_size,
+// min_work_groups_per_cu and max_work_groups_per_mp that are mapped to
+// maxntidx, minnctapersm, maxclusterrank PTX directives respectively.
 
 #include "sycl.hpp"
 
diff --git a/clang/test/CodeGenSYCL/lb_sm_90.cpp b/clang/test/CodeGenSYCL/lb_sm_90.cpp
@@ -1,48 +1,51 @@
-// REQUIRES: cuda
-
-// RUN: %clangxx -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_90 -fsycl-device-only -S -Xclang -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK-IR
-// RUN: %clangxx -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_90 -fsycl -fsyntax-only -Xclang -verify %s
-// expected-no-diagnostics
-
-#include <sycl/sycl.hpp>
-
-template <int N1, int N2, int N3> class Functor {
-public:
-  [[intel::max_work_group_size(1, 1, N1), intel::min_work_groups_per_cu(N2),
-    intel::max_work_groups_per_mp(N3)]] void
-  operator()() const {}
-};
-
-int main() {
-  sycl::queue Q{};
-
-  sycl::range<1> Gws(32);
-  sycl::range<1> Lws(32);
-
-  Q.submit([&](sycl::handler &cgh) {
-     cgh.parallel_for(sycl::nd_range<1>(Gws, Lws),
-                      [=](sycl::id<1>) [[intel::max_work_group_size(1, 1, 256),
-                                         intel::min_work_groups_per_cu(2),
-                                         intel::max_work_groups_per_mp(4)]] {
-                        volatile int A = 42;
-                      });
-   }).wait_and_throw();
-  // CHECK-IR: !min_work_groups_per_cu [[MWGPCU:![0-9]+]]
-  // CHECK-IR: !max_work_groups_per_mp [[MWGPMP:![0-9]+]]
-  // CHECK-IR: !max_work_group_size [[MWGS:![0-9]+]]
-
-  Q.single_task<class F>(Functor<512, 8, 16>{}).wait();
-  // CHECK-IR: !min_work_groups_per_cu [[MWGPCU_F:![0-9]+]]
-  // CHECK-IR: !max_work_groups_per_mp [[MWGPMP_F:![0-9]+]]
-  // CHECK-IR: !max_work_group_size [[MWGS_F:![0-9]+]]
-
-  // CHECK-IR: [[MWGPCU]] = !{i32 2}
-  // CHECK-IR: [[MWGPMP]] = !{i32 4}
-  // CHECK-IR: [[MWGS]] = !{i32 256, i32 1, i32 1}
-
-  // CHECK-IR: [[MWGPCU_F]] = !{i32 8}
-  // CHECK-IR: [[MWGPMP_F]] = !{i32 16}
-  // CHECK-IR: [[MWGS_F]] = !{i32 512, i32 1, i32 1}
-
-  return 0;
-}
+// RUN: %clang_cc1 -internal-isystem %S/Inputs -triple nvptx-unknown-unknown -target-cpu sm_90 -fsycl-is-device -S -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK-IR
+// RUN: %clang_cc1 -internal-isystem %S/Inputs %s -triple nvptx64-nvidia-cuda -target-cpu sm_90 -fsycl-is-device -fsyntax-only -verify
+// expected-no-diagnostics
+
+// Maximum work groups per multi-processor, mapped to maxclusterrank PTX
+// directive, is an SM_90 feature, make sure that correct metadata is generated
+// and no warnings/errors are issued.
+
+#include "sycl.hpp"
+
+template <int N1, int N2, int N3> class Functor {
+public:
+  [[intel::max_work_group_size(1, 1, N1), intel::min_work_groups_per_cu(N2),
+    intel::max_work_groups_per_mp(N3)]] void
+  operator()() const {}
+};
+
+int main() {
+  sycl::queue Q{};
+
+  sycl::range<1> Gws(32);
+
+  Q.submit([&](sycl::handler &cgh) {
+     cgh.parallel_for<class K1>(Gws,
+                      [=](sycl::id<1>) [[intel::max_work_group_size(1, 1, 256),
+                                         intel::min_work_groups_per_cu(2),
+                                         intel::max_work_groups_per_mp(4)]] {
+                        volatile int A = 42;
+                      });
+   });
+  // CHECK-IR: !min_work_groups_per_cu [[MWGPCU:![0-9]+]]
+  // CHECK-IR: !max_work_groups_per_mp [[MWGPMP:![0-9]+]]
+  // CHECK-IR: !max_work_group_size [[MWGS:![0-9]+]]
+
+  Q.submit([&](sycl::handler &cgh) {
+    cgh.single_task<class F>(Functor<512, 8, 16>{});
+  });
+  // CHECK-IR: !min_work_groups_per_cu [[MWGPCU_F:![0-9]+]]
+  // CHECK-IR: !max_work_groups_per_mp [[MWGPMP_F:![0-9]+]]
+  // CHECK-IR: !max_work_group_size [[MWGS_F:![0-9]+]]
+
+  // CHECK-IR: [[MWGPCU]] = !{i32 2}
+  // CHECK-IR: [[MWGPMP]] = !{i32 4}
+  // CHECK-IR: [[MWGS]] = !{i32 256, i32 1, i32 1}
+
+  // CHECK-IR: [[MWGPCU_F]] = !{i32 8}
+  // CHECK-IR: [[MWGPMP_F]] = !{i32 16}
+  // CHECK-IR: [[MWGS_F]] = !{i32 512, i32 1, i32 1}
+
+  return 0;
+}
diff --git a/clang/test/SemaSYCL/lb_sm_70.cpp b/clang/test/SemaSYCL/lb_sm_70.cpp
@@ -1,8 +1,11 @@
-// REQUIRES: cuda
+// RUN: %clang_cc1 -internal-isystem %S/Inputs -triple nvptx-unknown-unknown -target-cpu sm_70 -fsycl-is-device -S -emit-llvm %s -o -ferror-limit=100 -fsyntax-only -verify %s
 
-// RUN: %clangxx -ferror-limit=100 -fsycl -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_70 -fsycl-device-only -fsyntax-only -Xclang -verify %s
+// Maximum work groups per multi-processor, mapped to maxclusterrank PTX
+// directive, is an SM_90 feature, make sure that correct warning is issued on
+// architectures lower than that. Furthermore, warn/error incorrect values
+// specified for max_work_groups_per_mp and min_work_groups_per_cu.
 
-#include <sycl/sycl.hpp>
+#include "sycl.hpp"
 
 template <int N1, int N2, int N3> class Functor {
 public:
@@ -24,15 +27,15 @@ int main() {
                intel::max_work_groups_per_mp(4)]] { volatile int A = 42; });
 
      constexpr float A = 2.0;
-     // expected-error@+5 {{'min_work_groups_per_cu' attribute requires parameter 0 to be an integer constant}}
+     // expected-error@+5 {{'min_work_groups_per_cu' attribute requires an integer constant}}
      // expected-warning@+5 {{'maxclusterrank' requires sm_90 or higher, CUDA arch provided: sm_70, ignoring 'max_work_groups_per_mp' attribute}}
      cgh.single_task<class T2>(
          [=]()
              [[intel::max_work_group_size(1, 1, 256),
                intel::min_work_groups_per_cu(A),
                intel::max_work_groups_per_mp(4)]] { volatile int A = 42; });
 
-     // expected-error@+3 {{'min_work_groups_per_cu' attribute requires parameter 0 to be an integer constant}}
+     // expected-error@+3 {{'min_work_groups_per_cu' attribute requires an integer constant}}
      cgh.single_task<class T3>(
          [=]() [[intel::max_work_group_size(1, 1, 256),
                  intel::min_work_groups_per_cu(2147483647 + 1)]]
@@ -46,13 +49,15 @@ int main() {
        volatile int A = 42;
      });
 
-     // expected-error@+1 {{'min_work_groups_per_cu' attribute must be greater than 0}}
+     // expected-error@+1 {{'min_work_groups_per_cu' attribute requires a non-negative integral compile time constant expression}}
      cgh.single_task<class T5>([=]() [[intel::min_work_groups_per_cu(-8)]] {
        volatile int A = 42;
      });
-   }).wait_and_throw();
+   });
 
-  Q.single_task<class F>(Functor<512, 8, 16>{}).wait();
+  Q.submit([&](sycl::handler &cgh) {
+    cgh.single_task<class F>(Functor<512, 8, 16>{});
+  });
 
   return 0;
 }
diff --git a/clang/test/SemaSYCL/lb_sm_90_ast.cpp b/clang/test/SemaSYCL/lb_sm_90_ast.cpp
@@ -1,5 +1,3 @@
-// REQUIERS: cuda
-
 // RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -sycl-std=2017 -ast-dump -triple nvptx-unknown-unknown -target-cpu sm_90 %s | FileCheck %s
 
 // Tests for AST of Intel max_work_group_size, min_work_groups_per_cu and

Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,3 @@`
`1`		`-// REQUIERS: cuda`
`2`		`-`
`3`	`1`	`// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -sycl-std=2017 -ast-dump -triple nvptx-unknown-unknown -target-cpu sm_90 %s \| FileCheck %s`
`4`	`2`
`5`	`3`	`// Tests for AST of Intel max_work_group_size, min_work_groups_per_cu and`