[SYCL][CUDA] Return invalid subgroup size warning (#6183)

JackAKirk · web-flow · commit 6dab69f13882 · 2022-06-03T19:46:45.000-07:00
This is a solution to #6103 for the CUDA case only. HIP AMD case still needs to be considered as discussed here: #6103 (comment). CUDA only currently supports one subgroup (warp) size : 32 for all devices. This PR introduces a solution to #6103 appropriate for backends which only support a single subgroup size: if the optional kernel attribute reqd_sub_group_size() is used with the supported subgroup size then it will compile and behave as the programmer intends. If reqd_sub_group_size() is used with another incompatible subgroup size a warning is returned when compiling, such as: reqd-sub-group-size-cuda.cpp:12:73: warning: attribute argument 8 is invalid and will be ignored; CUDA requires sub_group size 32 [-Wcuda-compat] h.single_task<class invalid_kernel>([=] [[sycl::reqd_sub_group_size(8)]] {}); ^ Signed-off-by: JackAKirk jack.kirk@codeplay.com
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -3288,6 +3288,10 @@ def err_attribute_argument_is_zero : Error<
 def warn_attribute_argument_n_negative : Warning<
   "%0 attribute parameter %1 is negative and will be ignored">,
   InGroup<CudaCompat>;
+def warn_reqd_sub_group_attribute_cuda_n_32
+    : Warning<"attribute argument %0 is invalid and will be ignored; CUDA "
+              "requires sub_group size 32">,
+      InGroup<CudaCompat>;
 def err_property_function_in_objc_container : Error<
   "use of Objective-C property in function nested in Objective-C "
   "container not supported, move function outside its container">;
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -3886,6 +3886,10 @@ void Sema::AddIntelReqdSubGroupSize(Decl *D, const AttributeCommonInfo &CI,
           << CI << /*positive*/ 0;
       return;
     }
+    if (Context.getTargetInfo().getTriple().isNVPTX() && ArgVal != 32) {
+      Diag(E->getExprLoc(), diag::warn_reqd_sub_group_attribute_cuda_n_32)
+          << ArgVal.getSExtValue();
+    }
 
     // Check to see if there's a duplicate attribute with different values
     // already applied to the declaration.
diff --git a/clang/test/SemaSYCL/reqd-sub-group-size-cuda.cpp b/clang/test/SemaSYCL/reqd-sub-group-size-cuda.cpp
@@ -0,0 +1,20 @@
+// RUN: %clang_cc1 -fsycl-is-device -triple nvptx -internal-isystem %S/Inputs -std=c++2b -verify %s
+//
+// This tests that a warning is returned when a sub group size other than 32 is
+// requested in the CUDA backend via the reqd_sub_group_size() kernel attribute.
+#include "sycl.hpp"
+
+int main() {
+
+  sycl::queue Q;
+
+  Q.submit([&](sycl::handler &h) {
+    h.single_task<class invalid_kernel>([=] [[sycl::reqd_sub_group_size(8)]] {}); // expected-warning {{attribute argument 8 is invalid and will be ignored; CUDA requires sub_group size 32}}
+  });
+
+  Q.submit([&](sycl::handler &h) {
+    h.single_task<class valid_kernel>([=] [[sycl::reqd_sub_group_size(32)]] {});
+  });
+
+  return 0;
+}