[SYCL] [AMDGPU] Ignore incorrect sub-group size

jchlanda · jchlanda · commit ac489a598fcb · 2023-10-30T17:10:24.000Z
CDNA supports only 64 wave front size, for those GPUs set subgroup size
to 64. Some GPUS support both 32 and 64, for those (and the rest) only
allow 32.
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -3526,6 +3526,10 @@ def warn_dllimport_dropped_from_inline_function : Warning<
 def warn_attribute_on_direct_kernel_callee_only : Warning<"%0 attribute allowed"
   " only on a function directly called from a SYCL kernel function; attribute ignored">,
   InGroup<IgnoredAttributes>;
+def warn_amd_reqd_sub_group_attribute_n
+    : Warning<"attribute argument %0 is invalid and will be ignored; AMD "
+              "requires sub_group size %1">,
+  InGroup<IgnoredAttributes>;
 def warn_nothrow_attribute_ignored : Warning<"'nothrow' attribute conflicts with"
   " exception specification; attribute ignored">,
   InGroup<IgnoredAttributes>;
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -4007,10 +4007,27 @@ void Sema::AddIntelReqdSubGroupSize(Decl *D, const AttributeCommonInfo &CI,
           << CI << /*positive*/ 0;
       return;
     }
-    if (Context.getTargetInfo().getTriple().isNVPTX() && ArgVal != 32) {
+    auto &TI = Context.getTargetInfo();
+    if (TI.getTriple().isNVPTX() && ArgVal != 32) {
       Diag(E->getExprLoc(), diag::warn_reqd_sub_group_attribute_cuda_n_32)
           << ArgVal.getSExtValue();
     }
+    if (TI.getTriple().isAMDGPU()) {
+      const auto HasWaveFrontSize64 =
+          TI.getTargetOpts().FeatureMap["wavefrontsize64"];
+      const auto HasWaveFrontSize32 =
+          TI.getTargetOpts().FeatureMap["wavefrontsize32"];
+
+      // CDNA supports only 64 wave front size, for those GPUs allow subgroup
+      // size of 64. Some GPUs support both 32 and 64, for those (and the rest)
+      // only allow 32.
+      const auto SupportedWaveFrontSize =
+          HasWaveFrontSize64 && !HasWaveFrontSize32 ? 64 : 32;
+      if (ArgVal != SupportedWaveFrontSize) {
+        Diag(E->getExprLoc(), diag::warn_amd_reqd_sub_group_attribute_n)
+            << ArgVal.getSExtValue() << SupportedWaveFrontSize;
+      }
+    }
 
     // Check to see if there's a duplicate attribute with different values
     // already applied to the declaration.
diff --git a/clang/test/SemaSYCL/reqd-sub-group-size-amd_32.cpp b/clang/test/SemaSYCL/reqd-sub-group-size-amd_32.cpp
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -fsycl-is-device -triple amdgcn-amd-amdhsa -target-cpu gfx1010 -internal-isystem %S/Inputs -std=c++2b -verify %s
+
+// Sub-group size is optimized for 32, warn (and ignore the attribute) if the
+// size exceeds 32.
+#include "sycl.hpp"
+
+int main() {
+
+  sycl::queue Q;
+
+  Q.submit([&](sycl::handler &h) {
+    h.single_task<class invalid_kernel>([=] [[sycl::reqd_sub_group_size(64)]] {}); // expected-warning {{attribute argument 64 is invalid and will be ignored; AMD requires sub_group size 32}}
+  });
+
+  Q.submit([&](sycl::handler &h) {
+    h.single_task<class valid_kernel>([=] [[sycl::reqd_sub_group_size(32)]] {});
+  });
+
+  Q.submit([&](sycl::handler &h) {
+    h.single_task<class valid_kernel>([=] [[sycl::reqd_sub_group_size(8)]] {}); // expected-warning {{attribute argument 8 is invalid and will be ignored; AMD requires sub_group size 32}}
+  });
+
+  return 0;
+}
diff --git a/clang/test/SemaSYCL/reqd-sub-group-size-amd_64.cpp b/clang/test/SemaSYCL/reqd-sub-group-size-amd_64.cpp
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -fsycl-is-device -triple amdgcn-amd-amdhsa -target-cpu gfx90a -internal-isystem %S/Inputs -std=c++2b -verify %s
+
+// Sub-group size is optimized for 32, warn (and ignore the attribute) if the
+// size exceeds 32.
+#include "sycl.hpp"
+
+int main() {
+
+  sycl::queue Q;
+
+  Q.submit([&](sycl::handler &h) {
+    h.single_task<class invalid_kernel>([=] [[sycl::reqd_sub_group_size(64)]] {});
+  });
+
+  Q.submit([&](sycl::handler &h) {
+    h.single_task<class valid_kernel>([=] [[sycl::reqd_sub_group_size(32)]] {}); // expected-warning {{attribute argument 32 is invalid and will be ignored; AMD requires sub_group size 64}}
+  });
+
+  Q.submit([&](sycl::handler &h) {
+    h.single_task<class valid_kernel>([=] [[sycl::reqd_sub_group_size(8)]] {}); // expected-warning {{attribute argument 8 is invalid and will be ignored; AMD requires sub_group size 64}}
+  });
+
+  return 0;
+}