intel · againull · Nov 21, 2023 · Oct 27, 2023 · Nov 1, 2023 · Nov 6, 2023
@@ -3526,6 +3526,10 @@ def warn_dllimport_dropped_from_inline_function : Warning<
 def warn_attribute_on_direct_kernel_callee_only : Warning<"%0 attribute allowed"
   " only on a function directly called from a SYCL kernel function; attribute ignored">,
   InGroup<IgnoredAttributes>;
+def warn_amd_reqd_sub_group_attribute_n
+    : Warning<"attribute argument %0 is invalid and will be ignored; AMD "
+              "requires sub_group size %1">,
+  InGroup<IgnoredAttributes>;
 def warn_nothrow_attribute_ignored : Warning<"'nothrow' attribute conflicts with"
   " exception specification; attribute ignored">,
   InGroup<IgnoredAttributes>;

@@ -4007,10 +4007,27 @@ void Sema::AddIntelReqdSubGroupSize(Decl *D, const AttributeCommonInfo &CI,
           << CI << /*positive*/ 0;
       return;
     }
-    if (Context.getTargetInfo().getTriple().isNVPTX() && ArgVal != 32) {
+    auto &TI = Context.getTargetInfo();
+    if (TI.getTriple().isNVPTX() && ArgVal != 32) {
       Diag(E->getExprLoc(), diag::warn_reqd_sub_group_attribute_cuda_n_32)
           << ArgVal.getSExtValue();
     }
+    if (TI.getTriple().isAMDGPU()) {
+      const auto HasWaveFrontSize64 =
+          TI.getTargetOpts().FeatureMap["wavefrontsize64"];
+      const auto HasWaveFrontSize32 =
+          TI.getTargetOpts().FeatureMap["wavefrontsize32"];
+
+      // CDNA supports only 64 wave front size, for those GPUs allow subgroup
+      // size of 64. Some GPUs support both 32 and 64, for those (and the rest)
+      // only allow 32.
+      const auto SupportedWaveFrontSize =
+          HasWaveFrontSize64 && !HasWaveFrontSize32 ? 64 : 32;
+      if (ArgVal != SupportedWaveFrontSize) {
+        Diag(E->getExprLoc(), diag::warn_amd_reqd_sub_group_attribute_n)
+            << ArgVal.getSExtValue() << SupportedWaveFrontSize;
+      }
+    }
 
     // Check to see if there's a duplicate attribute with different values
     // already applied to the declaration.

@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -fsycl-is-device -triple amdgcn-amd-amdhsa -target-cpu gfx1010 -internal-isystem %S/Inputs -std=c++2b -verify %s
+
+// Sub-group size is optimized for 32, warn (and ignore the attribute) if the
+// size is not 32.
+#include "sycl.hpp"
+
+int main() {
+
+  sycl::queue Q;
+
+  Q.submit([&](sycl::handler &h) {
+    h.single_task<class invalid_kernel>([=] [[sycl::reqd_sub_group_size(64)]] {}); // expected-warning {{attribute argument 64 is invalid and will be ignored; AMD requires sub_group size 32}}
+  });
+
+  Q.submit([&](sycl::handler &h) {
+    h.single_task<class valid_kernel>([=] [[sycl::reqd_sub_group_size(32)]] {});
+  });
+
+  Q.submit([&](sycl::handler &h) {
+    h.single_task<class valid_kernel>([=] [[sycl::reqd_sub_group_size(8)]] {}); // expected-warning {{attribute argument 8 is invalid and will be ignored; AMD requires sub_group size 32}}
+  });
+
+  return 0;
+}
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -fsycl-is-device -triple amdgcn-amd-amdhsa -target-cpu gfx90a -internal-isystem %S/Inputs -std=c++2b -verify %s
+
+// Sub-group size is optimized for 64, warn (and ignore the attribute) if the
+// size is not 64.
+#include "sycl.hpp"
+
+int main() {
+
+  sycl::queue Q;
+
+  Q.submit([&](sycl::handler &h) {
+    h.single_task<class invalid_kernel>([=] [[sycl::reqd_sub_group_size(64)]] {});
+  });
+
+  Q.submit([&](sycl::handler &h) {
+    h.single_task<class valid_kernel>([=] [[sycl::reqd_sub_group_size(32)]] {}); // expected-warning {{attribute argument 32 is invalid and will be ignored; AMD requires sub_group size 64}}
+  });
+
+  Q.submit([&](sycl::handler &h) {
+    h.single_task<class valid_kernel>([=] [[sycl::reqd_sub_group_size(8)]] {}); // expected-warning {{attribute argument 8 is invalid and will be ignored; AMD requires sub_group size 64}}
+  });
+
+  return 0;
+}