intel · v-klochkov · Feb 17, 2023 · Feb 16, 2023 · Feb 16, 2023
@@ -2,9 +2,6 @@
 // REQUIRES: gpu && linux
 // UNSUPPORTED: cuda || hip
 //
-// TODO: enable when Jira ticket resolved
-// XFAIL: gpu-intel-pvc
-//
 // Check that full compilation works:
 // RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../call_vadd_1d_loop_naive.cpp -o %t.out
 // RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out

@@ -2,9 +2,6 @@
 // REQUIRES: gpu && linux
 // UNSUPPORTED: cuda || hip
 //
-// TODO: enable when Jira ticket resolved
-// XFAIL: gpu-intel-pvc
-//
 // Check that full compilation works:
 // RUN: %clangxx -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %s -o %t.out
 // RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out

@@ -6,15 +6,6 @@
 #include <iostream>
 #include <type_traits>
 
-/* Subgroup size attribute is optional
- * In case it is absent compiler decides what subgroup size to use
- */
-#ifdef IMPL_SUBGROUP
-#define SUBGROUP_ATTR
-#else
-#define SUBGROUP_ATTR [[intel::reqd_sub_group_size(VL)]]
-#endif
-
 using namespace sycl::ext::oneapi::experimental;
 namespace esimd = sycl::ext::intel::esimd;
 
@@ -77,7 +68,7 @@ template <int Size, int VL, class QueueTY> bool test(QueueTY q) {
   try {
     auto e = q.submit([&](handler &cgh) {
       cgh.parallel_for<class TestID<Size, VL>>(
-          Range, [=](nd_item<1> ndi) SUBGROUP_ATTR {
+          Range, [=](nd_item<1> ndi) [[intel::reqd_sub_group_size(VL)]] {
             sub_group sg = ndi.get_sub_group();
             group<1> g = ndi.get_group();
             uint32_t i = sg.get_group_linear_id() * VL +

@@ -2,9 +2,6 @@
 // REQUIRES: gpu && linux
 // UNSUPPORTED: cuda || hip
 //
-// TODO: enable when Jira ticket resolved
-// XFAIL: gpu
-//
 // Check that full compilation works:
 // RUN: %clangxx -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %s -o %t.out
 // RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
@@ -32,11 +29,7 @@ int main(void) {
   bool passed = true;
 
   // simd_size 16
-  passed &= test<4, 16>(q);
-  passed &= test<8, 16>(q);
   passed &= test<16, 16>(q);
-  passed &= test<32, 16>(q);
-  // TODO FIXME: enable cases with ret val size > 32 when Jira ticket resolved
 
   std::cout << (passed ? "Passed\n" : "FAILED\n");
   return passed ? 0 : 1;

@@ -3,9 +3,6 @@
 // REQUIRES: gpu-intel-pvc && linux
 // UNSUPPORTED: cuda || hip
 //
-// TODO: enable when Jira ticket resolved
-// XFAIL: gpu
-//
 // Check that full compilation works:
 // RUN: %clangxx -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %s -o %t.out
 // RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
@@ -32,11 +29,7 @@ int main(void) {
   bool passed = true;
 
   // simd_size 32
-  passed &= test<4, 32>(q);
-  passed &= test<8, 32>(q);
-  passed &= test<16, 32>(q);
   passed &= test<32, 32>(q);
-  // TODO FIXME: enable cases with ret val size > 32 when Jira ticket resolved
 
   std::cout << (passed ? "Passed\n" : "FAILED\n");
   return passed ? 0 : 1;

@@ -4,9 +4,6 @@
 // Test not intended to run on PVC
 // UNSUPPORTED: cuda || hip || gpu-intel-pvc
 //
-// TODO: enable when Jira ticket resolved
-// XFAIL: gpu
-//
 // Check that full compilation works:
 // RUN: %clangxx -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %s -o %t.out
 // RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
@@ -33,11 +30,7 @@ int main(void) {
   bool passed = true;
 
   // simd_size 8
-  passed &= test<4, 8>(q);
   passed &= test<8, 8>(q);
-  passed &= test<16, 8>(q);
-  passed &= test<32, 8>(q);
-  // TODO FIXME: enable cases with ret val size > 32 when Jira ticket resolved
 
   std::cout << (passed ? "Passed\n" : "FAILED\n");
   return passed ? 0 : 1;

@@ -0,0 +1,54 @@
+// TODO: enable on Windows once driver is ready
+// REQUIRES: gpu && linux
+// UNSUPPORTED: cuda || hip
+//
+// TODO: enable when Jira ticket resolved
+// XFAIL: gpu
+//
+// Check that full compilation works:
+// RUN: %clangxx -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %s -o %t.out | FileCheck %s
+
+/* Tests invoke_simd support in the compiler/headers
+ * The test checks that compiler emits a meaningful and user friendly error message
+ * when the target function of invoke_simd has arguments or returns of simd<T,N> or
+ * simd_mask<T,N> type and N is not equal to subgroup size.
+ */
+
+#include "Inputs/common.hpp"
+
+int main(void) {
+  auto q = queue{gpu_selector_v};
+  auto dev = q.get_device();
+  std::cout << "Running on " << dev.get_info<sycl::info::device::name>()
+            << "\n";
+  bool passed = true;
+
+  // TODO: exact error message is a subject to future changes
+
+  // simd_size 8
+  passed &= test<4, 8>(q);
+  // CHECK: {{.*}}error: Kernel subgroup size (8) does not match size of invoke_simd return type (4){{.*}}
+  passed &= test<16, 8>(q);
+  // CHECK: {{.*}}error: Kernel subgroup size (8) does not match size of invoke_simd return type (16){{.*}}
+  passed &= test<32, 8>(q);
+  // CHECK: {{.*}}error: Kernel subgroup size (8) does not match size of invoke_simd return type (32){{.*}}
+
+  // simd_size 16
+  passed &= test<4, 16>(q);
+  // CHECK: {{.*}}error: Kernel subgroup size (16) does not match size of invoke_simd return type (4){{.*}}
+  passed &= test<8, 16>(q);
+  // CHECK: {{.*}}error: Kernel subgroup size (16) does not match size of invoke_simd return type (8){{.*}}
+  passed &= test<32, 16>(q);
+  // CHECK: {{.*}}error: Kernel subgroup size (16) does not match size of invoke_simd return type (32){{.*}}
+
+  // simd_size 32
+  passed &= test<4, 32>(q);
+  // CHECK: {{.*}}error: Kernel subgroup size (32) does not match size of invoke_simd return type (4){{.*}}
+  passed &= test<8, 32>(q);
+  // CHECK: {{.*}}error: Kernel subgroup size (32) does not match size of invoke_simd return type (8){{.*}}
+  passed &= test<16, 32>(q);
+  // CHECK: {{.*}}error: Kernel subgroup size (32) does not match size of invoke_simd return type (16){{.*}}
+
+  std::cout << (passed ? "Passed\n" : "FAILED\n");
+  return passed ? 0 : 1;
+}