intel · v-klochkov · Mar 14, 2023 · Mar 3, 2023 · Mar 3, 2023 · Mar 6, 2023
@@ -2,9 +2,6 @@
 // REQUIRES: gpu && linux
 // UNSUPPORTED: cuda || hip
 //
-// TODO: enable when Jira ticket resolved
-// XFAIL: gpu
-//
 // Check that full compilation works:
 // RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../simd_mask.cpp -o %t.out
 // RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out

@@ -2,9 +2,6 @@
 // REQUIRES: gpu && linux
 // UNSUPPORTED: cuda || hip
 //
-// TODO: enable when Jira ticket resolved
-// XFAIL: gpu
-//
 // Check that full compilation works:
 // RUN: %clangxx -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %s -o %t.out
 // RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
@@ -21,6 +18,7 @@
  * This test also runs with all types of VISA link time optimizations enabled.
  */
 
+#include <sycl/detail/boost/mp11.hpp>
 #include <sycl/ext/intel/esimd.hpp>
 #include <sycl/ext/oneapi/experimental/invoke_simd.hpp>
 #include <sycl/sycl.hpp>
@@ -42,26 +40,25 @@ using namespace sycl::ext::oneapi::experimental;
 namespace esimd = sycl::ext::intel::esimd;
 constexpr int VL = 16;
 
+template <typename MaskType>
 __attribute__((always_inline)) esimd::simd<float, VL>
 ESIMD_CALLEE(esimd::simd<float, VL> va,
-             simd_mask<bool, VL> mask) SYCL_ESIMD_FUNCTION {
+             simd_mask<MaskType, VL> mask) SYCL_ESIMD_FUNCTION {
   return va;
 }
 
+template <typename MaskType>
 [[intel::device_indirectly_callable]] SYCL_EXTERNAL
     simd<float, VL> __regcall SIMD_CALLEE(
-        simd<float, VL> va, simd_mask<bool, VL> mask) SYCL_ESIMD_FUNCTION;
+        simd<float, VL> va, simd_mask<MaskType, VL> mask) SYCL_ESIMD_FUNCTION;
 
 using namespace sycl;
 
-int main(void) {
+template <typename MaskType> int test(queue q) {
   constexpr unsigned Size = 1024;
   constexpr unsigned GroupSize = 4 * VL;
 
-  auto q = queue{gpu_selector_v};
   auto dev = q.get_device();
-  std::cout << "Running on " << dev.get_info<sycl::info::device::name>()
-            << "\n";
   auto ctxt = q.get_context();
 
   float *A =
@@ -86,14 +83,14 @@ int main(void) {
 
   try {
     auto e = q.submit([&](handler &cgh) {
-      cgh.parallel_for<class Test>(Range, [=](nd_item<1> ndi) SUBGROUP_ATTR {
+      cgh.parallel_for(Range, [=](nd_item<1> ndi) SUBGROUP_ATTR {
         sub_group sg = ndi.get_sub_group();
         group<1> g = ndi.get_group();
         uint32_t i =
             sg.get_group_linear_id() * VL + g.get_group_linear_id() * GroupSize;
         uint32_t wi_id = i + sg.get_local_id();
-
-        float res = invoke_simd(sg, SIMD_CALLEE, A[wi_id], M[wi_id]);
+        auto Callee = SIMD_CALLEE<MaskType>;
+        float res = invoke_simd(sg, Callee, A[wi_id], M[wi_id]);
         C[wi_id] = res;
       });
     });
@@ -131,9 +128,34 @@ int main(void) {
   return err_cnt > 0 ? 1 : 0;
 }
 
+template <typename MaskType>
 [[intel::device_indirectly_callable]] SYCL_EXTERNAL
     simd<float, VL> __regcall SIMD_CALLEE(
-        simd<float, VL> va, simd_mask<bool, VL> mask) SYCL_ESIMD_FUNCTION {
+        simd<float, VL> va, simd_mask<MaskType, VL> mask) SYCL_ESIMD_FUNCTION {
   esimd::simd<float, VL> res = ESIMD_CALLEE(va, mask);
   return res;
 }
+
+int main() {
+  queue q{gpu_selector_v};
+
+  auto dev = q.get_device();
+  std::cout << "Running on " << dev.get_info<sycl::info::device::name>()
+            << "\n";
+  bool passed = true;
+  const bool SupportsDouble = dev.has(aspect::fp64);
+  using namespace sycl::detail::boost::mp11;
+  using MaskTypes =
+      std::tuple<char, char16_t, char32_t, wchar_t, signed char, signed short,
+                 signed int, signed long, signed long long, unsigned char,
+                 unsigned short, unsigned int, unsigned long,
+                 unsigned long long, float, double>;
+  tuple_for_each(MaskTypes{}, [&](auto &&x) {
+    using T = std::remove_reference_t<decltype(x)>;
+    if (std::is_same_v<T, double> && !SupportsDouble)
+      return;
+    passed &= !test<T>(q);
+  });
+  std::cout << (passed ? "Test passed\n" : "TEST FAILED\n");
+  return passed ? 0 : 1;
+}