Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit 94697c4

Browse files
authored
[SYCL][ESIMD] Enable invoke_simd simd_mask test (#1630)
* [SYCL][ESIMD] Enable invoke_simd simd_mask test Add tests for all valid simd_mask types and the boolean conversion to/from them. Signed-off-by: Sarnie, Nick <[email protected]>
1 parent 89ae917 commit 94697c4

File tree

2 files changed

+35
-16
lines changed

2 files changed

+35
-16
lines changed

SYCL/InvokeSimd/Spec/ImplicitSubgroup/simd_mask.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,6 @@
22
// REQUIRES: gpu && linux
33
// UNSUPPORTED: cuda || hip
44
//
5-
// TODO: enable when Jira ticket resolved
6-
// XFAIL: gpu
7-
//
85
// Check that full compilation works:
96
// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../simd_mask.cpp -o %t.out
107
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out

SYCL/InvokeSimd/Spec/simd_mask.cpp

Lines changed: 35 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,6 @@
22
// REQUIRES: gpu && linux
33
// UNSUPPORTED: cuda || hip
44
//
5-
// TODO: enable when Jira ticket resolved
6-
// XFAIL: gpu
7-
//
85
// Check that full compilation works:
96
// RUN: %clangxx -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %s -o %t.out
107
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
@@ -21,6 +18,7 @@
2118
* This test also runs with all types of VISA link time optimizations enabled.
2219
*/
2320

21+
#include <sycl/detail/boost/mp11.hpp>
2422
#include <sycl/ext/intel/esimd.hpp>
2523
#include <sycl/ext/oneapi/experimental/invoke_simd.hpp>
2624
#include <sycl/sycl.hpp>
@@ -42,26 +40,25 @@ using namespace sycl::ext::oneapi::experimental;
4240
namespace esimd = sycl::ext::intel::esimd;
4341
constexpr int VL = 16;
4442

43+
template <typename MaskType>
4544
__attribute__((always_inline)) esimd::simd<float, VL>
4645
ESIMD_CALLEE(esimd::simd<float, VL> va,
47-
simd_mask<bool, VL> mask) SYCL_ESIMD_FUNCTION {
46+
simd_mask<MaskType, VL> mask) SYCL_ESIMD_FUNCTION {
4847
return va;
4948
}
5049

50+
template <typename MaskType>
5151
[[intel::device_indirectly_callable]] SYCL_EXTERNAL
5252
simd<float, VL> __regcall SIMD_CALLEE(
53-
simd<float, VL> va, simd_mask<bool, VL> mask) SYCL_ESIMD_FUNCTION;
53+
simd<float, VL> va, simd_mask<MaskType, VL> mask) SYCL_ESIMD_FUNCTION;
5454

5555
using namespace sycl;
5656

57-
int main(void) {
57+
template <typename MaskType> int test(queue q) {
5858
constexpr unsigned Size = 1024;
5959
constexpr unsigned GroupSize = 4 * VL;
6060

61-
auto q = queue{gpu_selector_v};
6261
auto dev = q.get_device();
63-
std::cout << "Running on " << dev.get_info<sycl::info::device::name>()
64-
<< "\n";
6562
auto ctxt = q.get_context();
6663

6764
float *A =
@@ -86,14 +83,14 @@ int main(void) {
8683

8784
try {
8885
auto e = q.submit([&](handler &cgh) {
89-
cgh.parallel_for<class Test>(Range, [=](nd_item<1> ndi) SUBGROUP_ATTR {
86+
cgh.parallel_for(Range, [=](nd_item<1> ndi) SUBGROUP_ATTR {
9087
sub_group sg = ndi.get_sub_group();
9188
group<1> g = ndi.get_group();
9289
uint32_t i =
9390
sg.get_group_linear_id() * VL + g.get_group_linear_id() * GroupSize;
9491
uint32_t wi_id = i + sg.get_local_id();
95-
96-
float res = invoke_simd(sg, SIMD_CALLEE, A[wi_id], M[wi_id]);
92+
auto Callee = SIMD_CALLEE<MaskType>;
93+
float res = invoke_simd(sg, Callee, A[wi_id], M[wi_id]);
9794
C[wi_id] = res;
9895
});
9996
});
@@ -131,9 +128,34 @@ int main(void) {
131128
return err_cnt > 0 ? 1 : 0;
132129
}
133130

131+
template <typename MaskType>
134132
[[intel::device_indirectly_callable]] SYCL_EXTERNAL
135133
simd<float, VL> __regcall SIMD_CALLEE(
136-
simd<float, VL> va, simd_mask<bool, VL> mask) SYCL_ESIMD_FUNCTION {
134+
simd<float, VL> va, simd_mask<MaskType, VL> mask) SYCL_ESIMD_FUNCTION {
137135
esimd::simd<float, VL> res = ESIMD_CALLEE(va, mask);
138136
return res;
139137
}
138+
139+
int main() {
140+
queue q{gpu_selector_v};
141+
142+
auto dev = q.get_device();
143+
std::cout << "Running on " << dev.get_info<sycl::info::device::name>()
144+
<< "\n";
145+
bool passed = true;
146+
const bool SupportsDouble = dev.has(aspect::fp64);
147+
using namespace sycl::detail::boost::mp11;
148+
using MaskTypes =
149+
std::tuple<char, char16_t, char32_t, wchar_t, signed char, signed short,
150+
signed int, signed long, signed long long, unsigned char,
151+
unsigned short, unsigned int, unsigned long,
152+
unsigned long long, float, double>;
153+
tuple_for_each(MaskTypes{}, [&](auto &&x) {
154+
using T = std::remove_reference_t<decltype(x)>;
155+
if (std::is_same_v<T, double> && !SupportsDouble)
156+
return;
157+
passed &= !test<T>(q);
158+
});
159+
std::cout << (passed ? "Test passed\n" : "TEST FAILED\n");
160+
return passed ? 0 : 1;
161+
}

0 commit comments

Comments
 (0)