2
2
// REQUIRES: gpu && linux
3
3
// UNSUPPORTED: cuda || hip
4
4
//
5
- // TODO: enable when Jira ticket resolved
6
- // XFAIL: gpu
7
- //
8
5
// Check that full compilation works:
9
6
// RUN: %clangxx -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %s -o %t.out
10
7
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
21
18
* This test also runs with all types of VISA link time optimizations enabled.
22
19
*/
23
20
21
+ #include < sycl/detail/boost/mp11.hpp>
24
22
#include < sycl/ext/intel/esimd.hpp>
25
23
#include < sycl/ext/oneapi/experimental/invoke_simd.hpp>
26
24
#include < sycl/sycl.hpp>
@@ -42,26 +40,25 @@ using namespace sycl::ext::oneapi::experimental;
42
40
namespace esimd = sycl::ext::intel::esimd;
43
41
constexpr int VL = 16 ;
44
42
43
+ template <typename MaskType>
45
44
__attribute__ ((always_inline)) esimd::simd<float, VL>
46
45
ESIMD_CALLEE(esimd::simd<float , VL> va,
47
- simd_mask<bool , VL> mask) SYCL_ESIMD_FUNCTION {
46
+ simd_mask<MaskType , VL> mask) SYCL_ESIMD_FUNCTION {
48
47
return va;
49
48
}
50
49
50
+ template <typename MaskType>
51
51
[[intel::device_indirectly_callable]] SYCL_EXTERNAL
52
52
simd<float , VL> __regcall SIMD_CALLEE (
53
- simd<float , VL> va, simd_mask<bool , VL> mask) SYCL_ESIMD_FUNCTION;
53
+ simd<float , VL> va, simd_mask<MaskType , VL> mask) SYCL_ESIMD_FUNCTION;
54
54
55
55
using namespace sycl ;
56
56
57
- int main ( void ) {
57
+ template < typename MaskType> int test (queue q ) {
58
58
constexpr unsigned Size = 1024 ;
59
59
constexpr unsigned GroupSize = 4 * VL;
60
60
61
- auto q = queue{gpu_selector_v};
62
61
auto dev = q.get_device ();
63
- std::cout << " Running on " << dev.get_info <sycl::info::device::name>()
64
- << " \n " ;
65
62
auto ctxt = q.get_context ();
66
63
67
64
float *A =
@@ -86,14 +83,14 @@ int main(void) {
86
83
87
84
try {
88
85
auto e = q.submit ([&](handler &cgh) {
89
- cgh.parallel_for < class Test > (Range, [=](nd_item<1 > ndi) SUBGROUP_ATTR {
86
+ cgh.parallel_for (Range, [=](nd_item<1 > ndi) SUBGROUP_ATTR {
90
87
sub_group sg = ndi.get_sub_group ();
91
88
group<1 > g = ndi.get_group ();
92
89
uint32_t i =
93
90
sg.get_group_linear_id () * VL + g.get_group_linear_id () * GroupSize;
94
91
uint32_t wi_id = i + sg.get_local_id ();
95
-
96
- float res = invoke_simd (sg, SIMD_CALLEE , A[wi_id], M[wi_id]);
92
+ auto Callee = SIMD_CALLEE<MaskType>;
93
+ float res = invoke_simd (sg, Callee , A[wi_id], M[wi_id]);
97
94
C[wi_id] = res;
98
95
});
99
96
});
@@ -131,9 +128,34 @@ int main(void) {
131
128
return err_cnt > 0 ? 1 : 0 ;
132
129
}
133
130
131
+ template <typename MaskType>
134
132
[[intel::device_indirectly_callable]] SYCL_EXTERNAL
135
133
simd<float , VL> __regcall SIMD_CALLEE (
136
- simd<float , VL> va, simd_mask<bool , VL> mask) SYCL_ESIMD_FUNCTION {
134
+ simd<float , VL> va, simd_mask<MaskType , VL> mask) SYCL_ESIMD_FUNCTION {
137
135
esimd::simd<float , VL> res = ESIMD_CALLEE (va, mask);
138
136
return res;
139
137
}
138
+
139
+ int main () {
140
+ queue q{gpu_selector_v};
141
+
142
+ auto dev = q.get_device ();
143
+ std::cout << " Running on " << dev.get_info <sycl::info::device::name>()
144
+ << " \n " ;
145
+ bool passed = true ;
146
+ const bool SupportsDouble = dev.has (aspect::fp64);
147
+ using namespace sycl ::detail::boost::mp11;
148
+ using MaskTypes =
149
+ std::tuple<char , char16_t , char32_t , wchar_t , signed char , signed short ,
150
+ signed int , signed long , signed long long , unsigned char ,
151
+ unsigned short , unsigned int , unsigned long ,
152
+ unsigned long long , float , double >;
153
+ tuple_for_each (MaskTypes{}, [&](auto &&x) {
154
+ using T = std::remove_reference_t <decltype (x)>;
155
+ if (std::is_same_v<T, double > && !SupportsDouble)
156
+ return ;
157
+ passed &= !test<T>(q);
158
+ });
159
+ std::cout << (passed ? " Test passed\n " : " TEST FAILED\n " );
160
+ return passed ? 0 : 1 ;
161
+ }
0 commit comments