Skip to content

Commit 03bd562

Browse files
kbobrovsbb-sycl
authored andcommitted
[SYCL] Update invoke_simd smoke test, add test on argument conversion. (intel#1146)
* [SYCL] Update invoke_simd smoke test, add test on argument conversion. invoke_simd_smoke.cpp is updated due to compiler changes. [[intel::device_indirectly_callable]] attribute is now required for user functions which are invoke_simd call targets. Signed-off-by: Konstantin S Bobrovsky <[email protected]>
1 parent bd2e504 commit 03bd562

File tree

2 files changed

+208
-10
lines changed

2 files changed

+208
-10
lines changed

SYCL/InvokeSimd/invoke_simd_conv.cpp

Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
// The test checks that invoke_simd implementation performs proper conversions
2+
// on the actual arguments:
3+
// - Case1: actual type is uniform<T>, formal - T1 (scalar)
4+
// standard C++ arithmetic conversion is applied
5+
// - Case2: actual type is T, format - simd<T1, VL>
6+
// simd-simd conversion is applied according to the std::experimental::simd
7+
// specification. Basically, only non-narrowing conversions are allowed:
8+
// char -> int, float -> double, etc. int -> float is forbidden.
9+
10+
// TODO: enable on Windows once driver is ready
11+
// REQUIRES: gpu && linux
12+
// UNSUPPORTED: cuda || hip
13+
14+
// RUN: %clangxx -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %s -o %t.out
15+
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
16+
17+
#include <sycl/ext/intel/esimd.hpp>
18+
#include <sycl/ext/oneapi/experimental/invoke_simd.hpp>
19+
#include <sycl/ext/oneapi/experimental/uniform.hpp>
20+
#include <sycl/sycl.hpp>
21+
22+
#include <functional>
23+
#include <iostream>
24+
#include <type_traits>
25+
26+
using namespace sycl::ext::oneapi::experimental;
27+
using namespace sycl;
28+
29+
constexpr int VL = 16;
30+
31+
template <class T> inline T calc(T val) { return val + val; }
32+
template <class T, int N> inline simd<T, N> calc(simd<T, N> val) {
33+
// emulate '+' on simd operands
34+
for (int i = 0; i < N; ++i) {
35+
val[i] += val[i];
36+
}
37+
return val;
38+
}
39+
40+
template <class SimdElemT>
41+
[[intel::device_indirectly_callable]] // required by FE for addr-taken functions
42+
simd<SimdElemT, VL> __regcall SIMD_CALLEE_UNIFORM(SimdElemT val)
43+
SYCL_ESIMD_FUNCTION {
44+
return simd<SimdElemT, VL>(calc(val)); // broadcast
45+
}
46+
47+
template <class SimdElemT>
48+
[[intel::device_indirectly_callable]] simd<SimdElemT, VL> __regcall SIMD_CALLEE(
49+
simd<SimdElemT, VL> val) SYCL_ESIMD_FUNCTION {
50+
return calc(val);
51+
}
52+
53+
class ESIMDSelector : public device_selector {
54+
// Require GPU device unless HOST is requested in SYCL_DEVICE_FILTER env
55+
virtual int operator()(const device &device) const {
56+
if (const char *dev_filter = getenv("SYCL_DEVICE_FILTER")) {
57+
std::string filter_string(dev_filter);
58+
if (filter_string.find("gpu") != std::string::npos)
59+
return device.is_gpu() ? 1000 : -1;
60+
if (filter_string.find("host") != std::string::npos)
61+
return device.is_host() ? 1000 : -1;
62+
std::cerr
63+
<< "Supported 'SYCL_DEVICE_FILTER' env var values are 'gpu' and "
64+
"'host', '"
65+
<< filter_string << "' does not contain such substrings.\n";
66+
return -1;
67+
}
68+
// If "SYCL_DEVICE_FILTER" not defined, only allow gpu device
69+
return device.is_gpu() ? 1000 : -1;
70+
}
71+
};
72+
73+
inline auto createExceptionHandler() {
74+
return [](exception_list l) {
75+
for (auto ep : l) {
76+
try {
77+
std::rethrow_exception(ep);
78+
} catch (sycl::exception &e0) {
79+
std::cout << "sycl::exception: " << e0.what() << std::endl;
80+
} catch (std::exception &e) {
81+
std::cout << "std::exception: " << e.what() << std::endl;
82+
} catch (...) {
83+
std::cout << "generic exception\n";
84+
}
85+
}
86+
};
87+
}
88+
89+
template <class, class, bool> class TestID;
90+
91+
template <class SpmdT, class SimdElemT, bool IsUniform> bool test(queue q) {
92+
// 3 subgroups per workgroup
93+
unsigned GroupSize = VL * 3;
94+
unsigned NGroups = 7;
95+
unsigned Size = GroupSize * NGroups;
96+
SimdElemT *A = malloc_shared<SimdElemT>(Size, q);
97+
98+
for (unsigned i = 0; i < Size; ++i) {
99+
A[i] = (SimdElemT)i;
100+
}
101+
sycl::range<1> GlobalRange{Size};
102+
sycl::range<1> LocalRange{GroupSize};
103+
sycl::nd_range<1> Range(GlobalRange, LocalRange);
104+
105+
try {
106+
auto e = q.submit([&](handler &cgh) {
107+
cgh.parallel_for<TestID<SpmdT, SimdElemT, IsUniform>>(
108+
Range, [=](nd_item<1> ndi) [[intel::reqd_sub_group_size(VL)]] {
109+
sub_group sg = ndi.get_sub_group();
110+
SpmdT val = (SpmdT)sg.get_group_linear_id(); // 0 .. GroupSize-1
111+
SimdElemT res = 0;
112+
113+
if constexpr (IsUniform) {
114+
res =
115+
invoke_simd(sg, SIMD_CALLEE_UNIFORM<SimdElemT>, uniform{val});
116+
} else {
117+
res = invoke_simd(sg, SIMD_CALLEE<SimdElemT>, val);
118+
}
119+
uint32_t i = ndi.get_global_linear_id();
120+
A[i] = res;
121+
});
122+
});
123+
e.wait();
124+
} catch (sycl::exception const &e) {
125+
std::cout << "SYCL exception caught: " << e.what() << '\n';
126+
sycl::free(A, q);
127+
return false;
128+
}
129+
int err_cnt = 0;
130+
131+
for (unsigned i = 0; i < Size; ++i) {
132+
unsigned group_id = i / GroupSize;
133+
uint32_t sg_id = (i - (group_id * GroupSize)) / VL;
134+
SimdElemT test = A[i];
135+
SimdElemT gold = calc((SimdElemT)sg_id);
136+
if ((test != gold) && (++err_cnt < 10)) {
137+
std::cout << "failed at index " << i << ", " << test << " != " << gold
138+
<< "(gold)\n";
139+
}
140+
}
141+
if (err_cnt > 0) {
142+
std::cout << " pass rate: "
143+
<< ((float)(Size - err_cnt) / (float)Size) * 100.0f << "% ("
144+
<< (Size - err_cnt) << "/" << Size << ")\n";
145+
}
146+
sycl::free(A, q);
147+
return err_cnt == 0;
148+
}
149+
150+
int main(void) {
151+
queue q(ESIMDSelector{}, createExceptionHandler());
152+
153+
auto dev = q.get_device();
154+
std::cout << "Running on " << dev.get_info<info::device::name>() << "\n";
155+
bool passed = true;
156+
157+
constexpr bool UNIFORM = true;
158+
constexpr bool NON_UNIFORM = false;
159+
160+
// With uniform parameters SPMD actual argument corresponds to SIMD scalar
161+
// argument, and standard C++ arithmetic conversion are implicitly
162+
// applied by the compiler. Any aritimetic type can be implicitly coverted to
163+
// any other arithmetic type.
164+
165+
passed &= test<int, float, UNIFORM>(q);
166+
passed &= test<unsigned char, uint64_t, UNIFORM>(q);
167+
passed &= test<char, double, UNIFORM>(q);
168+
passed &= test<double, char, UNIFORM>(q);
169+
170+
// With non-uniform parameters, SPMD actual argument of type T is "widened" to
171+
// std::simd<T, VL> and then convered to SIMD vector argument
172+
// (std::simd<T1, VL>) using std::simd implicit conversion constructors. They
173+
// allow only non-narrowing conversions (e.g. int -> float is narrowing and
174+
// hence is prohibited).
175+
176+
passed &= test<char, long, NON_UNIFORM>(q);
177+
passed &= test<short, short, NON_UNIFORM>(q);
178+
passed &= test<float, double, NON_UNIFORM>(q);
179+
180+
std::cout << (passed ? "Passed\n" : "FAILED\n");
181+
return passed ? 0 : 1;
182+
}

SYCL/InvokeSimd/invoke_simd_smoke.cpp

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
// REQUIRES: gpu && linux
77
// UNSUPPORTED: cuda || hip
88

9-
// RUN: %clangxx -fsycl -fno-sycl-device-code-split-esimd %s -o %t.out
9+
// RUN: %clangxx -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %s -o %t.out
1010
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
1111

1212
#include <sycl/ext/intel/esimd.hpp>
@@ -37,9 +37,14 @@ ESIMD_CALLEE(float *A, esimd::simd<float, VL> b, int i) SYCL_ESIMD_FUNCTION {
3737
return a + b;
3838
}
3939

40-
SYCL_EXTERNAL
41-
simd<float, VL> __regcall SIMD_CALLEE(float *A, simd<float, VL> b,
42-
int i) SYCL_ESIMD_FUNCTION;
40+
// Use two functions with the same signature called via invoke_simd for better
41+
// testing.
42+
[[intel::device_indirectly_callable]] SYCL_EXTERNAL
43+
simd<float, VL> __regcall SIMD_CALLEE1(float *A, simd<float, VL> b,
44+
int i) SYCL_ESIMD_FUNCTION;
45+
[[intel::device_indirectly_callable]] SYCL_EXTERNAL
46+
simd<float, VL> __regcall SIMD_CALLEE2(float *A, simd<float, VL> b,
47+
int i) SYCL_ESIMD_FUNCTION;
4348

4449
float SPMD_CALLEE(float *A, float b, int i) { return A[i] + b; }
4550

@@ -115,10 +120,14 @@ int main(void) {
115120
float res = 0;
116121

117122
if constexpr (use_invoke_simd) {
118-
res = invoke_simd(sg, SIMD_CALLEE, uniform{A}, B[wi_id],
123+
res = invoke_simd(sg, SIMD_CALLEE1, uniform{A}, B[wi_id],
119124
uniform{i});
125+
res += invoke_simd(sg, SIMD_CALLEE2, uniform{A}, B[wi_id],
126+
uniform{i});
127+
120128
} else {
121129
res = SPMD_CALLEE(A, B[wi_id], wi_id);
130+
res += SPMD_CALLEE(A, B[wi_id], wi_id);
122131
}
123132
C[wi_id] = res;
124133
});
@@ -135,10 +144,10 @@ int main(void) {
135144
int err_cnt = 0;
136145

137146
for (unsigned i = 0; i < Size; ++i) {
138-
if (A[i] + B[i] != C[i]) {
147+
if (2 * (A[i] + B[i]) != C[i]) {
139148
if (++err_cnt < 10) {
140-
std::cout << "failed at index " << i << ", " << C[i] << " != " << A[i]
141-
<< " + " << B[i] << "\n";
149+
std::cout << "failed at index " << i << ", " << C[i] << " != 2*("
150+
<< A[i] << " + " << B[i] << ")\n";
142151
}
143152
}
144153
}
@@ -156,8 +165,15 @@ int main(void) {
156165
}
157166

158167
SYCL_EXTERNAL
159-
simd<float, VL> __regcall SIMD_CALLEE(float *A, simd<float, VL> b,
160-
int i) SYCL_ESIMD_FUNCTION {
168+
simd<float, VL> __regcall SIMD_CALLEE1(float *A, simd<float, VL> b,
169+
int i) SYCL_ESIMD_FUNCTION {
170+
esimd::simd<float, VL> res = ESIMD_CALLEE(A, b, i);
171+
return res;
172+
}
173+
174+
SYCL_EXTERNAL
175+
simd<float, VL> __regcall SIMD_CALLEE2(float *A, simd<float, VL> b,
176+
int i) SYCL_ESIMD_FUNCTION {
161177
esimd::simd<float, VL> res = ESIMD_CALLEE(A, b, i);
162178
return res;
163179
}

0 commit comments

Comments
 (0)