Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit 23e8f46

Browse files
authored
[SYCL] Add E2E invoke_simd 'smoke' test. (#1124)
* [SYCL] Add E2E invoke_simd 'smoke' test. Signed-off-by: Konstantin S Bobrovsky <[email protected]>
1 parent de2eb4c commit 23e8f46

File tree

1 file changed

+163
-0
lines changed

1 file changed

+163
-0
lines changed

SYCL/InvokeSimd/invoke_simd_smoke.cpp

Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
// The tests a basic E2E invoke_simd test checking that invoke_simd
2+
// compiles and executes correctly on GPU, where the SIMD target is a
3+
// ESIMD function.
4+
5+
// TODO: enable on Windows once driver is ready
6+
// REQUIRES: gpu && linux
7+
// UNSUPPORTED: cuda || hip
8+
9+
// RUN: %clangxx -fsycl -fno-sycl-device-code-split-esimd %s -o %t.out
10+
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
11+
12+
#include <sycl/ext/intel/esimd.hpp>
13+
#include <sycl/ext/oneapi/experimental/invoke_simd.hpp>
14+
#include <sycl/ext/oneapi/experimental/uniform.hpp>
15+
#include <sycl/sycl.hpp>
16+
17+
#include <functional>
18+
#include <iostream>
19+
#include <type_traits>
20+
21+
using namespace sycl::ext::oneapi::experimental;
22+
using namespace sycl;
23+
namespace esimd = sycl::ext::intel::esimd;
24+
25+
constexpr int VL = 16;
26+
27+
#ifndef INVOKE_SIMD
28+
#define INVOKE_SIMD 1
29+
#endif
30+
31+
constexpr bool use_invoke_simd = INVOKE_SIMD != 0;
32+
33+
__attribute__((always_inline)) esimd::simd<float, VL>
34+
ESIMD_CALLEE(float *A, esimd::simd<float, VL> b, int i) SYCL_ESIMD_FUNCTION {
35+
esimd::simd<float, VL> a;
36+
a.copy_from(A + i);
37+
return a + b;
38+
}
39+
40+
SYCL_EXTERNAL
41+
simd<float, VL> __regcall SIMD_CALLEE(float *A, simd<float, VL> b,
42+
int i) SYCL_ESIMD_FUNCTION;
43+
44+
float SPMD_CALLEE(float *A, float b, int i) { return A[i] + b; }
45+
46+
class ESIMDSelector : public device_selector {
47+
// Require GPU device unless HOST is requested in SYCL_DEVICE_FILTER env
48+
virtual int operator()(const device &device) const {
49+
if (const char *dev_filter = getenv("SYCL_DEVICE_FILTER")) {
50+
std::string filter_string(dev_filter);
51+
if (filter_string.find("gpu") != std::string::npos)
52+
return device.is_gpu() ? 1000 : -1;
53+
if (filter_string.find("host") != std::string::npos)
54+
return device.is_host() ? 1000 : -1;
55+
std::cerr
56+
<< "Supported 'SYCL_DEVICE_FILTER' env var values are 'gpu' and "
57+
"'host', '"
58+
<< filter_string << "' does not contain such substrings.\n";
59+
return -1;
60+
}
61+
// If "SYCL_DEVICE_FILTER" not defined, only allow gpu device
62+
return device.is_gpu() ? 1000 : -1;
63+
}
64+
};
65+
66+
inline auto createExceptionHandler() {
67+
return [](exception_list l) {
68+
for (auto ep : l) {
69+
try {
70+
std::rethrow_exception(ep);
71+
} catch (sycl::exception &e0) {
72+
std::cout << "sycl::exception: " << e0.what() << std::endl;
73+
} catch (std::exception &e) {
74+
std::cout << "std::exception: " << e.what() << std::endl;
75+
} catch (...) {
76+
std::cout << "generic exception\n";
77+
}
78+
}
79+
};
80+
}
81+
82+
int main(void) {
83+
constexpr unsigned Size = 1024;
84+
constexpr unsigned GroupSize = 4 * VL;
85+
86+
queue q(ESIMDSelector{}, createExceptionHandler());
87+
88+
auto dev = q.get_device();
89+
std::cout << "Running on " << dev.get_info<info::device::name>() << "\n";
90+
auto ctxt = q.get_context();
91+
float *A = malloc_shared<float>(Size, q);
92+
float *B = malloc_shared<float>(Size, q);
93+
float *C = malloc_shared<float>(Size, q);
94+
95+
for (unsigned i = 0; i < Size; ++i) {
96+
A[i] = B[i] = i;
97+
C[i] = -1;
98+
}
99+
100+
sycl::range<1> GlobalRange{Size};
101+
// Number of workitems in each workgroup.
102+
sycl::range<1> LocalRange{GroupSize};
103+
104+
sycl::nd_range<1> Range(GlobalRange, LocalRange);
105+
106+
try {
107+
auto e = q.submit([&](handler &cgh) {
108+
cgh.parallel_for<class Test>(
109+
Range, [=](nd_item<1> ndi) [[intel::reqd_sub_group_size(VL)]] {
110+
sub_group sg = ndi.get_sub_group();
111+
group<1> g = ndi.get_group();
112+
uint32_t i =
113+
sg.get_group_linear_id() * VL + g.get_linear_id() * GroupSize;
114+
uint32_t wi_id = i + sg.get_local_id();
115+
float res = 0;
116+
117+
if constexpr (use_invoke_simd) {
118+
res = invoke_simd(sg, SIMD_CALLEE, uniform{A}, B[wi_id],
119+
uniform{i});
120+
} else {
121+
res = SPMD_CALLEE(A, B[wi_id], wi_id);
122+
}
123+
C[wi_id] = res;
124+
});
125+
});
126+
e.wait();
127+
} catch (sycl::exception const &e) {
128+
std::cout << "SYCL exception caught: " << e.what() << '\n';
129+
sycl::free(A, q);
130+
sycl::free(B, q);
131+
sycl::free(C, q);
132+
return 1;
133+
}
134+
135+
int err_cnt = 0;
136+
137+
for (unsigned i = 0; i < Size; ++i) {
138+
if (A[i] + B[i] != C[i]) {
139+
if (++err_cnt < 10) {
140+
std::cout << "failed at index " << i << ", " << C[i] << " != " << A[i]
141+
<< " + " << B[i] << "\n";
142+
}
143+
}
144+
}
145+
if (err_cnt > 0) {
146+
std::cout << " pass rate: "
147+
<< ((float)(Size - err_cnt) / (float)Size) * 100.0f << "% ("
148+
<< (Size - err_cnt) << "/" << Size << ")\n";
149+
}
150+
sycl::free(A, q);
151+
sycl::free(B, q);
152+
sycl::free(C, q);
153+
154+
std::cout << (err_cnt > 0 ? "FAILED\n" : "Passed\n");
155+
return err_cnt > 0 ? 1 : 0;
156+
}
157+
158+
SYCL_EXTERNAL
159+
simd<float, VL> __regcall SIMD_CALLEE(float *A, simd<float, VL> b,
160+
int i) SYCL_ESIMD_FUNCTION {
161+
esimd::simd<float, VL> res = ESIMD_CALLEE(A, b, i);
162+
return res;
163+
}

0 commit comments

Comments
 (0)