Skip to content

Commit e860b14

Browse files
authored
[SYCL][ESIMD][Doc] Add invoke_simd example (#10052)
This change adds an invoke_simd example that scales the input data and updates related doc to point to it. --------- Signed-off-by: Sarnie, Nick <[email protected]>
1 parent 75a6d09 commit e860b14

File tree

3 files changed

+138
-1
lines changed

3 files changed

+138
-1
lines changed

sycl/doc/extensions/supported/sycl_ext_intel_esimd/README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,11 @@ the same application.
8484
### SYCL and ESIMD interoperability
8585

8686
SYCL kernels can call ESIMD functions using the special `invoke_simd` API.
87+
88+
More examples are available [here](./examples/)
89+
8790
More details are available in [invoke_simd spec](../../experimental/sycl_ext_oneapi_invoke_simd.asciidoc)
91+
8892
Test cases are available [here](../../../../test-e2e/InvokeSimd/)
8993

9094
```cpp

sycl/doc/extensions/supported/sycl_ext_intel_esimd/examples/README.md

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,5 +26,43 @@ is to show the basic ESIMD APIs in well known examples.
2626
});
2727
}).wait_and_throw();
2828
```
29+
2) Calling ESIMD from SYCL using invoke_simd - ["invoke_simd"](./invoke_simd.md).
30+
Please see the full source code here: ["invoke_simd"](./invoke_simd.md)
31+
```c++
32+
[[intel::device_indirectly_callable]] simd<int, VL> __regcall scale(
33+
simd<int, VL> x, int n) SYCL_ESIMD_FUNCTION {
34+
esimd::simd<int, VL> vec = x;
35+
esimd::simd<int, VL> result = vec * n;
36+
return result;
37+
}
38+
39+
int main(void) {
40+
int *in = new int[SIZE];
41+
int *out = new int[SIZE];
42+
buffer<int, 1> bufin(in, range<1>(SIZE));
43+
buffer<int, 1> bufout(out, range<1>(SIZE));
44+
45+
// scale factor
46+
int n = 2;
47+
48+
sycl::range<1> GlobalRange{SIZE};
49+
sycl::range<1> LocalRange{VL};
50+
51+
q.submit([&](handler &cgh) {
52+
auto accin = bufin.get_access<access::mode::read>(cgh);
53+
auto accout = bufout.get_access<access::mode::write>(cgh);
54+
55+
cgh.parallel_for<class Scale>(
56+
nd_range<1>(GlobalRange, LocalRange), [=](nd_item<1> item) {
57+
sycl::sub_group sg = item.get_sub_group();
58+
unsigned int offset = item.get_global_linear_id();
59+
60+
int in_val = sg.load(accin.get_pointer() + offset);
61+
62+
int out_val = invoke_simd(sg, scale, in_val, uniform{n});
2963
30-
2) TODO: Add more examples here.
64+
sg.store(accout.get_pointer() + offset, out_val);
65+
});
66+
});
67+
```
68+
3) TODO: Add more examples here.
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
## Call ESIMD from SYCL using invoke_simd
2+
3+
In this example, we will scale the input data by a factor of 2 using `invoke_simd`.
4+
5+
Compile and run:
6+
```bash
7+
> clang++ -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr invoke_simd.cpp
8+
9+
> IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 ONEAPI_DEVICE_SELECTOR=level_zero:gpu ./a.out
10+
Running on Intel(R) UHD Graphics 630
11+
Passed
12+
```
13+
Source code:
14+
```c++
15+
#include <sycl/ext/intel/esimd.hpp>
16+
#include <sycl/ext/oneapi/experimental/invoke_simd.hpp>
17+
#include <sycl/sycl.hpp>
18+
#include <iostream>
19+
20+
using namespace sycl;
21+
using namespace sycl::ext::oneapi::experimental;
22+
namespace esimd = sycl::ext::intel::esimd;
23+
24+
constexpr int SIZE = 512;
25+
constexpr int VL = 16;
26+
27+
[[intel::device_indirectly_callable]] simd<int, VL> __regcall scale(
28+
simd<int, VL> x, int n) SYCL_ESIMD_FUNCTION {
29+
esimd::simd<int, VL> vec = x;
30+
esimd::simd<int, VL> result = vec * n;
31+
return result;
32+
}
33+
34+
int main(void) {
35+
auto q = queue{gpu_selector_v};
36+
auto dev = q.get_device();
37+
std::cout << "Running on " << dev.get_info<sycl::info::device::name>()
38+
<< "\n";
39+
bool passed = true;
40+
int *in = new int[SIZE];
41+
int *out = new int[SIZE];
42+
43+
for (int i = 0; i < SIZE; ++i) {
44+
in[i] = i;
45+
out[i] = 0;
46+
}
47+
48+
// scale factor
49+
int n = 2;
50+
51+
try {
52+
buffer<int, 1> bufin(in, range<1>(SIZE));
53+
buffer<int, 1> bufout(out, range<1>(SIZE));
54+
55+
sycl::range<1> GlobalRange{SIZE};
56+
sycl::range<1> LocalRange{VL};
57+
58+
auto e = q.submit([&](handler &cgh) {
59+
auto accin = bufin.get_access<access::mode::read>(cgh);
60+
auto accout = bufout.get_access<access::mode::write>(cgh);
61+
62+
cgh.parallel_for<class Scale>(
63+
nd_range<1>(GlobalRange, LocalRange), [=](nd_item<1> item) {
64+
sycl::sub_group sg = item.get_sub_group();
65+
unsigned int offset = item.get_global_linear_id();
66+
67+
int in_val = sg.load(accin.get_pointer() + offset);
68+
69+
int out_val = invoke_simd(sg, scale, in_val, uniform{n});
70+
71+
sg.store(accout.get_pointer() + offset, out_val);
72+
});
73+
});
74+
e.wait();
75+
} catch (sycl::exception const &e) {
76+
delete[] in;
77+
delete[] out;
78+
std::cout << "SYCL exception caught: " << e.what() << '\n';
79+
return 1;
80+
}
81+
82+
for (int i = 0; i < SIZE; ++i) {
83+
if (out[i] != in[i] * n) {
84+
std::cout << "failed at index " << i << ", " << out[i] << " != " << in[i]
85+
<< " * " << n << "\n";
86+
passed = false;
87+
}
88+
}
89+
delete[] in;
90+
delete[] out;
91+
std::cout << (passed ? "Passed\n" : "FAILED\n");
92+
return passed ? 0 : 1;
93+
}
94+
95+
```

0 commit comments

Comments
 (0)