Skip to content

Commit f1986b4

Browse files
authored
[SYCL][ESIMD][Doc] Add simd_view example (#10128)
1 parent 74e83d5 commit f1986b4

File tree

2 files changed

+139
-0
lines changed

2 files changed

+139
-0
lines changed

sycl/doc/extensions/supported/sycl_ext_intel_esimd/examples/README.md

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,4 +209,35 @@ is to show the basic ESIMD APIs in well known examples.
209209
}
210210
```
211211

212+
4) Using simd_view to construct views of simd objects - ["simd_view"](./simd_view.md).
213+
Please see the full source code here: ["simd_view"](./simd_view.md).
214+
215+
```c++
216+
float *a = malloc_shared<float>(Size, q); // USM memory for A.
217+
218+
// Initialize a.
219+
220+
// For elements of 'a' with indices, which are:
221+
// * multiple of 4: multiply by 6;
222+
// * multiple of 2: multiply by 3;
223+
q.parallel_for(Size / VL, [=](id<1> i) [[intel::sycl_explicit_simd]] {
224+
auto element_offset = i * VL;
225+
simd<float, VL> vec_a(a + element_offset);
226+
227+
// simd_view of simd<float, VL> using the even-index elements.
228+
auto vec_a_even_elems_view = vec_a.select<VL / 2, 2>(0);
229+
vec_a_even_elems_view *= 3;
230+
231+
// simd_view with even indices constructed from previous
232+
// simd_view of simd<float, VL> using the even-index elements.
233+
// This results in a simd_view containing every fourth element
234+
// of vec_a.
235+
auto vec_a_mult_four_view = vec_a_even_elems_view.select<VL / 4, 2>(0);
236+
vec_a_mult_four_view *= 2;
237+
238+
// Copy back to the memory.
239+
vec_a.copy_to(a + element_offset);
240+
}).wait_and_throw();
241+
```
242+
212243
6) TODO: Add more examples here.
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
### Using simd_view to construct views of simd objects
2+
3+
In this example we demonstrate using `simd_view` to construct multi-level
4+
view into `simd` objects.
5+
6+
Compile and run:
7+
```bash
8+
> clang++ -fsycl simd_view.cpp
9+
10+
> ONEAPI_DEVICE_SELECTOR=level_zero:gpu ./a.out
11+
```
12+
13+
Source code:
14+
```C++
15+
16+
#include <sycl/ext/intel/esimd.hpp>
17+
#include <sycl/sycl.hpp>
18+
19+
using namespace sycl;
20+
using namespace sycl::ext::intel::esimd;
21+
22+
inline auto create_exception_handler() {
23+
return [](exception_list l) {
24+
for (auto ep : l) {
25+
try {
26+
std::rethrow_exception(ep);
27+
} catch (sycl::exception &e0) {
28+
std::cout << "sycl::exception: " << e0.what() << std::endl;
29+
} catch (std::exception &e) {
30+
std::cout << "std::exception: " << e.what() << std::endl;
31+
} catch (...) {
32+
std::cout << "generic exception\n";
33+
}
34+
}
35+
};
36+
}
37+
38+
struct usm_deleter {
39+
queue q;
40+
void operator()(void *ptr) {
41+
if (ptr)
42+
sycl::free(ptr, q);
43+
}
44+
};
45+
46+
int main() {
47+
constexpr unsigned Size = 128;
48+
constexpr unsigned VL = 32;
49+
int err_cnt = 0;
50+
51+
try {
52+
queue q(gpu_selector_v, create_exception_handler());
53+
auto dev = q.get_device();
54+
std::cout << "Running on " << dev.get_info<info::device::name>() << "\n";
55+
56+
float *a = malloc_shared<float>(Size, q); // USM memory for A.
57+
float *b = new float[Size];
58+
59+
std::unique_ptr<float, usm_deleter> guard_a(a, usm_deleter{q});
60+
std::unique_ptr<float> guard_b(b);
61+
62+
// Initialize a and b.
63+
for (unsigned i = 0; i < Size; i++)
64+
a[i] = b[i] = i * i;
65+
66+
// For elements of 'a' with indices, which are:
67+
// * multiple of 4: multiply by 6;
68+
// * multiple of 2: multiply by 3;
69+
q.parallel_for(Size / VL, [=](id<1> i) [[intel::sycl_explicit_simd]] {
70+
auto element_offset = i * VL;
71+
simd<float, VL> vec_a(a + element_offset);
72+
73+
// simd_view of simd<float, VL> using the even-index elements.
74+
auto vec_a_even_elems_view = vec_a.select<VL / 2, 2>(0);
75+
vec_a_even_elems_view *= 3;
76+
77+
// simd_view with even indices constructed from previous
78+
// simd_view of simd<float, VL> using the even-index elements.
79+
// This results in a simd_view containing every fourth element
80+
// of vec_a.
81+
auto vec_a_mult_four_view = vec_a_even_elems_view.select<VL / 4, 2>(0);
82+
vec_a_mult_four_view *= 2;
83+
84+
// Copy back to the memory.
85+
vec_a.copy_to(a + element_offset);
86+
}).wait_and_throw();
87+
88+
// Verify on host.
89+
for (unsigned i = 0; i < Size; ++i) {
90+
float gold = b[i];
91+
if (i % 2 == 0)
92+
gold *= 3;
93+
if (i % 4 == 0)
94+
gold *= 2;
95+
if (a[i] != gold) {
96+
err_cnt++;
97+
std::cout << "failed at" << i << ": " << a[i] << " != " << (float)i
98+
<< " + " << (float)i << std::endl;
99+
}
100+
}
101+
} catch (sycl::exception &e) {
102+
std::cout << "SYCL exception caught: " << e.what() << "\n";
103+
return 1;
104+
}
105+
std::cout << (err_cnt > 0 ? "FAILED\n" : "Passed\n");
106+
return err_cnt > 0 ? 1 : 0;
107+
}
108+
```

0 commit comments

Comments
 (0)