Skip to content

Commit ae237a9

Browse files
committed
[SYCL] Test indirect access memory tracking in the L0 plugin
1 parent 1aa3199 commit ae237a9

File tree

1 file changed

+127
-0
lines changed

1 file changed

+127
-0
lines changed
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
// UNSUPPORTED: windows
2+
// REQUIRES: level_zero, level_zero_dev_kit
3+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %level_zero_options -lpthread %s -o %t.out
4+
// RUN: env SYCL_PI_LEVEL_ZERO_TRACK_INDIRECT_ACCESS_MEMORY=1 SYCL_DEVICE_FILTER=level_zero %GPU_RUN_PLACEHOLDER %t.out 2>&1 %GPU_CHECK_PLACEHOLDER
5+
//
6+
// CHECK: pass
7+
//
8+
// Test checks memory tracking and deferred release functionality which is
9+
// enabled by SYCL_PI_LEVEL_ZERO_TRACK_INDIRECT_ACCESS_MEMORY env variable.
10+
// Tracking and deferred release is necessary for memory which can be indirectly
11+
// accessed because such memory can't be released as soon as someone calls free.
12+
// It can be released only after completion of all kernels which can possibly
13+
// access this memory indirectly. Currently the Level Zero plugin marks all
14+
// kernels with indirect access flag conservatively. This flag means that kernel
15+
// starts to reference all existing memory allocations (even if not explicitly
16+
// used in the kernel) as soon as it is submitted. That's why basically all
17+
// memory allocations need to be tracked. This is crucial in multi-threaded
18+
// applications because kernels with indirect access flag reference allocations
19+
// from another threads causing the following error if memory is released too
20+
// early:
21+
//
22+
// ../neo/opencl/source/os_interface/linux/drm_command_stream.inl
23+
// Aborted (core dumped)
24+
//
25+
// Such multi-threaded scenario is checked in this test. Test is expected to
26+
// pass when memory tracking is enabled and fail otherwise.
27+
28+
#include <cassert>
29+
#include <iostream>
30+
#include <thread>
31+
32+
#define LENGTH 10
33+
34+
#include <CL/sycl.hpp>
35+
using namespace sycl;
36+
37+
void update_d2_data(queue &q) {
38+
int d2_data[LENGTH][LENGTH];
39+
40+
try {
41+
size_t d_size = LENGTH;
42+
buffer<int, 2> b_d2_data((int *)d2_data, range<2>(d_size, d_size));
43+
44+
q.submit([&](handler &cgh) {
45+
accessor acc{b_d2_data, cgh};
46+
47+
cgh.parallel_for<class write_d2_data>(
48+
range<2>{d_size, d_size},
49+
[=](id<2> idx) { acc[idx] = idx[0] * idx[1]; });
50+
});
51+
q.wait_and_throw();
52+
} catch (exception &e) {
53+
std::cerr << std::string(e.what());
54+
}
55+
56+
for (size_t i = 0; i < LENGTH; i++) {
57+
for (size_t j = 0; j < LENGTH; j++) {
58+
assert(d2_data[i][j] == i * j);
59+
}
60+
}
61+
}
62+
void update_d3_data(queue &q) {
63+
int d3_data[LENGTH][LENGTH][LENGTH];
64+
65+
try {
66+
size_t d_size = LENGTH;
67+
buffer<int, 3> b_d3_data((int *)d3_data, range<3>(d_size, d_size, d_size));
68+
69+
q.submit([&](handler &cgh) {
70+
accessor acc{b_d3_data, cgh};
71+
72+
cgh.parallel_for<class write_d3_data>(
73+
range<3>{d_size, d_size, d_size},
74+
[=](id<3> idx) { acc[idx] = idx[0] * idx[1] * idx[2]; });
75+
});
76+
q.wait_and_throw();
77+
} catch (exception &e) {
78+
std::cerr << std::string(e.what());
79+
}
80+
81+
for (size_t i = 0; i < LENGTH; i++) {
82+
for (size_t j = 0; j < LENGTH; j++) {
83+
for (size_t k = 0; k < LENGTH; k++) {
84+
assert(d3_data[i][j][k] == i * j * k);
85+
}
86+
}
87+
}
88+
}
89+
90+
int main() {
91+
static const size_t n = 8;
92+
std::thread d2_threads[n];
93+
std::thread d3_threads[n];
94+
95+
auto thread_body = [&](int type) {
96+
queue q;
97+
switch (type) {
98+
case 1:
99+
update_d2_data(q);
100+
break;
101+
case 2:
102+
update_d3_data(q);
103+
break;
104+
}
105+
};
106+
107+
for (size_t i = 0; i < n; ++i) {
108+
d2_threads[i] = std::thread(thread_body, 1);
109+
d3_threads[i] = std::thread(thread_body, 2);
110+
}
111+
112+
for (size_t i = 0; i < n; ++i) {
113+
d2_threads[i].join();
114+
d3_threads[i].join();
115+
}
116+
117+
{
118+
queue q;
119+
120+
update_d2_data(q);
121+
update_d3_data(q);
122+
}
123+
124+
std::cout << "pass" << std::endl;
125+
return 0;
126+
}
127+

0 commit comments

Comments
 (0)