Skip to content

Commit e8fb6ff

Browse files
againullbb-sycl
authored andcommitted
[SYCL] Test indirect access memory tracking in the L0 plugin (intel#532)
1 parent a16100e commit e8fb6ff

File tree

1 file changed

+125
-0
lines changed

1 file changed

+125
-0
lines changed
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
// REQUIRES: level_zero, level_zero_dev_kit
2+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %level_zero_options %threads_lib %s -o %t.out
3+
// RUN: env SYCL_PI_LEVEL_ZERO_TRACK_INDIRECT_ACCESS_MEMORY=1 SYCL_DEVICE_FILTER=level_zero %GPU_RUN_PLACEHOLDER %t.out 2>&1 %GPU_CHECK_PLACEHOLDER
4+
//
5+
// CHECK: pass
6+
//
7+
// Test checks memory tracking and deferred release functionality which is
8+
// enabled by SYCL_PI_LEVEL_ZERO_TRACK_INDIRECT_ACCESS_MEMORY env variable.
9+
// Tracking and deferred release is necessary for memory which can be indirectly
10+
// accessed because such memory can't be released as soon as someone calls free.
11+
// It can be released only after completion of all kernels which can possibly
12+
// access this memory indirectly. Currently the Level Zero plugin marks all
13+
// kernels with indirect access flag conservatively. This flag means that kernel
14+
// starts to reference all existing memory allocations (even if not explicitly
15+
// used in the kernel) as soon as it is submitted. That's why basically all
16+
// memory allocations need to be tracked. This is crucial in multi-threaded
17+
// applications because kernels with indirect access flag reference allocations
18+
// from another threads causing the following error if memory is released too
19+
// early:
20+
//
21+
// ../neo/opencl/source/os_interface/linux/drm_command_stream.inl
22+
// Aborted (core dumped)
23+
//
24+
// Such multi-threaded scenario is checked in this test. Test is expected to
25+
// pass when memory tracking is enabled and fail otherwise.
26+
27+
#include <cassert>
28+
#include <iostream>
29+
#include <thread>
30+
31+
#define LENGTH 10
32+
33+
#include <CL/sycl.hpp>
34+
using namespace sycl;
35+
36+
void update_d2_data(queue &q) {
37+
int d2_data[LENGTH][LENGTH];
38+
39+
try {
40+
size_t d_size = LENGTH;
41+
buffer<int, 2> b_d2_data((int *)d2_data, range<2>(d_size, d_size));
42+
43+
q.submit([&](handler &cgh) {
44+
accessor acc{b_d2_data, cgh};
45+
46+
cgh.parallel_for<class write_d2_data>(
47+
range<2>{d_size, d_size},
48+
[=](id<2> idx) { acc[idx] = idx[0] * idx[1]; });
49+
});
50+
q.wait_and_throw();
51+
} catch (exception &e) {
52+
std::cerr << std::string(e.what());
53+
}
54+
55+
for (size_t i = 0; i < LENGTH; i++) {
56+
for (size_t j = 0; j < LENGTH; j++) {
57+
assert(d2_data[i][j] == i * j);
58+
}
59+
}
60+
}
61+
void update_d3_data(queue &q) {
62+
int d3_data[LENGTH][LENGTH][LENGTH];
63+
64+
try {
65+
size_t d_size = LENGTH;
66+
buffer<int, 3> b_d3_data((int *)d3_data, range<3>(d_size, d_size, d_size));
67+
68+
q.submit([&](handler &cgh) {
69+
accessor acc{b_d3_data, cgh};
70+
71+
cgh.parallel_for<class write_d3_data>(
72+
range<3>{d_size, d_size, d_size},
73+
[=](id<3> idx) { acc[idx] = idx[0] * idx[1] * idx[2]; });
74+
});
75+
q.wait_and_throw();
76+
} catch (exception &e) {
77+
std::cerr << std::string(e.what());
78+
}
79+
80+
for (size_t i = 0; i < LENGTH; i++) {
81+
for (size_t j = 0; j < LENGTH; j++) {
82+
for (size_t k = 0; k < LENGTH; k++) {
83+
assert(d3_data[i][j][k] == i * j * k);
84+
}
85+
}
86+
}
87+
}
88+
89+
int main() {
90+
static const size_t n = 8;
91+
std::thread d2_threads[n];
92+
std::thread d3_threads[n];
93+
94+
auto thread_body = [&](int type) {
95+
queue q;
96+
switch (type) {
97+
case 1:
98+
update_d2_data(q);
99+
break;
100+
case 2:
101+
update_d3_data(q);
102+
break;
103+
}
104+
};
105+
106+
for (size_t i = 0; i < n; ++i) {
107+
d2_threads[i] = std::thread(thread_body, 1);
108+
d3_threads[i] = std::thread(thread_body, 2);
109+
}
110+
111+
for (size_t i = 0; i < n; ++i) {
112+
d2_threads[i].join();
113+
d3_threads[i].join();
114+
}
115+
116+
{
117+
queue q;
118+
119+
update_d2_data(q);
120+
update_d3_data(q);
121+
}
122+
123+
std::cout << "pass" << std::endl;
124+
return 0;
125+
}

0 commit comments

Comments
 (0)