Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit f63f13e

Browse files
authored
[SYCL] Add tests for reuse of l0 events in plugin (#1263)
1 parent 6d27a02 commit f63f13e

File tree

3 files changed

+315
-2
lines changed

3 files changed

+315
-2
lines changed

SYCL/Plugin/level-zero-event-leak.cpp

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,12 @@
33
// UNSUPPORTED: windows
44
//
55
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %level_zero_options %s -o %t.out
6-
// RUN: env SYCL_DEVICE_FILTER=level_zero ZE_DEBUG=4 %GPU_RUN_PLACEHOLDER %t.out 2>&1 %GPU_CHECK_PLACEHOLDER
6+
// RUN: env SYCL_DEVICE_FILTER=level_zero ZE_DEBUG=4 %GPU_RUN_PLACEHOLDER %t.out wait 2>&1 %GPU_CHECK_PLACEHOLDER
7+
// RUN: env SYCL_DEVICE_FILTER=level_zero ZE_DEBUG=4 %GPU_RUN_PLACEHOLDER %t.out nowait 2>&1 %GPU_CHECK_PLACEHOLDER
8+
//
9+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %level_zero_options %s -DCHECK_INORDER -o %t.inorder.out
10+
// RUN: env SYCL_DEVICE_FILTER=level_zero ZE_DEBUG=4 %GPU_RUN_PLACEHOLDER %t.inorder.out wait 2>&1 %GPU_CHECK_PLACEHOLDER
11+
// RUN: env SYCL_DEVICE_FILTER=level_zero ZE_DEBUG=4 %GPU_RUN_PLACEHOLDER %t.inorder.out nowait 2>&1 %GPU_CHECK_PLACEHOLDER
712
//
813
// CHECK-NOT: LEAK
914

@@ -36,10 +41,31 @@
3641
#include <sycl/sycl.hpp>
3742

3843
int main(int argc, char **argv) {
44+
assert(argc == 2 && "Invalid number of arguments");
45+
std::string use_queue_finish(argv[1]);
46+
47+
bool Use = false;
48+
if (use_queue_finish == "wait") {
49+
Use = true;
50+
std::cerr << "Use queue::wait" << std::endl;
51+
} else if (use_queue_finish == "nowait") {
52+
std::cerr << "No wait. Ensure resources are released anyway" << std::endl;
53+
} else {
54+
assert(0 && "Unsupported parameter value");
55+
}
56+
57+
#ifdef CHECK_INORDER
58+
sycl::queue Q({sycl::property::queue::in_order{}});
59+
#else
3960
sycl::queue Q;
61+
#endif
62+
4063
const unsigned n_chunk = 1000;
4164
for (int i = 0; i < n_chunk; i++)
4265
Q.single_task([=]() {});
43-
Q.wait();
66+
67+
if (Use)
68+
Q.wait();
69+
4470
return 0;
4571
}

SYCL/Plugin/level_zero_inorder.cpp

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
// REQUIRES: level_zero
2+
//
3+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
4+
//
5+
// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=0 SYCL_DEVICE_FILTER=level_zero %GPU_RUN_PLACEHOLDER %t.out
6+
// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=1 SYCL_DEVICE_FILTER=level_zero %GPU_RUN_PLACEHOLDER %t.out
7+
// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=2 SYCL_DEVICE_FILTER=level_zero %GPU_RUN_PLACEHOLDER %t.out
8+
// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=3 SYCL_DEVICE_FILTER=level_zero %GPU_RUN_PLACEHOLDER %t.out
9+
//
10+
// The test checks that the kernels are executed in-order, regardless of
11+
// batching. IMPORTANT NOTE: this is a critical test, double-check if your
12+
// changes are related to L0 events and links between commands.
13+
14+
#include <CL/sycl.hpp>
15+
#include <cassert>
16+
#include <iostream>
17+
#include <numeric>
18+
19+
static constexpr int MAGIC_NUM1 = 2;
20+
static constexpr int buffer_size = 100;
21+
sycl::usm::alloc AllocType = sycl::usm::alloc::shared;
22+
23+
const size_t PartSize = 5;
24+
const bool PartiallyPrint = buffer_size > 2 * PartSize;
25+
26+
void ValidationPrint(const std::string &vectName, int *vect,
27+
const std::function<int(size_t)> &ExpectedVal) {
28+
std::cerr << vectName;
29+
if (!PartiallyPrint) {
30+
for (size_t i = 0u; i < buffer_size; ++i) {
31+
std::cerr << " " << vect[i];
32+
}
33+
} else {
34+
for (size_t i = 0u; i < PartSize; ++i) {
35+
std::cerr << " " << vect[i];
36+
}
37+
std::cerr << " ... ";
38+
for (size_t i = buffer_size - PartSize; i < buffer_size; ++i) {
39+
std::cerr << " " << vect[i];
40+
}
41+
}
42+
43+
std::cerr << std::endl << "expected[] = ";
44+
if (!PartiallyPrint) {
45+
for (size_t i = 0u; i < buffer_size; ++i) {
46+
std::cerr << " " << ExpectedVal(i);
47+
}
48+
} else {
49+
for (size_t i = 0u; i < PartSize; ++i) {
50+
std::cerr << " " << ExpectedVal(i);
51+
}
52+
std::cerr << " ... ";
53+
for (size_t i = buffer_size - PartSize; i < buffer_size; ++i) {
54+
std::cerr << " " << ExpectedVal(i);
55+
}
56+
}
57+
std::cerr << std::endl;
58+
for (int i = 0; i < buffer_size; ++i) {
59+
if (vect[i] != ExpectedVal(i)) {
60+
std::cerr << "i = " << i << " is wrong!!! " << std::endl;
61+
break;
62+
}
63+
}
64+
std::cerr << std::endl;
65+
}
66+
67+
void IfTrueIncrementByValue(sycl::queue Q, sycl::range<1> Range, int *Harray,
68+
int ValueToCheck, int ValueToIncrement) {
69+
Q.submit([&](sycl::handler &CGH) {
70+
CGH.parallel_for<class increment_usm>(Range, [=](sycl::item<1> itemID) {
71+
size_t i = itemID.get_id(0);
72+
if (Harray[i] == ValueToCheck) {
73+
Harray[i] += ValueToIncrement;
74+
}
75+
});
76+
});
77+
}
78+
79+
void RunCalculation(sycl::queue Q) {
80+
sycl::range<1> Range(buffer_size);
81+
auto Dev = Q.get_device();
82+
if (!Dev.has(sycl::aspect::usm_shared_allocations))
83+
return;
84+
85+
int *values = sycl::malloc<int>(buffer_size, Dev, Q.get_context(), AllocType);
86+
87+
try {
88+
Q.submit([&](sycl::handler &cgh) {
89+
cgh.parallel_for(Range, [=](sycl::item<1> itemID) {
90+
size_t i = itemID.get_id(0);
91+
values[i] = 1;
92+
});
93+
});
94+
95+
IfTrueIncrementByValue(Q, Range, values, 1, 10);
96+
97+
IfTrueIncrementByValue(Q, Range, values, 11, 100);
98+
99+
IfTrueIncrementByValue(Q, Range, values, 111, 1000);
100+
101+
IfTrueIncrementByValue(Q, Range, values, 1111, 10000);
102+
103+
IfTrueIncrementByValue(Q, Range, values, 11111, 100000);
104+
105+
Q.wait();
106+
107+
ValidationPrint("vector[] = ", values, [&](size_t i) { return 111111; });
108+
109+
for (int i = 0; i < buffer_size; ++i) {
110+
int expected = 111111;
111+
assert(values[i] == expected);
112+
}
113+
114+
} catch (sycl::exception &e) {
115+
std::cout << "Exception: " << std::string(e.what()) << std::endl;
116+
}
117+
118+
free(values, Q);
119+
}
120+
121+
int main(int argc, char *argv[]) {
122+
sycl::queue Q({sycl::property::queue::in_order{}});
123+
124+
RunCalculation(Q);
125+
126+
std::cout << "The test passed." << std::endl;
127+
return 0;
128+
}
Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
// REQUIRES: level_zero
2+
//
3+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
4+
//
5+
// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=0 SYCL_DEVICE_FILTER=level_zero %GPU_RUN_PLACEHOLDER %t.out
6+
// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=1 SYCL_DEVICE_FILTER=level_zero %GPU_RUN_PLACEHOLDER %t.out
7+
// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=2 SYCL_DEVICE_FILTER=level_zero %GPU_RUN_PLACEHOLDER %t.out
8+
// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=3 SYCL_DEVICE_FILTER=level_zero %GPU_RUN_PLACEHOLDER %t.out
9+
//
10+
// The test checks that interleaving using copy and kernel operations are
11+
// performed in-order, regardless of batching. IMPORTANT NOTE: this is a
12+
// critical test, double-check if your changes are related to L0 events and
13+
// links between commands.
14+
15+
#include <CL/sycl.hpp>
16+
#include <cassert>
17+
#include <iostream>
18+
#include <numeric>
19+
20+
static constexpr int MAGIC_NUM1 = 2;
21+
static constexpr int buffer_size = 100;
22+
sycl::usm::alloc AllocType = sycl::usm::alloc::device;
23+
24+
const size_t PartSize = 5;
25+
const bool PartiallyPrint = buffer_size > 2 * PartSize;
26+
27+
void ValidationPrint(const std::string &vectName, const std::vector<int> &vect,
28+
const std::function<int(size_t)> &ExpectedVal) {
29+
std::cerr << vectName;
30+
if (!PartiallyPrint) {
31+
for (size_t i = 0u; i < buffer_size; ++i) {
32+
std::cerr << " " << vect[i];
33+
}
34+
} else {
35+
for (size_t i = 0u; i < PartSize; ++i) {
36+
std::cerr << " " << vect[i];
37+
}
38+
std::cerr << " ... ";
39+
for (size_t i = buffer_size - PartSize; i < buffer_size; ++i) {
40+
std::cerr << " " << vect[i];
41+
}
42+
}
43+
44+
std::cerr << std::endl << "expected[] = ";
45+
if (!PartiallyPrint) {
46+
for (size_t i = 0u; i < buffer_size; ++i) {
47+
std::cerr << " " << ExpectedVal(i);
48+
}
49+
} else {
50+
for (size_t i = 0u; i < PartSize; ++i) {
51+
std::cerr << " " << ExpectedVal(i);
52+
}
53+
std::cerr << " ... ";
54+
for (size_t i = buffer_size - PartSize; i < buffer_size; ++i) {
55+
std::cerr << " " << ExpectedVal(i);
56+
}
57+
}
58+
std::cerr << std::endl;
59+
for (int i = 0; i < buffer_size; ++i) {
60+
if (vect[i] != ExpectedVal(i)) {
61+
std::cerr << "i = " << i << " is wrong!!! " << std::endl;
62+
break;
63+
}
64+
}
65+
std::cerr << std::endl;
66+
}
67+
68+
void RunCalculation(sycl::queue Q) {
69+
sycl::range<1> Range(buffer_size);
70+
auto Dev = Q.get_device();
71+
if (!Dev.has(sycl::aspect::usm_device_allocations))
72+
return;
73+
74+
int *Dvalues =
75+
sycl::malloc<int>(buffer_size, Dev, Q.get_context(), AllocType);
76+
int *DvaluesTmp =
77+
sycl::malloc<int>(buffer_size, Dev, Q.get_context(), AllocType);
78+
79+
std::vector<int> Hvalues1(buffer_size, 0);
80+
std::vector<int> HvaluesTmp(buffer_size, 0);
81+
std::iota(Hvalues1.begin(), Hvalues1.end(), 0);
82+
83+
try {
84+
Q.memcpy(Dvalues, Hvalues1.data(), buffer_size * sizeof(int));
85+
86+
Q.submit([&](sycl::handler &cgh) {
87+
cgh.parallel_for(Range, [=](sycl::item<1> itemID) {
88+
size_t i = itemID.get_id(0);
89+
if (Dvalues[i] == i)
90+
Dvalues[i] += 1;
91+
});
92+
});
93+
94+
Q.submit([&](sycl::handler &cgh) {
95+
cgh.parallel_for(Range, [=](sycl::item<1> itemID) {
96+
size_t i = itemID.get_id(0);
97+
if (Dvalues[i] == i + 1)
98+
Dvalues[i] += 10;
99+
});
100+
});
101+
102+
Q.memcpy(Hvalues1.data(), Dvalues, buffer_size * sizeof(int));
103+
Q.memcpy(DvaluesTmp, Hvalues1.data(), buffer_size * sizeof(int));
104+
105+
Q.submit([&](sycl::handler &cgh) {
106+
cgh.parallel_for(Range, [=](sycl::item<1> itemID) {
107+
size_t i = itemID.get_id(0);
108+
if (Dvalues[i] == i + 11)
109+
if (DvaluesTmp[i] == i + 11)
110+
Dvalues[i] += 100;
111+
});
112+
});
113+
114+
Q.submit([&](sycl::handler &cgh) {
115+
cgh.parallel_for(Range, [=](sycl::item<1> itemID) {
116+
size_t i = itemID.get_id(0);
117+
if (Dvalues[i] == i + 111)
118+
Dvalues[i] += 1000;
119+
});
120+
});
121+
122+
Q.submit([&](sycl::handler &cgh) {
123+
cgh.parallel_for(Range, [=](sycl::item<1> itemID) {
124+
size_t i = itemID.get_id(0);
125+
if (Dvalues[i] == i + 1111)
126+
Dvalues[i] += 10000;
127+
});
128+
});
129+
130+
Q.memcpy(Hvalues1.data(), Dvalues, buffer_size * sizeof(int));
131+
Q.memcpy(HvaluesTmp.data(), DvaluesTmp, buffer_size * sizeof(int));
132+
Q.wait();
133+
134+
ValidationPrint("vector1[] = ", Hvalues1,
135+
[&](size_t i) { return i + 11111; });
136+
ValidationPrint("vector2[] = ", HvaluesTmp,
137+
[&](size_t i) { return i + 11; });
138+
139+
for (int i = 0; i < buffer_size; ++i) {
140+
int expected = i + 11111;
141+
assert(Hvalues1[i] == expected);
142+
}
143+
144+
} catch (sycl::exception &e) {
145+
std::cout << "Exception: " << std::string(e.what()) << std::endl;
146+
}
147+
148+
free(Dvalues, Q);
149+
free(DvaluesTmp, Q);
150+
}
151+
152+
int main(int argc, char *argv[]) {
153+
sycl::queue Q({sycl::property::queue::in_order{}});
154+
155+
RunCalculation(Q);
156+
157+
std::cout << "The test passed." << std::endl;
158+
return 0;
159+
}

0 commit comments

Comments
 (0)