Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit 6d27a02

Browse files
[SYCL] Add/Update DiscardEvents tests (#1258)
Co-authored-by: alexanderfle <[email protected]>
1 parent 191ddf7 commit 6d27a02

6 files changed

+779
-12
lines changed
Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
// UNSUPPORTED: hip
2+
//
3+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
4+
//
5+
// RUN: %CPU_RUN_PLACEHOLDER %t.out image
6+
// RUN: %GPU_RUN_PLACEHOLDER %t.out image
7+
//
8+
// RUN: %CPU_RUN_PLACEHOLDER %t.out mixed
9+
// RUN: %GPU_RUN_PLACEHOLDER %t.out mixed
10+
//
11+
// Note that the tests use image functionality and if you have problems with
12+
// the tests, please check if they pass without the discard_events property, if
13+
// they don't pass then it's most likely a general issue unrelated to
14+
// discard_events.
15+
16+
#include "../helpers.hpp" // for printableVec
17+
#include <CL/sycl.hpp>
18+
#include <cassert>
19+
#include <iostream>
20+
21+
using namespace cl::sycl;
22+
static constexpr size_t BUFFER_SIZE = 1024;
23+
static constexpr int MAX_ITER_NUM1 = 10;
24+
static constexpr int MAX_ITER_NUM2 = 10;
25+
static constexpr int InitialVal = MAX_ITER_NUM1;
26+
27+
void TestHelper(sycl::queue Q,
28+
const std::function<void(sycl::range<2> ImgSize, int *Harray,
29+
sycl::image<2> Img)> &Function) {
30+
int *Harray = sycl::malloc_shared<int>(BUFFER_SIZE, Q);
31+
assert(Harray != nullptr);
32+
for (size_t i = 0; i < BUFFER_SIZE; ++i) {
33+
Harray[i] = 0;
34+
}
35+
36+
const sycl::image_channel_order ChanOrder = sycl::image_channel_order::rgba;
37+
const sycl::image_channel_type ChanType =
38+
sycl::image_channel_type::signed_int32;
39+
40+
const sycl::range<2> ImgSize(sqrt(BUFFER_SIZE), sqrt(BUFFER_SIZE));
41+
std::vector<sycl::int4> ImgHostData(
42+
ImgSize.size(), {InitialVal, InitialVal, InitialVal, InitialVal});
43+
sycl::image<2> Img(ImgHostData.data(), ChanOrder, ChanType, ImgSize);
44+
45+
Function(ImgSize, Harray, Img);
46+
47+
free(Harray, Q);
48+
}
49+
50+
void IfTrueIncrementUSM(sycl::queue Q, sycl::range<1> Range, int *Harray,
51+
int ValueToCheck) {
52+
Q.submit([&](sycl::handler &CGH) {
53+
CGH.parallel_for<class increment_usm>(Range, [=](sycl::item<1> itemID) {
54+
size_t i = itemID.get_id(0);
55+
if (Harray[i] == ValueToCheck) {
56+
Harray[i] += 1;
57+
}
58+
});
59+
});
60+
}
61+
62+
void IfTrueIncrementImageAndUSM(sycl::queue Q, sycl::range<2> ImgSize,
63+
int *Harray, sycl::image<2> Img,
64+
int HarrayValueToCheck, int ImageValueToCheck) {
65+
Q.submit([&](sycl::handler &CGH) {
66+
auto Img1Acc = Img.get_access<sycl::int4, sycl::access::mode::read>(CGH);
67+
auto Img2Acc = Img.get_access<sycl::int4, sycl::access::mode::write>(CGH);
68+
CGH.parallel_for<class ImgCopy>(ImgSize, [=](sycl::item<2> Item) {
69+
size_t i = Item.get_linear_id();
70+
if (Harray[i] == HarrayValueToCheck) {
71+
sycl::int4 Data = Img1Acc.read(sycl::int2{Item[0], Item[1]});
72+
if (Data[0] == ImageValueToCheck && Data[1] == ImageValueToCheck &&
73+
Data[2] == ImageValueToCheck && Data[3] == ImageValueToCheck) {
74+
Data[0]++;
75+
Data[3] = Data[2] = Data[1] = Data[0];
76+
Img2Acc.write(sycl::int2{Item[0], Item[1]}, Data);
77+
}
78+
++Harray[i];
79+
}
80+
});
81+
});
82+
}
83+
84+
void RunTest_ImageTest(sycl::queue Q) {
85+
TestHelper(Q, [&](sycl::range<2> ImgSize, int *Harray, sycl::image<2> Img) {
86+
sycl::range<1> Range(BUFFER_SIZE);
87+
for (int i = 0; i < MAX_ITER_NUM1; ++i)
88+
IfTrueIncrementUSM(Q, Range, Harray, (i));
89+
90+
for (int i = 0; i < MAX_ITER_NUM2; ++i)
91+
IfTrueIncrementImageAndUSM(Q, ImgSize, Harray, Img, (MAX_ITER_NUM1 + i),
92+
(InitialVal + i));
93+
Q.wait();
94+
95+
// check results
96+
for (size_t i = 0; i < BUFFER_SIZE; ++i) {
97+
int expected = MAX_ITER_NUM1 + MAX_ITER_NUM2;
98+
assert(Harray[i] == expected);
99+
}
100+
101+
{
102+
auto HostAcc =
103+
Img.template get_access<sycl::int4, sycl::access::mode::read>();
104+
int expected = InitialVal + MAX_ITER_NUM2;
105+
for (int X = 0; X < ImgSize[0]; ++X)
106+
for (int Y = 0; Y < ImgSize[1]; ++Y) {
107+
sycl::int4 Vec1 = cl::sycl::int4(expected);
108+
sycl::int4 Vec2 = HostAcc.read(sycl::int2{X, Y});
109+
if (Vec1[0] != Vec2[0] || Vec1[1] != Vec2[1] || Vec1[2] != Vec2[2] ||
110+
Vec1[3] != Vec2[3]) {
111+
std::cerr << "Failed" << std::endl;
112+
std::cerr << "Element [ " << X << ", " << Y << " ]" << std::endl;
113+
std::cerr << "Expected: " << printableVec(Vec1) << std::endl;
114+
std::cerr << " Got : " << printableVec(Vec2) << std::endl;
115+
assert(false && "ImageTest failed!");
116+
}
117+
}
118+
}
119+
});
120+
}
121+
122+
void RunTest_ImageTest_Mixed(sycl::queue Q) {
123+
TestHelper(Q, [&](sycl::range<2> ImgSize, int *Harray, sycl::image<2> Img) {
124+
sycl::range<1> Range(BUFFER_SIZE);
125+
126+
for (int i = 0; i < MAX_ITER_NUM1; ++i) {
127+
IfTrueIncrementUSM(Q, Range, Harray, (i * 2));
128+
IfTrueIncrementImageAndUSM(Q, ImgSize, Harray, Img, (i * 2 + 1),
129+
(InitialVal + i));
130+
}
131+
132+
for (int i = 0; i < MAX_ITER_NUM2; ++i) {
133+
IfTrueIncrementImageAndUSM(Q, ImgSize, Harray, Img,
134+
(MAX_ITER_NUM1 * 2 + i * 2),
135+
(InitialVal + MAX_ITER_NUM1 + i));
136+
IfTrueIncrementUSM(Q, Range, Harray, (MAX_ITER_NUM1 * 2 + i * 2 + 1));
137+
}
138+
139+
Q.wait();
140+
141+
// check results
142+
for (size_t i = 0; i < BUFFER_SIZE; ++i) {
143+
int expected = MAX_ITER_NUM1 * 2 + MAX_ITER_NUM2 * 2;
144+
assert(Harray[i] == expected);
145+
}
146+
147+
{
148+
auto HostAcc =
149+
Img.template get_access<sycl::int4, sycl::access::mode::read>();
150+
int expected = InitialVal + MAX_ITER_NUM1 + MAX_ITER_NUM2;
151+
for (int X = 0; X < ImgSize[0]; ++X)
152+
for (int Y = 0; Y < ImgSize[1]; ++Y) {
153+
sycl::int4 Vec1 = cl::sycl::int4(expected);
154+
sycl::int4 Vec2 = HostAcc.read(sycl::int2{X, Y});
155+
if (Vec1[0] != Vec2[0] || Vec1[1] != Vec2[1] || Vec1[2] != Vec2[2] ||
156+
Vec1[3] != Vec2[3]) {
157+
std::cerr << "Failed" << std::endl;
158+
std::cerr << "Element [ " << X << ", " << Y << " ]" << std::endl;
159+
std::cerr << "Expected: " << printableVec(Vec1) << std::endl;
160+
std::cerr << " Got : " << printableVec(Vec2) << std::endl;
161+
assert(false && "ImageTest_Mixed failed!");
162+
}
163+
}
164+
}
165+
});
166+
}
167+
168+
int main(int Argc, const char *Argv[]) {
169+
assert(Argc == 2 && "Invalid number of arguments");
170+
std::string TestType(Argv[1]);
171+
172+
sycl::property_list props{
173+
sycl::property::queue::in_order{},
174+
sycl::ext::oneapi::property::queue::discard_events{}};
175+
sycl::queue Q(props);
176+
177+
auto dev = Q.get_device();
178+
if (dev.has(aspect::image)) {
179+
if (TestType == "image") {
180+
std::cerr << "RunTest_ImageTest" << std::endl;
181+
RunTest_ImageTest(Q);
182+
} else if (TestType == "mixed") {
183+
std::cerr << "RunTest_ImageTest_Mixed" << std::endl;
184+
RunTest_ImageTest_Mixed(Q);
185+
} else {
186+
assert(0 && "Unsupported test type!");
187+
}
188+
} else {
189+
std::cout << "device does not support image operations" << std::endl;
190+
}
191+
192+
std::cout << "The test passed." << std::endl;
193+
return 0;
194+
}
Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
// REQUIRES: level_zero
2+
//
3+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
4+
//
5+
// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=0 SYCL_DEVICE_FILTER=level_zero %GPU_RUN_PLACEHOLDER %t.out
6+
// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=1 SYCL_DEVICE_FILTER=level_zero %GPU_RUN_PLACEHOLDER %t.out
7+
// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=2 SYCL_DEVICE_FILTER=level_zero %GPU_RUN_PLACEHOLDER %t.out
8+
// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=3 SYCL_DEVICE_FILTER=level_zero %GPU_RUN_PLACEHOLDER %t.out
9+
//
10+
// The test is to check the execution of different queue operations has in-order
11+
// semantics regardless of batching.
12+
//
13+
// IMPORTANT NOTE: this is a critical test, double-check if your changes are
14+
// related to L0 barriers that provide links between commands within the same
15+
// command-list or if your changes are related to L0 events and links between
16+
// command-lists. if you have problems with this test, first see if other tests
17+
// related to discard_events pass. And please check if the test passes without
18+
// the discard_events property, if it doesn't pass then it's most likely a
19+
// general issue unrelated to discard_events.
20+
21+
#include <CL/sycl.hpp>
22+
#include <cassert>
23+
#include <iostream>
24+
#include <numeric>
25+
26+
static constexpr int MAGIC_NUM1 = 2;
27+
28+
sycl::aspect getUSMAspect(sycl::usm::alloc Alloc) {
29+
if (Alloc == sycl::usm::alloc::host)
30+
return sycl::aspect::usm_host_allocations;
31+
32+
if (Alloc == sycl::usm::alloc::device)
33+
return sycl::aspect::usm_device_allocations;
34+
35+
assert(Alloc == sycl::usm::alloc::shared && "Unknown USM allocation type");
36+
return sycl::aspect::usm_shared_allocations;
37+
}
38+
39+
void RunCalculation(sycl::queue queue, sycl::usm::alloc AllocType) {
40+
int buffer_size = 100;
41+
sycl::range<1> range(buffer_size);
42+
auto Dev = queue.get_device();
43+
if (!Dev.has(getUSMAspect(AllocType)))
44+
return;
45+
46+
int *values1 =
47+
sycl::malloc<int>(buffer_size, Dev, queue.get_context(), AllocType);
48+
int *values2 =
49+
sycl::malloc<int>(buffer_size, Dev, queue.get_context(), AllocType);
50+
int *values3 =
51+
sycl::malloc<int>(buffer_size, Dev, queue.get_context(), AllocType);
52+
53+
std::vector<int> values(buffer_size, 0);
54+
std::iota(values.begin(), values.end(), 0);
55+
56+
std::vector<int> vec1(buffer_size, 0);
57+
std::vector<int> vec2(buffer_size, 0);
58+
std::vector<int> vec3(buffer_size, 0);
59+
60+
try {
61+
queue.memcpy(values1, values.data(), buffer_size * sizeof(int));
62+
queue.memcpy(values2, values1, buffer_size * sizeof(int));
63+
queue.memcpy(values3, values2, buffer_size * sizeof(int));
64+
queue.memset(values1, 0, buffer_size * sizeof(int));
65+
66+
queue.submit([&](sycl::handler &cgh) {
67+
cgh.parallel_for<class kernel1>(range, [=](sycl::item<1> itemID) {
68+
size_t i = itemID.get_id(0);
69+
if (values1[i] == 0)
70+
if (values2[i] == i)
71+
if (values3[i] == i) {
72+
values1[i] += i;
73+
values2[i] = MAGIC_NUM1;
74+
values3[i] = i;
75+
}
76+
});
77+
});
78+
79+
queue.submit([&](sycl::handler &cgh) {
80+
cgh.parallel_for<class kernel2>(range, [=](sycl::item<1> itemID) {
81+
size_t i = itemID.get_id(0);
82+
if (values1[i] == i)
83+
if (values2[i] == MAGIC_NUM1)
84+
if (values3[i] == i) {
85+
values1[i] += 10;
86+
}
87+
});
88+
});
89+
90+
queue.memcpy(values.data(), values1, buffer_size * sizeof(int));
91+
queue.memcpy(values2, values.data(), buffer_size * sizeof(int));
92+
93+
queue.submit([&](sycl::handler &cgh) {
94+
cgh.parallel_for<class kernel3>(range, [=](sycl::item<1> itemID) {
95+
size_t i = itemID.get_id(0);
96+
if (values1[i] == i + 10)
97+
if (values2[i] == i + 10)
98+
if (values3[i] == i) {
99+
values1[i] += 100;
100+
values2[i] = i;
101+
}
102+
});
103+
});
104+
105+
queue.submit([&](sycl::handler &cgh) {
106+
cgh.parallel_for<class kernel4>(range, [=](sycl::item<1> itemID) {
107+
size_t i = itemID.get_id(0);
108+
if (values1[i] == i + 110)
109+
if (values2[i] == i)
110+
if (values3[i] == i) {
111+
values1[i] += 1000;
112+
}
113+
});
114+
});
115+
116+
queue.submit([&](sycl::handler &cgh) {
117+
cgh.parallel_for<class kernel5>(range, [=](sycl::item<1> itemID) {
118+
size_t i = itemID.get_id(0);
119+
if (values1[i] == i + 1110)
120+
if (values2[i] == i)
121+
if (values3[i] == i) {
122+
values1[i] += 10000;
123+
}
124+
});
125+
});
126+
127+
queue.memcpy(vec1.data(), values1, buffer_size * sizeof(int));
128+
queue.memcpy(vec2.data(), values2, buffer_size * sizeof(int));
129+
queue.memcpy(vec3.data(), values3, buffer_size * sizeof(int));
130+
131+
queue.wait();
132+
133+
for (int i = 0; i < buffer_size; ++i) {
134+
int expected = i + 11110;
135+
assert(vec1[i] == expected);
136+
expected = i;
137+
assert(vec2[i] == expected);
138+
assert(vec3[i] == expected);
139+
}
140+
141+
} catch (sycl::exception &e) {
142+
std::cout << "Exception: " << std::string(e.what()) << std::endl;
143+
}
144+
145+
free(values1, queue);
146+
free(values2, queue);
147+
free(values3, queue);
148+
}
149+
150+
int main(int argc, char *argv[]) {
151+
sycl::property_list Props{
152+
sycl::property::queue::in_order{},
153+
sycl::ext::oneapi::property::queue::discard_events{}};
154+
sycl::queue queue(sycl::default_selector{}, Props);
155+
156+
RunCalculation(queue, sycl::usm::alloc::host);
157+
RunCalculation(queue, sycl::usm::alloc::shared);
158+
RunCalculation(queue, sycl::usm::alloc::device);
159+
160+
std::cout << "The test passed." << std::endl;
161+
return 0;
162+
}

0 commit comments

Comments
 (0)