Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit 718688c

Browse files
authored
[SYCL] Add LIT test for (reduction + range + USM) cases (#398)
Signed-off-by: Vyacheslav N Klochkov <[email protected]>
1 parent f225a01 commit 718688c

File tree

3 files changed

+180
-5
lines changed

3 files changed

+180
-5
lines changed

SYCL/Reduction/reduction_range_scalar.hpp

Lines changed: 126 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,28 @@
77
using namespace cl::sycl;
88

99
template <typename T, bool B> class KName;
10+
template <typename T, typename> class TName;
1011

1112
template <typename Name, bool IsSYCL2020, access::mode Mode, int AccDim = 1,
1213
typename T, class BinaryOperation, int Dims>
1314
int test(queue &Q, T Identity, T Init, BinaryOperation BOp,
1415
const range<Dims> &Range) {
1516
printTestLabel<T, BinaryOperation>(IsSYCL2020, Range);
1617

17-
// Skip the test for such big arrays now.
18-
constexpr size_t TwoGB = 2LL * 1024 * 1024 * 1024;
19-
if (Range.size() > TwoGB)
18+
// It is a known problem with passing data that is close to 4Gb in size
19+
// to device. Such data breaks the execution pretty badly.
20+
// Some of test cases calling this function try to verify the correctness
21+
// of reduction with the global range bigger than the maximal work-group size
22+
// for the device. Maximal WG size for device may be very big, e.g. it is
23+
// 67108864 for ACC emulator. Multiplying that by some factor
24+
// (to exceed max WG-Size) and multiplying it by the element size may exceed
25+
// the safe size of data passed to device.
26+
// Let's set it to 1 GB for now, and just skip the test if it exceeds 1Gb.
27+
constexpr size_t OneGB = 1LL * 1024 * 1024 * 1024;
28+
if (Range.size() * sizeof(T) > OneGB) {
29+
std::cout << " SKIPPED due to too big data size" << std::endl;
2030
return 0;
31+
}
2132

2233
buffer<T, Dims> InBuf(Range);
2334
buffer<T, 1> OutBuf(1);
@@ -55,3 +66,115 @@ int testBoth(queue &Q, T Identity, T Init, BinaryOperation BOp,
5566
return test<KName<Name, false>, false, Mode>(Q, Identity, Init, BOp, Range) +
5667
test<KName<Name, true>, true, Mode>(Q, Identity, Init, BOp, Range);
5768
}
69+
70+
template <typename Name, bool IsSYCL2020, access::mode Mode, typename T,
71+
class BinaryOperation, int Dims>
72+
int testUSM(queue &Q, T Identity, T Init, BinaryOperation BOp,
73+
const range<Dims> &Range, usm::alloc AllocType) {
74+
printTestLabel<T, BinaryOperation>(IsSYCL2020, Range);
75+
76+
auto Dev = Q.get_device();
77+
if (!Dev.has(getUSMAspect(AllocType))) {
78+
std::cout << " SKIPPED due to unsupported USM alloc type" << std::endl;
79+
return 0;
80+
}
81+
82+
// It is a known problem with passing data that is close to 4Gb in size
83+
// to device. Such data breaks the execution pretty badly.
84+
// Some of test cases calling this function try to verify the correctness
85+
// of reduction with the global range bigger than the maximal work-group size
86+
// for the device. Maximal WG size for device may be very big, e.g. it is
87+
// 67108864 for ACC emulator. Multiplying that by some factor
88+
// (to exceed max WG-Size) and multiplying it by the element size may exceed
89+
// the safe size of data passed to device.
90+
// Let's set it to 1 GB for now, and just skip the test if it exceeds 1Gb.
91+
constexpr size_t OneGB = 1LL * 1024 * 1024 * 1024;
92+
if (Range.size() * sizeof(T) > OneGB) {
93+
std::cout << " SKIPPED due to too big data size" << std::endl;
94+
return 0;
95+
}
96+
97+
T *ReduVarPtr = (T *)malloc(sizeof(T), Dev, Q.get_context(), AllocType);
98+
if (ReduVarPtr == nullptr) {
99+
std::cout << " SKIPPED due to unrelated reason: alloc returned nullptr"
100+
<< std::endl;
101+
return 0;
102+
}
103+
if (AllocType == usm::alloc::device) {
104+
Q.submit([&](handler &CGH) {
105+
CGH.single_task<TName<Name, class InitKernel>>(
106+
[=]() { *ReduVarPtr = Init; });
107+
}).wait();
108+
} else {
109+
*ReduVarPtr = Init;
110+
}
111+
112+
// Initialize.
113+
T CorrectOut;
114+
buffer<T, Dims> InBuf(Range);
115+
initInputData(InBuf, CorrectOut, Identity, BOp, Range);
116+
if constexpr (Mode == access::mode::read_write)
117+
CorrectOut = BOp(CorrectOut, Init);
118+
119+
// Compute.
120+
Q.submit([&](handler &CGH) {
121+
auto In = InBuf.template get_access<access::mode::read>(CGH);
122+
auto Redu = createReduction<IsSYCL2020, Mode>(ReduVarPtr, Identity, BOp);
123+
CGH.parallel_for<TName<Name, class Test>>(
124+
Range, Redu, [=](id<Dims> Id, auto &Sum) { Sum.combine(In[Id]); });
125+
}).wait();
126+
127+
// Check correctness.
128+
T ComputedOut;
129+
if (AllocType == usm::alloc::device) {
130+
buffer<T, 1> Buf(&ComputedOut, range<1>(1));
131+
Q.submit([&](handler &CGH) {
132+
auto OutAcc = Buf.template get_access<access::mode::discard_write>(CGH);
133+
CGH.single_task<TName<Name, class Check>>(
134+
[=]() { OutAcc[0] = *ReduVarPtr; });
135+
}).wait();
136+
ComputedOut = (Buf.template get_access<access::mode::read>())[0];
137+
} else {
138+
ComputedOut = *ReduVarPtr;
139+
}
140+
141+
std::string AllocStr =
142+
"AllocMode=" + std::to_string(static_cast<int>(AllocType));
143+
int Error = checkResults(Q, IsSYCL2020, BOp, Range, ComputedOut, CorrectOut,
144+
AllocStr);
145+
free(ReduVarPtr, Q.get_context());
146+
return Error;
147+
}
148+
149+
template <typename Name, access::mode Mode, typename T, class BinaryOperation,
150+
int Dims>
151+
int test2020USM(queue &Q, T Identity, T Init, BinaryOperation BOp,
152+
const range<Dims> &Range) {
153+
int NumErrors = 0;
154+
NumErrors += testUSM<TName<Name, class Shared2020>, true, Mode, T>(
155+
Q, Identity, Init, BOp, Range, usm::alloc::shared);
156+
NumErrors += testUSM<TName<Name, class Host2020>, true, Mode, T>(
157+
Q, Identity, Init, BOp, Range, usm::alloc::host);
158+
NumErrors += testUSM<TName<Name, class Device2020>, true, Mode, T>(
159+
Q, Identity, Init, BOp, Range, usm::alloc::device);
160+
return NumErrors;
161+
}
162+
163+
template <typename Name, access::mode Mode, typename T, class BinaryOperation,
164+
int Dims>
165+
int testONEAPIUSM(queue &Q, T Identity, T Init, BinaryOperation BOp,
166+
const range<Dims> &Range) {
167+
int NumErrors = 0;
168+
if (Mode == access::mode::discard_write) {
169+
std::cerr << "Skipped an incorrect test case: ext::oneapi::reduction "
170+
<< "does not support discard_write mode for USM variables.";
171+
return 0;
172+
}
173+
NumErrors += testUSM<TName<Name, class Shared>, false, Mode, T>(
174+
Q, Identity, Init, BOp, Range, usm::alloc::shared);
175+
NumErrors += testUSM<TName<Name, class Host>, false, Mode, T>(
176+
Q, Identity, Init, BOp, Range, usm::alloc::host);
177+
NumErrors += testUSM<TName<Name, class Device>, false, Mode, T>(
178+
Q, Identity, Init, BOp, Range, usm::alloc::device);
179+
return NumErrors;
180+
}
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
2+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
3+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
4+
5+
// TODO: accelerator may not suport atomics required by the current
6+
// implementation. Enable testing when implementation is fixed.
7+
// RUNx: %ACC_RUN_PLACEHOLDER %t.out
8+
9+
#include "reduction_range_scalar.hpp"
10+
11+
// This test performs basic checks of parallel_for(range<1>, reduction, func)
12+
// with reductions initialized with 1-dimensional discard_write accessor
13+
// accessing 1 element buffer.
14+
15+
using namespace cl::sycl;
16+
17+
int NumErrors = 0;
18+
19+
template <typename Name, typename T, class BinaryOperation, int Dims>
20+
void tests(queue &Q, T Identity, T Init, BinaryOperation BOp,
21+
const range<Dims> &Range) {
22+
constexpr access::mode DW = access::mode::discard_write;
23+
NumErrors += test2020USM<Name, DW>(Q, Identity, Init, BOp, Range);
24+
}
25+
26+
int main() {
27+
queue Q;
28+
printDeviceInfo(Q);
29+
size_t MaxWGSize =
30+
Q.get_device().get_info<info::device::max_work_group_size>();
31+
32+
// Fast-reduce and Fast-atomics. Try various range types/sizes.
33+
tests<class A1, int>(Q, 0, 99, std::plus<>{}, range<1>{MaxWGSize * 2 + 5});
34+
tests<class A2, float>(Q, 0, 99, std::plus<>{}, range<2>{1, 1});
35+
tests<class A3, int>(Q, 0, 99, std::plus<>{}, range<3>{MaxWGSize, 1, 2});
36+
37+
// Try various operations.
38+
tests<class B1, int>(Q, ~0, 99, std::bit_and<>{}, range<1>{8});
39+
tests<class B2, int>(Q, 0, 0xff99, std::bit_xor<>{}, range<1>{MaxWGSize + 1});
40+
tests<class B4, short>(Q, 1, 2, std::multiplies<>{}, range<1>{7});
41+
tests<class B5, int>(Q, (std::numeric_limits<int>::max)(), -99,
42+
ext::oneapi::minimum<>{}, range<2>{MaxWGSize, 2});
43+
44+
// Check with CUSTOM type.
45+
tests<class C1>(Q, CustomVec<long long>(0), CustomVec<long long>(99),
46+
CustomVecPlus<long long>{}, range<2>{3, MaxWGSize});
47+
48+
printFinalStatus(NumErrors);
49+
return NumErrors;
50+
}

SYCL/Reduction/reduction_utils.hpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -204,13 +204,15 @@ int checkResults(queue &Q, bool IsSYCL2020, BinaryOperation,
204204
Passed = ComputedRes == CorrectRes;
205205
}
206206

207+
if (!AddInfo.empty())
208+
AddInfo = std::string(", ") + AddInfo;
207209
std::cout << AddInfo << (Passed ? ". PASSED" : ". FAILED") << std::endl;
208210
if (!Passed) {
209211
printDeviceInfo(Q, true);
210212
printTestLabel<T, BinaryOperation>(IsSYCL2020, Range, true);
211213
std::cerr << ", Computed value=" << ComputedRes
212-
<< ", Expected value=" << CorrectRes << ErrorStr
213-
<< (AddInfo.empty() ? "" : ", " + AddInfo) << std::endl;
214+
<< ", Expected value=" << CorrectRes << ErrorStr << AddInfo
215+
<< std::endl;
214216
}
215217
return Passed ? 0 : 1;
216218
}

0 commit comments

Comments
 (0)