Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit 835a263

Browse files
committed
[SYCL] Add tests for SYCL2020 queue::parallel_for() shortcuts using reductions
Signed-off-by: Vyacheslav N Klochkov <[email protected]>
1 parent a8db568 commit 835a263

File tree

4 files changed

+447
-60
lines changed

4 files changed

+447
-60
lines changed
Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
2+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
3+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
4+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
5+
6+
// `Group algorithms are not supported on host device.` on Nvidia.
7+
// XFAIL: hip_nvidia
8+
9+
// RUNx: %HOST_RUN_PLACEHOLDER %t.out
10+
// TODO: Enable the test for HOST when it supports ext::oneapi::reduce() and
11+
// barrier()
12+
13+
// This test only checks that the method queue::parallel_for() accepting
14+
// reduction, can be properly translated into queue::submit + parallel_for().
15+
16+
#include "reduction_utils.hpp"
17+
18+
using namespace sycl;
19+
20+
template <typename T, int I> class KName;
21+
22+
template <typename T> T *allocUSM(queue &Q) {
23+
if (!Q.get_device().has(getUSMAspect(usm::alloc::shared)))
24+
return nullptr;
25+
26+
return malloc_shared<T>(1, Q);
27+
}
28+
29+
template <typename RangeT>
30+
void printNVarsTestLabel(bool IsSYCL2020, const RangeT &Range,
31+
bool ToCERR = false) {
32+
std::ostream &OS = ToCERR ? std::cerr : std::cout;
33+
std::string Mode = IsSYCL2020 ? "SYCL2020" : "ONEAPI ";
34+
OS << (ToCERR ? "Error" : "Start") << ": Mode=" << Mode
35+
<< ", Range=" << Range;
36+
if (!ToCERR)
37+
OS << std::endl;
38+
}
39+
40+
template <typename Name, bool IsSYCL2020, typename T1, typename T2,
41+
typename BOpT1, typename BOpT2>
42+
int test(queue &Q, BOpT1 BOp1, BOpT2 BOp2, const nd_range<1> &Range) {
43+
printNVarsTestLabel(IsSYCL2020, Range);
44+
45+
size_t NElems = Range.get_global_range().size();
46+
T1 *Sum1 = allocUSM<T1>(Q);
47+
T2 *Sum2 = allocUSM<T2>(Q);
48+
if (!Sum1 || !Sum2) {
49+
free(Sum1, Q);
50+
free(Sum2, Q);
51+
std::cout << " SKIPPED due to unrelated problems with USM" << std::endl;
52+
return 0;
53+
}
54+
T1 Identity1 = 0;
55+
T2 Identity2 = 0;
56+
*Sum1 = Identity1;
57+
*Sum2 = Identity2;
58+
59+
auto R1 = createReduction<IsSYCL2020, access::mode::read_write>(Sum1, BOp1);
60+
auto R2 = createReduction<IsSYCL2020, access::mode::read_write>(
61+
Sum2, Identity2, BOp2);
62+
Q.parallel_for<Name>(Range, R1, R2,
63+
[=](nd_item<1> It, auto &Sum1, auto &Sum2) {
64+
Sum1.combine(static_cast<T1>(It.get_global_id(0)));
65+
Sum2.combine(static_cast<T2>(It.get_global_id(0)));
66+
})
67+
.wait();
68+
69+
T1 ExpectedSum1 = static_cast<T1>((NElems - 1) * NElems / 2);
70+
T2 ExpectedSum2 = static_cast<T1>((NElems - 1) * NElems / 2);
71+
std::string AddInfo = "TestCase=";
72+
int Error = checkResults(Q, IsSYCL2020, BOp1, Range, *Sum1, ExpectedSum1,
73+
AddInfo + std::to_string(1));
74+
Error += checkResults(Q, IsSYCL2020, BOp2, Range, *Sum2, ExpectedSum2,
75+
AddInfo + std::to_string(2));
76+
77+
free(Sum1, Q);
78+
free(Sum2, Q);
79+
return Error;
80+
}
81+
82+
template <typename Name, bool IsSYCL2020, typename T1, typename T2,
83+
typename BOpT1, typename BOpT2>
84+
int testDep(queue &Q, BOpT1 BOp1, BOpT2 BOp2, const nd_range<1> &Range) {
85+
printNVarsTestLabel(IsSYCL2020, Range);
86+
87+
size_t NElems = Range.get_global_range().size();
88+
T1 *Sum1 = allocUSM<T1>(Q);
89+
T2 *Sum2 = allocUSM<T2>(Q);
90+
if (!Sum1 || !Sum2) {
91+
free(Sum1, Q);
92+
free(Sum2, Q);
93+
std::cout << " SKIPPED due to unrelated problems with USM" << std::endl;
94+
return 0;
95+
}
96+
T1 Identity1 = 0;
97+
T2 Identity2 = 0;
98+
*Sum1 = Identity1;
99+
auto E = Q.memcpy(Sum2, &Identity2, sizeof(T2));
100+
101+
auto R1 = createReduction<IsSYCL2020, access::mode::read_write>(Sum1, BOp1);
102+
auto R2 = createReduction<IsSYCL2020, access::mode::read_write>(
103+
Sum2, Identity2, BOp2);
104+
Q.parallel_for<Name>(Range, E, R1, R2,
105+
[=](nd_item<1> It, auto &Sum1, auto &Sum2) {
106+
Sum1.combine(static_cast<T1>(It.get_global_id(0)));
107+
Sum2.combine(static_cast<T2>(It.get_global_id(0)));
108+
})
109+
.wait();
110+
111+
T1 ExpectedSum1 = static_cast<T1>((NElems - 1) * NElems / 2);
112+
T2 ExpectedSum2 = static_cast<T1>((NElems - 1) * NElems / 2);
113+
std::string AddInfo = "TestCase=";
114+
int Error = checkResults(Q, IsSYCL2020, BOp1, Range, *Sum1, ExpectedSum1,
115+
AddInfo + std::to_string(1));
116+
Error += checkResults(Q, IsSYCL2020, BOp2, Range, *Sum2, ExpectedSum2,
117+
AddInfo + std::to_string(2));
118+
119+
free(Sum1, Q);
120+
free(Sum2, Q);
121+
return Error;
122+
}
123+
124+
template <typename Name, bool IsSYCL2020, typename T1, typename T2,
125+
typename BOpT1, typename BOpT2>
126+
int testDepV(queue &Q, BOpT1 BOp1, BOpT2 BOp2, const nd_range<1> &Range) {
127+
printNVarsTestLabel(IsSYCL2020, Range);
128+
129+
size_t NElems = Range.get_global_range().size();
130+
T1 *Sum1 = allocUSM<T1>(Q);
131+
T2 *Sum2 = allocUSM<T2>(Q);
132+
if (!Sum1 || !Sum2) {
133+
free(Sum1, Q);
134+
free(Sum2, Q);
135+
std::cout << " SKIPPED due to unrelated problems with USM" << std::endl;
136+
return 0;
137+
}
138+
std::vector<event> EVec;
139+
T1 Identity1 = 0;
140+
T2 Identity2 = 0;
141+
auto E1 = Q.memcpy(Sum1, &Identity1, sizeof(T1));
142+
auto E2 = Q.memcpy(Sum2, &Identity2, sizeof(T2));
143+
EVec.push_back(E1);
144+
EVec.push_back(E2);
145+
146+
auto R1 = createReduction<IsSYCL2020, access::mode::read_write>(Sum1, BOp1);
147+
auto R2 = createReduction<IsSYCL2020, access::mode::read_write>(
148+
Sum2, Identity2, BOp2);
149+
Q.parallel_for<Name>(Range, EVec, R1, R2,
150+
[=](nd_item<1> It, auto &Sum1, auto &Sum2) {
151+
Sum1.combine(static_cast<T1>(It.get_global_id(0)));
152+
Sum2.combine(static_cast<T2>(It.get_global_id(0)));
153+
})
154+
.wait();
155+
156+
T1 ExpectedSum1 = static_cast<T1>((NElems - 1) * NElems / 2);
157+
T2 ExpectedSum2 = static_cast<T1>((NElems - 1) * NElems / 2);
158+
std::string AddInfo = "TestCase=";
159+
int Error = checkResults(Q, IsSYCL2020, BOp1, Range, *Sum1, ExpectedSum1,
160+
AddInfo + std::to_string(1));
161+
Error += checkResults(Q, IsSYCL2020, BOp2, Range, *Sum2, ExpectedSum2,
162+
AddInfo + std::to_string(2));
163+
164+
free(Sum1, Q);
165+
free(Sum2, Q);
166+
return Error;
167+
}
168+
169+
template <typename Name, typename T1, typename T2, typename BOpT1,
170+
typename BOpT2>
171+
int tests(queue &Q, BOpT1 BOp1, BOpT2 BOp2, const nd_range<1> &Range) {
172+
int NumErrors = 0;
173+
NumErrors += test<KName<Name, 0>, true, T1, T2>(Q, BOp1, BOp2, Range);
174+
NumErrors += test<KName<Name, 1>, false, T1, T2>(Q, BOp1, BOp2, Range);
175+
176+
NumErrors += testDep<KName<Name, 2>, true, T1, T2>(Q, BOp1, BOp2, Range);
177+
NumErrors += testDep<KName<Name, 3>, false, T1, T2>(Q, BOp1, BOp2, Range);
178+
179+
NumErrors += testDepV<KName<Name, 4>, true, T1, T2>(Q, BOp1, BOp2, Range);
180+
NumErrors += testDepV<KName<Name, 5>, false, T1, T2>(Q, BOp1, BOp2, Range);
181+
return NumErrors;
182+
}
183+
184+
int main() {
185+
queue Q;
186+
printDeviceInfo(Q);
187+
188+
auto LambdaSum = [](auto X, auto Y) { return (X + Y); };
189+
int NumErrors = tests<class A, int, short>(Q, std::plus<>{}, LambdaSum,
190+
nd_range<1>{32, 16});
191+
192+
printFinalStatus(NumErrors);
193+
return NumErrors;
194+
}
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
2+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
3+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
4+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
5+
6+
// `Group algorithms are not supported on host device.` on Nvidia.
7+
// XFAIL: hip_nvidia
8+
9+
// RUNx: %HOST_RUN_PLACEHOLDER %t.out
10+
// TODO: Enable the test for HOST when it supports ext::oneapi::reduce() and
11+
// barrier()
12+
13+
// This test only checks that the method queue::parallel_for() accepting
14+
// reduction, can be properly translated into queue::submit + parallel_for().
15+
16+
#include "reduction_utils.hpp"
17+
18+
using namespace sycl;
19+
20+
template <typename T, int I> class KName;
21+
22+
template <typename T> T *allocUSM(queue &Q) {
23+
if (!Q.get_device().has(getUSMAspect(usm::alloc::shared)))
24+
return nullptr;
25+
26+
return malloc_shared<T>(1, Q);
27+
}
28+
29+
template <typename Name, bool IsSYCL2020, typename T, int Dims,
30+
typename BinaryOperation>
31+
int test(queue &Q, BinaryOperation BOp, const nd_range<Dims> &Range) {
32+
printTestLabel<T, BinaryOperation>(IsSYCL2020, Range);
33+
34+
T *Sum = allocUSM<T>(Q);
35+
if (!Sum) {
36+
std::cout << " SKIPPED due to unrelated problems with USM" << std::endl;
37+
return 0;
38+
}
39+
*Sum = 0;
40+
41+
auto Redu = createReduction<IsSYCL2020, access::mode::read_write>(Sum, BOp);
42+
Q.parallel_for<Name>(Range, Redu, [=](nd_item<1> It, auto &Sum) {
43+
Sum += static_cast<T>(It.get_global_id(0));
44+
}).wait();
45+
46+
size_t NElems = Range.get_global_range().size();
47+
T ExpectedSum = (NElems - 1) * NElems / 2;
48+
int Error = checkResults(Q, IsSYCL2020, BOp, Range, *Sum, ExpectedSum);
49+
free(Sum, Q);
50+
return Error;
51+
}
52+
53+
template <typename Name, bool IsSYCL2020, typename T, int Dims,
54+
typename BinaryOperation>
55+
int testDep(queue &Q, BinaryOperation BOp, const nd_range<Dims> &Range) {
56+
printTestLabel<T, BinaryOperation>(IsSYCL2020, Range);
57+
58+
T *Sum = allocUSM<T>(Q);
59+
if (!Sum) {
60+
std::cout << " SKIPPED due to unrelated problems with USM" << std::endl;
61+
return 0;
62+
}
63+
T Identity = 0;
64+
auto E = Q.memcpy(Sum, &Identity, sizeof(T));
65+
66+
auto Redu = createReduction<IsSYCL2020, access::mode::read_write>(Sum, BOp);
67+
Q.parallel_for<Name>(Range, E, Redu, [=](nd_item<1> It, auto &Sum) {
68+
Sum += static_cast<T>(It.get_global_id(0));
69+
}).wait();
70+
71+
size_t NElems = Range.get_global_range().size();
72+
T ExpectedSum = (NElems - 1) * NElems / 2;
73+
int Error =
74+
checkResults(Q, IsSYCL2020, BOp, Range, *Sum, ExpectedSum, "DepEvent");
75+
free(Sum, Q);
76+
return Error;
77+
}
78+
79+
template <typename Name, bool IsSYCL2020, typename T, int Dims,
80+
typename BinaryOperation>
81+
int testDepV(queue &Q, BinaryOperation BOp, const nd_range<Dims> &Range) {
82+
printTestLabel<T, BinaryOperation>(IsSYCL2020, Range);
83+
84+
T *Sum = allocUSM<T>(Q);
85+
if (!Sum) {
86+
std::cout << " SKIPPED due to unrelated problems with USM" << std::endl;
87+
return 0;
88+
}
89+
*Sum = 0;
90+
T Identity = 0;
91+
std::vector<event> EVec;
92+
auto E = Q.memcpy(Sum, &Identity, sizeof(T));
93+
EVec.push_back(E);
94+
95+
auto Redu = createReduction<IsSYCL2020, access::mode::read_write>(Sum, BOp);
96+
Q.parallel_for<Name>(Range, EVec, Redu, [=](nd_item<1> It, auto &Sum) {
97+
Sum += static_cast<T>(It.get_global_id(0));
98+
}).wait();
99+
100+
size_t NElems = Range.get_global_range().size();
101+
T ExpectedSum = (NElems - 1) * NElems / 2;
102+
int Error =
103+
checkResults(Q, IsSYCL2020, BOp, Range, *Sum, ExpectedSum, "DepEventVec");
104+
free(Sum, Q);
105+
return Error;
106+
}
107+
108+
template <typename Name, typename T, int Dims, typename BinaryOperation>
109+
int tests(queue &Q, BinaryOperation BOp, const nd_range<Dims> &Range) {
110+
int NumErrors = 0;
111+
NumErrors += test<KName<Name, 0>, true, T>(Q, BOp, Range);
112+
NumErrors += test<KName<Name, 1>, false, T>(Q, BOp, Range);
113+
114+
NumErrors += testDep<KName<Name, 2>, true, T>(Q, BOp, Range);
115+
NumErrors += testDep<KName<Name, 3>, false, T>(Q, BOp, Range);
116+
117+
NumErrors += testDepV<KName<Name, 4>, true, T>(Q, BOp, Range);
118+
NumErrors += testDepV<KName<Name, 5>, false, T>(Q, BOp, Range);
119+
return NumErrors;
120+
}
121+
122+
int main() {
123+
queue Q;
124+
printDeviceInfo(Q);
125+
126+
int NumErrors = tests<class A, int>(Q, std::plus<>{}, nd_range<1>{32, 16});
127+
128+
printFinalStatus(NumErrors);
129+
return NumErrors;
130+
}

SYCL/Reduction/reduction_queue_parallel_for.cpp

Lines changed: 0 additions & 60 deletions
This file was deleted.

0 commit comments

Comments
 (0)