Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit 37077d2

Browse files
committed
Add test cases for 2 and 3 dim ranges. Renamed the tests.
Signed-off-by: Vyacheslav N Klochkov <[email protected]>
1 parent 73973db commit 37077d2

6 files changed

+344
-447
lines changed

SYCL/Reduction/reduction_nd_N_queue_parallel_for.cpp

Lines changed: 0 additions & 194 deletions
This file was deleted.
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
2+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
3+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
4+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
5+
6+
// `Group algorithms are not supported on host device.` on NVidia.
7+
// XFAIL: hip_nvidia
8+
9+
// RUNx: %HOST_RUN_PLACEHOLDER %t.out
10+
// TODO: Enable the test for HOST when it supports ext::oneapi::reduce() and
11+
// barrier()
12+
13+
// This test only checks that the method queue::parallel_for() accepting
14+
// reduction, can be properly translated into queue::submit + parallel_for().
15+
16+
#include "reduction_utils.hpp"
17+
18+
using namespace sycl;
19+
20+
template <typename T, int I> class KName;
21+
22+
enum TestCase { NoDependencies, Dependency, DependenciesVector };
23+
24+
template <typename T> T *allocUSM(queue &Q, size_t Size) {
25+
if (!Q.get_device().has(getUSMAspect(usm::alloc::shared)))
26+
return nullptr;
27+
28+
return malloc_shared<T>(Size, Q);
29+
}
30+
31+
template <typename RangeT>
32+
void printNVarsTestLabel(bool IsSYCL2020, const RangeT &Range,
33+
bool ToCERR = false) {
34+
std::ostream &OS = ToCERR ? std::cerr : std::cout;
35+
std::string Mode = IsSYCL2020 ? "SYCL2020" : "ONEAPI ";
36+
OS << (ToCERR ? "Error" : "Start") << ": Mode=" << Mode
37+
<< ", Range=" << Range;
38+
if (!ToCERR)
39+
OS << std::endl;
40+
}
41+
42+
template <typename T1, typename T2, TestCase TC, int Dims, typename BOpT1,
43+
typename BOpT2>
44+
int test(queue &Q, BOpT1 BOp1, BOpT2 BOp2, const nd_range<Dims> &Range) {
45+
printNVarsTestLabel(true /*SYCL2020*/, Range);
46+
47+
size_t NElems = Range.get_global_range().size();
48+
T1 *Sum1 = allocUSM<T1>(Q, 1);
49+
T2 *Sum2 = allocUSM<T2>(Q, 1);
50+
T1 *Arr1 = allocUSM<T1>(Q, NElems);
51+
T2 *Arr2 = allocUSM<T2>(Q, NElems);
52+
if (!Sum1 || !Sum2 || !Arr1 || !Arr2) {
53+
sycl::free(Sum1, Q);
54+
sycl::free(Sum2, Q);
55+
sycl::free(Arr1, Q);
56+
sycl::free(Arr2, Q);
57+
std::cout << " SKIPPED due to unrelated problems with USM" << std::endl;
58+
return 0;
59+
}
60+
61+
*Sum2 = 0;
62+
auto R1 = sycl::reduction(
63+
Sum1, BOp1, property_list(property::reduction::initialize_to_identity{}));
64+
auto R2 = sycl::reduction(Sum2, static_cast<T2>(0), BOp2);
65+
66+
if constexpr (TC == TestCase::NoDependencies) {
67+
std::fill(Arr1, Arr1 + NElems, 1);
68+
std::fill(Arr2, Arr2 + NElems, 2);
69+
Q.parallel_for(Range, R1, R2,
70+
[=](nd_item<Dims> It, auto &Sum1, auto &Sum2) {
71+
size_t LinId = It.get_global_linear_id();
72+
Sum1.combine(static_cast<T1>(LinId) + Arr1[LinId]);
73+
Sum2.combine(static_cast<T2>(LinId) + Arr2[LinId]);
74+
})
75+
.wait();
76+
} else if constexpr (TC == TestCase::Dependency) {
77+
auto E = Q.single_task([=]() {
78+
std::fill(Arr1, Arr1 + NElems, 1);
79+
std::fill(Arr2, Arr2 + NElems, 2);
80+
});
81+
Q.parallel_for(Range, E, R1, R2,
82+
[=](nd_item<Dims> It, auto &Sum1, auto &Sum2) {
83+
size_t LinId = It.get_global_linear_id();
84+
Sum1.combine(static_cast<T1>(LinId) + Arr1[LinId]);
85+
Sum2.combine(static_cast<T2>(LinId) + Arr2[LinId]);
86+
})
87+
.wait();
88+
} else {
89+
auto E1 = Q.single_task([=]() { std::fill(Arr1, Arr1 + NElems, 1); });
90+
auto E2 = Q.single_task([=]() { std::fill(Arr2, Arr2 + NElems, 2); });
91+
std::vector<event> EVec{E1, E2};
92+
Q.parallel_for(Range, EVec, R1, R2,
93+
[=](nd_item<Dims> It, auto &Sum1, auto &Sum2) {
94+
size_t LinId = It.get_global_linear_id();
95+
Sum1.combine(static_cast<T1>(LinId) + Arr1[LinId]);
96+
Sum2.combine(static_cast<T2>(LinId) + Arr2[LinId]);
97+
})
98+
.wait();
99+
}
100+
101+
T1 ExpectedSum1 = NElems + (NElems - 1) * NElems / 2;
102+
T2 ExpectedSum2 = 2 * NElems + (NElems - 1) * NElems / 2;
103+
std::string AddInfo = "TestCase=";
104+
int Error = checkResults(Q, true /*SYCL2020*/, BOp1, Range, *Sum1,
105+
ExpectedSum1, AddInfo + std::to_string(1));
106+
Error += checkResults(Q, true /*SYCL2020*/, BOp2, Range, *Sum2, ExpectedSum2,
107+
AddInfo + std::to_string(2));
108+
109+
sycl::free(Sum1, Q);
110+
sycl::free(Sum2, Q);
111+
sycl::free(Arr1, Q);
112+
sycl::free(Arr2, Q);
113+
return Error;
114+
}
115+
116+
template <typename T1, typename T2, int Dims, typename BinaryOperation1,
117+
typename BinaryOperation2>
118+
int tests(queue &Q, BinaryOperation1 BOp1, BinaryOperation2 BOp2,
119+
const nd_range<Dims> &Range) {
120+
int NumErrors = 0;
121+
NumErrors += test<T1, T2, TestCase::NoDependencies>(Q, BOp1, BOp2, Range);
122+
NumErrors += test<T1, T2, TestCase::Dependency>(Q, BOp1, BOp2, Range);
123+
NumErrors += test<T1, T2, TestCase::DependenciesVector>(Q, BOp1, BOp2, Range);
124+
return NumErrors;
125+
}
126+
127+
int main() {
128+
queue Q;
129+
printDeviceInfo(Q);
130+
131+
int NumErrors = 0;
132+
auto LambdaSum = [](auto X, auto Y) { return (X + Y); };
133+
134+
NumErrors +=
135+
tests<int, short>(Q, std::plus<>{}, LambdaSum, nd_range<1>{32, 16});
136+
NumErrors += tests<int, short>(Q, std::plus<>{}, LambdaSum,
137+
nd_range<2>{range<2>{4, 4}, range<2>{2, 2}});
138+
NumErrors +=
139+
tests<int, short>(Q, std::plus<>{}, LambdaSum,
140+
nd_range<3>{range<3>{4, 4, 3}, range<3>{1, 2, 3}});
141+
142+
printFinalStatus(NumErrors);
143+
return NumErrors;
144+
}

0 commit comments

Comments
 (0)