Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit e6df87c

Browse files
committed
Additional changes per reviewer's request
Signed-off-by: Vyacheslav N Klochkov <[email protected]>
1 parent 2645b72 commit e6df87c

10 files changed

+348
-293
lines changed

SYCL/Reduction/reduction_nd_N_vars.cpp

Lines changed: 97 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,6 @@
1212
// RUN: %GPU_RUN_PLACEHOLDER %t.out
1313
// RUN: %ACC_RUN_PLACEHOLDER %t.out
1414

15-
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -DTEST_SYCL2020_REDUCTIONS %s -o %t2020.out
16-
// RUN: %CPU_RUN_PLACEHOLDER %t2020.out
17-
// RUN: %GPU_RUN_PLACEHOLDER %t2020.out
18-
// RUN: %ACC_RUN_PLACEHOLDER %t2020.out
19-
2015
// This test checks handling of parallel_for() accepting nd_range and
2116
// two or more reductions.
2217

@@ -32,10 +27,14 @@
3227
#include <numeric>
3328
#include <string>
3429

35-
template <typename... Ts> class KernelNameGroup;
36-
3730
using namespace cl::sycl;
3831

32+
template <typename... Ts> class KNameGroup;
33+
template <typename T, bool B> class KName;
34+
35+
constexpr access::mode RW = access::mode::read_write;
36+
constexpr access::mode DW = access::mode::discard_write;
37+
3938
template <typename T>
4039
bool cherkResultIsExpected(int TestCaseNum, T Expected, T Computed) {
4140
bool Success;
@@ -51,11 +50,12 @@ bool cherkResultIsExpected(int TestCaseNum, T Expected, T Computed) {
5150
return Success;
5251
}
5352

54-
template <class ReductionExample, typename T1, access::mode Mode1, typename T2,
55-
access::mode Mode2, typename T3, access::mode Mode3, typename T4,
56-
class BinaryOperation1, class BinaryOperation2,
53+
// Returns 0 if the test case passed. Otherwise, some non-zero value.
54+
template <class Name, bool IsSYCL2020Mode, typename T1, access::mode Mode1,
55+
typename T2, access::mode Mode2, typename T3, access::mode Mode3,
56+
typename T4, class BinaryOperation1, class BinaryOperation2,
5757
class BinaryOperation3, class BinaryOperation4>
58-
int runTest(T1 IdentityVal1, T1 InitVal1, BinaryOperation1 BOp1,
58+
int testOne(T1 IdentityVal1, T1 InitVal1, BinaryOperation1 BOp1,
5959
T2 IdentityVal2, T2 InitVal2, BinaryOperation2 BOp2,
6060
T3 IdentityVal3, T3 InitVal3, BinaryOperation3 BOp3,
6161
T4 IdentityVal4, T3 InitVal4, BinaryOperation4 BOp4,
@@ -72,16 +72,16 @@ int runTest(T1 IdentityVal1, T1 InitVal1, BinaryOperation1 BOp1,
7272
auto Dev = Q.get_device();
7373
if (AllocType4 == usm::alloc::shared &&
7474
!Dev.get_info<info::device::usm_shared_allocations>())
75-
return 4;
75+
return 0;
7676
if (AllocType4 == usm::alloc::host &&
7777
!Dev.get_info<info::device::usm_host_allocations>())
78-
return 4;
78+
return 0;
7979
if (AllocType4 == usm::alloc::device &&
8080
!Dev.get_info<info::device::usm_device_allocations>())
81-
return 4;
81+
return 0;
8282
T4 *Out4 = (T4 *)malloc(sizeof(T4), Dev, Q.get_context(), AllocType4);
8383
if (Out4 == nullptr)
84-
return 4;
84+
return 1;
8585

8686
// Initialize the arrays with sentinel values
8787
// and pre-compute the expected result 'CorrectOut'.
@@ -114,51 +114,65 @@ int runTest(T1 IdentityVal1, T1 InitVal1, BinaryOperation1 BOp1,
114114

115115
if (AllocType4 == usm::alloc::device) {
116116
Q.submit([&](handler &CGH) {
117-
CGH.single_task<
118-
KernelNameGroup<ReductionExample, class KernelNameUSM4>>(
117+
CGH.single_task<KNameGroup<Name, class KernelNameUSM4>>(
119118
[=]() { *Out4 = InitVal4; });
120119
}).wait();
121120
} else {
122121
*Out4 = InitVal4;
123122
}
124123
}
125124

126-
// The main code to be tested.
127-
Q.submit([&](handler &CGH) {
128-
auto In1 = InBuf1.template get_access<access::mode::read>(CGH);
129-
auto In2 = InBuf2.template get_access<access::mode::read>(CGH);
130-
auto In3 = InBuf3.template get_access<access::mode::read>(CGH);
131-
auto In4 = InBuf4.template get_access<access::mode::read>(CGH);
132-
133-
#ifdef TEST_SYCL2020_REDUCTIONS
134-
auto Redu1 = sycl::reduction(OutBuf1, CGH, IdentityVal1, BOp1);
135-
auto Redu2 = sycl::reduction(OutBuf2, CGH, IdentityVal2, BOp2);
136-
auto Redu3 = sycl::reduction(OutBuf3, CGH, IdentityVal3, BOp3);
137-
auto Redu4 = sycl::reduction(Out4, IdentityVal4, BOp4);
138-
#else
139-
auto Out1 = OutBuf1.template get_access<Mode1>(CGH);
140-
auto Out2 = OutBuf2.template get_access<Mode2>(CGH);
141-
accessor<T3, 0, Mode3, access::target::global_buffer> Out3(OutBuf3, CGH);
142-
143-
auto Redu1 = ONEAPI::reduction(Out1, IdentityVal1, BOp1);
144-
auto Redu2 = ONEAPI::reduction(Out2, IdentityVal2, BOp2);
145-
auto Redu3 = ONEAPI::reduction(Out3, IdentityVal3, BOp3);
146-
auto Redu4 = ONEAPI::reduction(Out4, IdentityVal4, BOp4);
147-
#endif
148-
149-
auto Lambda = [=](nd_item<1> NDIt, auto &Sum1, auto &Sum2, auto &Sum3,
150-
auto &Sum4) {
151-
size_t I = NDIt.get_global_id(0);
152-
Sum1.combine(In1[I]);
153-
Sum2.combine(In2[I]);
154-
Sum3.combine(In3[I]);
155-
Sum4.combine(In4[I]);
156-
};
157-
158-
auto NDR = nd_range<1>{range<1>(NWorkItems), range<1>{WGSize}};
159-
CGH.parallel_for<ReductionExample>(NDR, Redu1, Redu2, Redu3, Redu4,
160-
Lambda);
161-
}).wait();
125+
auto NDR = nd_range<1>{range<1>(NWorkItems), range<1>{WGSize}};
126+
if constexpr (IsSYCL2020Mode) {
127+
Q.submit([&](handler &CGH) {
128+
auto In1 = InBuf1.template get_access<access::mode::read>(CGH);
129+
auto In2 = InBuf2.template get_access<access::mode::read>(CGH);
130+
auto In3 = InBuf3.template get_access<access::mode::read>(CGH);
131+
auto In4 = InBuf4.template get_access<access::mode::read>(CGH);
132+
133+
auto Redu1 = sycl::reduction(OutBuf1, CGH, IdentityVal1, BOp1);
134+
auto Redu2 = sycl::reduction(OutBuf2, CGH, IdentityVal2, BOp2);
135+
auto Redu3 = sycl::reduction(OutBuf3, CGH, IdentityVal3, BOp3);
136+
auto Redu4 = sycl::reduction(Out4, IdentityVal4, BOp4);
137+
138+
auto Lambda = [=](nd_item<1> NDIt, auto &Sum1, auto &Sum2, auto &Sum3,
139+
auto &Sum4) {
140+
size_t I = NDIt.get_global_id(0);
141+
Sum1.combine(In1[I]);
142+
Sum2.combine(In2[I]);
143+
Sum3.combine(In3[I]);
144+
Sum4.combine(In4[I]);
145+
};
146+
CGH.parallel_for<Name>(NDR, Redu1, Redu2, Redu3, Redu4, Lambda);
147+
}).wait();
148+
} else {
149+
// Test ONEAPI reductions
150+
Q.submit([&](handler &CGH) {
151+
auto In1 = InBuf1.template get_access<access::mode::read>(CGH);
152+
auto In2 = InBuf2.template get_access<access::mode::read>(CGH);
153+
auto In3 = InBuf3.template get_access<access::mode::read>(CGH);
154+
auto In4 = InBuf4.template get_access<access::mode::read>(CGH);
155+
156+
auto Out1 = OutBuf1.template get_access<Mode1>(CGH);
157+
auto Out2 = OutBuf2.template get_access<Mode2>(CGH);
158+
accessor<T3, 0, Mode3, access::target::global_buffer> Out3(OutBuf3, CGH);
159+
160+
auto Redu1 = ONEAPI::reduction(Out1, IdentityVal1, BOp1);
161+
auto Redu2 = ONEAPI::reduction(Out2, IdentityVal2, BOp2);
162+
auto Redu3 = ONEAPI::reduction(Out3, IdentityVal3, BOp3);
163+
auto Redu4 = ONEAPI::reduction(Out4, IdentityVal4, BOp4);
164+
165+
auto Lambda = [=](nd_item<1> NDIt, auto &Sum1, auto &Sum2, auto &Sum3,
166+
auto &Sum4) {
167+
size_t I = NDIt.get_global_id(0);
168+
Sum1.combine(In1[I]);
169+
Sum2.combine(In2[I]);
170+
Sum3.combine(In3[I]);
171+
Sum4.combine(In4[I]);
172+
};
173+
CGH.parallel_for<Name>(NDR, Redu1, Redu2, Redu3, Redu4, Lambda);
174+
}).wait();
175+
}
162176

163177
// Check the results and free memory.
164178
int Error = 0;
@@ -193,24 +207,43 @@ int runTest(T1 IdentityVal1, T1 InitVal1, BinaryOperation1 BOp1,
193207
return Error;
194208
}
195209

196-
int main() {
197-
constexpr access::mode ReadWriteMode = access::mode::read_write;
198-
#ifdef TEST_SYCL2020_REDUCTIONS
210+
// Tests both implementations of reduction:
211+
// sycl::reduction and sycl::ONEAPI::reduction
212+
template <class Name, typename T1, access::mode Mode1, typename T2,
213+
access::mode Mode2, typename T3, access::mode Mode3, typename T4,
214+
class BinaryOperation1, class BinaryOperation2,
215+
class BinaryOperation3, class BinaryOperation4>
216+
int testBoth(T1 IdentityVal1, T1 InitVal1, BinaryOperation1 BOp1,
217+
T2 IdentityVal2, T2 InitVal2, BinaryOperation2 BOp2,
218+
T3 IdentityVal3, T3 InitVal3, BinaryOperation3 BOp3,
219+
T4 IdentityVal4, T3 InitVal4, BinaryOperation4 BOp4,
220+
usm::alloc AllocType4, size_t NWorkItems, size_t WGSize) {
221+
int Error =
222+
testOne<KName<Name, false>, false, T1, Mode1, T2, Mode2, T3, Mode3, T4>(
223+
IdentityVal1, InitVal1, BOp1, IdentityVal2, InitVal2, BOp2,
224+
IdentityVal3, InitVal3, BOp3, IdentityVal4, InitVal4, BOp4,
225+
AllocType4, NWorkItems, WGSize);
226+
199227
// TODO: property::reduction::initialize_to_identity is not supported yet.
200228
// Thus only read_write mode is tested now.
201-
constexpr access::mode DiscardWriteMode = access::mode::read_write;
202-
#else
203-
constexpr access::mode DiscardWriteMode = access::mode::discard_write;
204-
#endif
229+
constexpr access::mode _Mode1 = (Mode1 == DW) ? RW : Mode1;
230+
constexpr access::mode _Mode2 = (Mode2 == DW) ? RW : Mode2;
231+
constexpr access::mode _Mode3 = (Mode3 == DW) ? RW : Mode3;
232+
Error +=
233+
testOne<KName<Name, true>, true, T1, _Mode1, T2, _Mode2, T3, _Mode3, T4>(
234+
IdentityVal1, InitVal1, BOp1, IdentityVal2, InitVal2, BOp2,
235+
IdentityVal3, InitVal3, BOp3, IdentityVal4, InitVal4, BOp4,
236+
AllocType4, NWorkItems, WGSize);
237+
return Error;
238+
}
205239

206-
int Error = runTest<class ReduFloatPlus16x1, float, DiscardWriteMode, int,
207-
ReadWriteMode, short, ReadWriteMode, int>(
240+
int main() {
241+
int Error = testBoth<class FP32Plus16x16, float, DW, int, RW, short, RW, int>(
208242
0, 1000, std::plus<float>{}, 0, 2000, std::plus<>{}, 0, 4000,
209243
std::bit_or<>{}, 0, 8000, std::bit_xor<>{}, usm::alloc::shared, 16, 16);
210244

211245
auto Add = [](auto x, auto y) { return (x + y); };
212-
Error += runTest<class ReduFloatPlus5x257, float, ReadWriteMode, int,
213-
ReadWriteMode, short, DiscardWriteMode, int>(
246+
Error += testBoth<class FP32Plus5x257, float, RW, int, RW, short, DW, int>(
214247
0, 1000, std::plus<float>{}, 0, 2000, std::plus<>{}, 0, 4000, Add, 0,
215248
8000, std::bit_xor<int>{}, usm::alloc::device, 5 * (256 + 1), 5);
216249

SYCL/Reduction/reduction_nd_ext_double.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,8 @@
33
// RUN: %GPU_RUN_PLACEHOLDER %t.out
44
// RUN: %ACC_RUN_PLACEHOLDER %t.out
55

6-
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -DTEST_SYCL2020_REDUCTIONS %s -o %t2020.out
7-
// RUN: %CPU_RUN_PLACEHOLDER %t2020.out
8-
// RUN: %GPU_RUN_PLACEHOLDER %t2020.out
9-
// RUN: %ACC_RUN_PLACEHOLDER %t2020.out
10-
116
// TODO: Enable the test for HOST when it supports intel::reduce() and barrier()
127
// RUNx: %HOST_RUN_PLACEHOLDER %t.out
13-
// RUNx: %HOST_RUN_PLACEHOLDER %t2020.out
148

159
// This test performs basic checks of parallel_for(nd_range, reduction, func)
1610
// used with 'double' type.

SYCL/Reduction/reduction_nd_ext_half.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,12 @@
11
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
22
// RUN: %GPU_RUN_PLACEHOLDER %t.out
33

4-
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -DTEST_SYCL2020_REDUCTIONS %s -o %t2020.out
5-
// RUN: %GPU_RUN_PLACEHOLDER %t2020.out
6-
74
// TODO: Enable the test for CPU/ACC when they support half type.
85
// RUNx: %CPU_RUN_PLACEHOLDER %t.out
96
// RUNx: %ACC_RUN_PLACEHOLDER %t.out
10-
// RUNx: %CPU_RUN_PLACEHOLDER %t2020.out
11-
// RUNx: %ACC_RUN_PLACEHOLDER %t2020.out
127

138
// TODO: Enable the test for HOST when it supports intel::reduce() and barrier()
149
// RUNx: %HOST_RUN_PLACEHOLDER %t.out
15-
// RUNx: %HOST_RUN_PLACEHOLDER %t2020.out
1610

1711
// This test performs basic checks of parallel_for(nd_range, reduction, func)
1812
// used with 'half' type.

SYCL/Reduction/reduction_nd_ext_type.hpp

Lines changed: 46 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,11 @@
88

99
using namespace cl::sycl;
1010

11-
template <typename SpecializationKernelName, typename T, int Dim,
11+
template <typename T, bool B> class KName;
12+
constexpr access::mode RW = access::mode::read_write;
13+
constexpr access::mode DW = access::mode::discard_write;
14+
15+
template <typename Name, bool IsSYCL2020Mode, typename T, int Dim,
1216
access::mode Mode, class BinaryOperation>
1317
void test(T Identity, size_t WGSize, size_t NWItems) {
1418
buffer<T, 1> InBuf(NWItems);
@@ -24,23 +28,27 @@ void test(T Identity, size_t WGSize, size_t NWItems) {
2428

2529
// Compute.
2630
queue Q;
27-
Q.submit([&](handler &CGH) {
28-
auto In = InBuf.template get_access<access::mode::read>(CGH);
29-
#ifdef TEST_SYCL2020_REDUCTIONS
30-
auto Redu = sycl::reduction(OutBuf, CGH, Identity, BOp);
31-
#else
32-
accessor<T, Dim, Mode, access::target::global_buffer> Out(OutBuf, CGH);
33-
auto Redu = ONEAPI::reduction(Out, Identity, BOp);
34-
#endif
35-
36-
range<1> GlobalRange(NWItems);
37-
range<1> LocalRange(WGSize);
38-
nd_range<1> NDRange(GlobalRange, LocalRange);
39-
CGH.parallel_for<SpecializationKernelName>(
40-
NDRange, Redu, [=](nd_item<1> NDIt, auto &Sum) {
41-
Sum.combine(In[NDIt.get_global_linear_id()]);
42-
});
43-
});
31+
nd_range<1> NDRange(range<1>{NWItems}, range<1>{WGSize});
32+
if constexpr (IsSYCL2020Mode) {
33+
Q.submit([&](handler &CGH) {
34+
auto In = InBuf.template get_access<access::mode::read>(CGH);
35+
auto Redu = sycl::reduction(OutBuf, CGH, Identity, BOp);
36+
37+
CGH.parallel_for<Name>(NDRange, Redu, [=](nd_item<1> NDIt, auto &Sum) {
38+
Sum.combine(In[NDIt.get_global_linear_id()]);
39+
});
40+
});
41+
} else {
42+
Q.submit([&](handler &CGH) {
43+
auto In = InBuf.template get_access<access::mode::read>(CGH);
44+
accessor<T, Dim, Mode, access::target::global_buffer> Out(OutBuf, CGH);
45+
auto Redu = ONEAPI::reduction(Out, Identity, BOp);
46+
47+
CGH.parallel_for<Name>(NDRange, Redu, [=](nd_item<1> NDIt, auto &Sum) {
48+
Sum.combine(In[NDIt.get_global_linear_id()]);
49+
});
50+
});
51+
}
4452

4553
// Check correctness.
4654
auto Out = OutBuf.template get_access<access::mode::read>();
@@ -56,36 +64,37 @@ void test(T Identity, size_t WGSize, size_t NWItems) {
5664
}
5765
}
5866

67+
template <typename Name, typename T, int Dim, access::mode Mode,
68+
class BinaryOperation>
69+
void testBoth(T Identity, size_t WGSize, size_t NWItems) {
70+
test<KName<Name, false>, false, T, Dim, Mode, BinaryOperation>(
71+
Identity, WGSize, NWItems);
72+
73+
// TODO: property::reduction::initialize_to_identity is not supported yet.
74+
// Thus only read_write mode is tested now.
75+
constexpr access::mode _Mode = (Mode == DW) ? RW : Mode;
76+
test<KName<Name, true>, true, T, Dim, _Mode, BinaryOperation>(
77+
Identity, WGSize, NWItems);
78+
}
79+
5980
template <typename T> int runTests(const string_class &ExtensionName) {
6081
device D = default_selector().select_device();
6182
if (!D.is_host() && !D.has_extension(ExtensionName)) {
6283
std::cout << "Test skipped\n";
6384
return 0;
6485
}
6586

66-
constexpr access::mode ReadWriteMode = access::mode::read_write;
67-
#ifdef TEST_SYCL2020_REDUCTIONS
68-
// TODO: property::reduction::initialize_to_identity is not supported yet.
69-
// Thus only read_write mode is tested now.
70-
constexpr access::mode DiscardWriteMode = access::mode::read_write;
71-
#else
72-
constexpr access::mode DiscardWriteMode = access::mode::discard_write;
73-
#endif
74-
7587
// Check some less standards WG sizes and corner cases first.
76-
test<class A, T, 1, ReadWriteMode, std::multiplies<T>>(0, 4, 4);
77-
test<class B, T, 0, DiscardWriteMode, ONEAPI::plus<T>>(0, 4, 64);
88+
testBoth<class A, T, 1, RW, std::multiplies<T>>(0, 4, 4);
89+
testBoth<class B, T, 0, DW, ONEAPI::plus<T>>(0, 4, 64);
7890

79-
test<class C, T, 0, ReadWriteMode, ONEAPI::minimum<T>>(getMaximumFPValue<T>(),
80-
7, 7);
81-
test<class D, T, 1, access::mode::discard_write, ONEAPI::maximum<T>>(
91+
testBoth<class C, T, 0, RW, ONEAPI::minimum<T>>(getMaximumFPValue<T>(), 7, 7);
92+
testBoth<class D, T, 1, access::mode::discard_write, ONEAPI::maximum<T>>(
8293
getMinimumFPValue<T>(), 7, 7 * 5);
8394

84-
test<class E, T, 1, ReadWriteMode, ONEAPI::plus<>>(1, 3, 3 * 5);
85-
test<class F, T, 1, DiscardWriteMode, ONEAPI::minimum<>>(
86-
getMaximumFPValue<T>(), 3, 3);
87-
test<class G, T, 0, DiscardWriteMode, ONEAPI::maximum<>>(
88-
getMinimumFPValue<T>(), 3, 3);
95+
testBoth<class E, T, 1, RW, ONEAPI::plus<>>(1, 3, 3 * 5);
96+
testBoth<class F, T, 1, DW, ONEAPI::minimum<>>(getMaximumFPValue<T>(), 3, 3);
97+
testBoth<class G, T, 0, DW, ONEAPI::maximum<>>(getMinimumFPValue<T>(), 3, 3);
8998

9099
std::cout << "Test passed\n";
91100
return 0;

0 commit comments

Comments
 (0)