Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit 44c9336

Browse files
committed
Mostly NFC - removed some redundancies
Signed-off-by: Vyacheslav N Klochkov <[email protected]>
1 parent 870c4bc commit 44c9336

File tree

2 files changed

+41
-48
lines changed

2 files changed

+41
-48
lines changed

SYCL/Reduction/reduction_reducer_op_eq.cpp

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -93,11 +93,11 @@ int test(T Identity) {
9393
constexpr size_t L = 4;
9494

9595
queue Q;
96-
T *Data = malloc_shared<T>(N, Q);
97-
T *Res = malloc_shared<T>(1, Q);
96+
T *Data = malloc_host<T>(N, Q);
97+
T *Res = malloc_host<T>(1, Q);
9898
T Expected = Identity;
9999
BinaryOperation BOp;
100-
if (OpEq == PlusPlus || OpEq == PlusPlusInt) {
100+
if constexpr (OpEq == PlusPlus || OpEq == PlusPlusInt) {
101101
Expected = T{N, N};
102102
} else {
103103
for (int I = 0; I < N; I++) {
@@ -168,9 +168,7 @@ int testBoth(T Identity) {
168168

169169
template <typename T> int testFPPack() {
170170
int Error = 0;
171-
Error += testBoth<T, std::plus<>, PlusEq, true>(T{});
172171
Error += testBoth<T, std::plus<T>, PlusEq, true>(T{});
173-
Error += testBoth<T, std::multiplies<>, MultipliesEq, true>(T{1, 1});
174172
Error += testBoth<T, std::multiplies<T>, MultipliesEq, true>(T{1, 1});
175173
return Error;
176174
}

SYCL/Reduction/reduction_usm.cpp

Lines changed: 38 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,7 @@ auto createReduction(T *USMPtr, T Identity, BinaryOperation BOp) {
2626
return ONEAPI::reduction(USMPtr, Identity, BOp);
2727
}
2828

29-
template <typename Name, bool IsSYCL2020Mode, typename T, int Dim,
30-
class BinaryOperation>
29+
template <typename Name, bool IsSYCL2020Mode, typename T, class BinaryOperation>
3130
void test(T Identity, size_t WGSize, size_t NWItems, usm::alloc AllocType) {
3231
queue Q;
3332
auto Dev = Q.get_device();
@@ -46,11 +45,10 @@ void test(T Identity, size_t WGSize, size_t NWItems, usm::alloc AllocType) {
4645
if (ReduVarPtr == nullptr)
4746
return;
4847
if (AllocType == usm::alloc::device) {
49-
event E = Q.submit([&](handler &CGH) {
50-
CGH.single_task<KernelNameGroup<Name, class Init>>(
51-
[=]() { *ReduVarPtr = Identity; });
52-
});
53-
E.wait();
48+
Q.submit([&](handler &CGH) {
49+
CGH.single_task<KernelNameGroup<Name, class Init>>(
50+
[=]() { *ReduVarPtr = Identity; });
51+
}).wait();
5452
} else {
5553
*ReduVarPtr = Identity;
5654
}
@@ -64,26 +62,24 @@ void test(T Identity, size_t WGSize, size_t NWItems, usm::alloc AllocType) {
6462

6563
// Compute.
6664
Q.submit([&](handler &CGH) {
67-
auto In = InBuf.template get_access<access::mode::read>(CGH);
68-
auto Redu = createReduction<IsSYCL2020Mode>(ReduVarPtr, Identity, BOp);
69-
range<1> GlobalRange(NWItems);
70-
range<1> LocalRange(WGSize);
71-
nd_range<1> NDRange(GlobalRange, LocalRange);
72-
CGH.parallel_for<KernelNameGroup<Name, class Test>>(
73-
NDRange, Redu, [=](nd_item<1> NDIt, auto &Sum) {
74-
Sum.combine(In[NDIt.get_global_linear_id()]);
75-
});
76-
});
77-
Q.wait();
65+
auto In = InBuf.template get_access<access::mode::read>(CGH);
66+
auto Redu = createReduction<IsSYCL2020Mode>(ReduVarPtr, Identity, BOp);
67+
nd_range<1> NDRange(range<1>{NWItems}, range<1>{WGSize});
68+
CGH.parallel_for<KernelNameGroup<Name, class Test>>(
69+
NDRange, Redu, [=](nd_item<1> NDIt, auto &Sum) {
70+
Sum.combine(In[NDIt.get_global_linear_id()]);
71+
});
72+
}).wait();
7873

7974
// Check correctness.
8075
T ComputedOut;
8176
if (AllocType == usm::alloc::device) {
8277
buffer<T, 1> Buf(&ComputedOut, range<1>(1));
83-
event E = Q.submit([&](handler &CGH) {
84-
auto OutAcc = Buf.template get_access<access::mode::discard_write>(CGH);
85-
CGH.copy(ReduVarPtr, OutAcc);
86-
});
78+
Q.submit([&](handler &CGH) {
79+
auto OutAcc = Buf.template get_access<access::mode::discard_write>(CGH);
80+
CGH.single_task<KernelNameGroup<Name, class Check>>(
81+
[=]() { OutAcc[0] = *ReduVarPtr; });
82+
}).wait();
8783
ComputedOut = (Buf.template get_access<access::mode::read>())[0];
8884
} else {
8985
ComputedOut = *ReduVarPtr;
@@ -99,42 +95,41 @@ void test(T Identity, size_t WGSize, size_t NWItems, usm::alloc AllocType) {
9995
free(ReduVarPtr, Q.get_context());
10096
}
10197

102-
template <typename Name, typename T, int Dim, class BinaryOperation>
98+
template <typename Name, typename T, class BinaryOperation>
10399
void testUSM(T Identity, size_t WGSize, size_t NWItems) {
104-
test<KernelNameGroup<Name, class SharedCase>, false, T, Dim, BinaryOperation>(
100+
test<KernelNameGroup<Name, class SharedCase>, false, T, BinaryOperation>(
105101
Identity, WGSize, NWItems, usm::alloc::shared);
106-
test<KernelNameGroup<Name, class HostCase>, false, T, Dim, BinaryOperation>(
102+
test<KernelNameGroup<Name, class HostCase>, false, T, BinaryOperation>(
107103
Identity, WGSize, NWItems, usm::alloc::host);
108-
test<KernelNameGroup<Name, class DeviceCase>, false, T, Dim, BinaryOperation>(
104+
test<KernelNameGroup<Name, class DeviceCase>, false, T, BinaryOperation>(
109105
Identity, WGSize, NWItems, usm::alloc::device);
110106

111-
test<KernelNameGroup<Name, class SharedCase2020>, true, T, Dim,
112-
BinaryOperation>(Identity, WGSize, NWItems, usm::alloc::shared);
113-
test<KernelNameGroup<Name, class HostCase2020>, true, T, Dim,
114-
BinaryOperation>(Identity, WGSize, NWItems, usm::alloc::host);
115-
test<KernelNameGroup<Name, class DeviceCase2020>, true, T, Dim,
116-
BinaryOperation>(Identity, WGSize, NWItems, usm::alloc::device);
107+
test<KernelNameGroup<Name, class SharedCase2020>, true, T, BinaryOperation>(
108+
Identity, WGSize, NWItems, usm::alloc::shared);
109+
test<KernelNameGroup<Name, class HostCase2020>, true, T, BinaryOperation>(
110+
Identity, WGSize, NWItems, usm::alloc::host);
111+
test<KernelNameGroup<Name, class DeviceCase2020>, true, T, BinaryOperation>(
112+
Identity, WGSize, NWItems, usm::alloc::device);
117113
}
118114

119115
int main() {
120116
// fast atomics and fast reduce
121-
testUSM<class AtomicReduce1, int, 1, ONEAPI::plus<int>>(0, 49, 49 * 5);
122-
testUSM<class AtomicReduce2, int, 0, ONEAPI::plus<int>>(0, 8, 128);
117+
testUSM<class AtomicReduce1, int, ONEAPI::plus<int>>(0, 49, 49);
118+
testUSM<class AtomicReduce2, int, ONEAPI::plus<int>>(0, 8, 32);
123119

124120
// fast atomics
125-
testUSM<class Atomic1, int, 0, ONEAPI::bit_or<int>>(0, 7, 7 * 3);
126-
testUSM<class Atomic2, int, 1, ONEAPI::bit_or<int>>(0, 4, 128);
121+
testUSM<class Atomic1, int, ONEAPI::bit_or<int>>(0, 7, 7 * 3);
122+
testUSM<class Atomic2, int, ONEAPI::bit_or<int>>(0, 4, 32);
127123

128124
// fast reduce
129-
testUSM<class Reduce1, float, 1, ONEAPI::minimum<float>>(
130-
getMaximumFPValue<float>(), 5, 5 * 7);
131-
testUSM<class Reduce2, float, 0, ONEAPI::maximum<float>>(
132-
getMinimumFPValue<float>(), 4, 128);
125+
testUSM<class Reduce1, float, ONEAPI::minimum<float>>(
126+
getMaximumFPValue<float>(), 17, 17);
127+
testUSM<class Reduce2, float, ONEAPI::maximum<float>>(
128+
getMinimumFPValue<float>(), 4, 32);
133129

134130
// generic algorithm
135-
testUSM<class Generic1, int, 0, std::multiplies<int>>(1, 7, 7 * 5);
136-
testUSM<class Generic2, int, 1, std::multiplies<int>>(1, 8, 16);
137-
testUSM<class Generic3, CustomVec<short>, 0, CustomVecPlus<short>>(
131+
testUSM<class Generic1, int, std::multiplies<int>>(1, 7, 7);
132+
testUSM<class Generic2, CustomVec<short>, CustomVecPlus<short>>(
138133
CustomVec<short>(0), 8, 8 * 3);
139134

140135
std::cout << "Test passed\n";

0 commit comments

Comments
 (0)