Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

[SYCL] Add test cases for SYCL2020 reductions #194

Merged
merged 4 commits into from
Apr 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions SYCL/Reduction/reduction_big_data.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,7 @@ size_t getSafeMaxWGSize(size_t MaxWGSize, size_t MemSize, size_t OneElemSize) {
}

template <typename KernelName, typename T, int Dim, class BinaryOperation>
void test(T Identity) {
queue Q;
void test(queue &Q, T Identity) {
device Device = Q.get_device();

std::size_t MaxWGSize = Device.get_info<info::device::max_work_group_size>();
Expand Down Expand Up @@ -99,10 +98,11 @@ template <class T> struct BigCustomVecPlus {
};

int main() {
test<class Test1, float, 0, ONEAPI::maximum<>>(getMinimumFPValue<float>());
queue Q;
test<class Test1, float, 0, ONEAPI::maximum<>>(Q, getMinimumFPValue<float>());

using BCV = BigCustomVec<long long>;
test<class Test2, BCV, 1, BigCustomVecPlus<long long>>(BCV(0));
test<class Test2, BCV, 1, BigCustomVecPlus<long long>>(Q, BCV(0));

std::cout << "Test passed\n";
return 0;
Expand Down
98 changes: 48 additions & 50 deletions SYCL/Reduction/reduction_nd_N_vars.cpp
Original file line number Diff line number Diff line change
@@ -1,17 +1,11 @@
// TODO: level_zero reports an internal error for this test.
// UNSUPPORTED: level_zero

// TODO: Windows implementation of std::tuple is not trivially copiable and
// thus cannot be passed from HOST to DEVICE. Enable the test on Windows when
// SYCL RT gets new type traits having less strict requirements for objects
// being passed to DEVICE.
// UNSUPPORTED: windows

// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

// TODO: The test irregularly reports incorrect results on CPU.
// UNSUPPORTED: cpu

// This test checks handling of parallel_for() accepting nd_range and
// two or more reductions.

Expand All @@ -36,26 +30,30 @@ constexpr access::mode RW = access::mode::read_write;
constexpr access::mode DW = access::mode::discard_write;

template <typename T>
bool cherkResultIsExpected(int TestCaseNum, T Expected, T Computed) {
bool cherkResultIsExpected(int TestCaseNum, T Expected, T Computed,
bool IsSYCL2020) {
bool Success;
if (!std::is_floating_point<T>::value)
Success = (Expected == Computed);
else
Success = std::abs((Expected / Computed) - 1) < 0.5;

if (!Success)
std::cout << TestCaseNum << ": Expected value = " << Expected
if (!Success) {
std::cerr << "Is SYCL2020 mode: " << IsSYCL2020 << std::endl;
std::cerr << TestCaseNum << ": Expected value = " << Expected
<< ", Computed value = " << Computed << "\n";
}

return Success;
}

// Returns 0 if the test case passed. Otherwise, some non-zero value.
template <class Name, bool IsSYCL2020Mode, typename T1, access::mode Mode1,
template <class Name, bool IsSYCL2020, typename T1, access::mode Mode1,
typename T2, access::mode Mode2, typename T3, access::mode Mode3,
typename T4, class BinaryOperation1, class BinaryOperation2,
class BinaryOperation3, class BinaryOperation4>
int testOne(T1 IdentityVal1, T1 InitVal1, BinaryOperation1 BOp1,
typename T4, access::mode Mode4, class BinaryOperation1,
class BinaryOperation2, class BinaryOperation3,
class BinaryOperation4>
int testOne(queue &Q, T1 IdentityVal1, T1 InitVal1, BinaryOperation1 BOp1,
T2 IdentityVal2, T2 InitVal2, BinaryOperation2 BOp2,
T3 IdentityVal3, T3 InitVal3, BinaryOperation3 BOp3,
T4 IdentityVal4, T3 InitVal4, BinaryOperation4 BOp4,
Expand All @@ -68,7 +66,6 @@ int testOne(T1 IdentityVal1, T1 InitVal1, BinaryOperation1 BOp1,
buffer<T2, 1> OutBuf2(1);
buffer<T3, 1> OutBuf3(1);

queue Q;
auto Dev = Q.get_device();
if (AllocType4 == usm::alloc::shared &&
!Dev.get_info<info::device::usm_shared_allocations>())
Expand Down Expand Up @@ -100,8 +97,9 @@ int testOne(T1 IdentityVal1, T1 InitVal1, BinaryOperation1 BOp1,
CorrectOut2 = BOp2(CorrectOut2, InitVal2);
if (Mode3 == access::mode::read_write)
CorrectOut3 = BOp3(CorrectOut3, InitVal3);
// 4th reduction is USM and this is read_write.
CorrectOut4 = BOp4(CorrectOut4, InitVal4);
// discard_write mode for USM reductions is available only SYCL2020.
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I cannot match the comment to the code. I see USM but I don't see discard_write. Am I missing something?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ONEAPI::reduction if initialized with USM pointer assumed read-write to that USM memory. I.e. add the original value of element pointed by USM memory in the final sum. Only SYCL-2020 can ignore the original value of USM memory (i.e. do discard_write).

The test may have 'Mode4' be equal to access::mode::discard_write, which is treated as 'read_write' if 'IsSYCL2020==false'

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment could be this:
// ONEAPI::reduction supports only read_write access to USM memory

if (Mode4 == access::mode::read_write || !IsSYCL2020)
CorrectOut4 = BOp4(CorrectOut4, InitVal4);

// Inititialize data.
{
Expand All @@ -123,17 +121,21 @@ int testOne(T1 IdentityVal1, T1 InitVal1, BinaryOperation1 BOp1,
}

auto NDR = nd_range<1>{range<1>(NWorkItems), range<1>{WGSize}};
if constexpr (IsSYCL2020Mode) {
if constexpr (IsSYCL2020) {
Q.submit([&](handler &CGH) {
auto In1 = InBuf1.template get_access<access::mode::read>(CGH);
auto In2 = InBuf2.template get_access<access::mode::read>(CGH);
auto In3 = InBuf3.template get_access<access::mode::read>(CGH);
auto In4 = InBuf4.template get_access<access::mode::read>(CGH);

auto Redu1 = sycl::reduction(OutBuf1, CGH, IdentityVal1, BOp1);
auto Redu2 = sycl::reduction(OutBuf2, CGH, IdentityVal2, BOp2);
auto Redu3 = sycl::reduction(OutBuf3, CGH, IdentityVal3, BOp3);
auto Redu4 = sycl::reduction(Out4, IdentityVal4, BOp4);
auto Redu1 = sycl::reduction(OutBuf1, CGH, IdentityVal1, BOp1,
getPropertyList<Mode1>());
auto Redu2 = sycl::reduction(OutBuf2, CGH, IdentityVal2, BOp2,
getPropertyList<Mode2>());
auto Redu3 = sycl::reduction(OutBuf3, CGH, IdentityVal3, BOp3,
getPropertyList<Mode3>());
auto Redu4 =
sycl::reduction(Out4, IdentityVal4, BOp4, getPropertyList<Mode4>());

auto Lambda = [=](nd_item<1> NDIt, auto &Sum1, auto &Sum2, auto &Sum3,
auto &Sum4) {
Expand Down Expand Up @@ -193,10 +195,10 @@ int testOne(T1 IdentityVal1, T1 InitVal1, BinaryOperation1 BOp1,
Out4Val = *Out4;
}

Error += cherkResultIsExpected(1, CorrectOut1, Out1[0]) ? 0 : 1;
Error += cherkResultIsExpected(2, CorrectOut2, Out2[0]) ? 0 : 1;
Error += cherkResultIsExpected(3, CorrectOut3, Out3[0]) ? 0 : 1;
Error += cherkResultIsExpected(4, CorrectOut4, Out4Val) ? 0 : 1;
Error += cherkResultIsExpected(1, CorrectOut1, Out1[0], IsSYCL2020) ? 0 : 1;
Error += cherkResultIsExpected(2, CorrectOut2, Out2[0], IsSYCL2020) ? 0 : 1;
Error += cherkResultIsExpected(3, CorrectOut3, Out3[0], IsSYCL2020) ? 0 : 1;
Error += cherkResultIsExpected(4, CorrectOut4, Out4Val, IsSYCL2020) ? 0 : 1;
free(Out4, Q.get_context());
}

Expand All @@ -211,45 +213,41 @@ int testOne(T1 IdentityVal1, T1 InitVal1, BinaryOperation1 BOp1,
// sycl::reduction and sycl::ONEAPI::reduction
template <class Name, typename T1, access::mode Mode1, typename T2,
access::mode Mode2, typename T3, access::mode Mode3, typename T4,
class BinaryOperation1, class BinaryOperation2,
access::mode Mode4, class BinaryOperation1, class BinaryOperation2,
class BinaryOperation3, class BinaryOperation4>
int testBoth(T1 IdentityVal1, T1 InitVal1, BinaryOperation1 BOp1,
int testBoth(queue &Q, T1 IdentityVal1, T1 InitVal1, BinaryOperation1 BOp1,
T2 IdentityVal2, T2 InitVal2, BinaryOperation2 BOp2,
T3 IdentityVal3, T3 InitVal3, BinaryOperation3 BOp3,
T4 IdentityVal4, T3 InitVal4, BinaryOperation4 BOp4,
usm::alloc AllocType4, size_t NWorkItems, size_t WGSize) {
int Error =
testOne<KName<Name, false>, false, T1, Mode1, T2, Mode2, T3, Mode3, T4>(
IdentityVal1, InitVal1, BOp1, IdentityVal2, InitVal2, BOp2,
IdentityVal3, InitVal3, BOp3, IdentityVal4, InitVal4, BOp4,
AllocType4, NWorkItems, WGSize);

// TODO: property::reduction::initialize_to_identity is not supported yet.
// Thus only read_write mode is tested now.
constexpr access::mode _Mode1 = (Mode1 == DW) ? RW : Mode1;
constexpr access::mode _Mode2 = (Mode2 == DW) ? RW : Mode2;
constexpr access::mode _Mode3 = (Mode3 == DW) ? RW : Mode3;
testOne<KName<Name, false>, false, T1, Mode1, T2, Mode2, T3, Mode3, T4,
Mode4>(Q, IdentityVal1, InitVal1, BOp1, IdentityVal2, InitVal2,
BOp2, IdentityVal3, InitVal3, BOp3, IdentityVal4, InitVal4,
BOp4, AllocType4, NWorkItems, WGSize);

Error +=
testOne<KName<Name, true>, true, T1, _Mode1, T2, _Mode2, T3, _Mode3, T4>(
IdentityVal1, InitVal1, BOp1, IdentityVal2, InitVal2, BOp2,
IdentityVal3, InitVal3, BOp3, IdentityVal4, InitVal4, BOp4,
AllocType4, NWorkItems, WGSize);
testOne<KName<Name, true>, true, T1, Mode1, T2, Mode2, T3, Mode3, T4,
Mode4>(Q, IdentityVal1, InitVal1, BOp1, IdentityVal2, InitVal2,
BOp2, IdentityVal3, InitVal3, BOp3, IdentityVal4, InitVal4,
BOp4, AllocType4, NWorkItems, WGSize);
return Error;
}

int main() {
int Error = testBoth<class FP32Plus16x16, float, DW, int, RW, short, RW, int>(
0, 1000, std::plus<float>{}, 0, 2000, std::plus<>{}, 0, 4000,
queue Q;
int Error = testBoth<class Case1, float, DW, int, RW, short, RW, int, RW>(
Q, 0, 1000, std::plus<float>{}, 0, 2000, std::plus<>{}, 0, 4000,
std::bit_or<>{}, 0, 8000, std::bit_xor<>{}, usm::alloc::shared, 16, 16);

auto Add = [](auto x, auto y) { return (x + y); };
Error += testBoth<class FP32Plus5x257, float, RW, int, RW, short, DW, int>(
0, 1000, std::plus<float>{}, 0, 2000, std::plus<>{}, 0, 4000, Add, 0,
8000, std::bit_xor<int>{}, usm::alloc::device, 5 * (256 + 1), 5);
Error += testBoth<class Case2, float, RW, int, RW, short, DW, int, DW>(
Q, 0, 1000, std::plus<float>{}, 0, 2000, std::plus<>{}, 0, 4000, Add, 0,
8000, std::plus<>{}, usm::alloc::device, 5 * (256 + 1), 5);

if (!Error)
std::cout << "Test passed\n";
else
std::cout << Error << " test-cases failed\n";
std::cerr << Error << " test-cases failed\n";
return Error;
}
13 changes: 6 additions & 7 deletions SYCL/Reduction/reduction_nd_conditional.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ template <class T> struct VecPlus {

template <typename SpecializationKernelName, typename T, int Dim,
class BinaryOperation>
void test(T Identity, size_t WGSize, size_t NWItems) {
void test(queue &Q, T Identity, size_t WGSize, size_t NWItems) {
buffer<T, 1> InBuf(NWItems);
buffer<T, 1> OutBuf(1);

Expand All @@ -66,7 +66,6 @@ void test(T Identity, size_t WGSize, size_t NWItems) {
initInputData(InBuf, CorrectOut, Identity, BOp, NWItems);

// Compute.
queue Q;
Q.submit([&](handler &CGH) {
auto In = InBuf.template get_access<access::mode::read>(CGH);
accessor<T, Dim, access::mode::discard_write, access::target::global_buffer>
Expand Down Expand Up @@ -100,11 +99,11 @@ void test(T Identity, size_t WGSize, size_t NWItems) {
}

int main() {
test<class KernelName_lAx, int, 0, ONEAPI::plus<int>>(0, 2, 2);
test<class KernelName_eVBkBK, int, 1, ONEAPI::plus<int>>(0, 7, 7);
test<class KernelName_vMSyszeYKJbaXATnPL, int, 0, ONEAPI::plus<int>>(0, 2,
64);
test<class KernelName_UPKnfG, short, 1, ONEAPI::plus<short>>(0, 16, 256);
queue Q;
test<class A, int, 0, ONEAPI::plus<int>>(Q, 0, 2, 2);
test<class B, int, 1, ONEAPI::plus<int>>(Q, 0, 7, 7);
test<class C, int, 0, ONEAPI::plus<int>>(Q, 0, 2, 64);
test<class D, short, 1, ONEAPI::plus<short>>(Q, 0, 16, 256);

std::cout << "Test passed\n";
return 0;
Expand Down
2 changes: 1 addition & 1 deletion SYCL/Reduction/reduction_nd_ext_double.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@

#include "reduction_nd_ext_type.hpp"

int main() { return runTests<double>("cl_khr_double"); }
int main() { return runTests<double>("cl_khr_fp64"); }
50 changes: 27 additions & 23 deletions SYCL/Reduction/reduction_nd_ext_type.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,26 +14,26 @@ constexpr access::mode DW = access::mode::discard_write;

template <typename Name, bool IsSYCL2020Mode, typename T, int Dim,
access::mode Mode, class BinaryOperation>
void test(T Identity, size_t WGSize, size_t NWItems) {
void test(queue &Q, T Identity, T Init, size_t WGSize, size_t NWItems) {
buffer<T, 1> InBuf(NWItems);
buffer<T, 1> OutBuf(1);

// Initialize.
BinaryOperation BOp;
T CorrectOut;
initInputData(InBuf, CorrectOut, Identity, BOp, NWItems);

if (Mode == access::mode::read_write)
(OutBuf.template get_access<access::mode::write>())[0] = Identity;
CorrectOut = BOp(CorrectOut, Init);

(OutBuf.template get_access<access::mode::write>())[0] = Init;

// Compute.
queue Q;
nd_range<1> NDRange(range<1>{NWItems}, range<1>{WGSize});
if constexpr (IsSYCL2020Mode) {
Q.submit([&](handler &CGH) {
auto In = InBuf.template get_access<access::mode::read>(CGH);
auto Redu = sycl::reduction(OutBuf, CGH, Identity, BOp);

auto Redu =
sycl::reduction(OutBuf, CGH, Identity, BOp, getPropertyList<Mode>());
CGH.parallel_for<Name>(NDRange, Redu, [=](nd_item<1> NDIt, auto &Sum) {
Sum.combine(In[NDIt.get_global_linear_id()]);
});
Expand All @@ -60,42 +60,46 @@ void test(T Identity, size_t WGSize, size_t NWItems) {
std::cout << "Computed value: " << ComputedOut
<< ", Expected value: " << CorrectOut << ", MaxDiff = " << MaxDiff
<< "\n";
if (IsSYCL2020Mode)
std::cout << std::endl;
assert(0 && "Wrong value.");
}
}

template <typename Name, typename T, int Dim, access::mode Mode,
class BinaryOperation>
void testBoth(T Identity, size_t WGSize, size_t NWItems) {
void testBoth(queue &Q, T Identity, T Init, size_t WGSize, size_t NWItems) {
test<KName<Name, false>, false, T, Dim, Mode, BinaryOperation>(
Identity, WGSize, NWItems);
Q, Identity, Init, WGSize, NWItems);

// TODO: property::reduction::initialize_to_identity is not supported yet.
// Thus only read_write mode is tested now.
constexpr access::mode _Mode = (Mode == DW) ? RW : Mode;
test<KName<Name, true>, true, T, Dim, _Mode, BinaryOperation>(
Identity, WGSize, NWItems);
test<KName<Name, true>, true, T, Dim, Mode, BinaryOperation>(
Q, Identity, Init, WGSize, NWItems);
}

template <typename T> int runTests(const string_class &ExtensionName) {
device D = default_selector().select_device();
queue Q;
device D = Q.get_device();
if (!D.is_host() && !D.has_extension(ExtensionName)) {
std::cout << "Test skipped\n";
return 0;
}

// Check some less standards WG sizes and corner cases first.
testBoth<class A, T, 1, RW, std::multiplies<T>>(0, 4, 4);
testBoth<class B, T, 0, DW, ONEAPI::plus<T>>(0, 4, 64);
testBoth<class A, T, 1, RW, std::multiplies<T>>(Q, 1, 77, 4, 4);

testBoth<class C, T, 0, RW, ONEAPI::minimum<T>>(getMaximumFPValue<T>(), 7, 7);
testBoth<class D, T, 1, access::mode::discard_write, ONEAPI::maximum<T>>(
getMinimumFPValue<T>(), 7, 7 * 5);
testBoth<class B1, T, 0, DW, ONEAPI::plus<T>>(Q, 0, 77, 4, 64);
testBoth<class B2, T, 1, RW, ONEAPI::plus<>>(Q, 0, 33, 3, 3 * 5);

testBoth<class E, T, 1, RW, ONEAPI::plus<>>(1, 3, 3 * 5);
testBoth<class F, T, 1, DW, ONEAPI::minimum<>>(getMaximumFPValue<T>(), 3, 3);
testBoth<class G, T, 0, DW, ONEAPI::maximum<>>(getMinimumFPValue<T>(), 3, 3);
testBoth<class C1, T, 0, RW, ONEAPI::minimum<T>>(Q, getMaximumFPValue<T>(),
-10.0, 7, 7);
testBoth<class C2, T, 0, RW, ONEAPI::minimum<T>>(Q, getMaximumFPValue<T>(),
99.0, 7, 7);
testBoth<class C3, T, 1, DW, ONEAPI::minimum<>>(Q, getMaximumFPValue<T>(),
-99.0, 3, 3);

testBoth<class D1, T, 0, DW, ONEAPI::maximum<>>(Q, getMinimumFPValue<T>(),
99.0, 3, 3);
testBoth<class D2, T, 1, RW, ONEAPI::maximum<T>>(Q, getMinimumFPValue<T>(),
99.0, 7, 7 * 5);
std::cout << "Test passed\n";
return 0;
}
Loading