This repository was archived by the owner on Mar 28, 2023. It is now read-only.
forked from llvm/llvm-test-suite
-
Notifications
You must be signed in to change notification settings - Fork 130
[SYCL] Add tests for SYCL2020 queue::parallel_for() shortcuts #405
Merged
v-klochkov
merged 4 commits into
intel:intel
from
v-klochkov:reduction_queue_parallel_for
May 3, 2022
Merged
Changes from all commits
Commits
Show all changes
4 commits
Select commit
Hold shift + click to select a range
835a263
[SYCL] Add tests for SYCL2020 queue::parallel_for() shortcuts using r…
v-klochkov edc588d
Merge remote-tracking branch 'intel_llvm/intel' into reduction_queue_…
v-klochkov 73973db
Merge remote-tracking branch 'intel_llvm/intel' into reduction_queue_…
v-klochkov 37077d2
Add test cases for 2 and 3 dim ranges. Renamed the tests.
v-klochkov File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,144 @@ | ||
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out | ||
// RUN: %GPU_RUN_PLACEHOLDER %t.out | ||
// RUN: %ACC_RUN_PLACEHOLDER %t.out | ||
// RUN: %CPU_RUN_PLACEHOLDER %t.out | ||
|
||
// `Group algorithms are not supported on host device.` on NVidia. | ||
// XFAIL: hip_nvidia | ||
|
||
// RUNx: %HOST_RUN_PLACEHOLDER %t.out | ||
// TODO: Enable the test for HOST when it supports ext::oneapi::reduce() and | ||
// barrier() | ||
|
||
// This test only checks that the method queue::parallel_for() accepting | ||
// reduction, can be properly translated into queue::submit + parallel_for(). | ||
|
||
#include "reduction_utils.hpp" | ||
|
||
using namespace sycl; | ||
|
||
template <typename T, int I> class KName; | ||
|
||
enum TestCase { NoDependencies, Dependency, DependenciesVector }; | ||
|
||
template <typename T> T *allocUSM(queue &Q, size_t Size) { | ||
if (!Q.get_device().has(getUSMAspect(usm::alloc::shared))) | ||
return nullptr; | ||
|
||
return malloc_shared<T>(Size, Q); | ||
} | ||
|
||
template <typename RangeT> | ||
void printNVarsTestLabel(bool IsSYCL2020, const RangeT &Range, | ||
bool ToCERR = false) { | ||
std::ostream &OS = ToCERR ? std::cerr : std::cout; | ||
std::string Mode = IsSYCL2020 ? "SYCL2020" : "ONEAPI "; | ||
OS << (ToCERR ? "Error" : "Start") << ": Mode=" << Mode | ||
<< ", Range=" << Range; | ||
if (!ToCERR) | ||
OS << std::endl; | ||
} | ||
|
||
template <typename T1, typename T2, TestCase TC, int Dims, typename BOpT1, | ||
typename BOpT2> | ||
int test(queue &Q, BOpT1 BOp1, BOpT2 BOp2, const nd_range<Dims> &Range) { | ||
printNVarsTestLabel(true /*SYCL2020*/, Range); | ||
|
||
size_t NElems = Range.get_global_range().size(); | ||
T1 *Sum1 = allocUSM<T1>(Q, 1); | ||
T2 *Sum2 = allocUSM<T2>(Q, 1); | ||
T1 *Arr1 = allocUSM<T1>(Q, NElems); | ||
T2 *Arr2 = allocUSM<T2>(Q, NElems); | ||
if (!Sum1 || !Sum2 || !Arr1 || !Arr2) { | ||
sycl::free(Sum1, Q); | ||
steffenlarsen marked this conversation as resolved.
Show resolved
Hide resolved
|
||
sycl::free(Sum2, Q); | ||
sycl::free(Arr1, Q); | ||
sycl::free(Arr2, Q); | ||
std::cout << " SKIPPED due to unrelated problems with USM" << std::endl; | ||
return 0; | ||
} | ||
|
||
*Sum2 = 0; | ||
auto R1 = sycl::reduction( | ||
Sum1, BOp1, property_list(property::reduction::initialize_to_identity{})); | ||
auto R2 = sycl::reduction(Sum2, static_cast<T2>(0), BOp2); | ||
|
||
if constexpr (TC == TestCase::NoDependencies) { | ||
std::fill(Arr1, Arr1 + NElems, 1); | ||
std::fill(Arr2, Arr2 + NElems, 2); | ||
Q.parallel_for(Range, R1, R2, | ||
[=](nd_item<Dims> It, auto &Sum1, auto &Sum2) { | ||
size_t LinId = It.get_global_linear_id(); | ||
Sum1.combine(static_cast<T1>(LinId) + Arr1[LinId]); | ||
Sum2.combine(static_cast<T2>(LinId) + Arr2[LinId]); | ||
}) | ||
.wait(); | ||
} else if constexpr (TC == TestCase::Dependency) { | ||
auto E = Q.single_task([=]() { | ||
std::fill(Arr1, Arr1 + NElems, 1); | ||
std::fill(Arr2, Arr2 + NElems, 2); | ||
}); | ||
Q.parallel_for(Range, E, R1, R2, | ||
[=](nd_item<Dims> It, auto &Sum1, auto &Sum2) { | ||
size_t LinId = It.get_global_linear_id(); | ||
Sum1.combine(static_cast<T1>(LinId) + Arr1[LinId]); | ||
Sum2.combine(static_cast<T2>(LinId) + Arr2[LinId]); | ||
}) | ||
.wait(); | ||
} else { | ||
auto E1 = Q.single_task([=]() { std::fill(Arr1, Arr1 + NElems, 1); }); | ||
auto E2 = Q.single_task([=]() { std::fill(Arr2, Arr2 + NElems, 2); }); | ||
std::vector<event> EVec{E1, E2}; | ||
Q.parallel_for(Range, EVec, R1, R2, | ||
[=](nd_item<Dims> It, auto &Sum1, auto &Sum2) { | ||
size_t LinId = It.get_global_linear_id(); | ||
Sum1.combine(static_cast<T1>(LinId) + Arr1[LinId]); | ||
Sum2.combine(static_cast<T2>(LinId) + Arr2[LinId]); | ||
}) | ||
.wait(); | ||
} | ||
|
||
T1 ExpectedSum1 = NElems + (NElems - 1) * NElems / 2; | ||
T2 ExpectedSum2 = 2 * NElems + (NElems - 1) * NElems / 2; | ||
std::string AddInfo = "TestCase="; | ||
int Error = checkResults(Q, true /*SYCL2020*/, BOp1, Range, *Sum1, | ||
ExpectedSum1, AddInfo + std::to_string(1)); | ||
Error += checkResults(Q, true /*SYCL2020*/, BOp2, Range, *Sum2, ExpectedSum2, | ||
AddInfo + std::to_string(2)); | ||
|
||
sycl::free(Sum1, Q); | ||
sycl::free(Sum2, Q); | ||
sycl::free(Arr1, Q); | ||
sycl::free(Arr2, Q); | ||
return Error; | ||
} | ||
|
||
template <typename T1, typename T2, int Dims, typename BinaryOperation1, | ||
typename BinaryOperation2> | ||
int tests(queue &Q, BinaryOperation1 BOp1, BinaryOperation2 BOp2, | ||
const nd_range<Dims> &Range) { | ||
int NumErrors = 0; | ||
NumErrors += test<T1, T2, TestCase::NoDependencies>(Q, BOp1, BOp2, Range); | ||
NumErrors += test<T1, T2, TestCase::Dependency>(Q, BOp1, BOp2, Range); | ||
NumErrors += test<T1, T2, TestCase::DependenciesVector>(Q, BOp1, BOp2, Range); | ||
return NumErrors; | ||
} | ||
|
||
int main() { | ||
queue Q; | ||
printDeviceInfo(Q); | ||
|
||
int NumErrors = 0; | ||
auto LambdaSum = [](auto X, auto Y) { return (X + Y); }; | ||
|
||
NumErrors += | ||
tests<int, short>(Q, std::plus<>{}, LambdaSum, nd_range<1>{32, 16}); | ||
NumErrors += tests<int, short>(Q, std::plus<>{}, LambdaSum, | ||
nd_range<2>{range<2>{4, 4}, range<2>{2, 2}}); | ||
NumErrors += | ||
tests<int, short>(Q, std::plus<>{}, LambdaSum, | ||
nd_range<3>{range<3>{4, 4, 3}, range<3>{1, 2, 3}}); | ||
|
||
printFinalStatus(NumErrors); | ||
return NumErrors; | ||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out | ||
// RUN: %GPU_RUN_PLACEHOLDER %t.out | ||
// RUN: %ACC_RUN_PLACEHOLDER %t.out | ||
// RUN: %CPU_RUN_PLACEHOLDER %t.out | ||
|
||
// `Group algorithms are not supported on host device.` on NVidia. | ||
// XFAIL: hip_nvidia | ||
|
||
// RUNx: %HOST_RUN_PLACEHOLDER %t.out | ||
// TODO: Enable the test for HOST when it supports ext::oneapi::reduce() and | ||
// barrier() | ||
|
||
// This test only checks that the method queue::parallel_for() accepting | ||
// reduction, can be properly translated into queue::submit + parallel_for(). | ||
|
||
#include "reduction_utils.hpp" | ||
|
||
using namespace sycl; | ||
|
||
template <typename T, int I> class KName; | ||
|
||
enum TestCase { NoDependencies, Dependency, DependenciesVector }; | ||
|
||
template <typename T> T *allocUSM(queue &Q, size_t Size) { | ||
if (!Q.get_device().has(getUSMAspect(usm::alloc::shared))) | ||
return nullptr; | ||
|
||
return malloc_shared<T>(Size, Q); | ||
} | ||
|
||
template <typename T, TestCase TC, int Dims, typename BinaryOperation> | ||
int test(queue &Q, BinaryOperation BOp, const nd_range<Dims> &Range) { | ||
printTestLabel<T, BinaryOperation>(true /*SYCL2020*/, Range); | ||
|
||
size_t NElems = Range.get_global_range().size(); | ||
T *Sum = allocUSM<T>(Q, 1); | ||
T *Arr = allocUSM<T>(Q, NElems); | ||
if (!Sum || !Arr) { | ||
std::cout << " SKIPPED due to unrelated problems with USM" << std::endl; | ||
sycl::free(Sum, Q); | ||
sycl::free(Arr, Q); | ||
return 0; | ||
} | ||
|
||
auto Redu = sycl::reduction( | ||
Sum, BOp, property_list(property::reduction::initialize_to_identity{})); | ||
if constexpr (TC == TestCase::NoDependencies) { | ||
std::fill(Arr, Arr + NElems, 1); | ||
Q.parallel_for(Range, Redu, [=](nd_item<Dims> It, auto &Sum) { | ||
size_t LinId = It.get_global_linear_id(); | ||
Sum.combine(static_cast<T>(LinId) + Arr[LinId]); | ||
}).wait(); | ||
} else if constexpr (TC == TestCase::Dependency) { | ||
auto E = Q.single_task([=]() { std::fill(Arr, Arr + NElems, 1); }); | ||
Q.parallel_for(Range, E, Redu, [=](nd_item<Dims> It, auto &Sum) { | ||
size_t LinId = It.get_global_linear_id(); | ||
Sum.combine(static_cast<T>(LinId) + Arr[LinId]); | ||
}).wait(); | ||
} else { | ||
auto E = Q.single_task([=]() { std::fill(Arr, Arr + NElems, 1); }); | ||
std::vector<event> EVec{E}; | ||
Q.parallel_for(Range, EVec, Redu, [=](nd_item<Dims> It, auto &Sum) { | ||
size_t LinId = It.get_global_linear_id(); | ||
Sum.combine(static_cast<T>(LinId) + Arr[LinId]); | ||
}).wait(); | ||
} | ||
|
||
T ExpectedSum = NElems + (NElems - 1) * NElems / 2; | ||
int Error = checkResults(Q, true /*SYCL2020*/, BOp, Range, *Sum, ExpectedSum); | ||
free(Sum, Q); | ||
free(Arr, Q); | ||
return Error; | ||
} | ||
|
||
template <typename T, int Dims, typename BinaryOperation> | ||
int tests(queue &Q, BinaryOperation BOp, const nd_range<Dims> &Range) { | ||
int NumErrors = 0; | ||
NumErrors += test<T, TestCase::NoDependencies>(Q, BOp, Range); | ||
NumErrors += test<T, TestCase::Dependency>(Q, BOp, Range); | ||
NumErrors += test<T, TestCase::DependenciesVector>(Q, BOp, Range); | ||
return NumErrors; | ||
} | ||
|
||
int main() { | ||
queue Q; | ||
printDeviceInfo(Q); | ||
|
||
int NumErrors = 0; | ||
NumErrors += tests<int>(Q, std::plus<>{}, nd_range<1>{32, 16}); | ||
NumErrors += | ||
tests<int>(Q, std::plus<>{}, nd_range<2>{range<2>{4, 4}, range<2>{2, 2}}); | ||
NumErrors += tests<int>(Q, std::plus<>{}, | ||
nd_range<3>{range<3>{4, 4, 3}, range<3>{1, 2, 3}}); | ||
|
||
printFinalStatus(NumErrors); | ||
return NumErrors; | ||
} |
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wonder if this is still applicable. Should we rerun the tests?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think normal CI would report/complain on this test if it passed on nvidia, right?
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I mean, this test copies that from the existing test (being removed/replaced in this PR): SYCL/Reduction/reduction_queue_parallel_for.cpp . If it passed, it would be seen during CI runs.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If it is the case for other similar tests, then I suspect you are right. If it starts unexpectedly passing we'll know to remove it. 😄