Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit d07c07c

Browse files
authored
[SYCL] Add tests for SYCL2020 sub_groups features (#283)
1 parent 39ee4cf commit d07c07c

File tree

2 files changed

+86
-16
lines changed

2 files changed

+86
-16
lines changed

SYCL/SubGroup/barrier.cpp

Lines changed: 29 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,11 @@
1616
#include <CL/sycl.hpp>
1717
#include <limits>
1818
#include <numeric>
19-
template <typename T> class sycl_subgr;
19+
20+
template <typename T, bool UseNewSyntax> class sycl_subgr;
2021
using namespace cl::sycl;
21-
template <typename T> void check(queue &Queue, size_t G = 240, size_t L = 60) {
22+
template <typename T, bool UseNewSyntax = false>
23+
void check(queue &Queue, size_t G = 240, size_t L = 60) {
2224
try {
2325
nd_range<1> NdRange(G, L);
2426
std::vector<T> data(G);
@@ -29,21 +31,26 @@ template <typename T> void check(queue &Queue, size_t G = 240, size_t L = 60) {
2931
auto addacc = addbuf.template get_access<access::mode::read_write>(cgh);
3032
auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>(cgh);
3133

32-
cgh.parallel_for<sycl_subgr<T>>(NdRange, [=](nd_item<1> NdItem) {
33-
ONEAPI::sub_group SG = NdItem.get_sub_group();
34-
size_t lid = SG.get_local_id().get(0);
35-
size_t gid = NdItem.get_global_id(0);
36-
size_t SGoff = gid - lid;
34+
cgh.parallel_for<sycl_subgr<T, UseNewSyntax>>(
35+
NdRange, [=](nd_item<1> NdItem) {
36+
ONEAPI::sub_group SG = NdItem.get_sub_group();
37+
size_t lid = SG.get_local_id().get(0);
38+
size_t gid = NdItem.get_global_id(0);
39+
size_t SGoff = gid - lid;
3740

38-
T res = 0;
39-
for (size_t i = 0; i <= lid; i++) {
40-
res += addacc[SGoff + i];
41-
}
42-
SG.barrier(access::fence_space::global_space);
43-
addacc[gid] = res;
44-
if (NdItem.get_global_id(0) == 0)
45-
sgsizeacc[0] = SG.get_max_local_range()[0];
46-
});
41+
T res = 0;
42+
for (size_t i = 0; i <= lid; i++) {
43+
res += addacc[SGoff + i];
44+
}
45+
if constexpr (UseNewSyntax) {
46+
group_barrier(SG);
47+
} else {
48+
SG.barrier(access::fence_space::global_space);
49+
}
50+
addacc[gid] = res;
51+
if (NdItem.get_global_id(0) == 0)
52+
sgsizeacc[0] = SG.get_max_local_range()[0];
53+
});
4754
});
4855
auto addacc = addbuf.template get_access<access::mode::read_write>();
4956
auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>();
@@ -79,8 +86,14 @@ int main() {
7986
check<long>(Queue);
8087
check<unsigned long>(Queue);
8188
check<float>(Queue);
89+
check<int, true>(Queue);
90+
check<unsigned int, true>(Queue);
91+
check<long, true>(Queue);
92+
check<unsigned long, true>(Queue);
93+
check<float, true>(Queue);
8294
if (Queue.get_device().has_extension("cl_khr_fp64")) {
8395
check<double>(Queue);
96+
check<double, true>(Queue);
8497
}
8598
std::cout << "Test passed." << std::endl;
8699
return 0;

SYCL/SubGroup/sub_groups_sycl2020.cpp

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
2+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
3+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
4+
//
5+
// TODO enable test on CUDA once kernel_bundle is supported
6+
// UNSUPPORTED: cuda
7+
8+
#include <sycl/sycl.hpp>
9+
10+
class TestKernel;
11+
12+
int main() {
13+
sycl::queue Q;
14+
sycl::buffer<int, 3> Buf{sycl::range{3, 32, 32}};
15+
16+
sycl::kernel_id TestKernelID = sycl::get_kernel_id<TestKernel>();
17+
18+
sycl::kernel_bundle KernelBundle =
19+
sycl::get_kernel_bundle<sycl::bundle_state::executable>(Q.get_context(),
20+
{TestKernelID});
21+
22+
Q.submit([&](sycl::handler &CGH) {
23+
CGH.use_kernel_bundle(KernelBundle);
24+
sycl::accessor Acc{Buf, CGH, sycl::write_only};
25+
CGH.parallel_for<TestKernel>(
26+
sycl::nd_range<1>(sycl::range{32}, sycl::range{32}),
27+
[=](sycl::nd_item<1> item) {
28+
auto SG = item.get_sub_group();
29+
Acc[0][SG.get_group_linear_id()][SG.get_local_linear_id()] =
30+
SG.leader();
31+
Acc[1][SG.get_group_linear_id()][SG.get_local_linear_id()] =
32+
SG.get_group_linear_range();
33+
Acc[2][SG.get_group_linear_id()][SG.get_local_linear_id()] =
34+
SG.get_local_linear_range();
35+
});
36+
});
37+
38+
sycl::host_accessor Acc{Buf, sycl::read_only};
39+
40+
sycl::kernel Kernel = KernelBundle.get_kernel(TestKernelID);
41+
42+
const size_t SubgroupSize =
43+
Kernel.get_info<sycl::info::kernel_device_specific::max_sub_group_size>(
44+
Q.get_device(), sycl::range{32, 1, 1});
45+
const size_t MaxNumSubgroups = 32 / SubgroupSize;
46+
47+
for (size_t SGNo = 0; SGNo < MaxNumSubgroups; SGNo++) {
48+
for (size_t WINo = 0; WINo < SubgroupSize; WINo++) {
49+
const int Leader = WINo == 0 ? 1 : 0;
50+
assert(Acc[0][SGNo][WINo] == Leader);
51+
assert(Acc[1][SGNo][WINo] == MaxNumSubgroups);
52+
assert(Acc[2][SGNo][WINo] == SubgroupSize);
53+
}
54+
}
55+
56+
return 0;
57+
}

0 commit comments

Comments
 (0)