Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit c540803

Browse files
[SYCL][CUDA] Enables sub_group_sycl2020 for CUDA (#316)
sub_group_sycl2020.cpp test is currently marked as unsupported for CUDA due to missing support for kernel-bundles. This patch adds a special CUDA path for avoiding the use of kernel-bundles, using the known size of sub-groups instead of querying for it. Signed-off-by: Steffen Larsen <[email protected]>
1 parent 6cf7cfd commit c540803

File tree

1 file changed

+49
-35
lines changed

1 file changed

+49
-35
lines changed

SYCL/SubGroup/sub_groups_sycl2020.cpp

Lines changed: 49 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,55 +1,69 @@
11
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
22
// RUN: %CPU_RUN_PLACEHOLDER %t.out
33
// RUN: %GPU_RUN_PLACEHOLDER %t.out
4-
//
5-
// TODO enable test on CUDA once kernel_bundle is supported
6-
// UNSUPPORTED: cuda
74

85
#include <sycl/sycl.hpp>
96

107
class TestKernel;
8+
class TestKernelCUDA;
119

1210
int main() {
1311
sycl::queue Q;
1412
sycl::buffer<int, 3> Buf{sycl::range{3, 32, 32}};
1513

16-
sycl::kernel_id TestKernelID = sycl::get_kernel_id<TestKernel>();
17-
18-
sycl::kernel_bundle KernelBundle =
19-
sycl::get_kernel_bundle<sycl::bundle_state::executable>(Q.get_context(),
20-
{TestKernelID});
21-
22-
Q.submit([&](sycl::handler &CGH) {
23-
CGH.use_kernel_bundle(KernelBundle);
24-
sycl::accessor Acc{Buf, CGH, sycl::write_only};
25-
CGH.parallel_for<TestKernel>(
26-
sycl::nd_range<1>(sycl::range{32}, sycl::range{32}),
27-
[=](sycl::nd_item<1> item) {
28-
auto SG = item.get_sub_group();
29-
Acc[0][SG.get_group_linear_id()][SG.get_local_linear_id()] =
30-
SG.leader();
31-
Acc[1][SG.get_group_linear_id()][SG.get_local_linear_id()] =
32-
SG.get_group_linear_range();
33-
Acc[2][SG.get_group_linear_id()][SG.get_local_linear_id()] =
34-
SG.get_local_linear_range();
35-
});
36-
});
37-
38-
sycl::host_accessor Acc{Buf, sycl::read_only};
39-
40-
sycl::kernel Kernel = KernelBundle.get_kernel(TestKernelID);
41-
42-
const size_t SubgroupSize =
43-
Kernel.get_info<sycl::info::kernel_device_specific::max_sub_group_size>(
44-
Q.get_device(), sycl::range{32, 1, 1});
14+
size_t SubgroupSize = 0;
15+
16+
sycl::accessor WriteAcc{Buf, sycl::write_only};
17+
const auto KernelFunc = [=](sycl::nd_item<1> item) {
18+
auto SG = item.get_sub_group();
19+
WriteAcc[0][SG.get_group_linear_id()][SG.get_local_linear_id()] =
20+
SG.leader();
21+
WriteAcc[1][SG.get_group_linear_id()][SG.get_local_linear_id()] =
22+
SG.get_group_linear_range();
23+
WriteAcc[2][SG.get_group_linear_id()][SG.get_local_linear_id()] =
24+
SG.get_local_linear_range();
25+
};
26+
27+
if (Q.get_backend() != sycl::backend::cuda) {
28+
sycl::kernel_id TestKernelID = sycl::get_kernel_id<TestKernel>();
29+
sycl::kernel_bundle KernelBundle =
30+
sycl::get_kernel_bundle<sycl::bundle_state::executable>(Q.get_context(),
31+
{TestKernelID});
32+
33+
Q.submit([&](sycl::handler &CGH) {
34+
CGH.use_kernel_bundle(KernelBundle);
35+
CGH.require(WriteAcc);
36+
CGH.parallel_for<TestKernel>(
37+
sycl::nd_range<1>(sycl::range{32}, sycl::range{32}), KernelFunc);
38+
});
39+
40+
sycl::kernel Kernel = KernelBundle.get_kernel(TestKernelID);
41+
SubgroupSize =
42+
Kernel.get_info<sycl::info::kernel_device_specific::max_sub_group_size>(
43+
Q.get_device(), sycl::range{32, 1, 1});
44+
} else {
45+
// CUDA sub-group size is 32 by default (size of a warp) so the kernel
46+
// bundle is not strictly needed to do this test for the CUDA backend.
47+
// TODO: Remove this special CUDA path once the CUDA backend supports kernel
48+
// bundles.
49+
SubgroupSize = 32;
50+
Q.submit([&](sycl::handler &CGH) {
51+
CGH.require(WriteAcc);
52+
CGH.parallel_for<TestKernelCUDA>(
53+
sycl::nd_range<1>(sycl::range{32}, sycl::range{32}), KernelFunc);
54+
});
55+
}
56+
57+
sycl::host_accessor HostAcc{Buf, sycl::read_only};
58+
4559
const size_t MaxNumSubgroups = 32 / SubgroupSize;
4660

4761
for (size_t SGNo = 0; SGNo < MaxNumSubgroups; SGNo++) {
4862
for (size_t WINo = 0; WINo < SubgroupSize; WINo++) {
4963
const int Leader = WINo == 0 ? 1 : 0;
50-
assert(Acc[0][SGNo][WINo] == Leader);
51-
assert(Acc[1][SGNo][WINo] == MaxNumSubgroups);
52-
assert(Acc[2][SGNo][WINo] == SubgroupSize);
64+
assert(HostAcc[0][SGNo][WINo] == Leader);
65+
assert(HostAcc[1][SGNo][WINo] == MaxNumSubgroups);
66+
assert(HostAcc[2][SGNo][WINo] == SubgroupSize);
5367
}
5468
}
5569

0 commit comments

Comments
 (0)