|
1 | 1 | // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
|
2 | 2 | // RUN: %CPU_RUN_PLACEHOLDER %t.out
|
3 | 3 | // RUN: %GPU_RUN_PLACEHOLDER %t.out
|
4 |
| -// |
5 |
| -// TODO enable test on CUDA once kernel_bundle is supported |
6 |
| -// UNSUPPORTED: cuda |
7 | 4 |
|
8 | 5 | #include <sycl/sycl.hpp>
|
9 | 6 |
|
10 | 7 | class TestKernel;
|
| 8 | +class TestKernelCUDA; |
11 | 9 |
|
12 | 10 | int main() {
|
13 | 11 | sycl::queue Q;
|
14 | 12 | sycl::buffer<int, 3> Buf{sycl::range{3, 32, 32}};
|
15 | 13 |
|
16 |
| - sycl::kernel_id TestKernelID = sycl::get_kernel_id<TestKernel>(); |
17 |
| - |
18 |
| - sycl::kernel_bundle KernelBundle = |
19 |
| - sycl::get_kernel_bundle<sycl::bundle_state::executable>(Q.get_context(), |
20 |
| - {TestKernelID}); |
21 |
| - |
22 |
| - Q.submit([&](sycl::handler &CGH) { |
23 |
| - CGH.use_kernel_bundle(KernelBundle); |
24 |
| - sycl::accessor Acc{Buf, CGH, sycl::write_only}; |
25 |
| - CGH.parallel_for<TestKernel>( |
26 |
| - sycl::nd_range<1>(sycl::range{32}, sycl::range{32}), |
27 |
| - [=](sycl::nd_item<1> item) { |
28 |
| - auto SG = item.get_sub_group(); |
29 |
| - Acc[0][SG.get_group_linear_id()][SG.get_local_linear_id()] = |
30 |
| - SG.leader(); |
31 |
| - Acc[1][SG.get_group_linear_id()][SG.get_local_linear_id()] = |
32 |
| - SG.get_group_linear_range(); |
33 |
| - Acc[2][SG.get_group_linear_id()][SG.get_local_linear_id()] = |
34 |
| - SG.get_local_linear_range(); |
35 |
| - }); |
36 |
| - }); |
37 |
| - |
38 |
| - sycl::host_accessor Acc{Buf, sycl::read_only}; |
39 |
| - |
40 |
| - sycl::kernel Kernel = KernelBundle.get_kernel(TestKernelID); |
41 |
| - |
42 |
| - const size_t SubgroupSize = |
43 |
| - Kernel.get_info<sycl::info::kernel_device_specific::max_sub_group_size>( |
44 |
| - Q.get_device(), sycl::range{32, 1, 1}); |
| 14 | + size_t SubgroupSize = 0; |
| 15 | + |
| 16 | + sycl::accessor WriteAcc{Buf, sycl::write_only}; |
| 17 | + const auto KernelFunc = [=](sycl::nd_item<1> item) { |
| 18 | + auto SG = item.get_sub_group(); |
| 19 | + WriteAcc[0][SG.get_group_linear_id()][SG.get_local_linear_id()] = |
| 20 | + SG.leader(); |
| 21 | + WriteAcc[1][SG.get_group_linear_id()][SG.get_local_linear_id()] = |
| 22 | + SG.get_group_linear_range(); |
| 23 | + WriteAcc[2][SG.get_group_linear_id()][SG.get_local_linear_id()] = |
| 24 | + SG.get_local_linear_range(); |
| 25 | + }; |
| 26 | + |
| 27 | + if (Q.get_backend() != sycl::backend::cuda) { |
| 28 | + sycl::kernel_id TestKernelID = sycl::get_kernel_id<TestKernel>(); |
| 29 | + sycl::kernel_bundle KernelBundle = |
| 30 | + sycl::get_kernel_bundle<sycl::bundle_state::executable>(Q.get_context(), |
| 31 | + {TestKernelID}); |
| 32 | + |
| 33 | + Q.submit([&](sycl::handler &CGH) { |
| 34 | + CGH.use_kernel_bundle(KernelBundle); |
| 35 | + CGH.require(WriteAcc); |
| 36 | + CGH.parallel_for<TestKernel>( |
| 37 | + sycl::nd_range<1>(sycl::range{32}, sycl::range{32}), KernelFunc); |
| 38 | + }); |
| 39 | + |
| 40 | + sycl::kernel Kernel = KernelBundle.get_kernel(TestKernelID); |
| 41 | + SubgroupSize = |
| 42 | + Kernel.get_info<sycl::info::kernel_device_specific::max_sub_group_size>( |
| 43 | + Q.get_device(), sycl::range{32, 1, 1}); |
| 44 | + } else { |
| 45 | + // CUDA sub-group size is 32 by default (size of a warp) so the kernel |
| 46 | + // bundle is not strictly needed to do this test for the CUDA backend. |
| 47 | + // TODO: Remove this special CUDA path once the CUDA backend supports kernel |
| 48 | + // bundles. |
| 49 | + SubgroupSize = 32; |
| 50 | + Q.submit([&](sycl::handler &CGH) { |
| 51 | + CGH.require(WriteAcc); |
| 52 | + CGH.parallel_for<TestKernelCUDA>( |
| 53 | + sycl::nd_range<1>(sycl::range{32}, sycl::range{32}), KernelFunc); |
| 54 | + }); |
| 55 | + } |
| 56 | + |
| 57 | + sycl::host_accessor HostAcc{Buf, sycl::read_only}; |
| 58 | + |
45 | 59 | const size_t MaxNumSubgroups = 32 / SubgroupSize;
|
46 | 60 |
|
47 | 61 | for (size_t SGNo = 0; SGNo < MaxNumSubgroups; SGNo++) {
|
48 | 62 | for (size_t WINo = 0; WINo < SubgroupSize; WINo++) {
|
49 | 63 | const int Leader = WINo == 0 ? 1 : 0;
|
50 |
| - assert(Acc[0][SGNo][WINo] == Leader); |
51 |
| - assert(Acc[1][SGNo][WINo] == MaxNumSubgroups); |
52 |
| - assert(Acc[2][SGNo][WINo] == SubgroupSize); |
| 64 | + assert(HostAcc[0][SGNo][WINo] == Leader); |
| 65 | + assert(HostAcc[1][SGNo][WINo] == MaxNumSubgroups); |
| 66 | + assert(HostAcc[2][SGNo][WINo] == SubgroupSize); |
53 | 67 | }
|
54 | 68 | }
|
55 | 69 |
|
|
0 commit comments