|
| 1 | +// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -disable-llvm-passes -triple amdgcn-amd-amdhsa -target-cpu gfx1010 -S -emit-llvm -o - %s | FileCheck -check-prefix=CHECK_AMD_32 %s |
| 2 | + |
| 3 | +// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -disable-llvm-passes -triple amdgcn-amd-amdhsa -target-cpu gfx90a -S -emit-llvm -o - %s | FileCheck -check-prefix=CHECK_AMD_64 %s |
| 4 | + |
| 5 | +// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -disable-llvm-passes -triple nvptx-unknown-unknown -target-cpu sm_90 -S -emit-llvm -o - %s | FileCheck -check-prefix=CHECK_CUDA_32 %s |
| 6 | + |
| 7 | +// Check that incorrect values specified for reqd_sub_group_size are ignored. |
| 8 | +// CDNA supports only 64 wave front size, for those GPUs allow subgroup size of |
| 9 | +// 64. Some GPUs support both 32 and 64, for those (and the rest) only allow |
| 10 | +// 32. For CUDA only allow 32. |
| 11 | + |
| 12 | +#include "sycl.hpp" |
| 13 | + |
| 14 | +int main() { |
| 15 | + |
| 16 | + sycl::queue Q; |
| 17 | + |
| 18 | + Q.submit([&](sycl::handler &h) { |
| 19 | + h.single_task<class Kernel_1>([=] [[sycl::reqd_sub_group_size(64)]] {}); |
| 20 | + }); |
| 21 | + |
| 22 | + Q.submit([&](sycl::handler &h) { |
| 23 | + h.single_task<class Kernel_2>([=] [[sycl::reqd_sub_group_size(32)]] {}); |
| 24 | + }); |
| 25 | + |
| 26 | + Q.submit([&](sycl::handler &h) { |
| 27 | + h.single_task<class Kernel_3>([=] [[sycl::reqd_sub_group_size(8)]] {}); |
| 28 | + }); |
| 29 | + |
| 30 | + return 0; |
| 31 | +} |
| 32 | + |
| 33 | +// CHECK_AMD_32: define {{.*}}amdgpu_kernel void @{{.*}}Kernel_1() #0 {{.*}} |
| 34 | +// CHECK_AMD_32-NOT: intel_reqd_sub_group_size |
| 35 | +// CHECK_AMD_32: define {{.*}}amdgpu_kernel void @{{.*}}Kernel_2() #0 {{.*}} !intel_reqd_sub_group_size ![[IRSGS_32:[0-9]+]] |
| 36 | +// CHECK_AMD_32: define {{.*}}amdgpu_kernel void @{{.*}}Kernel_3() #0 {{.*}} |
| 37 | +// CHECK_AMD_32-NOT: intel_reqd_sub_group_size |
| 38 | +// CHECK_AMD_32: ![[IRSGS_32]] = !{i32 32} |
| 39 | + |
| 40 | +// CHECK_AMD_64: define {{.*}}amdgpu_kernel void @{{.*}}Kernel_1() #0 {{.*}} !intel_reqd_sub_group_size ![[IRSGS_64:[0-9]+]] |
| 41 | +// CHECK_AMD_64: define {{.*}}amdgpu_kernel void @{{.*}}Kernel_2() #0 {{.*}} |
| 42 | +// CHECK_AMD_64-NOT: intel_reqd_sub_group_size |
| 43 | +// CHECK_AMD_64: define {{.*}}amdgpu_kernel void @{{.*}}Kernel_3() #0 {{.*}} |
| 44 | +// CHECK_AMD_64-NOT: intel_reqd_sub_group_size |
| 45 | +// CHECK_AMD_64: ![[IRSGS_64]] = !{i32 64} |
| 46 | + |
| 47 | +// CHECK_CUDA_32: define {{.*}} void @{{.*}}Kernel_1() #0 {{.*}} |
| 48 | +// CHECK_CUDA_32-NOT: intel_reqd_sub_group_size |
| 49 | +// CHECK_CUDA_32: define {{.*}} void @{{.*}}Kernel_2() #0 {{.*}} !intel_reqd_sub_group_size ![[IRSGS_32:[0-9]+]] |
| 50 | +// CHECK_CUDA_32: define {{.*}} void @{{.*}}Kernel_3() #0 {{.*}} |
| 51 | +// CHECK_CUDA_32-NOT: intel_reqd_sub_group_size |
| 52 | +// CHECK_CUDA_32: ![[IRSGS_32]] = !{i32 32} |
0 commit comments