Skip to content

Commit 2074de2

Browse files
authored
[clang][HIP] Don't use the OpenCLKernel CC when targeting AMDGCNSPIRV (#110447)
When compiling HIP source for AMDGCN flavoured SPIR-V that is expected to be consumed by the ROCm HIP RT, it's not desirable to set the OpenCL Kernel CC on `__global__` functions. On one hand, this is not an OpenCL RT, so it doesn't compose with e.g. OCL specific attributes. On the other it is a "noisy" CC that carries semantics, and breaks overload resolution when using [generic dispatchers such as those used by RAJA](https://github.com/LLNL/RAJAPerf/blob/186d4194a5719788ae96631c923f9ca337f56970/src/common/HipDataUtils.hpp#L39).
1 parent 41365dc commit 2074de2

File tree

4 files changed

+25
-7
lines changed

4 files changed

+25
-7
lines changed

clang/lib/CodeGen/CGDeclCXX.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -815,7 +815,10 @@ void CodeGenModule::EmitCXXModuleInitFunc(Module *Primary) {
815815
assert(!getLangOpts().CUDA || !getLangOpts().CUDAIsDevice ||
816816
getLangOpts().GPUAllowDeviceInit);
817817
if (getLangOpts().HIP && getLangOpts().CUDAIsDevice) {
818-
Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
818+
if (getTriple().isSPIRV())
819+
Fn->setCallingConv(llvm::CallingConv::SPIR_KERNEL);
820+
else
821+
Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
819822
Fn->addFnAttr("device-init");
820823
}
821824

@@ -973,7 +976,10 @@ CodeGenModule::EmitCXXGlobalInitFunc() {
973976
assert(!getLangOpts().CUDA || !getLangOpts().CUDAIsDevice ||
974977
getLangOpts().GPUAllowDeviceInit);
975978
if (getLangOpts().HIP && getLangOpts().CUDAIsDevice) {
976-
Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
979+
if (getTriple().isSPIRV())
980+
Fn->setCallingConv(llvm::CallingConv::SPIR_KERNEL);
981+
else
982+
Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
977983
Fn->addFnAttr("device-init");
978984
}
979985

clang/lib/Sema/SemaType.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3738,12 +3738,12 @@ static CallingConv getCCForDeclaratorChunk(
37383738
}
37393739
}
37403740
} else if (S.getLangOpts().CUDA) {
3741-
// If we're compiling CUDA/HIP code and targeting SPIR-V we need to make
3741+
// If we're compiling CUDA/HIP code and targeting HIPSPV we need to make
37423742
// sure the kernels will be marked with the right calling convention so that
3743-
// they will be visible by the APIs that ingest SPIR-V.
3743+
// they will be visible by the APIs that ingest SPIR-V. We do not do this
3744+
// when targeting AMDGCNSPIRV, as it does not rely on OpenCL.
37443745
llvm::Triple Triple = S.Context.getTargetInfo().getTriple();
3745-
if (Triple.getArch() == llvm::Triple::spirv32 ||
3746-
Triple.getArch() == llvm::Triple::spirv64) {
3746+
if (Triple.isSPIRV() && Triple.getVendor() != llvm::Triple::AMD) {
37473747
for (const ParsedAttr &AL : D.getDeclSpec().getAttributes()) {
37483748
if (AL.getKind() == ParsedAttr::AT_CUDAGlobal) {
37493749
CC = CC_OpenCLKernel;

clang/test/CodeGenCUDA/device-init-fun.cu

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,17 @@
44
// RUN: -fgpu-allow-device-init -x hip \
55
// RUN: -fno-threadsafe-statics -emit-llvm -o - %s \
66
// RUN: | FileCheck %s
7+
// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -fcuda-is-device -std=c++11 \
8+
// RUN: -fgpu-allow-device-init -x hip \
9+
// RUN: -fno-threadsafe-statics -emit-llvm -o - %s \
10+
// RUN: | FileCheck %s --check-prefix=CHECK-SPIRV
711

812
#include "Inputs/cuda.h"
913

1014
// CHECK: define internal amdgpu_kernel void @_GLOBAL__sub_I_device_init_fun.cu() #[[ATTR:[0-9]*]]
1115
// CHECK: attributes #[[ATTR]] = {{.*}}"device-init"
16+
// CHECK-SPIRV: define internal spir_kernel void @_GLOBAL__sub_I_device_init_fun.cu(){{.*}} #[[ATTR:[0-9]*]]
17+
// CHECK-SPIRV: attributes #[[ATTR]] = {{.*}}"device-init"
1218

1319
__device__ void f();
1420

clang/test/CodeGenCUDA/kernel-amdgcn.cu

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,37 @@
11
// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -emit-llvm -x hip %s -o - | FileCheck %s
2+
// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV
23
#include "Inputs/cuda.h"
34

45
// CHECK: define{{.*}} amdgpu_kernel void @_ZN1A6kernelEv
6+
// CHECK-SPIRV: define{{.*}} spir_kernel void @_ZN1A6kernelEv
57
class A {
68
public:
79
static __global__ void kernel(){}
810
};
911

1012
// CHECK: define{{.*}} void @_Z10non_kernelv
13+
// CHECK-SPIRV: define{{.*}} void @_Z10non_kernelv
1114
__device__ void non_kernel(){}
1215

1316
// CHECK: define{{.*}} amdgpu_kernel void @_Z6kerneli
17+
// CHECK-SPIRV: define{{.*}} spir_kernel void @_Z6kerneli
1418
__global__ void kernel(int x) {
1519
non_kernel();
1620
}
1721

1822
// CHECK: define{{.*}} amdgpu_kernel void @_Z11EmptyKernelIvEvv
23+
// CHECK-SPIRV: define{{.*}} spir_kernel void @_Z11EmptyKernelIvEvv
1924
template <typename T>
2025
__global__ void EmptyKernel(void) {}
2126

2227
struct Dummy {
2328
/// Type definition of the EmptyKernel kernel entry point
2429
typedef void (*EmptyKernelPtr)();
25-
EmptyKernelPtr Empty() { return EmptyKernel<void>; }
30+
EmptyKernelPtr Empty() { return EmptyKernel<void>; }
2631
};
2732

2833
// CHECK: define{{.*}} amdgpu_kernel void @_Z15template_kernelI1AEvT_{{.*}} #[[ATTR:[0-9][0-9]*]]
34+
// CHECK-SPIRV: define{{.*}} spir_kernel void @_Z15template_kernelI1AEvT_{{.*}} #[[ATTR:[0-9][0-9]*]]
2935
template<class T>
3036
__global__ void template_kernel(T x) {}
3137

0 commit comments

Comments
 (0)