Skip to content

Commit 7b696d8

Browse files
committed
[CUDA] make kernel stub ICF-proof
MSVC linker merges functions having comdat which have identical set of instructions. CUDA uses kernel stub function as key to look up kernels in device executables. If kernel stub function for different kernels are merged by ICF, incorrect kernels will be launched. To prevent ICF from merging kernel stub functions, an unique global variable is created for each kernel stub function having comdat and a store is added to the kernel stub function. This makes the set of instructions in each kernel function unique. Fixes: #88883
1 parent 6844c2f commit 7b696d8

File tree

2 files changed

+83
-40
lines changed

2 files changed

+83
-40
lines changed

clang/lib/CodeGen/CGCUDANV.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,34 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
424424
CGM.CreateRuntimeFunction(FTy, LaunchKernelName);
425425
CGF.EmitCall(FI, CGCallee::forDirect(cudaLaunchKernelFn), ReturnValueSlot(),
426426
LaunchKernelArgs);
427+
428+
// To prevent CUDA device stub functions from being merged by ICF in MSVC
429+
// environment, create an unique global variable for each kernel and write to
430+
// the variable in the device stub.
431+
if (CGM.getContext().getTargetInfo().getCXXABI().isMicrosoft() &&
432+
!CGF.getLangOpts().HIP) {
433+
llvm::Function *KernelFunction = llvm::cast<llvm::Function>(Kernel);
434+
if (KernelFunction->hasComdat()) {
435+
std::string KernelName = KernelFunction->getName().str();
436+
std::string GlobalVarName = KernelName + ".id";
437+
438+
llvm::GlobalVariable *HandleVar =
439+
CGM.getModule().getNamedGlobal(GlobalVarName);
440+
if (!HandleVar) {
441+
HandleVar = new llvm::GlobalVariable(
442+
CGM.getModule(), CGM.Int8Ty,
443+
/*Constant=*/false, KernelFunction->getLinkage(),
444+
llvm::ConstantInt::get(CGM.Int8Ty, 0), GlobalVarName);
445+
HandleVar->setDSOLocal(KernelFunction->isDSOLocal());
446+
HandleVar->setVisibility(KernelFunction->getVisibility());
447+
HandleVar->setComdat(CGM.getModule().getOrInsertComdat(GlobalVarName));
448+
}
449+
450+
CGF.Builder.CreateAlignedStore(llvm::ConstantInt::get(CGM.Int8Ty, 1),
451+
HandleVar, CharUnits::One());
452+
}
453+
}
454+
427455
CGF.EmitBranch(EndBlock);
428456

429457
CGF.EmitBlock(EndBlock);

clang/test/CodeGenCUDA/kernel-stub-name.cu

Lines changed: 55 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
44
// RUN: -fcuda-include-gpubinary %t -o - -x hip\
5-
// RUN: | FileCheck -check-prefixes=CHECK,GNU %s
5+
// RUN: | FileCheck -check-prefixes=CHECK,GNU,GNU-HIP,HIP %s
66

77
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
88
// RUN: -fcuda-include-gpubinary %t -o - -x hip\
@@ -11,7 +11,12 @@
1111
// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -emit-llvm %s \
1212
// RUN: -aux-triple amdgcn-amd-amdhsa -fcuda-include-gpubinary \
1313
// RUN: %t -o - -x hip\
14-
// RUN: | FileCheck -check-prefixes=CHECK,MSVC %s
14+
// RUN: | FileCheck -check-prefixes=CHECK,MSVC,MSVC-HIP,HIP %s
15+
16+
// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -emit-llvm %s \
17+
// RUN: -aux-triple nvptx64 -fcuda-include-gpubinary \
18+
// RUN: %t -target-sdk-version=9.2 -o - \
19+
// RUN: | FileCheck -check-prefixes=CHECK,MSVC,CUDA %s
1520

1621
// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -emit-llvm %s \
1722
// RUN: -aux-triple amdgcn-amd-amdhsa -fcuda-include-gpubinary \
@@ -22,19 +27,21 @@
2227

2328
// Check kernel handles are emitted for non-MSVC target but not for MSVC target.
2429

25-
// GNU: @[[HCKERN:ckernel]] = constant ptr @[[CSTUB:__device_stub__ckernel]], align 8
26-
// GNU: @[[HNSKERN:_ZN2ns8nskernelEv]] = constant ptr @[[NSSTUB:_ZN2ns23__device_stub__nskernelEv]], align 8
27-
// GNU: @[[HTKERN:_Z10kernelfuncIiEvv]] = linkonce_odr constant ptr @[[TSTUB:_Z25__device_stub__kernelfuncIiEvv]], comdat, align 8
28-
// GNU: @[[HDKERN:_Z11kernel_declv]] = external constant ptr, align 8
29-
// GNU: @[[HTDKERN:_Z20template_kernel_declIiEvT_]] = external constant ptr, align 8
30-
31-
// MSVC: @[[HCKERN:ckernel]] = dso_local constant ptr @[[CSTUB:__device_stub__ckernel]], align 8
32-
// MSVC: @[[HNSKERN:"\?nskernel@ns@@YAXXZ.*"]] = dso_local constant ptr @[[NSSTUB:"\?__device_stub__nskernel@ns@@YAXXZ"]], align 8
33-
// MSVC: @[[HTKERN:"\?\?\$kernelfunc@H@@YAXXZ.*"]] = linkonce_odr dso_local constant ptr @[[TSTUB:"\?\?\$__device_stub__kernelfunc@H@@YAXXZ.*"]], comdat, align 8
34-
// MSVC: @[[HDKERN:"\?kernel_decl@@YAXXZ.*"]] = external dso_local constant ptr, align 8
35-
// MSVC: @[[HTDKERN:"\?\?\$template_kernel_decl@H@@YAXH.*"]] = external dso_local constant ptr, align 8
30+
// GNU-HIP: @[[HCKERN:ckernel]] = constant ptr @[[CSTUB:__device_stub__ckernel]], align 8
31+
// GNU-HIP: @[[HNSKERN:_ZN2ns8nskernelEv]] = constant ptr @[[NSSTUB:_ZN2ns23__device_stub__nskernelEv]], align 8
32+
// GNU-HIP: @[[HTKERN:_Z10kernelfuncIiEvv]] = linkonce_odr constant ptr @[[TSTUB:_Z25__device_stub__kernelfuncIiEvv]], comdat, align 8
33+
// GNU-HIP: @[[HDKERN:_Z11kernel_declv]] = external constant ptr, align 8
34+
// GNU-HIP: @[[HTDKERN:_Z20template_kernel_declIiEvT_]] = external constant ptr, align 8
35+
36+
// MSVC-HIP: @[[HCKERN:ckernel]] = dso_local constant ptr @[[CSTUB:__device_stub__ckernel]], align 8
37+
// MSVC-HIP: @[[HNSKERN:"\?nskernel@ns@@YAXXZ.*"]] = dso_local constant ptr @[[NSSTUB:"\?__device_stub__nskernel@ns@@YAXXZ"]], align 8
38+
// MSVC-HIP: @[[HTKERN:"\?\?\$kernelfunc@H@@YAXXZ.*"]] = linkonce_odr dso_local constant ptr @[[TSTUB:"\?\?\$__device_stub__kernelfunc@H@@YAXXZ.*"]], comdat, align 8
39+
// MSVC-HIP: @[[HDKERN:"\?kernel_decl@@YAXXZ.*"]] = external dso_local constant ptr, align 8
40+
// MSVC-HIP: @[[HTDKERN:"\?\?\$template_kernel_decl@H@@YAXH.*"]] = external dso_local constant ptr, align 8
3641
extern "C" __global__ void ckernel() {}
3742

43+
// CUDA: @[[HTKERN:"\?\?\$__device_stub__kernelfunc@H@@YAXXZ\.id"]] = linkonce_odr dso_local global i8 0, comdat
44+
3845
namespace ns {
3946
__global__ void nskernel() {}
4047
} // namespace ns
@@ -60,18 +67,23 @@ extern "C" void launch(void *kern);
6067

6168
// Non-template kernel stub functions
6269

63-
// CHECK: define{{.*}}@[[CSTUB]]
64-
// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[HCKERN]]
70+
// HIP: define{{.*}}@[[CSTUB]]
71+
// CUDA: define{{.*}}@[[CSTUB:__device_stub__ckernel]]
72+
// HIP: call{{.*}}@hipLaunchByPtr{{.*}}@[[HCKERN]]
73+
// CUDA: call{{.*}}@cudaLaunch{{.*}}@[[CSTUB]]
6574

66-
// CHECK: define{{.*}}@[[NSSTUB]]
67-
// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[HNSKERN]]
75+
// HIP: define{{.*}}@[[NSSTUB]]
76+
// CUDA: define{{.*}}@[[NSSTUB:"\?__device_stub__nskernel@ns@@YAXXZ"]]
77+
// HIP: call{{.*}}@hipLaunchByPtr{{.*}}@[[HNSKERN]]
78+
// CUDA: call{{.*}}@cudaLaunch{{.*}}@[[NSSTUB]]
6879

6980
// Check kernel stub is called for triple chevron.
7081

7182
// CHECK-LABEL: define{{.*}}@fun1()
7283
// CHECK: call void @[[CSTUB]]()
7384
// CHECK: call void @[[NSSTUB]]()
74-
// CHECK: call void @[[TSTUB]]()
85+
// HIP: call void @[[TSTUB]]()
86+
// CUDA: call void @[[TSTUB:"\?\?\$__device_stub__kernelfunc@H@@YAXXZ.*"]]()
7587
// GNU: call void @[[DSTUB:_Z26__device_stub__kernel_declv]]()
7688
// GNU: call void @[[TDSTUB:_Z35__device_stub__template_kernel_declIiEvT_]](
7789
// MSVC: call void @[[DSTUB:"\?__device_stub__kernel_decl@@YAXXZ"]]()
@@ -88,7 +100,10 @@ extern "C" void fun1(void) {
88100
// Template kernel stub functions
89101

90102
// CHECK: define{{.*}}@[[TSTUB]]
91-
// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[HTKERN]]
103+
// HIP: call{{.*}}@hipLaunchByPtr{{.*}}@[[HTKERN]]
104+
// CUDA: call{{.*}}@cudaLaunch{{.*}}@[[TSTUB]]
105+
// CUDA: store i8 1, ptr @[[HTKERN]], align 1
106+
// CHECK: ret void
92107

93108
// Check declaration of stub function for external kernel.
94109

@@ -98,11 +113,11 @@ extern "C" void fun1(void) {
98113
// Check kernel handle is used for passing the kernel as a function pointer.
99114

100115
// CHECK-LABEL: define{{.*}}@fun2()
101-
// CHECK: call void @launch({{.*}}[[HCKERN]]
102-
// CHECK: call void @launch({{.*}}[[HNSKERN]]
103-
// CHECK: call void @launch({{.*}}[[HTKERN]]
104-
// CHECK: call void @launch({{.*}}[[HDKERN]]
105-
// CHECK: call void @launch({{.*}}[[HTDKERN]]
116+
// HIP: call void @launch({{.*}}[[HCKERN]]
117+
// HIP: call void @launch({{.*}}[[HNSKERN]]
118+
// HIP: call void @launch({{.*}}[[HTKERN]]
119+
// HIP: call void @launch({{.*}}[[HDKERN]]
120+
// HIP: call void @launch({{.*}}[[HTDKERN]]
106121
extern "C" void fun2() {
107122
launch((void *)ckernel);
108123
launch((void *)ns::nskernel);
@@ -114,10 +129,10 @@ extern "C" void fun2() {
114129
// Check kernel handle is used for assigning a kernel to a function pointer.
115130

116131
// CHECK-LABEL: define{{.*}}@fun3()
117-
// CHECK: store ptr @[[HCKERN]], ptr @kernel_ptr, align 8
118-
// CHECK: store ptr @[[HCKERN]], ptr @kernel_ptr, align 8
119-
// CHECK: store ptr @[[HCKERN]], ptr @void_ptr, align 8
120-
// CHECK: store ptr @[[HCKERN]], ptr @void_ptr, align 8
132+
// HIP: store ptr @[[HCKERN]], ptr @kernel_ptr, align 8
133+
// HIP: store ptr @[[HCKERN]], ptr @kernel_ptr, align 8
134+
// HIP: store ptr @[[HCKERN]], ptr @void_ptr, align 8
135+
// HIP: store ptr @[[HCKERN]], ptr @void_ptr, align 8
121136
extern "C" void fun3() {
122137
kernel_ptr = ckernel;
123138
kernel_ptr = &ckernel;
@@ -129,11 +144,11 @@ extern "C" void fun3() {
129144
// used with triple chevron.
130145

131146
// CHECK-LABEL: define{{.*}}@fun4()
132-
// CHECK: store ptr @[[HCKERN]], ptr @kernel_ptr
133-
// CHECK: call noundef i32 @{{.*hipConfigureCall}}
134-
// CHECK: %[[HANDLE:.*]] = load ptr, ptr @kernel_ptr, align 8
135-
// CHECK: %[[STUB:.*]] = load ptr, ptr %[[HANDLE]], align 8
136-
// CHECK: call void %[[STUB]]()
147+
// HIP: store ptr @[[HCKERN]], ptr @kernel_ptr
148+
// HIP: call noundef i32 @{{.*hipConfigureCall}}
149+
// HIP: %[[HANDLE:.*]] = load ptr, ptr @kernel_ptr, align 8
150+
// HIP: %[[STUB:.*]] = load ptr, ptr %[[HANDLE]], align 8
151+
// HIP: call void %[[STUB]]()
137152
extern "C" void fun4() {
138153
kernel_ptr = ckernel;
139154
kernel_ptr<<<1,1>>>();
@@ -142,20 +157,20 @@ extern "C" void fun4() {
142157
// Check kernel handle is passed to a function.
143158

144159
// CHECK-LABEL: define{{.*}}@fun5()
145-
// CHECK: store ptr @[[HCKERN]], ptr @kernel_ptr
146-
// CHECK: %[[HANDLE:.*]] = load ptr, ptr @kernel_ptr, align 8
147-
// CHECK: call void @launch(ptr noundef %[[HANDLE]])
160+
// HIP: store ptr @[[HCKERN]], ptr @kernel_ptr
161+
// HIP: %[[HANDLE:.*]] = load ptr, ptr @kernel_ptr, align 8
162+
// HIP: call void @launch(ptr noundef %[[HANDLE]])
148163
extern "C" void fun5() {
149164
kernel_ptr = ckernel;
150165
launch((void *)kernel_ptr);
151166
}
152167

153168
// Check kernel handle is registered.
154169

155-
// CHECK-LABEL: define{{.*}}@__hip_register_globals
156-
// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[HCKERN]]{{.*}}@[[CKERN]]
157-
// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[HNSKERN]]{{.*}}@[[NSKERN]]
158-
// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[HTKERN]]{{.*}}@[[TKERN]]
170+
// HIP-LABEL: define{{.*}}@__hip_register_globals
171+
// HIP: call{{.*}}@__hipRegisterFunction{{.*}}@[[HCKERN]]{{.*}}@[[CKERN]]
172+
// HIP: call{{.*}}@__hipRegisterFunction{{.*}}@[[HNSKERN]]{{.*}}@[[NSKERN]]
173+
// HIP: call{{.*}}@__hipRegisterFunction{{.*}}@[[HTKERN]]{{.*}}@[[TKERN]]
159174
// NEG-NOT: call{{.*}}@__hipRegisterFunction{{.*}}__device_stub
160175
// NEG-NOT: call{{.*}}@__hipRegisterFunction{{.*}}kernel_decl
161176
// NEG-NOT: call{{.*}}@__hipRegisterFunction{{.*}}template_kernel_decl

0 commit comments

Comments
 (0)