Skip to content

Commit be5075a

Browse files
authored
[CUDA] make kernel stub ICF-proof (#90155)
MSVC linker merges functions having comdat which have identical set of instructions. CUDA uses kernel stub function as key to look up kernels in device executables. If kernel stub function for different kernels are merged by ICF, incorrect kernels will be launched. To prevent ICF from merging kernel stub functions, an unique global variable is created for each kernel stub function having comdat and a store is added to the kernel stub function. This makes the set of instructions in each kernel function unique. Fixes: #88883
1 parent f07a2ed commit be5075a

File tree

2 files changed

+88
-40
lines changed

2 files changed

+88
-40
lines changed

clang/lib/CodeGen/CGCUDANV.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,33 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
424424
CGM.CreateRuntimeFunction(FTy, LaunchKernelName);
425425
CGF.EmitCall(FI, CGCallee::forDirect(cudaLaunchKernelFn), ReturnValueSlot(),
426426
LaunchKernelArgs);
427+
428+
// To prevent CUDA device stub functions from being merged by ICF in MSVC
429+
// environment, create an unique global variable for each kernel and write to
430+
// the variable in the device stub.
431+
if (CGM.getContext().getTargetInfo().getCXXABI().isMicrosoft() &&
432+
!CGF.getLangOpts().HIP) {
433+
llvm::Function *KernelFunction = llvm::cast<llvm::Function>(Kernel);
434+
std::string GlobalVarName = (KernelFunction->getName() + ".id").str();
435+
436+
llvm::GlobalVariable *HandleVar =
437+
CGM.getModule().getNamedGlobal(GlobalVarName);
438+
if (!HandleVar) {
439+
HandleVar = new llvm::GlobalVariable(
440+
CGM.getModule(), CGM.Int8Ty,
441+
/*Constant=*/false, KernelFunction->getLinkage(),
442+
llvm::ConstantInt::get(CGM.Int8Ty, 0), GlobalVarName);
443+
HandleVar->setDSOLocal(KernelFunction->isDSOLocal());
444+
HandleVar->setVisibility(KernelFunction->getVisibility());
445+
if (KernelFunction->hasComdat())
446+
HandleVar->setComdat(CGM.getModule().getOrInsertComdat(GlobalVarName));
447+
}
448+
449+
CGF.Builder.CreateAlignedStore(llvm::ConstantInt::get(CGM.Int8Ty, 1),
450+
HandleVar, CharUnits::One(),
451+
/*IsVolatile=*/true);
452+
}
453+
427454
CGF.EmitBranch(EndBlock);
428455

429456
CGF.EmitBlock(EndBlock);

clang/test/CodeGenCUDA/kernel-stub-name.cu

Lines changed: 61 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
44
// RUN: -fcuda-include-gpubinary %t -o - -x hip\
5-
// RUN: | FileCheck -check-prefixes=CHECK,GNU %s
5+
// RUN: | FileCheck -check-prefixes=CHECK,GNU,GNU-HIP,HIP %s
66

77
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
88
// RUN: -fcuda-include-gpubinary %t -o - -x hip\
@@ -11,7 +11,12 @@
1111
// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -emit-llvm %s \
1212
// RUN: -aux-triple amdgcn-amd-amdhsa -fcuda-include-gpubinary \
1313
// RUN: %t -o - -x hip\
14-
// RUN: | FileCheck -check-prefixes=CHECK,MSVC %s
14+
// RUN: | FileCheck -check-prefixes=CHECK,MSVC,MSVC-HIP,HIP %s
15+
16+
// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -emit-llvm %s \
17+
// RUN: -aux-triple nvptx64 -fcuda-include-gpubinary \
18+
// RUN: %t -target-sdk-version=9.2 -o - \
19+
// RUN: | FileCheck -check-prefixes=CHECK,MSVC,CUDA %s
1520

1621
// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -emit-llvm %s \
1722
// RUN: -aux-triple amdgcn-amd-amdhsa -fcuda-include-gpubinary \
@@ -22,19 +27,23 @@
2227

2328
// Check kernel handles are emitted for non-MSVC target but not for MSVC target.
2429

25-
// GNU: @[[HCKERN:ckernel]] = constant ptr @[[CSTUB:__device_stub__ckernel]], align 8
26-
// GNU: @[[HNSKERN:_ZN2ns8nskernelEv]] = constant ptr @[[NSSTUB:_ZN2ns23__device_stub__nskernelEv]], align 8
27-
// GNU: @[[HTKERN:_Z10kernelfuncIiEvv]] = linkonce_odr constant ptr @[[TSTUB:_Z25__device_stub__kernelfuncIiEvv]], comdat, align 8
28-
// GNU: @[[HDKERN:_Z11kernel_declv]] = external constant ptr, align 8
29-
// GNU: @[[HTDKERN:_Z20template_kernel_declIiEvT_]] = external constant ptr, align 8
30-
31-
// MSVC: @[[HCKERN:ckernel]] = dso_local constant ptr @[[CSTUB:__device_stub__ckernel]], align 8
32-
// MSVC: @[[HNSKERN:"\?nskernel@ns@@YAXXZ.*"]] = dso_local constant ptr @[[NSSTUB:"\?__device_stub__nskernel@ns@@YAXXZ"]], align 8
33-
// MSVC: @[[HTKERN:"\?\?\$kernelfunc@H@@YAXXZ.*"]] = linkonce_odr dso_local constant ptr @[[TSTUB:"\?\?\$__device_stub__kernelfunc@H@@YAXXZ.*"]], comdat, align 8
34-
// MSVC: @[[HDKERN:"\?kernel_decl@@YAXXZ.*"]] = external dso_local constant ptr, align 8
35-
// MSVC: @[[HTDKERN:"\?\?\$template_kernel_decl@H@@YAXH.*"]] = external dso_local constant ptr, align 8
30+
// GNU-HIP: @[[HCKERN:ckernel]] = constant ptr @[[CSTUB:__device_stub__ckernel]], align 8
31+
// GNU-HIP: @[[HNSKERN:_ZN2ns8nskernelEv]] = constant ptr @[[NSSTUB:_ZN2ns23__device_stub__nskernelEv]], align 8
32+
// GNU-HIP: @[[HTKERN:_Z10kernelfuncIiEvv]] = linkonce_odr constant ptr @[[TSTUB:_Z25__device_stub__kernelfuncIiEvv]], comdat, align 8
33+
// GNU-HIP: @[[HDKERN:_Z11kernel_declv]] = external constant ptr, align 8
34+
// GNU-HIP: @[[HTDKERN:_Z20template_kernel_declIiEvT_]] = external constant ptr, align 8
35+
36+
// MSVC-HIP: @[[HCKERN:ckernel]] = dso_local constant ptr @[[CSTUB:__device_stub__ckernel]], align 8
37+
// MSVC-HIP: @[[HNSKERN:"\?nskernel@ns@@YAXXZ.*"]] = dso_local constant ptr @[[NSSTUB:"\?__device_stub__nskernel@ns@@YAXXZ"]], align 8
38+
// MSVC-HIP: @[[HTKERN:"\?\?\$kernelfunc@H@@YAXXZ.*"]] = linkonce_odr dso_local constant ptr @[[TSTUB:"\?\?\$__device_stub__kernelfunc@H@@YAXXZ.*"]], comdat, align 8
39+
// MSVC-HIP: @[[HDKERN:"\?kernel_decl@@YAXXZ.*"]] = external dso_local constant ptr, align 8
40+
// MSVC-HIP: @[[HTDKERN:"\?\?\$template_kernel_decl@H@@YAXH.*"]] = external dso_local constant ptr, align 8
3641
extern "C" __global__ void ckernel() {}
3742

43+
// CUDA: @[[HCKERN:__device_stub__ckernel\.id]] = dso_local global i8 0
44+
// CUDA: @[[HNSKERN:"\?__device_stub__nskernel@ns@@YAXXZ\.id"]] = dso_local global i8 0
45+
// CUDA: @[[HTKERN:"\?\?\$__device_stub__kernelfunc@H@@YAXXZ\.id"]] = linkonce_odr dso_local global i8 0, comdat
46+
3847
namespace ns {
3948
__global__ void nskernel() {}
4049
} // namespace ns
@@ -60,18 +69,27 @@ extern "C" void launch(void *kern);
6069

6170
// Non-template kernel stub functions
6271

63-
// CHECK: define{{.*}}@[[CSTUB]]
64-
// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[HCKERN]]
72+
// HIP: define{{.*}}@[[CSTUB]]
73+
// CUDA: define{{.*}}@[[CSTUB:__device_stub__ckernel]]
74+
// HIP: call{{.*}}@hipLaunchByPtr{{.*}}@[[HCKERN]]
75+
// CUDA: call{{.*}}@cudaLaunch{{.*}}@[[CSTUB]]
76+
// CUDA: store volatile i8 1, ptr @[[HCKERN]], align 1
77+
// CHECK: ret void
6578

66-
// CHECK: define{{.*}}@[[NSSTUB]]
67-
// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[HNSKERN]]
79+
// HIP: define{{.*}}@[[NSSTUB]]
80+
// CUDA: define{{.*}}@[[NSSTUB:"\?__device_stub__nskernel@ns@@YAXXZ"]]
81+
// HIP: call{{.*}}@hipLaunchByPtr{{.*}}@[[HNSKERN]]
82+
// CUDA: call{{.*}}@cudaLaunch{{.*}}@[[NSSTUB]]
83+
// CUDA: store volatile i8 1, ptr @[[HNSKERN]], align 1
84+
// CHECK: ret void
6885

6986
// Check kernel stub is called for triple chevron.
7087

7188
// CHECK-LABEL: define{{.*}}@fun1()
7289
// CHECK: call void @[[CSTUB]]()
7390
// CHECK: call void @[[NSSTUB]]()
74-
// CHECK: call void @[[TSTUB]]()
91+
// HIP: call void @[[TSTUB]]()
92+
// CUDA: call void @[[TSTUB:"\?\?\$__device_stub__kernelfunc@H@@YAXXZ.*"]]()
7593
// GNU: call void @[[DSTUB:_Z26__device_stub__kernel_declv]]()
7694
// GNU: call void @[[TDSTUB:_Z35__device_stub__template_kernel_declIiEvT_]](
7795
// MSVC: call void @[[DSTUB:"\?__device_stub__kernel_decl@@YAXXZ"]]()
@@ -88,7 +106,10 @@ extern "C" void fun1(void) {
88106
// Template kernel stub functions
89107

90108
// CHECK: define{{.*}}@[[TSTUB]]
91-
// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[HTKERN]]
109+
// HIP: call{{.*}}@hipLaunchByPtr{{.*}}@[[HTKERN]]
110+
// CUDA: call{{.*}}@cudaLaunch{{.*}}@[[TSTUB]]
111+
// CUDA: store volatile i8 1, ptr @[[HTKERN]], align 1
112+
// CHECK: ret void
92113

93114
// Check declaration of stub function for external kernel.
94115

@@ -98,11 +119,11 @@ extern "C" void fun1(void) {
98119
// Check kernel handle is used for passing the kernel as a function pointer.
99120

100121
// CHECK-LABEL: define{{.*}}@fun2()
101-
// CHECK: call void @launch({{.*}}[[HCKERN]]
102-
// CHECK: call void @launch({{.*}}[[HNSKERN]]
103-
// CHECK: call void @launch({{.*}}[[HTKERN]]
104-
// CHECK: call void @launch({{.*}}[[HDKERN]]
105-
// CHECK: call void @launch({{.*}}[[HTDKERN]]
122+
// HIP: call void @launch({{.*}}[[HCKERN]]
123+
// HIP: call void @launch({{.*}}[[HNSKERN]]
124+
// HIP: call void @launch({{.*}}[[HTKERN]]
125+
// HIP: call void @launch({{.*}}[[HDKERN]]
126+
// HIP: call void @launch({{.*}}[[HTDKERN]]
106127
extern "C" void fun2() {
107128
launch((void *)ckernel);
108129
launch((void *)ns::nskernel);
@@ -114,10 +135,10 @@ extern "C" void fun2() {
114135
// Check kernel handle is used for assigning a kernel to a function pointer.
115136

116137
// CHECK-LABEL: define{{.*}}@fun3()
117-
// CHECK: store ptr @[[HCKERN]], ptr @kernel_ptr, align 8
118-
// CHECK: store ptr @[[HCKERN]], ptr @kernel_ptr, align 8
119-
// CHECK: store ptr @[[HCKERN]], ptr @void_ptr, align 8
120-
// CHECK: store ptr @[[HCKERN]], ptr @void_ptr, align 8
138+
// HIP: store ptr @[[HCKERN]], ptr @kernel_ptr, align 8
139+
// HIP: store ptr @[[HCKERN]], ptr @kernel_ptr, align 8
140+
// HIP: store ptr @[[HCKERN]], ptr @void_ptr, align 8
141+
// HIP: store ptr @[[HCKERN]], ptr @void_ptr, align 8
121142
extern "C" void fun3() {
122143
kernel_ptr = ckernel;
123144
kernel_ptr = &ckernel;
@@ -129,11 +150,11 @@ extern "C" void fun3() {
129150
// used with triple chevron.
130151

131152
// CHECK-LABEL: define{{.*}}@fun4()
132-
// CHECK: store ptr @[[HCKERN]], ptr @kernel_ptr
133-
// CHECK: call noundef i32 @{{.*hipConfigureCall}}
134-
// CHECK: %[[HANDLE:.*]] = load ptr, ptr @kernel_ptr, align 8
135-
// CHECK: %[[STUB:.*]] = load ptr, ptr %[[HANDLE]], align 8
136-
// CHECK: call void %[[STUB]]()
153+
// HIP: store ptr @[[HCKERN]], ptr @kernel_ptr
154+
// HIP: call noundef i32 @{{.*hipConfigureCall}}
155+
// HIP: %[[HANDLE:.*]] = load ptr, ptr @kernel_ptr, align 8
156+
// HIP: %[[STUB:.*]] = load ptr, ptr %[[HANDLE]], align 8
157+
// HIP: call void %[[STUB]]()
137158
extern "C" void fun4() {
138159
kernel_ptr = ckernel;
139160
kernel_ptr<<<1,1>>>();
@@ -142,20 +163,20 @@ extern "C" void fun4() {
142163
// Check kernel handle is passed to a function.
143164

144165
// CHECK-LABEL: define{{.*}}@fun5()
145-
// CHECK: store ptr @[[HCKERN]], ptr @kernel_ptr
146-
// CHECK: %[[HANDLE:.*]] = load ptr, ptr @kernel_ptr, align 8
147-
// CHECK: call void @launch(ptr noundef %[[HANDLE]])
166+
// HIP: store ptr @[[HCKERN]], ptr @kernel_ptr
167+
// HIP: %[[HANDLE:.*]] = load ptr, ptr @kernel_ptr, align 8
168+
// HIP: call void @launch(ptr noundef %[[HANDLE]])
148169
extern "C" void fun5() {
149170
kernel_ptr = ckernel;
150171
launch((void *)kernel_ptr);
151172
}
152173

153174
// Check kernel handle is registered.
154175

155-
// CHECK-LABEL: define{{.*}}@__hip_register_globals
156-
// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[HCKERN]]{{.*}}@[[CKERN]]
157-
// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[HNSKERN]]{{.*}}@[[NSKERN]]
158-
// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[HTKERN]]{{.*}}@[[TKERN]]
176+
// HIP-LABEL: define{{.*}}@__hip_register_globals
177+
// HIP: call{{.*}}@__hipRegisterFunction{{.*}}@[[HCKERN]]{{.*}}@[[CKERN]]
178+
// HIP: call{{.*}}@__hipRegisterFunction{{.*}}@[[HNSKERN]]{{.*}}@[[NSKERN]]
179+
// HIP: call{{.*}}@__hipRegisterFunction{{.*}}@[[HTKERN]]{{.*}}@[[TKERN]]
159180
// NEG-NOT: call{{.*}}@__hipRegisterFunction{{.*}}__device_stub
160181
// NEG-NOT: call{{.*}}@__hipRegisterFunction{{.*}}kernel_decl
161182
// NEG-NOT: call{{.*}}@__hipRegisterFunction{{.*}}template_kernel_decl

0 commit comments

Comments
 (0)