Skip to content

Commit b46f980

Browse files
authored
[HIP] fix host-used external kernel (#83870)
In -fgpu-rdc mode, when an external kernel is used by a host function with weak_odr linkage (e.g. explicitly instantiated template function), the kernel should not be marked as host-used external kernel, since the host function may be dropped by the linker. Mark the external kernel as host-used external kernel will force a reference to the external kernel, which the user may not define in other TU. Fixes: #83771
1 parent 29afd64 commit b46f980

File tree

3 files changed

+21
-2
lines changed

3 files changed

+21
-2
lines changed

clang/lib/Sema/SemaCUDA.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -895,7 +895,10 @@ bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) {
895895
if (DiagKind == SemaDiagnosticBuilder::K_Nop) {
896896
// For -fgpu-rdc, keep track of external kernels used by host functions.
897897
if (LangOpts.CUDAIsDevice && LangOpts.GPURelocatableDeviceCode &&
898-
Callee->hasAttr<CUDAGlobalAttr>() && !Callee->isDefined())
898+
Callee->hasAttr<CUDAGlobalAttr>() && !Callee->isDefined() &&
899+
(!Caller || (!Caller->getDescribedFunctionTemplate() &&
900+
getASTContext().GetGVALinkageForFunction(Caller) ==
901+
GVA_StrongExternal)))
899902
getASTContext().CUDAExternalDeviceDeclODRUsedByHost.insert(Callee);
900903
return true;
901904
}

clang/lib/Sema/SemaExpr.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19218,7 +19218,10 @@ MarkVarDeclODRUsed(ValueDecl *V, SourceLocation Loc, Sema &SemaRef,
1921819218
// externalize the static device side variable ODR-used by host code.
1921919219
if (!Var->hasExternalStorage())
1922019220
SemaRef.getASTContext().CUDADeviceVarODRUsedByHost.insert(Var);
19221-
else if (SemaRef.LangOpts.GPURelocatableDeviceCode)
19221+
else if (SemaRef.LangOpts.GPURelocatableDeviceCode &&
19222+
(!FD || (!FD->getDescribedFunctionTemplate() &&
19223+
SemaRef.getASTContext().GetGVALinkageForFunction(FD) ==
19224+
GVA_StrongExternal)))
1922219225
SemaRef.getASTContext().CUDAExternalDeviceDeclODRUsedByHost.insert(Var);
1922319226
}
1922419227
}

clang/test/CodeGenCUDA/host-used-extern.cu

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
// NEG-NOT: @__clang_gpu_used_external = {{.*}} @_Z7kernel2v
2626
// NEG-NOT: @__clang_gpu_used_external = {{.*}} @_Z7kernel3v
27+
// NEG-NOT: @__clang_gpu_used_external = {{.*}} @_Z7kernel5v
2728
// NEG-NOT: @__clang_gpu_used_external = {{.*}} @var2
2829
// NEG-NOT: @__clang_gpu_used_external = {{.*}} @var3
2930
// NEG-NOT: @__clang_gpu_used_external = {{.*}} @ext_shvar
@@ -44,6 +45,10 @@ __global__ void kernel3();
4445
// kernel4 is marked as used even though it is not called.
4546
__global__ void kernel4();
4647

48+
// kernel5 is not marked as used since it is called by host function
49+
// with weak_odr linkage, which may be dropped by linker.
50+
__global__ void kernel5();
51+
4752
extern __device__ int var1;
4853

4954
__device__ int var2;
@@ -67,3 +72,11 @@ __global__ void test_lambda_using_extern_shared() {
6772
};
6873
lambda();
6974
}
75+
76+
template<class T>
77+
void template_caller() {
78+
kernel5<<<1, 1>>>();
79+
var1 = 1;
80+
}
81+
82+
template void template_caller<int>();

0 commit comments

Comments
 (0)