-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[CUDA][HIP] Fix template static member #98544
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[CUDA][HIP] Fix template static member #98544
Conversation
Should check host/device attributes before emitting static member of template instantiation. Fixes: llvm#98151
@llvm/pr-subscribers-clang-codegen @llvm/pr-subscribers-clang Author: Yaxun (Sam) Liu (yxsamliu) ChangesShould check host/device attributes before emitting static member Fixes: #98151 Full diff: https://github.com/llvm/llvm-project/pull/98544.diff 3 Files Affected:
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 5c810cd332185..599e20634bf72 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -3702,6 +3702,19 @@ template <typename AttrT> static bool hasImplicitAttr(const ValueDecl *D) {
return D->isImplicit();
}
+bool CodeGenModule::shouldEmitCUDAGlobalVar(const VarDecl *Global) const {
+ assert(LangOpts.CUDA && "Should not be called by non-CUDA languages");
+ // We need to emit host-side 'shadows' for all global
+ // device-side variables because the CUDA runtime needs their
+ // size and host-side address in order to provide access to
+ // their device-side incarnations.
+ return !LangOpts.CUDAIsDevice || Global->hasAttr<CUDADeviceAttr>() ||
+ Global->hasAttr<CUDAConstantAttr>() ||
+ Global->hasAttr<CUDASharedAttr>() ||
+ Global->getType()->isCUDADeviceBuiltinSurfaceType() ||
+ Global->getType()->isCUDADeviceBuiltinTextureType();
+}
+
void CodeGenModule::EmitGlobal(GlobalDecl GD) {
const auto *Global = cast<ValueDecl>(GD.getDecl());
@@ -3726,36 +3739,27 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
// Non-constexpr non-lambda implicit host device functions are not emitted
// unless they are used on device side.
if (LangOpts.CUDA) {
- if (LangOpts.CUDAIsDevice) {
+ assert((isa<FunctionDecl>(Global) || isa<VarDecl>(Global)) &&
+ "Expected Variable or Function");
+ if (const auto *VD = dyn_cast<VarDecl>(Global)) {
+ if (!shouldEmitCUDAGlobalVar(VD))
+ return;
+ } else if (LangOpts.CUDAIsDevice) {
const auto *FD = dyn_cast<FunctionDecl>(Global);
if ((!Global->hasAttr<CUDADeviceAttr>() ||
- (LangOpts.OffloadImplicitHostDeviceTemplates && FD &&
+ (LangOpts.OffloadImplicitHostDeviceTemplates &&
hasImplicitAttr<CUDAHostAttr>(FD) &&
hasImplicitAttr<CUDADeviceAttr>(FD) && !FD->isConstexpr() &&
!isLambdaCallOperator(FD) &&
!getContext().CUDAImplicitHostDeviceFunUsedByDevice.count(FD))) &&
!Global->hasAttr<CUDAGlobalAttr>() &&
- !Global->hasAttr<CUDAConstantAttr>() &&
- !Global->hasAttr<CUDASharedAttr>() &&
- !Global->getType()->isCUDADeviceBuiltinSurfaceType() &&
- !Global->getType()->isCUDADeviceBuiltinTextureType() &&
!(LangOpts.HIPStdPar && isa<FunctionDecl>(Global) &&
!Global->hasAttr<CUDAHostAttr>()))
return;
- } else {
- // We need to emit host-side 'shadows' for all global
- // device-side variables because the CUDA runtime needs their
- // size and host-side address in order to provide access to
- // their device-side incarnations.
-
- // So device-only functions are the only things we skip.
- if (isa<FunctionDecl>(Global) && !Global->hasAttr<CUDAHostAttr>() &&
- Global->hasAttr<CUDADeviceAttr>())
- return;
-
- assert((isa<FunctionDecl>(Global) || isa<VarDecl>(Global)) &&
- "Expected Variable or Function");
- }
+ // Device-only functions are the only things we skip.
+ } else if (!Global->hasAttr<CUDAHostAttr>() &&
+ Global->hasAttr<CUDADeviceAttr>())
+ return;
}
if (LangOpts.OpenMP) {
@@ -5931,7 +5935,8 @@ static void ReplaceUsesOfNonProtoTypeWithRealFunction(llvm::GlobalValue *Old,
void CodeGenModule::HandleCXXStaticMemberVarInstantiation(VarDecl *VD) {
auto DK = VD->isThisDeclarationADefinition();
- if (DK == VarDecl::Definition && VD->hasAttr<DLLImportAttr>())
+ if ((DK == VarDecl::Definition && VD->hasAttr<DLLImportAttr>()) ||
+ (LangOpts.CUDA && !shouldEmitCUDAGlobalVar(VD)))
return;
TemplateSpecializationKind TSK = VD->getTemplateSpecializationKind();
diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h
index 8b65348b879b6..392e2bf0cdfea 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -563,6 +563,9 @@ class CodeGenModule : public CodeGenTypeCache {
bool isTriviallyRecursive(const FunctionDecl *F);
bool shouldEmitFunction(GlobalDecl GD);
+ // Whether a global variable should be emitted by CUDA/HIP host/device
+ // related attributes.
+ bool shouldEmitCUDAGlobalVar(const VarDecl *VD) const;
bool shouldOpportunisticallyEmitVTables();
/// Map used to be sure we don't emit the same CompoundLiteral twice.
llvm::DenseMap<const CompoundLiteralExpr *, llvm::GlobalVariable *>
diff --git a/clang/test/CodeGenCUDA/template-class-static-member.cu b/clang/test/CodeGenCUDA/template-class-static-member.cu
new file mode 100644
index 0000000000000..d790d2dea66ba
--- /dev/null
+++ b/clang/test/CodeGenCUDA/template-class-static-member.cu
@@ -0,0 +1,50 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \
+// RUN: -emit-llvm -o - -x hip %s | FileCheck -check-prefix=DEV %s
+
+// RUN: %clang_cc1 -triple x86_64-gnu-linux -std=c++11 \
+// RUN: -emit-llvm -o - -x hip %s | FileCheck -check-prefix=HOST %s
+
+// Negative tests.
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \
+// RUN: -emit-llvm -o - -x hip %s | FileCheck -check-prefix=DEV-NEG %s
+
+#include "Inputs/cuda.h"
+
+template <class T>
+class A {
+ static int h_member;
+ __device__ static int d_member;
+ __constant__ static int c_member;
+ __managed__ static int m_member;
+ const static int const_member = 0;
+};
+
+template <class T>
+int A<T>::h_member;
+
+template <class T>
+__device__ int A<T>::d_member;
+
+template <class T>
+__constant__ int A<T>::c_member;
+
+template <class T>
+__managed__ int A<T>::m_member;
+
+template <class T>
+const int A<T>::const_member;
+
+template class A<int>;
+
+//DEV-DAG: @_ZN1AIiE8d_memberE = internal addrspace(1) global i32 0, comdat, align 4
+//DEV-DAG: @_ZN1AIiE8c_memberE = internal addrspace(4) global i32 0, comdat, align 4
+//DEV-DAG: @_ZN1AIiE8m_memberE = internal addrspace(1) externally_initialized global ptr addrspace(1) null
+//DEV-DAG: @_ZN1AIiE12const_memberE = internal addrspace(4) constant i32 0, comdat, align 4
+//DEV-NEG-NOT: @_ZN1AIiE8h_memberE
+
+//HOST-DAG: @_ZN1AIiE8h_memberE = weak_odr global i32 0, comdat, align 4
+//HOST-DAG: @_ZN1AIiE8d_memberE = internal global i32 undef, comdat, align 4
+//HOST-DAG: @_ZN1AIiE8c_memberE = internal global i32 undef, comdat, align 4
+//HOST-DAG: @_ZN1AIiE8m_memberE = internal externally_initialized global ptr null
+//HOST-DAG: @_ZN1AIiE12const_memberE = weak_odr constant i32 0, comdat, align 4
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks like this patch includes #98543. You may want to exclude it from the pull request.
done |
3442d12
into
llvm:users/yxsamliu/should-emit-var
I thought this PR will merge to main branch, but it only merges to my own branch. I have to open another PR to merge it to main branch. |
Should check host/device attributes before emitting static member
of template instantiation.
Fixes: #98151