Skip to content

Commit 622eaa4

Browse files
committed
[HIP] Support __managed__ attribute
This patch implements codegen for __managed__ variable attribute for HIP. Diagnostics will be added later. Differential Revision: https://reviews.llvm.org/D94814
1 parent 06f8a49 commit 622eaa4

22 files changed

+591
-179
lines changed

clang/include/clang/Basic/Attr.td

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,7 @@ class LangOpt<string name, code customCode = [{}]> {
324324
def MicrosoftExt : LangOpt<"MicrosoftExt">;
325325
def Borland : LangOpt<"Borland">;
326326
def CUDA : LangOpt<"CUDA">;
327+
def HIP : LangOpt<"HIP">;
327328
def SYCL : LangOpt<"SYCLIsDevice">;
328329
def COnly : LangOpt<"", "!LangOpts.CPlusPlus">;
329330
def CPlusPlus : LangOpt<"CPlusPlus">;
@@ -1115,6 +1116,13 @@ def CUDAHost : InheritableAttr {
11151116
let Documentation = [Undocumented];
11161117
}
11171118

1119+
def HIPManaged : InheritableAttr {
1120+
let Spellings = [GNU<"managed">, Declspec<"__managed__">];
1121+
let Subjects = SubjectList<[Var]>;
1122+
let LangOpts = [HIP];
1123+
let Documentation = [HIPManagedAttrDocs];
1124+
}
1125+
11181126
def CUDAInvalidTarget : InheritableAttr {
11191127
let Spellings = [];
11201128
let Subjects = SubjectList<[Function]>;

clang/include/clang/Basic/AttrDocs.td

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5419,6 +5419,17 @@ unbind runtime APIs.
54195419
}];
54205420
}
54215421

5422+
def HIPManagedAttrDocs : Documentation {
5423+
let Category = DocCatDecl;
5424+
let Content = [{
5425+
The ``__managed__`` attribute can be applied to a global variable declaration in HIP.
5426+
A managed variable is emitted as an undefined global symbol in the device binary and is
5427+
registered by ``__hipRegisterManagedVariable`` in init functions. The HIP runtime allocates
5428+
managed memory and uses it to define the symbol when loading the device binary.
5429+
A managed variable can be accessed in both device and host code.
5430+
}];
5431+
}
5432+
54225433
def LifetimeOwnerDocs : Documentation {
54235434
let Category = DocCatDecl;
54245435
let Content = [{

clang/include/clang/Basic/DiagnosticSemaKinds.td

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8237,7 +8237,7 @@ def err_cuda_device_exceptions : Error<
82378237
"%select{__device__|__global__|__host__|__host__ __device__}1 function">;
82388238
def err_dynamic_var_init : Error<
82398239
"dynamic initialization is not supported for "
8240-
"__device__, __constant__, and __shared__ variables.">;
8240+
"__device__, __constant__, __shared__, and __managed__ variables.">;
82418241
def err_shared_var_init : Error<
82428242
"initialization is not supported for __shared__ variables.">;
82438243
def err_cuda_vla : Error<
@@ -8247,7 +8247,8 @@ def err_cuda_extern_shared : Error<"__shared__ variable %0 cannot be 'extern'">;
82478247
def err_cuda_host_shared : Error<
82488248
"__shared__ local variables not allowed in "
82498249
"%select{__device__|__global__|__host__|__host__ __device__}0 functions">;
8250-
def err_cuda_nonstatic_constdev: Error<"__constant__ and __device__ are not allowed on non-static local variables">;
8250+
def err_cuda_nonstatic_constdev: Error<"__constant__, __device__, and "
8251+
"__managed__ are not allowed on non-static local variables">;
82518252
def err_cuda_ovl_target : Error<
82528253
"%select{__device__|__global__|__host__|__host__ __device__}0 function %1 "
82538254
"cannot overload %select{__device__|__global__|__host__|__host__ __device__}2 function %3">;

clang/lib/CodeGen/CGCUDANV.cpp

Lines changed: 81 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "llvm/IR/BasicBlock.h"
2222
#include "llvm/IR/Constants.h"
2323
#include "llvm/IR/DerivedTypes.h"
24+
#include "llvm/IR/ReplaceConstant.h"
2425
#include "llvm/Support/Format.h"
2526

2627
using namespace clang;
@@ -128,21 +129,23 @@ class CGNVCUDARuntime : public CGCUDARuntime {
128129
DeviceVars.push_back({&Var,
129130
VD,
130131
{DeviceVarFlags::Variable, Extern, Constant,
131-
/*Normalized*/ false, /*Type*/ 0}});
132+
VD->hasAttr<HIPManagedAttr>(),
133+
/*Normalized*/ false, 0}});
132134
}
133135
void registerDeviceSurf(const VarDecl *VD, llvm::GlobalVariable &Var,
134136
bool Extern, int Type) override {
135137
DeviceVars.push_back({&Var,
136138
VD,
137139
{DeviceVarFlags::Surface, Extern, /*Constant*/ false,
140+
/*Managed*/ false,
138141
/*Normalized*/ false, Type}});
139142
}
140143
void registerDeviceTex(const VarDecl *VD, llvm::GlobalVariable &Var,
141144
bool Extern, int Type, bool Normalized) override {
142145
DeviceVars.push_back({&Var,
143146
VD,
144147
{DeviceVarFlags::Texture, Extern, /*Constant*/ false,
145-
Normalized, Type}});
148+
/*Managed*/ false, Normalized, Type}});
146149
}
147150

148151
/// Creates module constructor function
@@ -380,6 +383,47 @@ void CGNVCUDARuntime::emitDeviceStubBodyLegacy(CodeGenFunction &CGF,
380383
CGF.EmitBlock(EndBlock);
381384
}
382385

386+
// Replace the original variable Var with the address loaded from variable
387+
// ManagedVar populated by HIP runtime.
388+
static void replaceManagedVar(llvm::GlobalVariable *Var,
389+
llvm::GlobalVariable *ManagedVar) {
390+
SmallVector<SmallVector<llvm::User *, 8>, 8> WorkList;
391+
for (auto &&VarUse : Var->uses()) {
392+
WorkList.push_back({VarUse.getUser()});
393+
}
394+
while (!WorkList.empty()) {
395+
auto &&WorkItem = WorkList.pop_back_val();
396+
auto *U = WorkItem.back();
397+
if (isa<llvm::ConstantExpr>(U)) {
398+
for (auto &&UU : U->uses()) {
399+
WorkItem.push_back(UU.getUser());
400+
WorkList.push_back(WorkItem);
401+
WorkItem.pop_back();
402+
}
403+
continue;
404+
}
405+
if (auto *I = dyn_cast<llvm::Instruction>(U)) {
406+
llvm::Value *OldV = Var;
407+
llvm::Instruction *NewV =
408+
new llvm::LoadInst(Var->getType(), ManagedVar, "ld.managed", false,
409+
llvm::Align(Var->getAlignment()), I);
410+
WorkItem.pop_back();
411+
// Replace constant expressions directly or indirectly using the managed
412+
// variable with instructions.
413+
for (auto &&Op : WorkItem) {
414+
auto *CE = cast<llvm::ConstantExpr>(Op);
415+
auto *NewInst = llvm::createReplacementInstr(CE, I);
416+
NewInst->replaceUsesOfWith(OldV, NewV);
417+
OldV = CE;
418+
NewV = NewInst;
419+
}
420+
I->replaceUsesOfWith(OldV, NewV);
421+
} else {
422+
llvm_unreachable("Invalid use of managed variable");
423+
}
424+
}
425+
}
426+
383427
/// Creates a function that sets up state on the host side for CUDA objects that
384428
/// have a presence on both the host and device sides. Specifically, registers
385429
/// the host side of kernel functions and device global variables with the CUDA
@@ -452,6 +496,13 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
452496
llvm::FunctionCallee RegisterVar = CGM.CreateRuntimeFunction(
453497
llvm::FunctionType::get(VoidTy, RegisterVarParams, false),
454498
addUnderscoredPrefixToName("RegisterVar"));
499+
// void __hipRegisterManagedVar(void **, char *, char *, const char *,
500+
// size_t, unsigned)
501+
llvm::Type *RegisterManagedVarParams[] = {VoidPtrPtrTy, CharPtrTy, CharPtrTy,
502+
CharPtrTy, VarSizeTy, IntTy};
503+
llvm::FunctionCallee RegisterManagedVar = CGM.CreateRuntimeFunction(
504+
llvm::FunctionType::get(VoidTy, RegisterManagedVarParams, false),
505+
addUnderscoredPrefixToName("RegisterManagedVar"));
455506
// void __cudaRegisterSurface(void **, const struct surfaceReference *,
456507
// const void **, const char *, int, int);
457508
llvm::FunctionCallee RegisterSurf = CGM.CreateRuntimeFunction(
@@ -474,16 +525,34 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
474525
case DeviceVarFlags::Variable: {
475526
uint64_t VarSize =
476527
CGM.getDataLayout().getTypeAllocSize(Var->getValueType());
477-
llvm::Value *Args[] = {
478-
&GpuBinaryHandlePtr,
479-
Builder.CreateBitCast(Var, VoidPtrTy),
480-
VarName,
481-
VarName,
482-
llvm::ConstantInt::get(IntTy, Info.Flags.isExtern()),
483-
llvm::ConstantInt::get(VarSizeTy, VarSize),
484-
llvm::ConstantInt::get(IntTy, Info.Flags.isConstant()),
485-
llvm::ConstantInt::get(IntTy, 0)};
486-
Builder.CreateCall(RegisterVar, Args);
528+
if (Info.Flags.isManaged()) {
529+
auto ManagedVar = new llvm::GlobalVariable(
530+
CGM.getModule(), Var->getType(),
531+
/*isConstant=*/false, Var->getLinkage(),
532+
/*Init=*/llvm::ConstantPointerNull::get(Var->getType()),
533+
Twine(Var->getName() + ".managed"), /*InsertBefore=*/nullptr,
534+
llvm::GlobalVariable::NotThreadLocal);
535+
replaceManagedVar(Var, ManagedVar);
536+
llvm::Value *Args[] = {
537+
&GpuBinaryHandlePtr,
538+
Builder.CreateBitCast(ManagedVar, VoidPtrTy),
539+
Builder.CreateBitCast(Var, VoidPtrTy),
540+
VarName,
541+
llvm::ConstantInt::get(VarSizeTy, VarSize),
542+
llvm::ConstantInt::get(IntTy, Var->getAlignment())};
543+
Builder.CreateCall(RegisterManagedVar, Args);
544+
} else {
545+
llvm::Value *Args[] = {
546+
&GpuBinaryHandlePtr,
547+
Builder.CreateBitCast(Var, VoidPtrTy),
548+
VarName,
549+
VarName,
550+
llvm::ConstantInt::get(IntTy, Info.Flags.isExtern()),
551+
llvm::ConstantInt::get(VarSizeTy, VarSize),
552+
llvm::ConstantInt::get(IntTy, Info.Flags.isConstant()),
553+
llvm::ConstantInt::get(IntTy, 0)};
554+
Builder.CreateCall(RegisterVar, Args);
555+
}
487556
break;
488557
}
489558
case DeviceVarFlags::Surface:

clang/lib/CodeGen/CGCUDARuntime.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,16 +54,19 @@ class CGCUDARuntime {
5454
unsigned Kind : 2;
5555
unsigned Extern : 1;
5656
unsigned Constant : 1; // Constant variable.
57+
unsigned Managed : 1; // Managed variable.
5758
unsigned Normalized : 1; // Normalized texture.
5859
int SurfTexType; // Type of surface/texutre.
5960

6061
public:
61-
DeviceVarFlags(DeviceVarKind K, bool E, bool C, bool N, int T)
62-
: Kind(K), Extern(E), Constant(C), Normalized(N), SurfTexType(T) {}
62+
DeviceVarFlags(DeviceVarKind K, bool E, bool C, bool M, bool N, int T)
63+
: Kind(K), Extern(E), Constant(C), Managed(M), Normalized(N),
64+
SurfTexType(T) {}
6365

6466
DeviceVarKind getKind() const { return static_cast<DeviceVarKind>(Kind); }
6567
bool isExtern() const { return Extern; }
6668
bool isConstant() const { return Constant; }
69+
bool isManaged() const { return Managed; }
6770
bool isNormalized() const { return Normalized; }
6871
int getSurfTexType() const { return SurfTexType; }
6972
};

clang/lib/CodeGen/CodeGenModule.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4152,13 +4152,14 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D,
41524152
// Shadows of initialized device-side global variables are also left
41534153
// undefined.
41544154
bool IsCUDAShadowVar =
4155-
!getLangOpts().CUDAIsDevice &&
4155+
!getLangOpts().CUDAIsDevice && !D->hasAttr<HIPManagedAttr>() &&
41564156
(D->hasAttr<CUDAConstantAttr>() || D->hasAttr<CUDADeviceAttr>() ||
41574157
D->hasAttr<CUDASharedAttr>());
41584158
bool IsCUDADeviceShadowVar =
41594159
getLangOpts().CUDAIsDevice &&
41604160
(D->getType()->isCUDADeviceBuiltinSurfaceType() ||
4161-
D->getType()->isCUDADeviceBuiltinTextureType());
4161+
D->getType()->isCUDADeviceBuiltinTextureType() ||
4162+
D->hasAttr<HIPManagedAttr>());
41624163
// HIP pinned shadow of initialized host-side global variables are also
41634164
// left undefined.
41644165
if (getLangOpts().CUDA &&

clang/lib/Sema/SemaDeclAttr.cpp

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4493,7 +4493,8 @@ static void handleOptimizeNoneAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
44934493
}
44944494

44954495
static void handleConstantAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
4496-
if (checkAttrMutualExclusion<CUDASharedAttr>(S, D, AL))
4496+
if (checkAttrMutualExclusion<CUDASharedAttr>(S, D, AL) ||
4497+
checkAttrMutualExclusion<HIPManagedAttr>(S, D, AL))
44974498
return;
44984499
const auto *VD = cast<VarDecl>(D);
44994500
if (VD->hasLocalStorage()) {
@@ -4504,7 +4505,8 @@ static void handleConstantAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
45044505
}
45054506

45064507
static void handleSharedAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
4507-
if (checkAttrMutualExclusion<CUDAConstantAttr>(S, D, AL))
4508+
if (checkAttrMutualExclusion<CUDAConstantAttr>(S, D, AL) ||
4509+
checkAttrMutualExclusion<HIPManagedAttr>(S, D, AL))
45084510
return;
45094511
const auto *VD = cast<VarDecl>(D);
45104512
// extern __shared__ is only allowed on arrays with no length (e.g.
@@ -4569,9 +4571,33 @@ static void handleDeviceAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
45694571
return;
45704572
}
45714573
}
4574+
4575+
if (auto *A = D->getAttr<CUDADeviceAttr>()) {
4576+
if (!A->isImplicit())
4577+
return;
4578+
D->dropAttr<CUDADeviceAttr>();
4579+
}
45724580
D->addAttr(::new (S.Context) CUDADeviceAttr(S.Context, AL));
45734581
}
45744582

4583+
static void handleManagedAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
4584+
if (checkAttrMutualExclusion<CUDAConstantAttr>(S, D, AL) ||
4585+
checkAttrMutualExclusion<CUDASharedAttr>(S, D, AL)) {
4586+
return;
4587+
}
4588+
4589+
if (const auto *VD = dyn_cast<VarDecl>(D)) {
4590+
if (VD->hasLocalStorage()) {
4591+
S.Diag(AL.getLoc(), diag::err_cuda_nonstatic_constdev);
4592+
return;
4593+
}
4594+
}
4595+
if (!D->hasAttr<HIPManagedAttr>())
4596+
D->addAttr(::new (S.Context) HIPManagedAttr(S.Context, AL));
4597+
if (!D->hasAttr<CUDADeviceAttr>())
4598+
D->addAttr(CUDADeviceAttr::CreateImplicit(S.Context));
4599+
}
4600+
45754601
static void handleGNUInlineAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
45764602
const auto *Fn = cast<FunctionDecl>(D);
45774603
if (!Fn->isInlineSpecified()) {
@@ -7793,6 +7819,9 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
77937819
case ParsedAttr::AT_CUDAHost:
77947820
handleSimpleAttributeWithExclusions<CUDAHostAttr, CUDAGlobalAttr>(S, D, AL);
77957821
break;
7822+
case ParsedAttr::AT_HIPManaged:
7823+
handleManagedAttr(S, D, AL);
7824+
break;
77967825
case ParsedAttr::AT_CUDADeviceBuiltinSurfaceType:
77977826
handleSimpleAttributeWithExclusions<CUDADeviceBuiltinSurfaceTypeAttr,
77987827
CUDADeviceBuiltinTextureTypeAttr>(S, D,

clang/test/AST/Inputs/cuda.h

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
/* Minimal declarations for CUDA support. Testing purposes only. */
2+
3+
#include <stddef.h>
4+
5+
// Make this file work with nvcc, for testing compatibility.
6+
7+
#ifndef __NVCC__
8+
#define __constant__ __attribute__((constant))
9+
#define __device__ __attribute__((device))
10+
#define __global__ __attribute__((global))
11+
#define __host__ __attribute__((host))
12+
#define __shared__ __attribute__((shared))
13+
#define __managed__ __attribute__((managed))
14+
#define __launch_bounds__(...) __attribute__((launch_bounds(__VA_ARGS__)))
15+
16+
struct dim3 {
17+
unsigned x, y, z;
18+
__host__ __device__ dim3(unsigned x, unsigned y = 1, unsigned z = 1) : x(x), y(y), z(z) {}
19+
};
20+
21+
#ifdef __HIP__
22+
typedef struct hipStream *hipStream_t;
23+
typedef enum hipError {} hipError_t;
24+
int hipConfigureCall(dim3 gridSize, dim3 blockSize, size_t sharedSize = 0,
25+
hipStream_t stream = 0);
26+
extern "C" hipError_t __hipPushCallConfiguration(dim3 gridSize, dim3 blockSize,
27+
size_t sharedSize = 0,
28+
hipStream_t stream = 0);
29+
extern "C" hipError_t hipLaunchKernel(const void *func, dim3 gridDim,
30+
dim3 blockDim, void **args,
31+
size_t sharedMem,
32+
hipStream_t stream);
33+
#else
34+
typedef struct cudaStream *cudaStream_t;
35+
typedef enum cudaError {} cudaError_t;
36+
37+
extern "C" int cudaConfigureCall(dim3 gridSize, dim3 blockSize,
38+
size_t sharedSize = 0,
39+
cudaStream_t stream = 0);
40+
extern "C" int __cudaPushCallConfiguration(dim3 gridSize, dim3 blockSize,
41+
size_t sharedSize = 0,
42+
cudaStream_t stream = 0);
43+
extern "C" cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim,
44+
dim3 blockDim, void **args,
45+
size_t sharedMem, cudaStream_t stream);
46+
#endif
47+
48+
// Host- and device-side placement new overloads.
49+
void *operator new(__SIZE_TYPE__, void *p) { return p; }
50+
void *operator new[](__SIZE_TYPE__, void *p) { return p; }
51+
__device__ void *operator new(__SIZE_TYPE__, void *p) { return p; }
52+
__device__ void *operator new[](__SIZE_TYPE__, void *p) { return p; }
53+
54+
#endif // !__NVCC__
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
// RUN: %clang_cc1 -ast-dump -x hip %s | FileCheck %s
2+
// RUN: %clang_cc1 -ast-dump -fcuda-is-device -x hip %s | FileCheck %s
3+
4+
#include "Inputs/cuda.h"
5+
6+
// CHECK-LABEL: VarDecl {{.*}} m1 'int'
7+
// CHECK-NEXT: HIPManagedAttr
8+
// CHECK-NEXT: CUDADeviceAttr {{.*}}Implicit
9+
__managed__ int m1;
10+
11+
// CHECK-LABEL: VarDecl {{.*}} m2 'int'
12+
// CHECK-NEXT: HIPManagedAttr
13+
// CHECK-NEXT: CUDADeviceAttr {{.*}}Implicit
14+
// CHECK-NOT: HIPManagedAttr
15+
// CHECK-NOT: CUDADeviceAttr
16+
__managed__ __managed__ int m2;
17+
18+
// CHECK-LABEL: VarDecl {{.*}} m3 'int'
19+
// CHECK-NEXT: HIPManagedAttr
20+
// CHECK-NEXT: CUDADeviceAttr {{.*}}line
21+
// CHECK-NOT: CUDADeviceAttr {{.*}}Implicit
22+
__managed__ __device__ int m3;
23+
24+
// CHECK-LABEL: VarDecl {{.*}} m3a 'int'
25+
// CHECK-NEXT: CUDADeviceAttr {{.*}}cuda.h
26+
// CHECK-NEXT: HIPManagedAttr
27+
// CHECK-NOT: CUDADeviceAttr {{.*}}Implicit
28+
__device__ __managed__ int m3a;

clang/test/CodeGenCUDA/Inputs/cuda.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@
77
#define __global__ __attribute__((global))
88
#define __host__ __attribute__((host))
99
#define __shared__ __attribute__((shared))
10+
#if __HIP__
11+
#define __managed__ __attribute__((managed))
12+
#endif
1013
#define __launch_bounds__(...) __attribute__((launch_bounds(__VA_ARGS__)))
1114

1215
struct dim3 {

0 commit comments

Comments
 (0)