Skip to content

[clang][CGCUDANV] Unify PointerType members of CGNVCUDARuntime (NFC) #75668

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 16, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 38 additions & 50 deletions clang/lib/CodeGen/CGCUDANV.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class CGNVCUDARuntime : public CGCUDARuntime {
private:
llvm::IntegerType *IntTy, *SizeTy;
llvm::Type *VoidTy;
llvm::PointerType *CharPtrTy, *VoidPtrTy, *VoidPtrPtrTy;
llvm::PointerType *PtrTy;

/// Convenience reference to LLVM Context
llvm::LLVMContext &Context;
Expand Down Expand Up @@ -232,15 +232,12 @@ CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM)
VoidTy = CGM.VoidTy;
Zeros[0] = llvm::ConstantInt::get(SizeTy, 0);
Zeros[1] = Zeros[0];

CharPtrTy = CGM.UnqualPtrTy;
VoidPtrTy = CGM.UnqualPtrTy;
VoidPtrPtrTy = CGM.UnqualPtrTy;
PtrTy = CGM.UnqualPtrTy;
}

llvm::FunctionCallee CGNVCUDARuntime::getSetupArgumentFn() const {
// cudaError_t cudaSetupArgument(void *, size_t, size_t)
llvm::Type *Params[] = {VoidPtrTy, SizeTy, SizeTy};
llvm::Type *Params[] = {PtrTy, SizeTy, SizeTy};
return CGM.CreateRuntimeFunction(
llvm::FunctionType::get(IntTy, Params, false),
addPrefixToName("SetupArgument"));
Expand All @@ -250,24 +247,24 @@ llvm::FunctionCallee CGNVCUDARuntime::getLaunchFn() const {
if (CGM.getLangOpts().HIP) {
// hipError_t hipLaunchByPtr(char *);
return CGM.CreateRuntimeFunction(
llvm::FunctionType::get(IntTy, CharPtrTy, false), "hipLaunchByPtr");
llvm::FunctionType::get(IntTy, PtrTy, false), "hipLaunchByPtr");
}
// cudaError_t cudaLaunch(char *);
return CGM.CreateRuntimeFunction(
llvm::FunctionType::get(IntTy, CharPtrTy, false), "cudaLaunch");
return CGM.CreateRuntimeFunction(llvm::FunctionType::get(IntTy, PtrTy, false),
"cudaLaunch");
}

llvm::FunctionType *CGNVCUDARuntime::getRegisterGlobalsFnTy() const {
return llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false);
return llvm::FunctionType::get(VoidTy, PtrTy, false);
}

llvm::FunctionType *CGNVCUDARuntime::getCallbackFnTy() const {
return llvm::FunctionType::get(VoidTy, VoidPtrTy, false);
return llvm::FunctionType::get(VoidTy, PtrTy, false);
}

llvm::FunctionType *CGNVCUDARuntime::getRegisterLinkedBinaryFnTy() const {
llvm::Type *Params[] = {llvm::PointerType::getUnqual(Context), VoidPtrTy,
VoidPtrTy, llvm::PointerType::getUnqual(Context)};
llvm::Type *Params[] = {llvm::PointerType::getUnqual(Context), PtrTy, PtrTy,
llvm::PointerType::getUnqual(Context)};
return llvm::FunctionType::get(VoidTy, Params, false);
}

Expand Down Expand Up @@ -330,15 +327,15 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
// args, allocate a single pointer so we still have a valid pointer to the
// argument array that we can pass to runtime, even if it will be unused.
Address KernelArgs = CGF.CreateTempAlloca(
VoidPtrTy, CharUnits::fromQuantity(16), "kernel_args",
PtrTy, CharUnits::fromQuantity(16), "kernel_args",
llvm::ConstantInt::get(SizeTy, std::max<size_t>(1, Args.size())));
// Store pointers to the arguments in a locally allocated launch_args.
for (unsigned i = 0; i < Args.size(); ++i) {
llvm::Value* VarPtr = CGF.GetAddrOfLocalVar(Args[i]).getPointer();
llvm::Value *VoidVarPtr = CGF.Builder.CreatePointerCast(VarPtr, VoidPtrTy);
llvm::Value *VoidVarPtr = CGF.Builder.CreatePointerCast(VarPtr, PtrTy);
CGF.Builder.CreateDefaultAlignedStore(
VoidVarPtr,
CGF.Builder.CreateConstGEP1_32(VoidPtrTy, KernelArgs.getPointer(), i));
CGF.Builder.CreateConstGEP1_32(PtrTy, KernelArgs.getPointer(), i));
}

llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end");
Expand Down Expand Up @@ -386,8 +383,7 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
CGF.CreateMemTemp(Dim3Ty, CharUnits::fromQuantity(8), "block_dim");
Address ShmemSize =
CGF.CreateTempAlloca(SizeTy, CGM.getSizeAlign(), "shmem_size");
Address Stream =
CGF.CreateTempAlloca(VoidPtrTy, CGM.getPointerAlign(), "stream");
Address Stream = CGF.CreateTempAlloca(PtrTy, CGM.getPointerAlign(), "stream");
llvm::FunctionCallee cudaPopConfigFn = CGM.CreateRuntimeFunction(
llvm::FunctionType::get(IntTy,
{/*gridDim=*/GridDim.getType(),
Expand All @@ -402,8 +398,8 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
ShmemSize.getPointer(), Stream.getPointer()});

// Emit the call to cudaLaunch
llvm::Value *Kernel = CGF.Builder.CreatePointerCast(
KernelHandles[CGF.CurFn->getName()], VoidPtrTy);
llvm::Value *Kernel =
CGF.Builder.CreatePointerCast(KernelHandles[CGF.CurFn->getName()], PtrTy);
CallArgList LaunchKernelArgs;
LaunchKernelArgs.add(RValue::get(Kernel),
cudaLaunchKernelFD->getParamDecl(0)->getType());
Expand Down Expand Up @@ -443,7 +439,7 @@ void CGNVCUDARuntime::emitDeviceStubBodyLegacy(CodeGenFunction &CGF,
Offset = Offset.alignTo(TInfo.Align);
llvm::Value *Args[] = {
CGF.Builder.CreatePointerCast(CGF.GetAddrOfLocalVar(A).getPointer(),
VoidPtrTy),
PtrTy),
llvm::ConstantInt::get(SizeTy, TInfo.Width.getQuantity()),
llvm::ConstantInt::get(SizeTy, Offset.getQuantity()),
};
Expand All @@ -458,8 +454,8 @@ void CGNVCUDARuntime::emitDeviceStubBodyLegacy(CodeGenFunction &CGF,

// Emit the call to cudaLaunch
llvm::FunctionCallee cudaLaunchFn = getLaunchFn();
llvm::Value *Arg = CGF.Builder.CreatePointerCast(
KernelHandles[CGF.CurFn->getName()], CharPtrTy);
llvm::Value *Arg =
CGF.Builder.CreatePointerCast(KernelHandles[CGF.CurFn->getName()], PtrTy);
CGF.EmitRuntimeCallOrInvoke(cudaLaunchFn, Arg);
CGF.EmitBranch(EndBlock);

Expand Down Expand Up @@ -537,11 +533,8 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
// void __cudaRegisterFunction(void **, const char *, char *, const char *,
// int, uint3*, uint3*, dim3*, dim3*, int*)
llvm::Type *RegisterFuncParams[] = {
VoidPtrPtrTy, CharPtrTy,
CharPtrTy, CharPtrTy,
IntTy, VoidPtrTy,
VoidPtrTy, VoidPtrTy,
VoidPtrTy, llvm::PointerType::getUnqual(Context)};
PtrTy, PtrTy, PtrTy, PtrTy, IntTy,
PtrTy, PtrTy, PtrTy, PtrTy, llvm::PointerType::getUnqual(Context)};
llvm::FunctionCallee RegisterFunc = CGM.CreateRuntimeFunction(
llvm::FunctionType::get(IntTy, RegisterFuncParams, false),
addUnderscoredPrefixToName("RegisterFunction"));
Expand All @@ -553,7 +546,7 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
for (auto &&I : EmittedKernels) {
llvm::Constant *KernelName =
makeConstantString(getDeviceSideName(cast<NamedDecl>(I.D)));
llvm::Constant *NullPtr = llvm::ConstantPointerNull::get(VoidPtrTy);
llvm::Constant *NullPtr = llvm::ConstantPointerNull::get(PtrTy);
llvm::Value *Args[] = {
&GpuBinaryHandlePtr,
KernelHandles[I.Kernel->getName()],
Expand All @@ -576,33 +569,29 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {

// void __cudaRegisterVar(void **, char *, char *, const char *,
// int, int, int, int)
llvm::Type *RegisterVarParams[] = {VoidPtrPtrTy, CharPtrTy, CharPtrTy,
CharPtrTy, IntTy, VarSizeTy,
IntTy, IntTy};
llvm::Type *RegisterVarParams[] = {PtrTy, PtrTy, PtrTy, PtrTy,
IntTy, VarSizeTy, IntTy, IntTy};
llvm::FunctionCallee RegisterVar = CGM.CreateRuntimeFunction(
llvm::FunctionType::get(VoidTy, RegisterVarParams, false),
addUnderscoredPrefixToName("RegisterVar"));
// void __hipRegisterManagedVar(void **, char *, char *, const char *,
// size_t, unsigned)
llvm::Type *RegisterManagedVarParams[] = {VoidPtrPtrTy, CharPtrTy, CharPtrTy,
CharPtrTy, VarSizeTy, IntTy};
llvm::Type *RegisterManagedVarParams[] = {PtrTy, PtrTy, PtrTy,
PtrTy, VarSizeTy, IntTy};
llvm::FunctionCallee RegisterManagedVar = CGM.CreateRuntimeFunction(
llvm::FunctionType::get(VoidTy, RegisterManagedVarParams, false),
addUnderscoredPrefixToName("RegisterManagedVar"));
// void __cudaRegisterSurface(void **, const struct surfaceReference *,
// const void **, const char *, int, int);
llvm::FunctionCallee RegisterSurf = CGM.CreateRuntimeFunction(
llvm::FunctionType::get(
VoidTy, {VoidPtrPtrTy, VoidPtrTy, CharPtrTy, CharPtrTy, IntTy, IntTy},
false),
VoidTy, {PtrTy, PtrTy, PtrTy, PtrTy, IntTy, IntTy}, false),
addUnderscoredPrefixToName("RegisterSurface"));
// void __cudaRegisterTexture(void **, const struct textureReference *,
// const void **, const char *, int, int, int)
llvm::FunctionCallee RegisterTex = CGM.CreateRuntimeFunction(
llvm::FunctionType::get(
VoidTy,
{VoidPtrPtrTy, VoidPtrTy, CharPtrTy, CharPtrTy, IntTy, IntTy, IntTy},
false),
VoidTy, {PtrTy, PtrTy, PtrTy, PtrTy, IntTy, IntTy, IntTy}, false),
addUnderscoredPrefixToName("RegisterTexture"));
for (auto &&Info : DeviceVars) {
llvm::GlobalVariable *Var = Info.Var;
Expand Down Expand Up @@ -713,11 +702,11 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {

// void ** __{cuda|hip}RegisterFatBinary(void *);
llvm::FunctionCallee RegisterFatbinFunc = CGM.CreateRuntimeFunction(
llvm::FunctionType::get(VoidPtrPtrTy, VoidPtrTy, false),
llvm::FunctionType::get(PtrTy, PtrTy, false),
addUnderscoredPrefixToName("RegisterFatBinary"));
// struct { int magic, int version, void * gpu_binary, void * dont_care };
llvm::StructType *FatbinWrapperTy =
llvm::StructType::get(IntTy, IntTy, VoidPtrTy, VoidPtrTy);
llvm::StructType::get(IntTy, IntTy, PtrTy, PtrTy);

// Register GPU binary with the CUDA runtime, store returned handle in a
// global variable and save a reference in GpuBinaryHandle to be cleaned up
Expand Down Expand Up @@ -813,7 +802,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
// Data.
Values.add(FatBinStr);
// Unused in fatbin v1.
Values.add(llvm::ConstantPointerNull::get(VoidPtrTy));
Values.add(llvm::ConstantPointerNull::get(PtrTy));
llvm::GlobalVariable *FatbinWrapper = Values.finishAndCreateGlobal(
addUnderscoredPrefixToName("_fatbin_wrapper"), CGM.getPointerAlign(),
/*constant*/ true);
Expand All @@ -836,9 +825,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
// The name, size, and initialization pattern of this variable is part
// of HIP ABI.
GpuBinaryHandle = new llvm::GlobalVariable(
TheModule, VoidPtrPtrTy, /*isConstant=*/false,
Linkage,
/*Initializer=*/llvm::ConstantPointerNull::get(VoidPtrPtrTy),
TheModule, PtrTy, /*isConstant=*/false, Linkage,
/*Initializer=*/llvm::ConstantPointerNull::get(PtrTy),
"__hip_gpubin_handle");
if (Linkage == llvm::GlobalValue::LinkOnceAnyLinkage)
GpuBinaryHandle->setComdat(
Expand All @@ -848,7 +836,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
if (Linkage != llvm::GlobalValue::InternalLinkage)
GpuBinaryHandle->setVisibility(llvm::GlobalValue::HiddenVisibility);
Address GpuBinaryAddr(
GpuBinaryHandle, VoidPtrPtrTy,
GpuBinaryHandle, PtrTy,
CharUnits::fromQuantity(GpuBinaryHandle->getAlignment()));
{
auto *HandleValue = CtorBuilder.CreateLoad(GpuBinaryAddr);
Expand Down Expand Up @@ -880,8 +868,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
llvm::CallInst *RegisterFatbinCall =
CtorBuilder.CreateCall(RegisterFatbinFunc, FatbinWrapper);
GpuBinaryHandle = new llvm::GlobalVariable(
TheModule, VoidPtrPtrTy, false, llvm::GlobalValue::InternalLinkage,
llvm::ConstantPointerNull::get(VoidPtrPtrTy), "__cuda_gpubin_handle");
TheModule, PtrTy, false, llvm::GlobalValue::InternalLinkage,
llvm::ConstantPointerNull::get(PtrTy), "__cuda_gpubin_handle");
GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getAsAlign());
CtorBuilder.CreateAlignedStore(RegisterFatbinCall, GpuBinaryHandle,
CGM.getPointerAlign());
Expand All @@ -895,7 +883,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
CudaFeature::CUDA_USES_FATBIN_REGISTER_END)) {
// void __cudaRegisterFatBinaryEnd(void **);
llvm::FunctionCallee RegisterFatbinEndFunc = CGM.CreateRuntimeFunction(
llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false),
llvm::FunctionType::get(VoidTy, PtrTy, false),
"__cudaRegisterFatBinaryEnd");
CtorBuilder.CreateCall(RegisterFatbinEndFunc, RegisterFatbinCall);
}
Expand Down Expand Up @@ -967,7 +955,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleDtorFunction() {

// void __cudaUnregisterFatBinary(void ** handle);
llvm::FunctionCallee UnregisterFatbinFunc = CGM.CreateRuntimeFunction(
llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false),
llvm::FunctionType::get(VoidTy, PtrTy, false),
addUnderscoredPrefixToName("UnregisterFatBinary"));

llvm::Function *ModuleDtorFunc = llvm::Function::Create(
Expand Down