-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[clang][CGCUDANV] Unify PointerType members of CGNVCUDARuntime (NFC) #75668
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Unify 3 `Pointertype *` members which all refer to the same llvm type. Opaque pointer clean-up effort.
@llvm/pr-subscribers-clang-codegen @llvm/pr-subscribers-clang Author: Youngsuk Kim (JOE1994) ChangesUnify 3 Opaque pointer clean-up effort. Full diff: https://github.com/llvm/llvm-project/pull/75668.diff 1 Files Affected:
diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp
index 520b0c4f117673..353370f1d761b9 100644
--- a/clang/lib/CodeGen/CGCUDANV.cpp
+++ b/clang/lib/CodeGen/CGCUDANV.cpp
@@ -39,7 +39,7 @@ class CGNVCUDARuntime : public CGCUDARuntime {
private:
llvm::IntegerType *IntTy, *SizeTy;
llvm::Type *VoidTy;
- llvm::PointerType *CharPtrTy, *VoidPtrTy, *VoidPtrPtrTy;
+ llvm::PointerType *PtrTy;
/// Convenience reference to LLVM Context
llvm::LLVMContext &Context;
@@ -232,15 +232,12 @@ CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM)
VoidTy = CGM.VoidTy;
Zeros[0] = llvm::ConstantInt::get(SizeTy, 0);
Zeros[1] = Zeros[0];
-
- CharPtrTy = CGM.UnqualPtrTy;
- VoidPtrTy = CGM.UnqualPtrTy;
- VoidPtrPtrTy = CGM.UnqualPtrTy;
+ PtrTy = CGM.UnqualPtrTy;
}
llvm::FunctionCallee CGNVCUDARuntime::getSetupArgumentFn() const {
// cudaError_t cudaSetupArgument(void *, size_t, size_t)
- llvm::Type *Params[] = {VoidPtrTy, SizeTy, SizeTy};
+ llvm::Type *Params[] = {PtrTy, SizeTy, SizeTy};
return CGM.CreateRuntimeFunction(
llvm::FunctionType::get(IntTy, Params, false),
addPrefixToName("SetupArgument"));
@@ -250,24 +247,24 @@ llvm::FunctionCallee CGNVCUDARuntime::getLaunchFn() const {
if (CGM.getLangOpts().HIP) {
// hipError_t hipLaunchByPtr(char *);
return CGM.CreateRuntimeFunction(
- llvm::FunctionType::get(IntTy, CharPtrTy, false), "hipLaunchByPtr");
+ llvm::FunctionType::get(IntTy, PtrTy, false), "hipLaunchByPtr");
}
// cudaError_t cudaLaunch(char *);
- return CGM.CreateRuntimeFunction(
- llvm::FunctionType::get(IntTy, CharPtrTy, false), "cudaLaunch");
+ return CGM.CreateRuntimeFunction(llvm::FunctionType::get(IntTy, PtrTy, false),
+ "cudaLaunch");
}
llvm::FunctionType *CGNVCUDARuntime::getRegisterGlobalsFnTy() const {
- return llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false);
+ return llvm::FunctionType::get(VoidTy, PtrTy, false);
}
llvm::FunctionType *CGNVCUDARuntime::getCallbackFnTy() const {
- return llvm::FunctionType::get(VoidTy, VoidPtrTy, false);
+ return llvm::FunctionType::get(VoidTy, PtrTy, false);
}
llvm::FunctionType *CGNVCUDARuntime::getRegisterLinkedBinaryFnTy() const {
- llvm::Type *Params[] = {llvm::PointerType::getUnqual(Context), VoidPtrTy,
- VoidPtrTy, llvm::PointerType::getUnqual(Context)};
+ llvm::Type *Params[] = {llvm::PointerType::getUnqual(Context), PtrTy, PtrTy,
+ llvm::PointerType::getUnqual(Context)};
return llvm::FunctionType::get(VoidTy, Params, false);
}
@@ -330,15 +327,15 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
// args, allocate a single pointer so we still have a valid pointer to the
// argument array that we can pass to runtime, even if it will be unused.
Address KernelArgs = CGF.CreateTempAlloca(
- VoidPtrTy, CharUnits::fromQuantity(16), "kernel_args",
+ PtrTy, CharUnits::fromQuantity(16), "kernel_args",
llvm::ConstantInt::get(SizeTy, std::max<size_t>(1, Args.size())));
// Store pointers to the arguments in a locally allocated launch_args.
for (unsigned i = 0; i < Args.size(); ++i) {
llvm::Value* VarPtr = CGF.GetAddrOfLocalVar(Args[i]).getPointer();
- llvm::Value *VoidVarPtr = CGF.Builder.CreatePointerCast(VarPtr, VoidPtrTy);
+ llvm::Value *VoidVarPtr = CGF.Builder.CreatePointerCast(VarPtr, PtrTy);
CGF.Builder.CreateDefaultAlignedStore(
VoidVarPtr,
- CGF.Builder.CreateConstGEP1_32(VoidPtrTy, KernelArgs.getPointer(), i));
+ CGF.Builder.CreateConstGEP1_32(PtrTy, KernelArgs.getPointer(), i));
}
llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end");
@@ -386,8 +383,7 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
CGF.CreateMemTemp(Dim3Ty, CharUnits::fromQuantity(8), "block_dim");
Address ShmemSize =
CGF.CreateTempAlloca(SizeTy, CGM.getSizeAlign(), "shmem_size");
- Address Stream =
- CGF.CreateTempAlloca(VoidPtrTy, CGM.getPointerAlign(), "stream");
+ Address Stream = CGF.CreateTempAlloca(PtrTy, CGM.getPointerAlign(), "stream");
llvm::FunctionCallee cudaPopConfigFn = CGM.CreateRuntimeFunction(
llvm::FunctionType::get(IntTy,
{/*gridDim=*/GridDim.getType(),
@@ -402,8 +398,8 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
ShmemSize.getPointer(), Stream.getPointer()});
// Emit the call to cudaLaunch
- llvm::Value *Kernel = CGF.Builder.CreatePointerCast(
- KernelHandles[CGF.CurFn->getName()], VoidPtrTy);
+ llvm::Value *Kernel =
+ CGF.Builder.CreatePointerCast(KernelHandles[CGF.CurFn->getName()], PtrTy);
CallArgList LaunchKernelArgs;
LaunchKernelArgs.add(RValue::get(Kernel),
cudaLaunchKernelFD->getParamDecl(0)->getType());
@@ -443,7 +439,7 @@ void CGNVCUDARuntime::emitDeviceStubBodyLegacy(CodeGenFunction &CGF,
Offset = Offset.alignTo(TInfo.Align);
llvm::Value *Args[] = {
CGF.Builder.CreatePointerCast(CGF.GetAddrOfLocalVar(A).getPointer(),
- VoidPtrTy),
+ PtrTy),
llvm::ConstantInt::get(SizeTy, TInfo.Width.getQuantity()),
llvm::ConstantInt::get(SizeTy, Offset.getQuantity()),
};
@@ -458,8 +454,8 @@ void CGNVCUDARuntime::emitDeviceStubBodyLegacy(CodeGenFunction &CGF,
// Emit the call to cudaLaunch
llvm::FunctionCallee cudaLaunchFn = getLaunchFn();
- llvm::Value *Arg = CGF.Builder.CreatePointerCast(
- KernelHandles[CGF.CurFn->getName()], CharPtrTy);
+ llvm::Value *Arg =
+ CGF.Builder.CreatePointerCast(KernelHandles[CGF.CurFn->getName()], PtrTy);
CGF.EmitRuntimeCallOrInvoke(cudaLaunchFn, Arg);
CGF.EmitBranch(EndBlock);
@@ -537,11 +533,8 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
// void __cudaRegisterFunction(void **, const char *, char *, const char *,
// int, uint3*, uint3*, dim3*, dim3*, int*)
llvm::Type *RegisterFuncParams[] = {
- VoidPtrPtrTy, CharPtrTy,
- CharPtrTy, CharPtrTy,
- IntTy, VoidPtrTy,
- VoidPtrTy, VoidPtrTy,
- VoidPtrTy, llvm::PointerType::getUnqual(Context)};
+ PtrTy, PtrTy, PtrTy, PtrTy, IntTy,
+ PtrTy, PtrTy, PtrTy, PtrTy, llvm::PointerType::getUnqual(Context)};
llvm::FunctionCallee RegisterFunc = CGM.CreateRuntimeFunction(
llvm::FunctionType::get(IntTy, RegisterFuncParams, false),
addUnderscoredPrefixToName("RegisterFunction"));
@@ -553,7 +546,7 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
for (auto &&I : EmittedKernels) {
llvm::Constant *KernelName =
makeConstantString(getDeviceSideName(cast<NamedDecl>(I.D)));
- llvm::Constant *NullPtr = llvm::ConstantPointerNull::get(VoidPtrTy);
+ llvm::Constant *NullPtr = llvm::ConstantPointerNull::get(PtrTy);
llvm::Value *Args[] = {
&GpuBinaryHandlePtr,
KernelHandles[I.Kernel->getName()],
@@ -576,16 +569,15 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
// void __cudaRegisterVar(void **, char *, char *, const char *,
// int, int, int, int)
- llvm::Type *RegisterVarParams[] = {VoidPtrPtrTy, CharPtrTy, CharPtrTy,
- CharPtrTy, IntTy, VarSizeTy,
- IntTy, IntTy};
+ llvm::Type *RegisterVarParams[] = {PtrTy, PtrTy, PtrTy, PtrTy,
+ IntTy, VarSizeTy, IntTy, IntTy};
llvm::FunctionCallee RegisterVar = CGM.CreateRuntimeFunction(
llvm::FunctionType::get(VoidTy, RegisterVarParams, false),
addUnderscoredPrefixToName("RegisterVar"));
// void __hipRegisterManagedVar(void **, char *, char *, const char *,
// size_t, unsigned)
- llvm::Type *RegisterManagedVarParams[] = {VoidPtrPtrTy, CharPtrTy, CharPtrTy,
- CharPtrTy, VarSizeTy, IntTy};
+ llvm::Type *RegisterManagedVarParams[] = {PtrTy, PtrTy, PtrTy,
+ PtrTy, VarSizeTy, IntTy};
llvm::FunctionCallee RegisterManagedVar = CGM.CreateRuntimeFunction(
llvm::FunctionType::get(VoidTy, RegisterManagedVarParams, false),
addUnderscoredPrefixToName("RegisterManagedVar"));
@@ -593,16 +585,13 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
// const void **, const char *, int, int);
llvm::FunctionCallee RegisterSurf = CGM.CreateRuntimeFunction(
llvm::FunctionType::get(
- VoidTy, {VoidPtrPtrTy, VoidPtrTy, CharPtrTy, CharPtrTy, IntTy, IntTy},
- false),
+ VoidTy, {PtrTy, PtrTy, PtrTy, PtrTy, IntTy, IntTy}, false),
addUnderscoredPrefixToName("RegisterSurface"));
// void __cudaRegisterTexture(void **, const struct textureReference *,
// const void **, const char *, int, int, int)
llvm::FunctionCallee RegisterTex = CGM.CreateRuntimeFunction(
llvm::FunctionType::get(
- VoidTy,
- {VoidPtrPtrTy, VoidPtrTy, CharPtrTy, CharPtrTy, IntTy, IntTy, IntTy},
- false),
+ VoidTy, {PtrTy, PtrTy, PtrTy, PtrTy, IntTy, IntTy, IntTy}, false),
addUnderscoredPrefixToName("RegisterTexture"));
for (auto &&Info : DeviceVars) {
llvm::GlobalVariable *Var = Info.Var;
@@ -713,11 +702,11 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
// void ** __{cuda|hip}RegisterFatBinary(void *);
llvm::FunctionCallee RegisterFatbinFunc = CGM.CreateRuntimeFunction(
- llvm::FunctionType::get(VoidPtrPtrTy, VoidPtrTy, false),
+ llvm::FunctionType::get(PtrTy, PtrTy, false),
addUnderscoredPrefixToName("RegisterFatBinary"));
// struct { int magic, int version, void * gpu_binary, void * dont_care };
llvm::StructType *FatbinWrapperTy =
- llvm::StructType::get(IntTy, IntTy, VoidPtrTy, VoidPtrTy);
+ llvm::StructType::get(IntTy, IntTy, PtrTy, PtrTy);
// Register GPU binary with the CUDA runtime, store returned handle in a
// global variable and save a reference in GpuBinaryHandle to be cleaned up
@@ -813,7 +802,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
// Data.
Values.add(FatBinStr);
// Unused in fatbin v1.
- Values.add(llvm::ConstantPointerNull::get(VoidPtrTy));
+ Values.add(llvm::ConstantPointerNull::get(PtrTy));
llvm::GlobalVariable *FatbinWrapper = Values.finishAndCreateGlobal(
addUnderscoredPrefixToName("_fatbin_wrapper"), CGM.getPointerAlign(),
/*constant*/ true);
@@ -836,9 +825,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
// The name, size, and initialization pattern of this variable is part
// of HIP ABI.
GpuBinaryHandle = new llvm::GlobalVariable(
- TheModule, VoidPtrPtrTy, /*isConstant=*/false,
- Linkage,
- /*Initializer=*/llvm::ConstantPointerNull::get(VoidPtrPtrTy),
+ TheModule, PtrTy, /*isConstant=*/false, Linkage,
+ /*Initializer=*/llvm::ConstantPointerNull::get(PtrTy),
"__hip_gpubin_handle");
if (Linkage == llvm::GlobalValue::LinkOnceAnyLinkage)
GpuBinaryHandle->setComdat(
@@ -848,7 +836,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
if (Linkage != llvm::GlobalValue::InternalLinkage)
GpuBinaryHandle->setVisibility(llvm::GlobalValue::HiddenVisibility);
Address GpuBinaryAddr(
- GpuBinaryHandle, VoidPtrPtrTy,
+ GpuBinaryHandle, PtrTy,
CharUnits::fromQuantity(GpuBinaryHandle->getAlignment()));
{
auto *HandleValue = CtorBuilder.CreateLoad(GpuBinaryAddr);
@@ -880,8 +868,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
llvm::CallInst *RegisterFatbinCall =
CtorBuilder.CreateCall(RegisterFatbinFunc, FatbinWrapper);
GpuBinaryHandle = new llvm::GlobalVariable(
- TheModule, VoidPtrPtrTy, false, llvm::GlobalValue::InternalLinkage,
- llvm::ConstantPointerNull::get(VoidPtrPtrTy), "__cuda_gpubin_handle");
+ TheModule, PtrTy, false, llvm::GlobalValue::InternalLinkage,
+ llvm::ConstantPointerNull::get(PtrTy), "__cuda_gpubin_handle");
GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getAsAlign());
CtorBuilder.CreateAlignedStore(RegisterFatbinCall, GpuBinaryHandle,
CGM.getPointerAlign());
@@ -895,7 +883,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
CudaFeature::CUDA_USES_FATBIN_REGISTER_END)) {
// void __cudaRegisterFatBinaryEnd(void **);
llvm::FunctionCallee RegisterFatbinEndFunc = CGM.CreateRuntimeFunction(
- llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false),
+ llvm::FunctionType::get(VoidTy, PtrTy, false),
"__cudaRegisterFatBinaryEnd");
CtorBuilder.CreateCall(RegisterFatbinEndFunc, RegisterFatbinCall);
}
@@ -967,7 +955,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleDtorFunction() {
// void __cudaUnregisterFatBinary(void ** handle);
llvm::FunctionCallee UnregisterFatbinFunc = CGM.CreateRuntimeFunction(
- llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false),
+ llvm::FunctionType::get(VoidTy, PtrTy, false),
addUnderscoredPrefixToName("UnregisterFatBinary"));
llvm::Function *ModuleDtorFunc = llvm::Function::Create(
|
I don't know much about this code, so I can't really judge if this is good and wanted etc. When I did opaque pointer cleanups myself earlier (removing some no-op bitcasts and using PointerType::get etc) I did leave lots of things like this around on purpose, as even if the types are opaque internal to LLVM it might be nice to see which kind of types that actually are used and derefenced in the API:s. So the differently named variables helps identifying which arguments that points to what kind of data. Anyway, the type cache in CGNVCUDARuntime probably shouldn't store three copies of the same unqualified pointer type. One idea, if one still want to keep the names, is to use a union like it is done in CodeGenTypeCache.h. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
There are already C-style function signatures in the comments, so I don't see value in keeping separate variable names.
Unify 3
Pointertype *
members which all refer to the same llvm type.Opaque pointer clean-up effort.