Skip to content

Commit f49e2b0

Browse files
authored
[clang][CGCUDANV] Unify PointerType members of CGNVCUDARuntime (NFC) (#75668)
Unify 3 `Pointertype *` members which all refer to the same llvm type. Opaque pointer clean-up effort.
1 parent 82ab0f7 commit f49e2b0

File tree

1 file changed

+38
-50
lines changed

1 file changed

+38
-50
lines changed

clang/lib/CodeGen/CGCUDANV.cpp

Lines changed: 38 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ class CGNVCUDARuntime : public CGCUDARuntime {
3939
private:
4040
llvm::IntegerType *IntTy, *SizeTy;
4141
llvm::Type *VoidTy;
42-
llvm::PointerType *CharPtrTy, *VoidPtrTy, *VoidPtrPtrTy;
42+
llvm::PointerType *PtrTy;
4343

4444
/// Convenience reference to LLVM Context
4545
llvm::LLVMContext &Context;
@@ -232,15 +232,12 @@ CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM)
232232
VoidTy = CGM.VoidTy;
233233
Zeros[0] = llvm::ConstantInt::get(SizeTy, 0);
234234
Zeros[1] = Zeros[0];
235-
236-
CharPtrTy = CGM.UnqualPtrTy;
237-
VoidPtrTy = CGM.UnqualPtrTy;
238-
VoidPtrPtrTy = CGM.UnqualPtrTy;
235+
PtrTy = CGM.UnqualPtrTy;
239236
}
240237

241238
llvm::FunctionCallee CGNVCUDARuntime::getSetupArgumentFn() const {
242239
// cudaError_t cudaSetupArgument(void *, size_t, size_t)
243-
llvm::Type *Params[] = {VoidPtrTy, SizeTy, SizeTy};
240+
llvm::Type *Params[] = {PtrTy, SizeTy, SizeTy};
244241
return CGM.CreateRuntimeFunction(
245242
llvm::FunctionType::get(IntTy, Params, false),
246243
addPrefixToName("SetupArgument"));
@@ -250,24 +247,24 @@ llvm::FunctionCallee CGNVCUDARuntime::getLaunchFn() const {
250247
if (CGM.getLangOpts().HIP) {
251248
// hipError_t hipLaunchByPtr(char *);
252249
return CGM.CreateRuntimeFunction(
253-
llvm::FunctionType::get(IntTy, CharPtrTy, false), "hipLaunchByPtr");
250+
llvm::FunctionType::get(IntTy, PtrTy, false), "hipLaunchByPtr");
254251
}
255252
// cudaError_t cudaLaunch(char *);
256-
return CGM.CreateRuntimeFunction(
257-
llvm::FunctionType::get(IntTy, CharPtrTy, false), "cudaLaunch");
253+
return CGM.CreateRuntimeFunction(llvm::FunctionType::get(IntTy, PtrTy, false),
254+
"cudaLaunch");
258255
}
259256

260257
llvm::FunctionType *CGNVCUDARuntime::getRegisterGlobalsFnTy() const {
261-
return llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false);
258+
return llvm::FunctionType::get(VoidTy, PtrTy, false);
262259
}
263260

264261
llvm::FunctionType *CGNVCUDARuntime::getCallbackFnTy() const {
265-
return llvm::FunctionType::get(VoidTy, VoidPtrTy, false);
262+
return llvm::FunctionType::get(VoidTy, PtrTy, false);
266263
}
267264

268265
llvm::FunctionType *CGNVCUDARuntime::getRegisterLinkedBinaryFnTy() const {
269-
llvm::Type *Params[] = {llvm::PointerType::getUnqual(Context), VoidPtrTy,
270-
VoidPtrTy, llvm::PointerType::getUnqual(Context)};
266+
llvm::Type *Params[] = {llvm::PointerType::getUnqual(Context), PtrTy, PtrTy,
267+
llvm::PointerType::getUnqual(Context)};
271268
return llvm::FunctionType::get(VoidTy, Params, false);
272269
}
273270

@@ -330,15 +327,15 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
330327
// args, allocate a single pointer so we still have a valid pointer to the
331328
// argument array that we can pass to runtime, even if it will be unused.
332329
Address KernelArgs = CGF.CreateTempAlloca(
333-
VoidPtrTy, CharUnits::fromQuantity(16), "kernel_args",
330+
PtrTy, CharUnits::fromQuantity(16), "kernel_args",
334331
llvm::ConstantInt::get(SizeTy, std::max<size_t>(1, Args.size())));
335332
// Store pointers to the arguments in a locally allocated launch_args.
336333
for (unsigned i = 0; i < Args.size(); ++i) {
337334
llvm::Value* VarPtr = CGF.GetAddrOfLocalVar(Args[i]).getPointer();
338-
llvm::Value *VoidVarPtr = CGF.Builder.CreatePointerCast(VarPtr, VoidPtrTy);
335+
llvm::Value *VoidVarPtr = CGF.Builder.CreatePointerCast(VarPtr, PtrTy);
339336
CGF.Builder.CreateDefaultAlignedStore(
340337
VoidVarPtr,
341-
CGF.Builder.CreateConstGEP1_32(VoidPtrTy, KernelArgs.getPointer(), i));
338+
CGF.Builder.CreateConstGEP1_32(PtrTy, KernelArgs.getPointer(), i));
342339
}
343340

344341
llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end");
@@ -386,8 +383,7 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
386383
CGF.CreateMemTemp(Dim3Ty, CharUnits::fromQuantity(8), "block_dim");
387384
Address ShmemSize =
388385
CGF.CreateTempAlloca(SizeTy, CGM.getSizeAlign(), "shmem_size");
389-
Address Stream =
390-
CGF.CreateTempAlloca(VoidPtrTy, CGM.getPointerAlign(), "stream");
386+
Address Stream = CGF.CreateTempAlloca(PtrTy, CGM.getPointerAlign(), "stream");
391387
llvm::FunctionCallee cudaPopConfigFn = CGM.CreateRuntimeFunction(
392388
llvm::FunctionType::get(IntTy,
393389
{/*gridDim=*/GridDim.getType(),
@@ -402,8 +398,8 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
402398
ShmemSize.getPointer(), Stream.getPointer()});
403399

404400
// Emit the call to cudaLaunch
405-
llvm::Value *Kernel = CGF.Builder.CreatePointerCast(
406-
KernelHandles[CGF.CurFn->getName()], VoidPtrTy);
401+
llvm::Value *Kernel =
402+
CGF.Builder.CreatePointerCast(KernelHandles[CGF.CurFn->getName()], PtrTy);
407403
CallArgList LaunchKernelArgs;
408404
LaunchKernelArgs.add(RValue::get(Kernel),
409405
cudaLaunchKernelFD->getParamDecl(0)->getType());
@@ -443,7 +439,7 @@ void CGNVCUDARuntime::emitDeviceStubBodyLegacy(CodeGenFunction &CGF,
443439
Offset = Offset.alignTo(TInfo.Align);
444440
llvm::Value *Args[] = {
445441
CGF.Builder.CreatePointerCast(CGF.GetAddrOfLocalVar(A).getPointer(),
446-
VoidPtrTy),
442+
PtrTy),
447443
llvm::ConstantInt::get(SizeTy, TInfo.Width.getQuantity()),
448444
llvm::ConstantInt::get(SizeTy, Offset.getQuantity()),
449445
};
@@ -458,8 +454,8 @@ void CGNVCUDARuntime::emitDeviceStubBodyLegacy(CodeGenFunction &CGF,
458454

459455
// Emit the call to cudaLaunch
460456
llvm::FunctionCallee cudaLaunchFn = getLaunchFn();
461-
llvm::Value *Arg = CGF.Builder.CreatePointerCast(
462-
KernelHandles[CGF.CurFn->getName()], CharPtrTy);
457+
llvm::Value *Arg =
458+
CGF.Builder.CreatePointerCast(KernelHandles[CGF.CurFn->getName()], PtrTy);
463459
CGF.EmitRuntimeCallOrInvoke(cudaLaunchFn, Arg);
464460
CGF.EmitBranch(EndBlock);
465461

@@ -537,11 +533,8 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
537533
// void __cudaRegisterFunction(void **, const char *, char *, const char *,
538534
// int, uint3*, uint3*, dim3*, dim3*, int*)
539535
llvm::Type *RegisterFuncParams[] = {
540-
VoidPtrPtrTy, CharPtrTy,
541-
CharPtrTy, CharPtrTy,
542-
IntTy, VoidPtrTy,
543-
VoidPtrTy, VoidPtrTy,
544-
VoidPtrTy, llvm::PointerType::getUnqual(Context)};
536+
PtrTy, PtrTy, PtrTy, PtrTy, IntTy,
537+
PtrTy, PtrTy, PtrTy, PtrTy, llvm::PointerType::getUnqual(Context)};
545538
llvm::FunctionCallee RegisterFunc = CGM.CreateRuntimeFunction(
546539
llvm::FunctionType::get(IntTy, RegisterFuncParams, false),
547540
addUnderscoredPrefixToName("RegisterFunction"));
@@ -553,7 +546,7 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
553546
for (auto &&I : EmittedKernels) {
554547
llvm::Constant *KernelName =
555548
makeConstantString(getDeviceSideName(cast<NamedDecl>(I.D)));
556-
llvm::Constant *NullPtr = llvm::ConstantPointerNull::get(VoidPtrTy);
549+
llvm::Constant *NullPtr = llvm::ConstantPointerNull::get(PtrTy);
557550
llvm::Value *Args[] = {
558551
&GpuBinaryHandlePtr,
559552
KernelHandles[I.Kernel->getName()],
@@ -576,33 +569,29 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
576569

577570
// void __cudaRegisterVar(void **, char *, char *, const char *,
578571
// int, int, int, int)
579-
llvm::Type *RegisterVarParams[] = {VoidPtrPtrTy, CharPtrTy, CharPtrTy,
580-
CharPtrTy, IntTy, VarSizeTy,
581-
IntTy, IntTy};
572+
llvm::Type *RegisterVarParams[] = {PtrTy, PtrTy, PtrTy, PtrTy,
573+
IntTy, VarSizeTy, IntTy, IntTy};
582574
llvm::FunctionCallee RegisterVar = CGM.CreateRuntimeFunction(
583575
llvm::FunctionType::get(VoidTy, RegisterVarParams, false),
584576
addUnderscoredPrefixToName("RegisterVar"));
585577
// void __hipRegisterManagedVar(void **, char *, char *, const char *,
586578
// size_t, unsigned)
587-
llvm::Type *RegisterManagedVarParams[] = {VoidPtrPtrTy, CharPtrTy, CharPtrTy,
588-
CharPtrTy, VarSizeTy, IntTy};
579+
llvm::Type *RegisterManagedVarParams[] = {PtrTy, PtrTy, PtrTy,
580+
PtrTy, VarSizeTy, IntTy};
589581
llvm::FunctionCallee RegisterManagedVar = CGM.CreateRuntimeFunction(
590582
llvm::FunctionType::get(VoidTy, RegisterManagedVarParams, false),
591583
addUnderscoredPrefixToName("RegisterManagedVar"));
592584
// void __cudaRegisterSurface(void **, const struct surfaceReference *,
593585
// const void **, const char *, int, int);
594586
llvm::FunctionCallee RegisterSurf = CGM.CreateRuntimeFunction(
595587
llvm::FunctionType::get(
596-
VoidTy, {VoidPtrPtrTy, VoidPtrTy, CharPtrTy, CharPtrTy, IntTy, IntTy},
597-
false),
588+
VoidTy, {PtrTy, PtrTy, PtrTy, PtrTy, IntTy, IntTy}, false),
598589
addUnderscoredPrefixToName("RegisterSurface"));
599590
// void __cudaRegisterTexture(void **, const struct textureReference *,
600591
// const void **, const char *, int, int, int)
601592
llvm::FunctionCallee RegisterTex = CGM.CreateRuntimeFunction(
602593
llvm::FunctionType::get(
603-
VoidTy,
604-
{VoidPtrPtrTy, VoidPtrTy, CharPtrTy, CharPtrTy, IntTy, IntTy, IntTy},
605-
false),
594+
VoidTy, {PtrTy, PtrTy, PtrTy, PtrTy, IntTy, IntTy, IntTy}, false),
606595
addUnderscoredPrefixToName("RegisterTexture"));
607596
for (auto &&Info : DeviceVars) {
608597
llvm::GlobalVariable *Var = Info.Var;
@@ -713,11 +702,11 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
713702

714703
// void ** __{cuda|hip}RegisterFatBinary(void *);
715704
llvm::FunctionCallee RegisterFatbinFunc = CGM.CreateRuntimeFunction(
716-
llvm::FunctionType::get(VoidPtrPtrTy, VoidPtrTy, false),
705+
llvm::FunctionType::get(PtrTy, PtrTy, false),
717706
addUnderscoredPrefixToName("RegisterFatBinary"));
718707
// struct { int magic, int version, void * gpu_binary, void * dont_care };
719708
llvm::StructType *FatbinWrapperTy =
720-
llvm::StructType::get(IntTy, IntTy, VoidPtrTy, VoidPtrTy);
709+
llvm::StructType::get(IntTy, IntTy, PtrTy, PtrTy);
721710

722711
// Register GPU binary with the CUDA runtime, store returned handle in a
723712
// global variable and save a reference in GpuBinaryHandle to be cleaned up
@@ -813,7 +802,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
813802
// Data.
814803
Values.add(FatBinStr);
815804
// Unused in fatbin v1.
816-
Values.add(llvm::ConstantPointerNull::get(VoidPtrTy));
805+
Values.add(llvm::ConstantPointerNull::get(PtrTy));
817806
llvm::GlobalVariable *FatbinWrapper = Values.finishAndCreateGlobal(
818807
addUnderscoredPrefixToName("_fatbin_wrapper"), CGM.getPointerAlign(),
819808
/*constant*/ true);
@@ -836,9 +825,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
836825
// The name, size, and initialization pattern of this variable is part
837826
// of HIP ABI.
838827
GpuBinaryHandle = new llvm::GlobalVariable(
839-
TheModule, VoidPtrPtrTy, /*isConstant=*/false,
840-
Linkage,
841-
/*Initializer=*/llvm::ConstantPointerNull::get(VoidPtrPtrTy),
828+
TheModule, PtrTy, /*isConstant=*/false, Linkage,
829+
/*Initializer=*/llvm::ConstantPointerNull::get(PtrTy),
842830
"__hip_gpubin_handle");
843831
if (Linkage == llvm::GlobalValue::LinkOnceAnyLinkage)
844832
GpuBinaryHandle->setComdat(
@@ -848,7 +836,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
848836
if (Linkage != llvm::GlobalValue::InternalLinkage)
849837
GpuBinaryHandle->setVisibility(llvm::GlobalValue::HiddenVisibility);
850838
Address GpuBinaryAddr(
851-
GpuBinaryHandle, VoidPtrPtrTy,
839+
GpuBinaryHandle, PtrTy,
852840
CharUnits::fromQuantity(GpuBinaryHandle->getAlignment()));
853841
{
854842
auto *HandleValue = CtorBuilder.CreateLoad(GpuBinaryAddr);
@@ -880,8 +868,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
880868
llvm::CallInst *RegisterFatbinCall =
881869
CtorBuilder.CreateCall(RegisterFatbinFunc, FatbinWrapper);
882870
GpuBinaryHandle = new llvm::GlobalVariable(
883-
TheModule, VoidPtrPtrTy, false, llvm::GlobalValue::InternalLinkage,
884-
llvm::ConstantPointerNull::get(VoidPtrPtrTy), "__cuda_gpubin_handle");
871+
TheModule, PtrTy, false, llvm::GlobalValue::InternalLinkage,
872+
llvm::ConstantPointerNull::get(PtrTy), "__cuda_gpubin_handle");
885873
GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getAsAlign());
886874
CtorBuilder.CreateAlignedStore(RegisterFatbinCall, GpuBinaryHandle,
887875
CGM.getPointerAlign());
@@ -895,7 +883,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
895883
CudaFeature::CUDA_USES_FATBIN_REGISTER_END)) {
896884
// void __cudaRegisterFatBinaryEnd(void **);
897885
llvm::FunctionCallee RegisterFatbinEndFunc = CGM.CreateRuntimeFunction(
898-
llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false),
886+
llvm::FunctionType::get(VoidTy, PtrTy, false),
899887
"__cudaRegisterFatBinaryEnd");
900888
CtorBuilder.CreateCall(RegisterFatbinEndFunc, RegisterFatbinCall);
901889
}
@@ -967,7 +955,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleDtorFunction() {
967955

968956
// void __cudaUnregisterFatBinary(void ** handle);
969957
llvm::FunctionCallee UnregisterFatbinFunc = CGM.CreateRuntimeFunction(
970-
llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false),
958+
llvm::FunctionType::get(VoidTy, PtrTy, false),
971959
addUnderscoredPrefixToName("UnregisterFatBinary"));
972960

973961
llvm::Function *ModuleDtorFunc = llvm::Function::Create(

0 commit comments

Comments
 (0)