@@ -39,7 +39,7 @@ class CGNVCUDARuntime : public CGCUDARuntime {
39
39
private:
40
40
llvm::IntegerType *IntTy, *SizeTy;
41
41
llvm::Type *VoidTy;
42
- llvm::PointerType *CharPtrTy, *VoidPtrTy, *VoidPtrPtrTy ;
42
+ llvm::PointerType *PtrTy ;
43
43
44
44
// / Convenience reference to LLVM Context
45
45
llvm::LLVMContext &Context;
@@ -232,15 +232,12 @@ CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM)
232
232
VoidTy = CGM.VoidTy ;
233
233
Zeros[0 ] = llvm::ConstantInt::get (SizeTy, 0 );
234
234
Zeros[1 ] = Zeros[0 ];
235
-
236
- CharPtrTy = CGM.UnqualPtrTy ;
237
- VoidPtrTy = CGM.UnqualPtrTy ;
238
- VoidPtrPtrTy = CGM.UnqualPtrTy ;
235
+ PtrTy = CGM.UnqualPtrTy ;
239
236
}
240
237
241
238
llvm::FunctionCallee CGNVCUDARuntime::getSetupArgumentFn () const {
242
239
// cudaError_t cudaSetupArgument(void *, size_t, size_t)
243
- llvm::Type *Params[] = {VoidPtrTy , SizeTy, SizeTy};
240
+ llvm::Type *Params[] = {PtrTy , SizeTy, SizeTy};
244
241
return CGM.CreateRuntimeFunction (
245
242
llvm::FunctionType::get (IntTy, Params, false ),
246
243
addPrefixToName (" SetupArgument" ));
@@ -250,24 +247,24 @@ llvm::FunctionCallee CGNVCUDARuntime::getLaunchFn() const {
250
247
if (CGM.getLangOpts ().HIP ) {
251
248
// hipError_t hipLaunchByPtr(char *);
252
249
return CGM.CreateRuntimeFunction (
253
- llvm::FunctionType::get (IntTy, CharPtrTy , false ), " hipLaunchByPtr" );
250
+ llvm::FunctionType::get (IntTy, PtrTy , false ), " hipLaunchByPtr" );
254
251
}
255
252
// cudaError_t cudaLaunch(char *);
256
- return CGM.CreateRuntimeFunction (
257
- llvm::FunctionType::get (IntTy, CharPtrTy, false ), " cudaLaunch" );
253
+ return CGM.CreateRuntimeFunction (llvm::FunctionType::get (IntTy, PtrTy, false ),
254
+ " cudaLaunch" );
258
255
}
259
256
260
257
llvm::FunctionType *CGNVCUDARuntime::getRegisterGlobalsFnTy () const {
261
- return llvm::FunctionType::get (VoidTy, VoidPtrPtrTy , false );
258
+ return llvm::FunctionType::get (VoidTy, PtrTy , false );
262
259
}
263
260
264
261
llvm::FunctionType *CGNVCUDARuntime::getCallbackFnTy () const {
265
- return llvm::FunctionType::get (VoidTy, VoidPtrTy , false );
262
+ return llvm::FunctionType::get (VoidTy, PtrTy , false );
266
263
}
267
264
268
265
llvm::FunctionType *CGNVCUDARuntime::getRegisterLinkedBinaryFnTy () const {
269
- llvm::Type *Params[] = {llvm::PointerType::getUnqual (Context), VoidPtrTy ,
270
- VoidPtrTy, llvm::PointerType::getUnqual (Context)};
266
+ llvm::Type *Params[] = {llvm::PointerType::getUnqual (Context), PtrTy, PtrTy ,
267
+ llvm::PointerType::getUnqual (Context)};
271
268
return llvm::FunctionType::get (VoidTy, Params, false );
272
269
}
273
270
@@ -330,15 +327,15 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
330
327
// args, allocate a single pointer so we still have a valid pointer to the
331
328
// argument array that we can pass to runtime, even if it will be unused.
332
329
Address KernelArgs = CGF.CreateTempAlloca (
333
- VoidPtrTy , CharUnits::fromQuantity (16 ), " kernel_args" ,
330
+ PtrTy , CharUnits::fromQuantity (16 ), " kernel_args" ,
334
331
llvm::ConstantInt::get (SizeTy, std::max<size_t >(1 , Args.size ())));
335
332
// Store pointers to the arguments in a locally allocated launch_args.
336
333
for (unsigned i = 0 ; i < Args.size (); ++i) {
337
334
llvm::Value* VarPtr = CGF.GetAddrOfLocalVar (Args[i]).getPointer ();
338
- llvm::Value *VoidVarPtr = CGF.Builder .CreatePointerCast (VarPtr, VoidPtrTy );
335
+ llvm::Value *VoidVarPtr = CGF.Builder .CreatePointerCast (VarPtr, PtrTy );
339
336
CGF.Builder .CreateDefaultAlignedStore (
340
337
VoidVarPtr,
341
- CGF.Builder .CreateConstGEP1_32 (VoidPtrTy , KernelArgs.getPointer (), i));
338
+ CGF.Builder .CreateConstGEP1_32 (PtrTy , KernelArgs.getPointer (), i));
342
339
}
343
340
344
341
llvm::BasicBlock *EndBlock = CGF.createBasicBlock (" setup.end" );
@@ -386,8 +383,7 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
386
383
CGF.CreateMemTemp (Dim3Ty, CharUnits::fromQuantity (8 ), " block_dim" );
387
384
Address ShmemSize =
388
385
CGF.CreateTempAlloca (SizeTy, CGM.getSizeAlign (), " shmem_size" );
389
- Address Stream =
390
- CGF.CreateTempAlloca (VoidPtrTy, CGM.getPointerAlign (), " stream" );
386
+ Address Stream = CGF.CreateTempAlloca (PtrTy, CGM.getPointerAlign (), " stream" );
391
387
llvm::FunctionCallee cudaPopConfigFn = CGM.CreateRuntimeFunction (
392
388
llvm::FunctionType::get (IntTy,
393
389
{/* gridDim=*/ GridDim.getType (),
@@ -402,8 +398,8 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
402
398
ShmemSize.getPointer (), Stream.getPointer ()});
403
399
404
400
// Emit the call to cudaLaunch
405
- llvm::Value *Kernel = CGF. Builder . CreatePointerCast (
406
- KernelHandles[CGF.CurFn ->getName ()], VoidPtrTy );
401
+ llvm::Value *Kernel =
402
+ CGF. Builder . CreatePointerCast ( KernelHandles[CGF.CurFn ->getName ()], PtrTy );
407
403
CallArgList LaunchKernelArgs;
408
404
LaunchKernelArgs.add (RValue::get (Kernel),
409
405
cudaLaunchKernelFD->getParamDecl (0 )->getType ());
@@ -443,7 +439,7 @@ void CGNVCUDARuntime::emitDeviceStubBodyLegacy(CodeGenFunction &CGF,
443
439
Offset = Offset.alignTo (TInfo.Align );
444
440
llvm::Value *Args[] = {
445
441
CGF.Builder .CreatePointerCast (CGF.GetAddrOfLocalVar (A).getPointer (),
446
- VoidPtrTy ),
442
+ PtrTy ),
447
443
llvm::ConstantInt::get (SizeTy, TInfo.Width .getQuantity ()),
448
444
llvm::ConstantInt::get (SizeTy, Offset.getQuantity ()),
449
445
};
@@ -458,8 +454,8 @@ void CGNVCUDARuntime::emitDeviceStubBodyLegacy(CodeGenFunction &CGF,
458
454
459
455
// Emit the call to cudaLaunch
460
456
llvm::FunctionCallee cudaLaunchFn = getLaunchFn ();
461
- llvm::Value *Arg = CGF. Builder . CreatePointerCast (
462
- KernelHandles[CGF.CurFn ->getName ()], CharPtrTy );
457
+ llvm::Value *Arg =
458
+ CGF. Builder . CreatePointerCast ( KernelHandles[CGF.CurFn ->getName ()], PtrTy );
463
459
CGF.EmitRuntimeCallOrInvoke (cudaLaunchFn, Arg);
464
460
CGF.EmitBranch (EndBlock);
465
461
@@ -537,11 +533,8 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
537
533
// void __cudaRegisterFunction(void **, const char *, char *, const char *,
538
534
// int, uint3*, uint3*, dim3*, dim3*, int*)
539
535
llvm::Type *RegisterFuncParams[] = {
540
- VoidPtrPtrTy, CharPtrTy,
541
- CharPtrTy, CharPtrTy,
542
- IntTy, VoidPtrTy,
543
- VoidPtrTy, VoidPtrTy,
544
- VoidPtrTy, llvm::PointerType::getUnqual (Context)};
536
+ PtrTy, PtrTy, PtrTy, PtrTy, IntTy,
537
+ PtrTy, PtrTy, PtrTy, PtrTy, llvm::PointerType::getUnqual (Context)};
545
538
llvm::FunctionCallee RegisterFunc = CGM.CreateRuntimeFunction (
546
539
llvm::FunctionType::get (IntTy, RegisterFuncParams, false ),
547
540
addUnderscoredPrefixToName (" RegisterFunction" ));
@@ -553,7 +546,7 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
553
546
for (auto &&I : EmittedKernels) {
554
547
llvm::Constant *KernelName =
555
548
makeConstantString (getDeviceSideName (cast<NamedDecl>(I.D )));
556
- llvm::Constant *NullPtr = llvm::ConstantPointerNull::get (VoidPtrTy );
549
+ llvm::Constant *NullPtr = llvm::ConstantPointerNull::get (PtrTy );
557
550
llvm::Value *Args[] = {
558
551
&GpuBinaryHandlePtr,
559
552
KernelHandles[I.Kernel ->getName ()],
@@ -576,33 +569,29 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
576
569
577
570
// void __cudaRegisterVar(void **, char *, char *, const char *,
578
571
// int, int, int, int)
579
- llvm::Type *RegisterVarParams[] = {VoidPtrPtrTy, CharPtrTy, CharPtrTy,
580
- CharPtrTy, IntTy, VarSizeTy,
581
- IntTy, IntTy};
572
+ llvm::Type *RegisterVarParams[] = {PtrTy, PtrTy, PtrTy, PtrTy,
573
+ IntTy, VarSizeTy, IntTy, IntTy};
582
574
llvm::FunctionCallee RegisterVar = CGM.CreateRuntimeFunction (
583
575
llvm::FunctionType::get (VoidTy, RegisterVarParams, false ),
584
576
addUnderscoredPrefixToName (" RegisterVar" ));
585
577
// void __hipRegisterManagedVar(void **, char *, char *, const char *,
586
578
// size_t, unsigned)
587
- llvm::Type *RegisterManagedVarParams[] = {VoidPtrPtrTy, CharPtrTy, CharPtrTy ,
588
- CharPtrTy, VarSizeTy, IntTy};
579
+ llvm::Type *RegisterManagedVarParams[] = {PtrTy, PtrTy, PtrTy ,
580
+ PtrTy, VarSizeTy, IntTy};
589
581
llvm::FunctionCallee RegisterManagedVar = CGM.CreateRuntimeFunction (
590
582
llvm::FunctionType::get (VoidTy, RegisterManagedVarParams, false ),
591
583
addUnderscoredPrefixToName (" RegisterManagedVar" ));
592
584
// void __cudaRegisterSurface(void **, const struct surfaceReference *,
593
585
// const void **, const char *, int, int);
594
586
llvm::FunctionCallee RegisterSurf = CGM.CreateRuntimeFunction (
595
587
llvm::FunctionType::get (
596
- VoidTy, {VoidPtrPtrTy, VoidPtrTy, CharPtrTy, CharPtrTy, IntTy, IntTy},
597
- false ),
588
+ VoidTy, {PtrTy, PtrTy, PtrTy, PtrTy, IntTy, IntTy}, false ),
598
589
addUnderscoredPrefixToName (" RegisterSurface" ));
599
590
// void __cudaRegisterTexture(void **, const struct textureReference *,
600
591
// const void **, const char *, int, int, int)
601
592
llvm::FunctionCallee RegisterTex = CGM.CreateRuntimeFunction (
602
593
llvm::FunctionType::get (
603
- VoidTy,
604
- {VoidPtrPtrTy, VoidPtrTy, CharPtrTy, CharPtrTy, IntTy, IntTy, IntTy},
605
- false ),
594
+ VoidTy, {PtrTy, PtrTy, PtrTy, PtrTy, IntTy, IntTy, IntTy}, false ),
606
595
addUnderscoredPrefixToName (" RegisterTexture" ));
607
596
for (auto &&Info : DeviceVars) {
608
597
llvm::GlobalVariable *Var = Info.Var ;
@@ -713,11 +702,11 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
713
702
714
703
// void ** __{cuda|hip}RegisterFatBinary(void *);
715
704
llvm::FunctionCallee RegisterFatbinFunc = CGM.CreateRuntimeFunction (
716
- llvm::FunctionType::get (VoidPtrPtrTy, VoidPtrTy , false ),
705
+ llvm::FunctionType::get (PtrTy, PtrTy , false ),
717
706
addUnderscoredPrefixToName (" RegisterFatBinary" ));
718
707
// struct { int magic, int version, void * gpu_binary, void * dont_care };
719
708
llvm::StructType *FatbinWrapperTy =
720
- llvm::StructType::get (IntTy, IntTy, VoidPtrTy, VoidPtrTy );
709
+ llvm::StructType::get (IntTy, IntTy, PtrTy, PtrTy );
721
710
722
711
// Register GPU binary with the CUDA runtime, store returned handle in a
723
712
// global variable and save a reference in GpuBinaryHandle to be cleaned up
@@ -813,7 +802,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
813
802
// Data.
814
803
Values.add (FatBinStr);
815
804
// Unused in fatbin v1.
816
- Values.add (llvm::ConstantPointerNull::get (VoidPtrTy ));
805
+ Values.add (llvm::ConstantPointerNull::get (PtrTy ));
817
806
llvm::GlobalVariable *FatbinWrapper = Values.finishAndCreateGlobal (
818
807
addUnderscoredPrefixToName (" _fatbin_wrapper" ), CGM.getPointerAlign (),
819
808
/* constant*/ true );
@@ -836,9 +825,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
836
825
// The name, size, and initialization pattern of this variable is part
837
826
// of HIP ABI.
838
827
GpuBinaryHandle = new llvm::GlobalVariable (
839
- TheModule, VoidPtrPtrTy, /* isConstant=*/ false ,
840
- Linkage,
841
- /* Initializer=*/ llvm::ConstantPointerNull::get (VoidPtrPtrTy),
828
+ TheModule, PtrTy, /* isConstant=*/ false , Linkage,
829
+ /* Initializer=*/ llvm::ConstantPointerNull::get (PtrTy),
842
830
" __hip_gpubin_handle" );
843
831
if (Linkage == llvm::GlobalValue::LinkOnceAnyLinkage)
844
832
GpuBinaryHandle->setComdat (
@@ -848,7 +836,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
848
836
if (Linkage != llvm::GlobalValue::InternalLinkage)
849
837
GpuBinaryHandle->setVisibility (llvm::GlobalValue::HiddenVisibility);
850
838
Address GpuBinaryAddr (
851
- GpuBinaryHandle, VoidPtrPtrTy ,
839
+ GpuBinaryHandle, PtrTy ,
852
840
CharUnits::fromQuantity (GpuBinaryHandle->getAlignment ()));
853
841
{
854
842
auto *HandleValue = CtorBuilder.CreateLoad (GpuBinaryAddr);
@@ -880,8 +868,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
880
868
llvm::CallInst *RegisterFatbinCall =
881
869
CtorBuilder.CreateCall (RegisterFatbinFunc, FatbinWrapper);
882
870
GpuBinaryHandle = new llvm::GlobalVariable (
883
- TheModule, VoidPtrPtrTy , false , llvm::GlobalValue::InternalLinkage,
884
- llvm::ConstantPointerNull::get (VoidPtrPtrTy ), " __cuda_gpubin_handle" );
871
+ TheModule, PtrTy , false , llvm::GlobalValue::InternalLinkage,
872
+ llvm::ConstantPointerNull::get (PtrTy ), " __cuda_gpubin_handle" );
885
873
GpuBinaryHandle->setAlignment (CGM.getPointerAlign ().getAsAlign ());
886
874
CtorBuilder.CreateAlignedStore (RegisterFatbinCall, GpuBinaryHandle,
887
875
CGM.getPointerAlign ());
@@ -895,7 +883,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
895
883
CudaFeature::CUDA_USES_FATBIN_REGISTER_END)) {
896
884
// void __cudaRegisterFatBinaryEnd(void **);
897
885
llvm::FunctionCallee RegisterFatbinEndFunc = CGM.CreateRuntimeFunction (
898
- llvm::FunctionType::get (VoidTy, VoidPtrPtrTy , false ),
886
+ llvm::FunctionType::get (VoidTy, PtrTy , false ),
899
887
" __cudaRegisterFatBinaryEnd" );
900
888
CtorBuilder.CreateCall (RegisterFatbinEndFunc, RegisterFatbinCall);
901
889
}
@@ -967,7 +955,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleDtorFunction() {
967
955
968
956
// void __cudaUnregisterFatBinary(void ** handle);
969
957
llvm::FunctionCallee UnregisterFatbinFunc = CGM.CreateRuntimeFunction (
970
- llvm::FunctionType::get (VoidTy, VoidPtrPtrTy , false ),
958
+ llvm::FunctionType::get (VoidTy, PtrTy , false ),
971
959
addUnderscoredPrefixToName (" UnregisterFatBinary" ));
972
960
973
961
llvm::Function *ModuleDtorFunc = llvm::Function::Create (
0 commit comments