@@ -556,7 +556,7 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
556
556
llvm::Constant *NullPtr = llvm::ConstantPointerNull::get (VoidPtrTy);
557
557
llvm::Value *Args[] = {
558
558
&GpuBinaryHandlePtr,
559
- Builder. CreateBitCast ( KernelHandles[I.Kernel ->getName ()], VoidPtrTy) ,
559
+ KernelHandles[I.Kernel ->getName ()],
560
560
KernelName,
561
561
KernelName,
562
562
llvm::ConstantInt::get (IntTy, -1 ),
@@ -631,8 +631,8 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
631
631
replaceManagedVar (Var, ManagedVar);
632
632
llvm::Value *Args[] = {
633
633
&GpuBinaryHandlePtr,
634
- Builder. CreateBitCast ( ManagedVar, VoidPtrTy) ,
635
- Builder. CreateBitCast ( Var, VoidPtrTy) ,
634
+ ManagedVar,
635
+ Var,
636
636
VarName,
637
637
llvm::ConstantInt::get (VarSizeTy, VarSize),
638
638
llvm::ConstantInt::get (IntTy, Var->getAlignment ())};
@@ -641,7 +641,7 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
641
641
} else {
642
642
llvm::Value *Args[] = {
643
643
&GpuBinaryHandlePtr,
644
- Builder. CreateBitCast ( Var, VoidPtrTy) ,
644
+ Var,
645
645
VarName,
646
646
VarName,
647
647
llvm::ConstantInt::get (IntTy, Info.Flags .isExtern ()),
@@ -655,15 +655,15 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
655
655
case DeviceVarFlags::Surface:
656
656
Builder.CreateCall (
657
657
RegisterSurf,
658
- {&GpuBinaryHandlePtr, Builder. CreateBitCast ( Var, VoidPtrTy) , VarName,
659
- VarName, llvm::ConstantInt::get (IntTy, Info.Flags .getSurfTexType ()),
658
+ {&GpuBinaryHandlePtr, Var, VarName , VarName,
659
+ llvm::ConstantInt::get (IntTy, Info.Flags .getSurfTexType ()),
660
660
llvm::ConstantInt::get (IntTy, Info.Flags .isExtern ())});
661
661
break ;
662
662
case DeviceVarFlags::Texture:
663
663
Builder.CreateCall (
664
664
RegisterTex,
665
- {&GpuBinaryHandlePtr, Builder. CreateBitCast ( Var, VoidPtrTy) , VarName,
666
- VarName, llvm::ConstantInt::get (IntTy, Info.Flags .getSurfTexType ()),
665
+ {&GpuBinaryHandlePtr, Var, VarName , VarName,
666
+ llvm::ConstantInt::get (IntTy, Info.Flags .getSurfTexType ()),
667
667
llvm::ConstantInt::get (IntTy, Info.Flags .isNormalized ()),
668
668
llvm::ConstantInt::get (IntTy, Info.Flags .isExtern ())});
669
669
break ;
@@ -860,9 +860,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
860
860
{
861
861
CtorBuilder.SetInsertPoint (IfBlock);
862
862
// GpuBinaryHandle = __hipRegisterFatBinary(&FatbinWrapper);
863
- llvm::CallInst *RegisterFatbinCall = CtorBuilder.CreateCall (
864
- RegisterFatbinFunc,
865
- CtorBuilder.CreateBitCast (FatbinWrapper, VoidPtrTy));
863
+ llvm::CallInst *RegisterFatbinCall =
864
+ CtorBuilder.CreateCall (RegisterFatbinFunc, FatbinWrapper);
866
865
CtorBuilder.CreateStore (RegisterFatbinCall, GpuBinaryAddr);
867
866
CtorBuilder.CreateBr (ExitBlock);
868
867
}
@@ -878,9 +877,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
878
877
// Register binary with CUDA runtime. This is substantially different in
879
878
// default mode vs. separate compilation!
880
879
// GpuBinaryHandle = __cudaRegisterFatBinary(&FatbinWrapper);
881
- llvm::CallInst *RegisterFatbinCall = CtorBuilder.CreateCall (
882
- RegisterFatbinFunc,
883
- CtorBuilder.CreateBitCast (FatbinWrapper, VoidPtrTy));
880
+ llvm::CallInst *RegisterFatbinCall =
881
+ CtorBuilder.CreateCall (RegisterFatbinFunc, FatbinWrapper);
884
882
GpuBinaryHandle = new llvm::GlobalVariable (
885
883
TheModule, VoidPtrPtrTy, false , llvm::GlobalValue::InternalLinkage,
886
884
llvm::ConstantPointerNull::get (VoidPtrPtrTy), " __cuda_gpubin_handle" );
@@ -921,9 +919,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
921
919
getRegisterLinkedBinaryFnTy (), RegisterLinkedBinaryName);
922
920
923
921
assert (RegisterGlobalsFunc && " Expecting at least dummy function!" );
924
- llvm::Value *Args[] = {RegisterGlobalsFunc,
925
- CtorBuilder.CreateBitCast (FatbinWrapper, VoidPtrTy),
926
- ModuleIDConstant,
922
+ llvm::Value *Args[] = {RegisterGlobalsFunc, FatbinWrapper, ModuleIDConstant,
927
923
makeDummyFunction (getCallbackFnTy ())};
928
924
CtorBuilder.CreateCall (RegisterLinkedBinaryFunc, Args);
929
925
}
0 commit comments