@@ -375,20 +375,29 @@ using LocalsSet = SmallPtrSet<AllocaInst *, 4>;
375
375
static void copyBetweenPrivateAndShadow (Value *L, GlobalVariable *Shadow,
376
376
IRBuilder<> &Builder, bool Loc2Shadow) {
377
377
Type *T = nullptr ;
378
- int LocAlignN = 0 ;
378
+ MaybeAlign LocAlign ( 0 ) ;
379
379
380
380
if (const auto *AI = dyn_cast<AllocaInst>(L)) {
381
381
T = AI->getAllocatedType ();
382
- LocAlignN = AI->getAlignment ();
382
+ LocAlign = MaybeAlign ( AI->getAlignment () );
383
383
} else {
384
- T = cast<Argument>(L)->getParamByValType ();
385
- LocAlignN = cast<Argument>(L)->getParamAlignment ();
384
+ if (cast<Argument>(L)->hasByValAttr ()) {
385
+ T = cast<Argument>(L)->getParamByValType ();
386
+ LocAlign = MaybeAlign (cast<Argument>(L)->getParamAlignment ());
387
+ } else {
388
+ Type *Ty = cast<Argument>(L)->getType ();
389
+ Module &M = *Shadow->getParent ();
390
+ LocAlign = M.getDataLayout ().getValueOrABITypeAlignment (
391
+ MaybeAlign (cast<Argument>(L)->getParamAlignment ()), Ty);
392
+ auto PtrTy = dyn_cast<PointerType>(cast<Argument>(L)->getType ());
393
+ assert (PtrTy && " Expected pointer type" );
394
+ T = PtrTy->getElementType ();
395
+ }
386
396
}
387
397
388
398
if (T->isAggregateType ()) {
389
399
// TODO: we should use methods which directly return MaybeAlign once such
390
400
// are added to LLVM for AllocaInst and GlobalVariable
391
- auto LocAlign = MaybeAlign (LocAlignN);
392
401
auto ShdAlign = MaybeAlign (Shadow->getAlignment ());
393
402
Module &M = *Shadow->getParent ();
394
403
auto SizeVal = M.getDataLayout ().getTypeStoreSize (T);
@@ -679,10 +688,25 @@ static void fixupPrivateMemoryPFWILambdaCaptures(CallInst *PFWICall) {
679
688
// Go through "byval" parameters which are passed as AS(0) pointers
680
689
// and: (1) create local shadows for them (2) and initialize them from the
681
690
// leader's copy and (3) replace usages with pointer to the shadow
682
- static void shareByValParams (Function &F, const Triple &TT) {
683
- // split
691
+ //
692
+ // Do the same for 'this' pointer which points to PFWG lamda object which is
693
+ // allocated in the caller. Caller is a kernel function which is generated by
694
+ // SYCL frontend. Kernel function allocates PFWG lambda object and initalizes
695
+ // captured objects (like accessors) using arguments of the kernel. After
696
+ // intialization kernel calls PFWG function (which is the operator() of the PFWG
697
+ // object). PFWG object captures all objects by value and all uses (except
698
+ // initialization from kernel arguments) of this values can only be in scope of
699
+ // PFWG function that is why copy back of PFWG object is not needed.
700
+ static void sharePFWGPrivateObjects (Function &F, const Triple &TT) {
701
+ // Skip alloca instructions and split. Alloca instructions must be in the
702
+ // beginning of the function otherwise they are considered as dynamic which
703
+ // can cause the problems with inlining.
684
704
BasicBlock *EntryBB = &F.getEntryBlock ();
685
- BasicBlock *LeaderBB = EntryBB->splitBasicBlock (&EntryBB->front (), " leader" );
705
+ Instruction *SplitPoint = &*EntryBB->begin ();
706
+ for (; SplitPoint->getOpcode () == Instruction::Alloca;
707
+ SplitPoint = SplitPoint->getNextNode ())
708
+ ;
709
+ BasicBlock *LeaderBB = EntryBB->splitBasicBlock (SplitPoint, " leader" );
686
710
BasicBlock *MergeBB = LeaderBB->splitBasicBlock (&LeaderBB->front (), " merge" );
687
711
688
712
// 1) rewire the above basic blocks so that LeaderBB is executed only for the
@@ -692,38 +716,48 @@ static void shareByValParams(Function &F, const Triple &TT) {
692
716
Instruction &At = LeaderBB->back ();
693
717
694
718
for (auto &Arg : F.args ()) {
695
- if (!Arg.hasByValAttr ())
696
- continue ;
697
- assert (Arg.getType ()->getPointerAddressSpace () ==
698
- asUInt (spirv::AddrSpace::Private));
699
- Type *T = Arg.getParamByValType ();
700
-
701
- // 2) create the shared copy - "shadow" - for current byval arg
702
- GlobalVariable *Shadow =
703
- spirv::createWGLocalVariable (*F.getParent (), T, " ArgShadow" );
719
+ Type *T;
720
+ LLVMContext &Ctx = At.getContext ();
721
+ IRBuilder<> Builder (Ctx);
722
+ Builder.SetInsertPoint (&LeaderBB->front ());
704
723
705
- // 3) replace argument with shadow in all uses
706
- Value *RepVal = Shadow;
707
- if (TT.isNVPTX ()) {
708
- // For NVPTX target address space inference for kernel arguments and
709
- // allocas is happening in the backend (NVPTXLowerArgs and
710
- // NVPTXLowerAlloca passes). After the frontend these pointers are in LLVM
711
- // default address space 0 which is the generic address space for NVPTX
712
- // target.
713
- assert (Arg.getType ()->getPointerAddressSpace () == 0 );
714
-
715
- // Cast a pointer in the shared address space to the generic address
716
- // space.
724
+ // 2) create the shared copy - "shadow" - for current arg
725
+ GlobalVariable *Shadow;
726
+ Value *RepVal;
727
+ if (Arg.hasByValAttr ()) {
728
+ assert (Arg.getType ()->getPointerAddressSpace () ==
729
+ asUInt (spirv::AddrSpace::Private));
730
+ T = Arg.getParamByValType ();
731
+ Shadow = spirv::createWGLocalVariable (*F.getParent (), T, " ArgShadow" );
732
+ RepVal = Shadow;
733
+ if (TT.isNVPTX ()) {
734
+ // For NVPTX target address space inference for kernel arguments and
735
+ // allocas is happening in the backend (NVPTXLowerArgs and
736
+ // NVPTXLowerAlloca passes). After the frontend these pointers are in
737
+ // LLVM default address space 0 which is the generic address space for
738
+ // NVPTX target.
739
+ assert (Arg.getType ()->getPointerAddressSpace () == 0 );
740
+
741
+ // Cast a pointer in the shared address space to the generic address
742
+ // space.
743
+ RepVal = ConstantExpr::getPointerBitCastOrAddrSpaceCast (Shadow,
744
+ Arg.getType ());
745
+ }
746
+ }
747
+ // Process 'this' pointer which points to PFWG lambda object
748
+ else if (Arg.getArgNo () == 0 ) {
749
+ PointerType *PtrT = dyn_cast<PointerType>(Arg.getType ());
750
+ assert (PtrT && " Expected this pointer as the first argument" );
751
+ T = PtrT->getElementType ();
752
+ Shadow = spirv::createWGLocalVariable (*F.getParent (), T, " ArgShadow" );
717
753
RepVal =
718
- ConstantExpr::getPointerBitCastOrAddrSpaceCast (Shadow, Arg.getType ());
754
+ Builder. CreatePointerBitCastOrAddrSpaceCast (Shadow, Arg.getType ());
719
755
}
756
+
757
+ // 3) replace argument with shadow in all uses
720
758
for (auto *U : Arg.users ())
721
759
U->replaceUsesOfWith (&Arg, RepVal);
722
760
723
- // 4) fill the shadow from the argument for the leader WI only
724
- LLVMContext &Ctx = At.getContext ();
725
- IRBuilder<> Builder (Ctx);
726
- Builder.SetInsertPoint (&LeaderBB->front ());
727
761
copyBetweenPrivateAndShadow (&Arg, Shadow, Builder,
728
762
true /* private->shadow*/ );
729
763
}
@@ -832,8 +866,9 @@ PreservedAnalyses SYCLLowerWGScopePass::run(Function &F, const llvm::Triple &TT,
832
866
for (auto *PFWICall : PFWICalls)
833
867
fixupPrivateMemoryPFWILambdaCaptures (PFWICall);
834
868
835
- // Finally, create shadows for and replace usages of byval pointer params
836
- shareByValParams (F, TT);
869
+ // Finally, create shadows for and replace usages of byval pointer params and
870
+ // PFWG lambda object ('this' pointer).
871
+ sharePFWGPrivateObjects (F, TT);
837
872
838
873
#ifndef NDEBUG
839
874
if (HaveChanges && Debug > 0 )
0 commit comments