@@ -190,7 +190,7 @@ enum class MemorySemantics : unsigned {
190
190
191
191
Instruction *genWGBarrier (Instruction &Before);
192
192
Value *genLinearLocalID (Instruction &Before);
193
- Value *createWGLocalVariable (Module &M, Type *T, const Twine &Name);
193
+ GlobalVariable *createWGLocalVariable (Module &M, Type *T, const Twine &Name);
194
194
} // namespace spirv
195
195
196
196
static bool isCallToAFuncMarkedWithMD (const Instruction *I, const char *MD) {
@@ -375,16 +375,25 @@ namespace {
375
375
using LocalsSet = SmallPtrSet<AllocaInst *, 4 >;
376
376
}
377
377
378
- static void copyBetweenLocalAndShadow (AllocaInst *L, GlobalVariable *Shadow,
379
- IRBuilder<> &Builder, bool Loc2Shadow) {
380
- Type *T = L->getAllocatedType ();
378
+ static void copyBetweenPrivateAndShadow (Value *L, GlobalVariable *Shadow,
379
+ IRBuilder<> &Builder, bool Loc2Shadow) {
380
+ Type *T = nullptr ;
381
+ int LocAlignN = 0 ;
382
+
383
+ if (const auto *AI = dyn_cast<AllocaInst>(L)) {
384
+ T = AI->getAllocatedType ();
385
+ LocAlignN = AI->getAlignment ();
386
+ } else {
387
+ T = cast<Argument>(L)->getParamByValType ();
388
+ LocAlignN = cast<Argument>(L)->getParamAlignment ();
389
+ }
381
390
382
391
if (T->isAggregateType ()) {
383
392
// TODO: we should use methods which directly return MaybeAlign once such
384
393
// are added to LLVM for AllocaInst and GlobalVariable
385
- auto LocAlign = MaybeAlign (L-> getAlignment () );
394
+ auto LocAlign = MaybeAlign (LocAlignN );
386
395
auto ShdAlign = MaybeAlign (Shadow->getAlignment ());
387
- Module &M = *L-> getModule ();
396
+ Module &M = *Shadow-> getParent ();
388
397
auto SizeVal = M.getDataLayout ().getTypeStoreSize (T);
389
398
auto Size = ConstantInt::get (getSizeTTy (M), SizeVal);
390
399
if (Loc2Shadow)
@@ -434,9 +443,9 @@ static void copyBetweenLocalAndShadow(AllocaInst *L, GlobalVariable *Shadow,
434
443
//
435
444
static void materializeLocalsInWIScopeBlocksImpl (
436
445
const DenseMap<BasicBlock *, std::unique_ptr<LocalsSet>> &BB2MatLocals,
437
- const DenseMap<AllocaInst *, Value *> &Local2Shadow) {
446
+ const DenseMap<AllocaInst *, GlobalVariable *> &Local2Shadow) {
438
447
for (auto &P : BB2MatLocals) {
439
- // generate LeaderBB and local <->shadow copies in proper BBs
448
+ // generate LeaderBB and private <->shadow copies in proper BBs
440
449
BasicBlock *LeaderBB = P.first ;
441
450
BasicBlock *BB = LeaderBB->splitBasicBlock (&LeaderBB->front (), " LeaderMat" );
442
451
// Add a barrier to the original block:
@@ -445,18 +454,19 @@ static void materializeLocalsInWIScopeBlocksImpl(
445
454
for (AllocaInst *L : *P.second .get ()) {
446
455
auto MapEntry = Local2Shadow.find (L);
447
456
assert (MapEntry != Local2Shadow.end () && " local must have a shadow" );
448
- auto *Shadow = dyn_cast<GlobalVariable>( MapEntry->second ) ;
457
+ auto *Shadow = MapEntry->second ;
449
458
LLVMContext &Ctx = L->getContext ();
450
459
IRBuilder<> Builder (Ctx);
451
460
// fill the leader BB:
452
461
// fetch data from leader's private copy (which is always up to date) into
453
462
// the corresponding shadow variable
454
463
Builder.SetInsertPoint (&LeaderBB->front ());
455
- copyBetweenLocalAndShadow (L, Shadow, Builder, true /* local ->shadow*/ );
464
+ copyBetweenPrivateAndShadow (L, Shadow, Builder, true /* private ->shadow*/ );
456
465
// store data to the local variable - effectively "refresh" the value of
457
466
// the local in each work item in the work group
458
467
Builder.SetInsertPoint (At);
459
- copyBetweenLocalAndShadow (L, Shadow, Builder, false /* shadow->local*/ );
468
+ copyBetweenPrivateAndShadow (L, Shadow, Builder,
469
+ false /* shadow->private*/ );
460
470
}
461
471
// now generate the TestBB and the leader WI guard
462
472
BasicBlock *TestBB =
@@ -528,7 +538,7 @@ void materializeLocalsInWIScopeBlocks(
528
538
SmallPtrSetImpl<AllocaInst *> &Locals,
529
539
SmallPtrSetImpl<BasicBlock *> &WIScopeBBs) {
530
540
// maps local variable to its "shadow" workgroup-shared global:
531
- DenseMap<AllocaInst *, Value *> Local2Shadow;
541
+ DenseMap<AllocaInst *, GlobalVariable *> Local2Shadow;
532
542
// records which locals must be materialized at the beginning of a block:
533
543
DenseMap<BasicBlock *, std::unique_ptr<LocalsSet>> BB2MatLocals;
534
544
@@ -543,7 +553,7 @@ void materializeLocalsInWIScopeBlocks(
543
553
continue ;
544
554
if (Local2Shadow.find (L) == Local2Shadow.end ()) {
545
555
// lazily create a "shadow" for current local:
546
- Value *Shadow = spirv::createWGLocalVariable (
556
+ GlobalVariable *Shadow = spirv::createWGLocalVariable (
547
557
*BB->getModule (), L->getAllocatedType (), " WGCopy" );
548
558
Local2Shadow.insert (std::make_pair (L, Shadow));
549
559
}
@@ -667,6 +677,47 @@ static void fixupPrivateMemoryPFWILambdaCaptures(CallInst *PFWICall) {
667
677
}
668
678
}
669
679
680
+ // Go through "byval" parameters which are passed as AS(0) pointers
681
+ // and: (1) create local shadows for them (2) and initialize them from the
682
+ // leader's copy and (3) replace usages with pointer to the shadow
683
+ static void shareByValParams (Function &F) {
684
+ // split
685
+ BasicBlock *EntryBB = &F.getEntryBlock ();
686
+ BasicBlock *LeaderBB = EntryBB->splitBasicBlock (&EntryBB->front (), " leader" );
687
+ BasicBlock *MergeBB = LeaderBB->splitBasicBlock (&LeaderBB->front (), " merge" );
688
+
689
+ // 1) rewire the above basic blocks so that LeaderBB is executed only for the
690
+ // leader workitem
691
+ guardBlockWithIsLeaderCheck (EntryBB, LeaderBB, MergeBB,
692
+ EntryBB->back ().getDebugLoc ());
693
+ Instruction &At = LeaderBB->back ();
694
+
695
+ for (auto &Arg : F.args ()) {
696
+ if (!Arg.hasByValAttr ())
697
+ continue ;
698
+ assert (Arg.getType ()->getPointerAddressSpace () ==
699
+ asUInt (spirv::AddrSpace::Private));
700
+ Type *T = Arg.getParamByValType ();
701
+
702
+ // 2) create the shared copy - "shadow" - for current byval arg
703
+ GlobalVariable *Shadow =
704
+ spirv::createWGLocalVariable (*F.getParent (), T, " ArgShadow" );
705
+
706
+ // 3) replace argument with shadow in all uses
707
+ for (auto *U : Arg.users ())
708
+ U->replaceUsesOfWith (&Arg, Shadow);
709
+
710
+ // 4) fill the shadow from the argument for the leader WI only
711
+ LLVMContext &Ctx = At.getContext ();
712
+ IRBuilder<> Builder (Ctx);
713
+ Builder.SetInsertPoint (&LeaderBB->front ());
714
+ copyBetweenPrivateAndShadow (&Arg, Shadow, Builder,
715
+ true /* private->shadow*/ );
716
+ }
717
+ // 5) make sure workers use up-to-date shared values written by the leader
718
+ spirv::genWGBarrier (MergeBB->front ());
719
+ }
720
+
670
721
PreservedAnalyses SYCLLowerWGScopePass::run (Function &F,
671
722
FunctionAnalysisManager &FAM) {
672
723
if (!F.getMetadata (WG_SCOPE_MD))
@@ -729,7 +780,13 @@ PreservedAnalyses SYCLLowerWGScopePass::run(Function &F,
729
780
}
730
781
}
731
782
#ifndef NDEBUG
732
- bool HaveChanges = (Ranges.size () > 0 ) || (Allocas.size () > 0 );
783
+ int NByval = 0 ;
784
+ for (const auto &Arg : F.args ()) {
785
+ if (Arg.hasByValAttr ())
786
+ NByval++;
787
+ }
788
+
789
+ bool HaveChanges = (Ranges.size () > 0 ) || (Allocas.size () > 0 ) || NByval > 0 ;
733
790
734
791
if (HaveChanges && Debug > 1 ) {
735
792
dumpIR (F, " before" );
@@ -762,6 +819,9 @@ PreservedAnalyses SYCLLowerWGScopePass::run(Function &F,
762
819
for (auto *PFWICall : PFWICalls)
763
820
fixupPrivateMemoryPFWILambdaCaptures (PFWICall);
764
821
822
+ // Finally, create shadows for and replace usages of byval pointer params
823
+ shareByValParams (F);
824
+
765
825
#ifndef NDEBUG
766
826
if (HaveChanges && Debug > 0 )
767
827
verifyModule (*F.getParent (), &llvm::errs ());
@@ -773,7 +833,8 @@ PreservedAnalyses SYCLLowerWGScopePass::run(Function &F,
773
833
return Changed ? PreservedAnalyses::none () : PreservedAnalyses::all ();
774
834
}
775
835
776
- Value *spirv::createWGLocalVariable (Module &M, Type *T, const Twine &Name) {
836
+ GlobalVariable *spirv::createWGLocalVariable (Module &M, Type *T,
837
+ const Twine &Name) {
777
838
GlobalVariable *G =
778
839
new GlobalVariable (M, // module
779
840
T, // type
0 commit comments