@@ -124,7 +124,8 @@ class SYCLLowerWGScopeLegacyPass : public FunctionPass {
124
124
return false ;
125
125
126
126
FunctionAnalysisManager FAM;
127
- auto PA = Impl.run (F, FAM);
127
+ auto TT = llvm::Triple (F.getParent ()->getTargetTriple ());
128
+ auto PA = Impl.run (F, TT, FAM);
128
129
return !PA.areAllPreserved ();
129
130
}
130
131
@@ -188,8 +189,8 @@ enum class MemorySemantics : unsigned {
188
189
ImageMemory = 0x800 ,
189
190
};
190
191
191
- Instruction *genWGBarrier (Instruction &Before);
192
- Value *genLinearLocalID (Instruction &Before);
192
+ Instruction *genWGBarrier (Instruction &Before, const Triple &TT );
193
+ Value *genLinearLocalID (Instruction &Before, const Triple &TT );
193
194
GlobalVariable *createWGLocalVariable (Module &M, Type *T, const Twine &Name);
194
195
} // namespace spirv
195
196
@@ -263,8 +264,9 @@ static bool mayHaveSideEffects(const Instruction *I) {
263
264
//
264
265
static void guardBlockWithIsLeaderCheck (BasicBlock *IfBB, BasicBlock *TrueBB,
265
266
BasicBlock *MergeBB,
266
- const DebugLoc &DbgLoc) {
267
- Value *LinearLocalID = spirv::genLinearLocalID (*IfBB->getTerminator ());
267
+ const DebugLoc &DbgLoc,
268
+ const Triple &TT) {
269
+ Value *LinearLocalID = spirv::genLinearLocalID (*IfBB->getTerminator (), TT);
268
270
auto *Ty = LinearLocalID->getType ();
269
271
Value *Zero = Constant::getNullValue (Ty);
270
272
IRBuilder<> Builder (IfBB->getContext ());
@@ -341,7 +343,7 @@ using InstrRange = std::pair<Instruction *, Instruction *>;
341
343
// ...
342
344
// B
343
345
// ... USE2(%I1_new) ...
344
- static void tformRange (const InstrRange &R) {
346
+ static void tformRange (const InstrRange &R, const Triple &TT ) {
345
347
// Instructions seen between the first and the last
346
348
SmallPtrSet<Instruction *, 16 > Seen;
347
349
Instruction *FirstSE = R.first ;
@@ -360,15 +362,15 @@ static void tformRange(const InstrRange &R) {
360
362
361
363
// 1) insert the first "is work group leader" test (at the first split) for
362
364
// the worker WIs to detour the side effects instructions
363
- guardBlockWithIsLeaderCheck (BBa, LeaderBB, BBb, FirstSE->getDebugLoc ());
365
+ guardBlockWithIsLeaderCheck (BBa, LeaderBB, BBb, FirstSE->getDebugLoc (), TT );
364
366
365
367
// 2) "Share" the output values of the instructions in the range
366
368
for (auto *I : Seen)
367
369
shareOutputViaLocalMem (*I, *BBa, *BBb, Seen);
368
370
369
371
// 3) Insert work group barrier so that workers further read valid data
370
372
// (before the materialization reads inserted at step 2)
371
- spirv::genWGBarrier (BBb->front ());
373
+ spirv::genWGBarrier (BBb->front (), TT );
372
374
}
373
375
374
376
namespace {
@@ -443,13 +445,13 @@ static void copyBetweenPrivateAndShadow(Value *L, GlobalVariable *Shadow,
443
445
//
444
446
static void materializeLocalsInWIScopeBlocksImpl (
445
447
const DenseMap<BasicBlock *, std::unique_ptr<LocalsSet>> &BB2MatLocals,
446
- const DenseMap<AllocaInst *, GlobalVariable *> &Local2Shadow) {
448
+ const DenseMap<AllocaInst *, GlobalVariable *> &Local2Shadow, const Triple &TT ) {
447
449
for (auto &P : BB2MatLocals) {
448
450
// generate LeaderBB and private<->shadow copies in proper BBs
449
451
BasicBlock *LeaderBB = P.first ;
450
452
BasicBlock *BB = LeaderBB->splitBasicBlock (&LeaderBB->front (), " LeaderMat" );
451
453
// Add a barrier to the original block:
452
- Instruction *At = spirv::genWGBarrier (*BB->getFirstNonPHI ())->getNextNode ();
454
+ Instruction *At = spirv::genWGBarrier (*BB->getFirstNonPHI (), TT )->getNextNode ();
453
455
454
456
for (AllocaInst *L : *P.second .get ()) {
455
457
auto MapEntry = Local2Shadow.find (L);
@@ -472,7 +474,7 @@ static void materializeLocalsInWIScopeBlocksImpl(
472
474
BasicBlock *TestBB =
473
475
LeaderBB->splitBasicBlock (&LeaderBB->front (), " TestMat" );
474
476
std::swap (TestBB, LeaderBB);
475
- guardBlockWithIsLeaderCheck (TestBB, LeaderBB, BB, At->getDebugLoc ());
477
+ guardBlockWithIsLeaderCheck (TestBB, LeaderBB, BB, At->getDebugLoc (), TT );
476
478
}
477
479
}
478
480
@@ -536,7 +538,8 @@ static bool localMustBeMaterialized(const AllocaInst *L, const BasicBlock &BB) {
536
538
//
537
539
void materializeLocalsInWIScopeBlocks (
538
540
SmallPtrSetImpl<AllocaInst *> &Locals,
539
- SmallPtrSetImpl<BasicBlock *> &WIScopeBBs) {
541
+ SmallPtrSetImpl<BasicBlock *> &WIScopeBBs,
542
+ const Triple &TT) {
540
543
// maps local variable to its "shadow" workgroup-shared global:
541
544
DenseMap<AllocaInst *, GlobalVariable *> Local2Shadow;
542
545
// records which locals must be materialized at the beginning of a block:
@@ -567,7 +570,7 @@ void materializeLocalsInWIScopeBlocks(
567
570
}
568
571
}
569
572
// perform the materialization
570
- materializeLocalsInWIScopeBlocksImpl (BB2MatLocals, Local2Shadow);
573
+ materializeLocalsInWIScopeBlocksImpl (BB2MatLocals, Local2Shadow, TT );
571
574
}
572
575
573
576
#ifndef NDEBUG
@@ -680,7 +683,7 @@ static void fixupPrivateMemoryPFWILambdaCaptures(CallInst *PFWICall) {
680
683
// Go through "byval" parameters which are passed as AS(0) pointers
681
684
// and: (1) create local shadows for them (2) and initialize them from the
682
685
// leader's copy and (3) replace usages with pointer to the shadow
683
- static void shareByValParams (Function &F) {
686
+ static void shareByValParams (Function &F, const Triple &TT ) {
684
687
// split
685
688
BasicBlock *EntryBB = &F.getEntryBlock ();
686
689
BasicBlock *LeaderBB = EntryBB->splitBasicBlock (&EntryBB->front (), " leader" );
@@ -689,7 +692,7 @@ static void shareByValParams(Function &F) {
689
692
// 1) rewire the above basic blocks so that LeaderBB is executed only for the
690
693
// leader workitem
691
694
guardBlockWithIsLeaderCheck (EntryBB, LeaderBB, MergeBB,
692
- EntryBB->back ().getDebugLoc ());
695
+ EntryBB->back ().getDebugLoc (), TT );
693
696
Instruction &At = LeaderBB->back ();
694
697
695
698
for (auto &Arg : F.args ()) {
@@ -715,10 +718,11 @@ static void shareByValParams(Function &F) {
715
718
true /* private->shadow*/ );
716
719
}
717
720
// 5) make sure workers use up-to-date shared values written by the leader
718
- spirv::genWGBarrier (MergeBB->front ());
721
+ spirv::genWGBarrier (MergeBB->front (), TT );
719
722
}
720
723
721
724
PreservedAnalyses SYCLLowerWGScopePass::run (Function &F,
725
+ const llvm::Triple &TT,
722
726
FunctionAnalysisManager &FAM) {
723
727
if (!F.getMetadata (WG_SCOPE_MD))
724
728
return PreservedAnalyses::all ();
@@ -796,7 +800,7 @@ PreservedAnalyses SYCLLowerWGScopePass::run(Function &F,
796
800
797
801
// Perform the transformation
798
802
for (auto &R : Ranges) {
799
- tformRange (R);
803
+ tformRange (R, TT );
800
804
Changed = true ;
801
805
}
802
806
// There can be allocas not corresponding to any variable declared in user
@@ -813,14 +817,14 @@ PreservedAnalyses SYCLLowerWGScopePass::run(Function &F,
813
817
WIScopeBBs.insert (I->getParent ());
814
818
815
819
// Now materialize the locals:
816
- materializeLocalsInWIScopeBlocks (Allocas, WIScopeBBs);
820
+ materializeLocalsInWIScopeBlocks (Allocas, WIScopeBBs, TT );
817
821
818
822
// Fixup captured addresses of private_memory isntances in current WI
819
823
for (auto *PFWICall : PFWICalls)
820
824
fixupPrivateMemoryPFWILambdaCaptures (PFWICall);
821
825
822
826
// Finally, create shadows for and replace usages of byval pointer params
823
- shareByValParams (F);
827
+ shareByValParams (F, TT );
824
828
825
829
#ifndef NDEBUG
826
830
if (HaveChanges && Debug > 0 )
@@ -866,37 +870,74 @@ GlobalVariable *spirv::createWGLocalVariable(Module &M, Type *T,
866
870
// Must correspond to the code in
867
871
// llvm-spirv/lib/SPIRV/OCL20ToSPIRV.cpp
868
872
// OCL20ToSPIRV::transWorkItemBuiltinsToVariables()
869
- Value *spirv::genLinearLocalID (Instruction &Before) {
873
+ Value *spirv::genLinearLocalID (Instruction &Before, const Triple &TT ) {
870
874
Module &M = *Before.getModule ();
871
- StringRef Name = " __spirv_BuiltInLocalInvocationIndex" ;
872
- GlobalVariable *G = M.getGlobalVariable (Name);
873
-
874
- if (!G) {
875
- Type *T = getSizeTTy (M);
876
- G = new GlobalVariable (M, // module
877
- T, // type
878
- true , // isConstant
879
- GlobalValue::ExternalLinkage, // Linkage
880
- nullptr , // Initializer
881
- Name, // Name
882
- nullptr , // InsertBefore
883
- GlobalVariable::NotThreadLocal, // ThreadLocalMode
884
- // TODO 'Input' crashes CPU Back-End
885
- // asUInt(spirv::AddrSpace::Input) // AddressSpace
886
- asUInt (spirv::AddrSpace::Global) // AddressSpace
887
- );
888
- unsigned Align = M.getDataLayout ().getPreferredAlignment (G);
889
- G->setAlignment (MaybeAlign (Align));
875
+ if (TT.isNVPTX ()) {
876
+ LLVMContext &Ctx = Before.getContext ();
877
+ Type *RetTy = getSizeTTy (M);
878
+
879
+ IRBuilder<> Bld (Ctx);
880
+ Bld.SetInsertPoint (&Before);
881
+
882
+ #define CREATE_CALLEE (NAME, FN_NAME ) \
883
+ FunctionCallee FnCallee##NAME = M.getOrInsertFunction (FN_NAME, RetTy); \
884
+ assert (FnCallee##NAME && " spirv intrinsic creation failed" ); \
885
+ auto NAME = Bld.CreateCall (FnCallee##NAME, {});
886
+
887
+ CREATE_CALLEE (LocalInvocationId_X, " _Z27__spirv_LocalInvocationId_xv" );
888
+ CREATE_CALLEE (LocalInvocationId_Y, " _Z27__spirv_LocalInvocationId_yv" );
889
+ CREATE_CALLEE (LocalInvocationId_Z, " _Z27__spirv_LocalInvocationId_zv" );
890
+ CREATE_CALLEE (WorkgroupSize_Y, " _Z23__spirv_WorkgroupSize_yv" );
891
+ CREATE_CALLEE (WorkgroupSize_Z, " _Z23__spirv_WorkgroupSize_zv" );
892
+
893
+ #undef CREATE_CALLEE
894
+
895
+ // 1: ((__spirv_WorkgroupSize_y() * __spirv_WorkgroupSize_z())
896
+ // 2: * __spirv_LocalInvocationId_x())
897
+ // 3: + (__spirv_WorkgroupSize_z() * __spirv_LocalInvocationId_y())
898
+ // 4: + (__spirv_LocalInvocationId_z())
899
+ return Bld.CreateAdd (
900
+ Bld.CreateAdd (
901
+ Bld.CreateMul (
902
+ Bld.CreateMul (WorkgroupSize_Y, WorkgroupSize_Z), // 1
903
+ LocalInvocationId_X), // 2
904
+ Bld.CreateMul (WorkgroupSize_Z, LocalInvocationId_Y)), // 3
905
+ LocalInvocationId_Z); // 4
906
+ } else {
907
+ StringRef Name = " __spirv_BuiltInLocalInvocationIndex" ;
908
+ GlobalVariable *G = M.getGlobalVariable (Name);
909
+
910
+ if (!G) {
911
+ Type *T = getSizeTTy (M);
912
+ G = new GlobalVariable (M, // module
913
+ T, // type
914
+ true , // isConstant
915
+ GlobalValue::ExternalLinkage, // Linkage
916
+ nullptr , // Initializer
917
+ Name, // Name
918
+ nullptr , // InsertBefore
919
+ GlobalVariable::NotThreadLocal, // ThreadLocalMode
920
+ // TODO 'Input' crashes CPU Back-End
921
+ // asUInt(spirv::AddrSpace::Input) // AddressSpace
922
+ asUInt (spirv::AddrSpace::Global) // AddressSpace
923
+ );
924
+ unsigned Align = M.getDataLayout ().getPreferredAlignment (G);
925
+ G->setAlignment (Align);
926
+ }
927
+ Value *Res = new LoadInst (G, " " , &Before);
928
+ return Res;
890
929
}
891
- Value *Res = new LoadInst (G, " " , &Before);
892
- return Res;
893
930
}
894
931
895
932
// extern void __spirv_ControlBarrier(Scope Execution, Scope Memory,
896
933
// uint32_t Semantics) noexcept;
897
- Instruction *spirv::genWGBarrier (Instruction &Before) {
934
+ Instruction *spirv::genWGBarrier (Instruction &Before, const Triple &TT ) {
898
935
Module &M = *Before.getModule ();
899
- StringRef Name = " __spirv_ControlBarrier" ;
936
+ StringRef Name;
937
+ if (TT.isNVPTX ())
938
+ Name = " _Z22__spirv_ControlBarrierN5__spv5ScopeES0_j" ;
939
+ else
940
+ Name = " __spirv_ControlBarrier" ;
900
941
LLVMContext &Ctx = Before.getContext ();
901
942
Type *ScopeTy = Type::getInt32Ty (Ctx);
902
943
Type *SemanticsTy = Type::getInt32Ty (Ctx);
0 commit comments