65
65
// (1) - materialization of a PFWI object
66
66
// (2) - "fixup" of the private variable address.
67
67
//
68
- // TODO: add support for the case when there are other functions between
69
- // parallel_for_work_group and parallel_for_work_item in the call stack.
70
- // For example:
71
- //
72
- // void foo(sycl::group<1> group, ...) {
73
- // group.parallel_for_work_item(range<1>(), [&](h_item<1> i) { ... });
74
- // }
75
- // ...
76
- // cgh.parallel_for_work_group<class kernel>(
77
- // range<1>(...), range<1>(...), [=](group<1> g) {
78
- // foo(g, ...);
79
- // });
80
- //
81
68
// TODO The approach employed by this pass generates lots of barriers and data
82
69
// copying between private and local memory, which might not be efficient. There
83
70
// are optimization opportunities listed below. Also other approaches can be
@@ -209,11 +196,36 @@ static bool isCallToAFuncMarkedWithMD(const Instruction *I, const char *MD) {
209
196
return F && F->getMetadata (MD);
210
197
}
211
198
212
- // Checks is this is a call to parallel_for_work_item.
199
+ // Recursively searches for a call to a function with work_group
200
+ // metadata inside F.
201
+ static bool hasCallToAFuncWithWGMetadata (Function &F) {
202
+ for (auto &BB : F)
203
+ for (auto &I : BB) {
204
+ if (isCallToAFuncMarkedWithMD (&I, WG_SCOPE_MD))
205
+ return true ;
206
+ const CallInst *Call = dyn_cast<CallInst>(&I);
207
+ Function *F = dyn_cast_or_null<Function>(Call ? Call->getCalledFunction ()
208
+ : nullptr );
209
+ if (F && hasCallToAFuncWithWGMetadata (*F))
210
+ return true ;
211
+ }
212
+ return false ;
213
+ }
214
+
215
+ // Checks if this is a call to parallel_for_work_item.
213
216
static bool isPFWICall (const Instruction *I) {
214
217
return isCallToAFuncMarkedWithMD (I, PFWI_MD);
215
218
}
216
219
220
+ // Checks if F has any calls to function marked with PFWI_MD metadata.
221
+ static bool hasPFWICall (Function &F) {
222
+ for (auto &BB : F)
223
+ for (auto &I : BB)
224
+ if (isPFWICall (&I))
225
+ return true ;
226
+ return false ;
227
+ }
228
+
217
229
// Checks if given instruction must be executed by all work items.
218
230
static bool isWIScopeInst (const Instruction *I) {
219
231
if (I->isTerminator ())
@@ -425,6 +437,17 @@ static void copyBetweenPrivateAndShadow(Value *L, GlobalVariable *Shadow,
425
437
}
426
438
}
427
439
440
+ // Skip allocas, addrspacecasts associated with allocas and debug insts.
441
+ static Instruction *getFirstInstToProcess (BasicBlock *BB) {
442
+ Instruction *I = &BB->front ();
443
+ for (;
444
+ I->getOpcode () == Instruction::Alloca ||
445
+ I->getOpcode () == Instruction::AddrSpaceCast || I->isDebugOrPseudoInst ();
446
+ I = I->getNextNode ()) {
447
+ }
448
+ return I;
449
+ }
450
+
428
451
// Performs the following transformation for each basic block in the input map:
429
452
//
430
453
// BB:
@@ -462,7 +485,11 @@ static void materializeLocalsInWIScopeBlocksImpl(
462
485
for (auto &P : BB2MatLocals) {
463
486
// generate LeaderBB and private<->shadow copies in proper BBs
464
487
BasicBlock *LeaderBB = P.first ;
465
- BasicBlock *BB = LeaderBB->splitBasicBlock (&LeaderBB->front (), " LeaderMat" );
488
+ // Skip allocas, addrspacecasts associated with allocas and debug insts.
489
+ // Alloca instructions and it's associated instructions must be in the
490
+ // beginning of the function.
491
+ Instruction *LeaderBBFront = getFirstInstToProcess (LeaderBB);
492
+ BasicBlock *BB = LeaderBB->splitBasicBlock (LeaderBBFront, " LeaderMat" );
466
493
// Add a barrier to the original block:
467
494
Instruction *At =
468
495
spirv::genWGBarrier (*BB->getFirstNonPHI (), TT)->getNextNode ();
@@ -476,7 +503,8 @@ static void materializeLocalsInWIScopeBlocksImpl(
476
503
// fill the leader BB:
477
504
// fetch data from leader's private copy (which is always up to date) into
478
505
// the corresponding shadow variable
479
- Builder.SetInsertPoint (&LeaderBB->front ());
506
+ LeaderBBFront = getFirstInstToProcess (LeaderBB);
507
+ Builder.SetInsertPoint (LeaderBBFront);
480
508
copyBetweenPrivateAndShadow (L, Shadow, Builder, true /* private->shadow*/ );
481
509
// store data to the local variable - effectively "refresh" the value of
482
510
// the local in each work item in the work group
@@ -485,8 +513,8 @@ static void materializeLocalsInWIScopeBlocksImpl(
485
513
false /* shadow->private*/ );
486
514
}
487
515
// now generate the TestBB and the leader WI guard
488
- BasicBlock *TestBB =
489
- LeaderBB->splitBasicBlock (&LeaderBB-> front () , " TestMat" );
516
+ LeaderBBFront = getFirstInstToProcess (LeaderBB);
517
+ BasicBlock *TestBB = LeaderBB->splitBasicBlock (LeaderBBFront , " TestMat" );
490
518
std::swap (TestBB, LeaderBB);
491
519
guardBlockWithIsLeaderCheck (TestBB, LeaderBB, BB, At->getDebugLoc (), TT);
492
520
}
@@ -752,6 +780,10 @@ PreservedAnalyses SYCLLowerWGScopePass::run(Function &F,
752
780
FunctionAnalysisManager &FAM) {
753
781
if (!F.getMetadata (WG_SCOPE_MD))
754
782
return PreservedAnalyses::all ();
783
+ // If a function does not have any PFWI calls and it has calls to a function
784
+ // that has work_group metadata, then we do not need to lower such functions.
785
+ if (!hasPFWICall (F) && hasCallToAFuncWithWGMetadata (F))
786
+ return PreservedAnalyses::all ();
755
787
LLVM_DEBUG (llvm::dbgs () << " Function name: " << F.getName () << " \n " );
756
788
const auto &TT = llvm::Triple (F.getParent ()->getTargetTriple ());
757
789
// Ranges of "side effect" instructions
0 commit comments