@@ -12,11 +12,9 @@ SPDX-License-Identifier: MIT
12
12
#include " Compiler/IGCPassSupport.h"
13
13
#include " common/LLVMWarningsPush.hpp"
14
14
#include < llvm/IR/InstIterator.h>
15
- #include < llvm/Analysis/CFG.h>
16
- #include < llvm/Analysis/InstructionSimplify.h>
17
15
#include < llvm/Analysis/LoopInfo.h>
16
+ #include < llvm/Analysis/CFG.h>
18
17
#include < llvm/Transforms/Utils/BasicBlockUtils.h>
19
- #include < llvm/Transforms/Utils/SSAUpdater.h>
20
18
#include " common/LLVMWarningsPop.hpp"
21
19
22
20
using namespace IGC ;
@@ -58,15 +56,7 @@ class DynamicRayManagementPass : public FunctionPass
58
56
llvm::SmallVector< llvm::LoadInst*, 4 >& foundLoads);
59
57
60
58
bool AddDynamicRayManagement (Function& F);
61
- bool TryProceedBasedApproach (Function& F);
62
59
void HandleComplexControlFlow (Function& F);
63
- bool requiresSplittingCheckReleaseRegion (Instruction& I);
64
- void FindProceedsInOperands (
65
- Instruction* I,
66
- SetVector<TraceRaySyncProceedHLIntrinsic*>& proceeds,
67
- SmallPtrSetImpl<Instruction*>& cache
68
- );
69
-
70
60
71
61
void HoistBeforeMostInnerLoop (
72
62
BasicBlock*& dominatorBasicBlock,
@@ -122,9 +112,6 @@ bool DynamicRayManagementPass::runOnFunction(Function& F)
122
112
return false ;
123
113
}
124
114
125
- if (TryProceedBasedApproach (F))
126
- return true ;
127
-
128
115
changed = AddDynamicRayManagement (F);
129
116
130
117
if (changed)
@@ -225,249 +212,6 @@ void DynamicRayManagementPass::FindLoadsFromAlloca(
225
212
}
226
213
}
227
214
228
- bool DynamicRayManagementPass::requiresSplittingCheckReleaseRegion (Instruction& I)
229
- {
230
- return
231
- isa<ContinuationHLIntrinsic>(I) ||
232
- isBarrierIntrinsic (&I) ||
233
- isUserFunctionCall (&I);
234
- }
235
-
236
- void DynamicRayManagementPass::FindProceedsInOperands (Instruction* I, SetVector<TraceRaySyncProceedHLIntrinsic*>& proceeds, SmallPtrSetImpl<Instruction*>& cache)
237
- {
238
- if (!I)
239
- return ;
240
-
241
- if (!cache.insert (I).second )
242
- return ;
243
-
244
- if (auto * proceedI = dyn_cast<TraceRaySyncProceedHLIntrinsic>(I))
245
- {
246
- proceeds.insert (proceedI);
247
- return ;
248
- }
249
-
250
- for (auto & op : I->operands ())
251
- {
252
- if (auto * opI = dyn_cast<Instruction>(op))
253
- {
254
- FindProceedsInOperands (opI, proceeds, cache);
255
- }
256
- }
257
- }
258
-
259
- bool DynamicRayManagementPass::TryProceedBasedApproach (Function& F)
260
- {
261
-
262
- #if LLVM_VERSION_MAJOR < 10
263
- // LLVM 9 doesn't have the necessary API for testing if the loop is guarded
264
- // none of the titles that use LLVM 9 use rayquery, so we just return instead of providing our own implementation
265
- return false ;
266
- #else
267
-
268
- // this approach assumes all traffic between private memory and RTStack happens on Proceed calls
269
- // will be removed once RayQuery will be overhauled to minimize shadowstack usage
270
-
271
- if (IGC_IS_FLAG_ENABLED (DisableProceedBasedApproachForRayQueryDynamicRayManagementMechanism))
272
- return false ;
273
-
274
- SmallVector<TraceRaySyncProceedHLIntrinsic*> allProceeds;
275
-
276
- for (auto & I : instructions (F))
277
- {
278
- // we don't want to use this approach in complex control flow situations
279
- if (requiresSplittingCheckReleaseRegion (I))
280
- return false ;
281
-
282
- // collect all Proceed calls, because some of them might be not in any loop
283
- if (auto * proceed = dyn_cast<TraceRaySyncProceedHLIntrinsic>(&I))
284
- allProceeds.push_back (proceed);
285
- }
286
-
287
- if (allProceeds.empty ())
288
- return false ;
289
-
290
- auto * LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo ();
291
-
292
- if (LI->empty ())
293
- return false ;
294
-
295
- // we don't want to do the insertions on the fly, because changing control flow will invalidate the domtrees
296
- SetVector<BasicBlock*> checkBBs;
297
- SetVector<BasicBlock*> releaseBBs;
298
-
299
- // we iterate over all loops from outermost to innermost
300
- // if we find a loop, we skip all loops that are nested in it
301
- SmallPtrSet<Loop*, 4 > loopsToIgnore;
302
- for (auto & loop : LI->getLoopsInPreorder ())
303
- {
304
- if (loopsToIgnore.contains (loop))
305
- continue ;
306
-
307
- if (!loop->isLoopSimplifyForm ())
308
- return false ;
309
-
310
- SetVector<TraceRaySyncProceedHLIntrinsic*> proceeds;
311
- SmallPtrSet<Instruction*, 4 > cache;
312
- FindProceedsInOperands (loop->getLoopGuardBranch (), proceeds, cache);
313
-
314
- SmallVector<BasicBlock*> exitingBlocks;
315
- loop->getExitingBlocks (exitingBlocks);
316
-
317
- for (auto * exitingBB : exitingBlocks)
318
- FindProceedsInOperands (exitingBB->getTerminator (), proceeds, cache);
319
-
320
- if (proceeds.empty ())
321
- continue ;
322
-
323
- loopsToIgnore.insert (loop->getSubLoops ().begin (), loop->getSubLoops ().end ());
324
-
325
- bool allProceedsInLoop = llvm::all_of (
326
- proceeds,
327
- [&](auto * proceed)
328
- {
329
- return loop->contains (proceed->getParent ());
330
- }
331
- );
332
-
333
- SmallVector<BasicBlock*> exitBlocks;
334
- loop->getExitBlocks (exitBlocks);
335
-
336
- if (allProceedsInLoop)
337
- {
338
- // if all proceed calls are inside the loop, we just check/release the loop itself
339
- checkBBs.insert (loop->getLoopPreheader ());
340
-
341
- for (auto * exitBB : exitBlocks)
342
- releaseBBs.insert (exitBB);
343
- }
344
- else
345
- {
346
- // in other cases, we need to expand to make sure all proceed calls are inside the check/release scope
347
- auto * start = loop->getLoopPreheader ();
348
- auto * end = loop->getLoopPreheader ();
349
-
350
- for (auto * proceed : proceeds)
351
- {
352
- start = m_DT->findNearestCommonDominator (start, proceed->getParent ());
353
- end = m_PDT->findNearestCommonDominator (end, proceed->getParent ());
354
- }
355
-
356
- // following single entry multiple exits loop model, we insert one check and multiple releases
357
- checkBBs.insert (start);
358
-
359
- for (auto * exitBB : exitBlocks)
360
- releaseBBs.insert (m_PDT->findNearestCommonDominator (end, exitBB));
361
- }
362
-
363
- llvm::erase_if (
364
- allProceeds,
365
- [&](auto * proceed) {
366
- return loop->contains (proceed) || proceeds.contains (proceed);
367
- }
368
- );
369
- }
370
-
371
- // abort if we have any proceeds that don't contribute to loop exit conditions
372
- if (!allProceeds.empty ())
373
- return false ;
374
-
375
- // at this point we commit to the approach
376
- RTBuilder IRB (&*F.getEntryBlock ().begin (), *m_CGCtx);
377
-
378
- SmallVector<Instruction*> guardStoresAndLoads;
379
-
380
- // create a guard boolean to prevent double checking/double releasing
381
- // later, we will try to optimize it out with LoadAndStorePromoter
382
- auto * guard = IRB.CreateAlloca (IRB.getInt1Ty (), nullptr , VALUE_NAME (" RayQueryCheckReleaseGuard" ));
383
- auto * init_guard = IRB.CreateStore (IRB.getFalse (), guard);
384
- guardStoresAndLoads.push_back (init_guard);
385
-
386
- SmallVector<Instruction*> CheckReleaseIntrinsics;
387
-
388
- for (auto * checkBB : checkBBs)
389
- {
390
- auto * IP = checkBB->getFirstNonPHI ();
391
- IRB.SetInsertPoint (IP);
392
-
393
- auto * load = IRB.CreateLoad (guard, VALUE_NAME (" RQGuardValue" ));
394
-
395
- guardStoresAndLoads.push_back (load);
396
-
397
- auto * cond = IRB.CreateNot (
398
- load,
399
- VALUE_NAME (" NegatedRQGuardValue" )
400
- );
401
-
402
- CheckReleaseIntrinsics.push_back (IRB.CreateRayQueryCheckIntrinsic (cond));
403
- guardStoresAndLoads.push_back (IRB.CreateStore (IRB.getTrue (), guard));
404
- };
405
-
406
- for (auto * insertBB : releaseBBs)
407
- {
408
- auto * IP = insertBB->getTerminator ();
409
- IRB.SetInsertPoint (IP);
410
-
411
- auto * cond = IRB.CreateLoad (guard, VALUE_NAME (" RQGuardValue" ));
412
-
413
- guardStoresAndLoads.push_back (cond);
414
-
415
- CheckReleaseIntrinsics.push_back (IRB.CreateRayQueryReleaseIntrinsic (cond));
416
- guardStoresAndLoads.push_back (IRB.CreateStore (IRB.getFalse (), guard));
417
- };
418
-
419
- // make sure guard dominates all uses
420
- init_guard->moveBefore (&*F.getEntryBlock ().getFirstInsertionPt ());
421
- guard->moveBefore (&*F.getEntryBlock ().getFirstInsertionPt ());
422
-
423
- SmallVector<PHINode*> phis;
424
-
425
- SSAUpdater Updater (&phis);
426
- LoadAndStorePromoter LSP (guardStoresAndLoads, Updater, " RayQueryCheckReleaseGuardPromotion" );
427
- LSP.run (guardStoresAndLoads);
428
-
429
- for (auto * phi : phis)
430
- {
431
- if (auto * V = phi->hasConstantValue ())
432
- {
433
- phi->replaceAllUsesWith (V);
434
- phi->eraseFromParent ();
435
- }
436
- }
437
-
438
- SimplifyQuery SQ (F.getParent ()->getDataLayout ());
439
-
440
- for (auto * I : CheckReleaseIntrinsics)
441
- {
442
- Value* flag = I->getOperand (0 );
443
- if (auto * flagAsBinOp = dyn_cast<BinaryOperator>(flag))
444
- flag =
445
- #if LLVM_VERSION_MAJOR >= 15
446
- simplifyBinOp (
447
- #else
448
- SimplifyBinOp (
449
- #endif
450
- flagAsBinOp->getOpcode (),
451
- flagAsBinOp->getOperand (0 ),
452
- flagAsBinOp->getOperand (1 ),
453
- SQ
454
- );
455
-
456
- if (auto * CI = dyn_cast_or_null<ConstantInt>(flag))
457
- {
458
- if (CI->isZero ())
459
- I->eraseFromParent ();
460
-
461
- if (CI->isOne ())
462
- I->setOperand (0 , IRB.getTrue ());
463
- }
464
- }
465
-
466
- return true ;
467
-
468
- #endif // LLVM_VERSION_MAJOR >= 10
469
- }
470
-
471
215
bool DynamicRayManagementPass::AddDynamicRayManagement (Function& F)
472
216
{
473
217
vector<AllocateRayQueryIntrinsic*> allocateRayQueries;
@@ -779,7 +523,9 @@ void DynamicRayManagementPass::HandleComplexControlFlow(Function& F)
779
523
// and GenISA_RayQueryCheck after to avoid deadlocks.
780
524
for (Instruction& I : instructions (F))
781
525
{
782
- if (requiresSplittingCheckReleaseRegion (I))
526
+ if (isa<ContinuationHLIntrinsic>(&I) ||
527
+ isBarrierIntrinsic (&I) ||
528
+ isUserFunctionCall (&I))
783
529
{
784
530
// Look through all RaytQueryCheck-Release pairs, and check if the barrier/call
785
531
// instruction is within any of pairs.
0 commit comments