@@ -12,9 +12,11 @@ SPDX-License-Identifier: MIT
12
12
#include " Compiler/IGCPassSupport.h"
13
13
#include " common/LLVMWarningsPush.hpp"
14
14
#include < llvm/IR/InstIterator.h>
15
- #include < llvm/Analysis/LoopInfo.h>
16
15
#include < llvm/Analysis/CFG.h>
16
+ #include < llvm/Analysis/InstructionSimplify.h>
17
+ #include < llvm/Analysis/LoopInfo.h>
17
18
#include < llvm/Transforms/Utils/BasicBlockUtils.h>
19
+ #include < llvm/Transforms/Utils/SSAUpdater.h>
18
20
#include " common/LLVMWarningsPop.hpp"
19
21
20
22
using namespace IGC ;
@@ -56,7 +58,15 @@ class DynamicRayManagementPass : public FunctionPass
56
58
llvm::SmallVector< llvm::LoadInst*, 4 >& foundLoads);
57
59
58
60
bool AddDynamicRayManagement (Function& F);
61
+ bool TryProceedBasedApproach (Function& F);
59
62
void HandleComplexControlFlow (Function& F);
63
+ bool requiresSplittingCheckReleaseRegion (Instruction& I);
64
+ void FindProceedsInOperands (
65
+ Instruction* I,
66
+ SetVector<TraceRaySyncProceedHLIntrinsic*>& proceeds,
67
+ SmallPtrSetImpl<Instruction*>& cache
68
+ );
69
+
60
70
61
71
void HoistBeforeMostInnerLoop (
62
72
BasicBlock*& dominatorBasicBlock,
@@ -112,6 +122,9 @@ bool DynamicRayManagementPass::runOnFunction(Function& F)
112
122
return false ;
113
123
}
114
124
125
+ if (TryProceedBasedApproach (F))
126
+ return true ;
127
+
115
128
changed = AddDynamicRayManagement (F);
116
129
117
130
if (changed)
@@ -212,6 +225,249 @@ void DynamicRayManagementPass::FindLoadsFromAlloca(
212
225
}
213
226
}
214
227
228
+ bool DynamicRayManagementPass::requiresSplittingCheckReleaseRegion (Instruction& I)
229
+ {
230
+ return
231
+ isa<ContinuationHLIntrinsic>(I) ||
232
+ isBarrierIntrinsic (&I) ||
233
+ isUserFunctionCall (&I);
234
+ }
235
+
236
+ void DynamicRayManagementPass::FindProceedsInOperands (Instruction* I, SetVector<TraceRaySyncProceedHLIntrinsic*>& proceeds, SmallPtrSetImpl<Instruction*>& cache)
237
+ {
238
+ if (!I)
239
+ return ;
240
+
241
+ if (!cache.insert (I).second )
242
+ return ;
243
+
244
+ if (auto * proceedI = dyn_cast<TraceRaySyncProceedHLIntrinsic>(I))
245
+ {
246
+ proceeds.insert (proceedI);
247
+ return ;
248
+ }
249
+
250
+ for (auto & op : I->operands ())
251
+ {
252
+ if (auto * opI = dyn_cast<Instruction>(op))
253
+ {
254
+ FindProceedsInOperands (opI, proceeds, cache);
255
+ }
256
+ }
257
+ }
258
+
259
+ bool DynamicRayManagementPass::TryProceedBasedApproach (Function& F)
260
+ {
261
+
262
+ #if LLVM_VERSION_MAJOR < 10
263
+ // LLVM 9 doesn't have the necessary API for testing if the loop is guarded
264
+ // none of the titles that use LLVM 9 use rayquery, so we just return instead of providing our own implementation
265
+ return false ;
266
+ #else
267
+
268
+ // this approach assumes all traffic between private memory and RTStack happens on Proceed calls
269
+ // will be removed once RayQuery will be overhauled to minimize shadowstack usage
270
+
271
+ if (IGC_IS_FLAG_ENABLED (DisableProceedBasedApproachForRayQueryDynamicRayManagementMechanism))
272
+ return false ;
273
+
274
+ SmallVector<TraceRaySyncProceedHLIntrinsic*> allProceeds;
275
+
276
+ for (auto & I : instructions (F))
277
+ {
278
+ // we don't want to use this approach in complex control flow situations
279
+ if (requiresSplittingCheckReleaseRegion (I))
280
+ return false ;
281
+
282
+ // collect all Proceed calls, because some of them might be not in any loop
283
+ if (auto * proceed = dyn_cast<TraceRaySyncProceedHLIntrinsic>(&I))
284
+ allProceeds.push_back (proceed);
285
+ }
286
+
287
+ if (allProceeds.empty ())
288
+ return false ;
289
+
290
+ auto * LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo ();
291
+
292
+ if (LI->empty ())
293
+ return false ;
294
+
295
+ // we don't want to do the insertions on the fly, because changing control flow will invalidate the domtrees
296
+ SetVector<BasicBlock*> checkBBs;
297
+ SetVector<BasicBlock*> releaseBBs;
298
+
299
+ // we iterate over all loops from outermost to innermost
300
+ // if we find a loop, we skip all loops that are nested in it
301
+ SmallPtrSet<Loop*, 4 > loopsToIgnore;
302
+ for (auto & loop : LI->getLoopsInPreorder ())
303
+ {
304
+ if (loopsToIgnore.contains (loop))
305
+ continue ;
306
+
307
+ if (!loop->isLoopSimplifyForm ())
308
+ return false ;
309
+
310
+ SetVector<TraceRaySyncProceedHLIntrinsic*> proceeds;
311
+ SmallPtrSet<Instruction*, 4 > cache;
312
+ FindProceedsInOperands (loop->getLoopGuardBranch (), proceeds, cache);
313
+
314
+ SmallVector<BasicBlock*> exitingBlocks;
315
+ loop->getExitingBlocks (exitingBlocks);
316
+
317
+ for (auto * exitingBB : exitingBlocks)
318
+ FindProceedsInOperands (exitingBB->getTerminator (), proceeds, cache);
319
+
320
+ if (proceeds.empty ())
321
+ continue ;
322
+
323
+ loopsToIgnore.insert (loop->getSubLoops ().begin (), loop->getSubLoops ().end ());
324
+
325
+ bool allProceedsInLoop = llvm::all_of (
326
+ proceeds,
327
+ [&](auto * proceed)
328
+ {
329
+ return loop->contains (proceed->getParent ());
330
+ }
331
+ );
332
+
333
+ SmallVector<BasicBlock*> exitBlocks;
334
+ loop->getExitBlocks (exitBlocks);
335
+
336
+ if (allProceedsInLoop)
337
+ {
338
+ // if all proceed calls are inside the loop, we just check/release the loop itself
339
+ checkBBs.insert (loop->getLoopPreheader ());
340
+
341
+ for (auto * exitBB : exitBlocks)
342
+ releaseBBs.insert (exitBB);
343
+ }
344
+ else
345
+ {
346
+ // in other cases, we need to expand to make sure all proceed calls are inside the check/release scope
347
+ auto * start = loop->getLoopPreheader ();
348
+ auto * end = loop->getLoopPreheader ();
349
+
350
+ for (auto * proceed : proceeds)
351
+ {
352
+ start = m_DT->findNearestCommonDominator (start, proceed->getParent ());
353
+ end = m_PDT->findNearestCommonDominator (end, proceed->getParent ());
354
+ }
355
+
356
+ // following single entry multiple exits loop model, we insert one check and multiple releases
357
+ checkBBs.insert (start);
358
+
359
+ for (auto * exitBB : exitBlocks)
360
+ releaseBBs.insert (m_PDT->findNearestCommonDominator (end, exitBB));
361
+ }
362
+
363
+ llvm::erase_if (
364
+ allProceeds,
365
+ [&](auto * proceed) {
366
+ return loop->contains (proceed) || proceeds.contains (proceed);
367
+ }
368
+ );
369
+ }
370
+
371
+ // abort if we have any proceeds that don't contribute to loop exit conditions
372
+ if (!allProceeds.empty ())
373
+ return false ;
374
+
375
+ // at this point we commit to the approach
376
+ RTBuilder IRB (&*F.getEntryBlock ().begin (), *m_CGCtx);
377
+
378
+ SmallVector<Instruction*> guardStoresAndLoads;
379
+
380
+ // create a guard boolean to prevent double checking/double releasing
381
+ // later, we will try to optimize it out with LoadAndStorePromoter
382
+ auto * guard = IRB.CreateAlloca (IRB.getInt1Ty (), nullptr , VALUE_NAME (" RayQueryCheckReleaseGuard" ));
383
+ auto * init_guard = IRB.CreateStore (IRB.getFalse (), guard);
384
+ guardStoresAndLoads.push_back (init_guard);
385
+
386
+ SmallVector<Instruction*> CheckReleaseIntrinsics;
387
+
388
+ for (auto * checkBB : checkBBs)
389
+ {
390
+ auto * IP = checkBB->getFirstNonPHI ();
391
+ IRB.SetInsertPoint (IP);
392
+
393
+ auto * load = IRB.CreateLoad (guard, VALUE_NAME (" RQGuardValue" ));
394
+
395
+ guardStoresAndLoads.push_back (load);
396
+
397
+ auto * cond = IRB.CreateNot (
398
+ load,
399
+ VALUE_NAME (" NegatedRQGuardValue" )
400
+ );
401
+
402
+ CheckReleaseIntrinsics.push_back (IRB.CreateRayQueryCheckIntrinsic (cond));
403
+ guardStoresAndLoads.push_back (IRB.CreateStore (IRB.getTrue (), guard));
404
+ };
405
+
406
+ for (auto * insertBB : releaseBBs)
407
+ {
408
+ auto * IP = insertBB->getTerminator ();
409
+ IRB.SetInsertPoint (IP);
410
+
411
+ auto * cond = IRB.CreateLoad (guard, VALUE_NAME (" RQGuardValue" ));
412
+
413
+ guardStoresAndLoads.push_back (cond);
414
+
415
+ CheckReleaseIntrinsics.push_back (IRB.CreateRayQueryReleaseIntrinsic (cond));
416
+ guardStoresAndLoads.push_back (IRB.CreateStore (IRB.getFalse (), guard));
417
+ };
418
+
419
+ // make sure guard dominates all uses
420
+ init_guard->moveBefore (&*F.getEntryBlock ().getFirstInsertionPt ());
421
+ guard->moveBefore (&*F.getEntryBlock ().getFirstInsertionPt ());
422
+
423
+ SmallVector<PHINode*> phis;
424
+
425
+ SSAUpdater Updater (&phis);
426
+ LoadAndStorePromoter LSP (guardStoresAndLoads, Updater, " RayQueryCheckReleaseGuardPromotion" );
427
+ LSP.run (guardStoresAndLoads);
428
+
429
+ for (auto * phi : phis)
430
+ {
431
+ if (auto * V = phi->hasConstantValue ())
432
+ {
433
+ phi->replaceAllUsesWith (V);
434
+ phi->eraseFromParent ();
435
+ }
436
+ }
437
+
438
+ SimplifyQuery SQ (F.getParent ()->getDataLayout ());
439
+
440
+ for (auto * I : CheckReleaseIntrinsics)
441
+ {
442
+ Value* flag = I->getOperand (0 );
443
+ if (auto * flagAsBinOp = dyn_cast<BinaryOperator>(flag))
444
+ flag =
445
+ #if LLVM_VERSION_MAJOR >= 15
446
+ simplifyBinOp (
447
+ #else
448
+ SimplifyBinOp (
449
+ #endif
450
+ flagAsBinOp->getOpcode (),
451
+ flagAsBinOp->getOperand (0 ),
452
+ flagAsBinOp->getOperand (1 ),
453
+ SQ
454
+ );
455
+
456
+ if (auto * CI = dyn_cast_or_null<ConstantInt>(flag))
457
+ {
458
+ if (CI->isZero ())
459
+ I->eraseFromParent ();
460
+
461
+ if (CI->isOne ())
462
+ I->setOperand (0 , IRB.getTrue ());
463
+ }
464
+ }
465
+
466
+ return true ;
467
+
468
+ #endif // LLVM_VERSION_MAJOR >= 10
469
+ }
470
+
215
471
bool DynamicRayManagementPass::AddDynamicRayManagement (Function& F)
216
472
{
217
473
vector<AllocateRayQueryIntrinsic*> allocateRayQueries;
@@ -523,9 +779,7 @@ void DynamicRayManagementPass::HandleComplexControlFlow(Function& F)
523
779
// and GenISA_RayQueryCheck after to avoid deadlocks.
524
780
for (Instruction& I : instructions (F))
525
781
{
526
- if (isa<ContinuationHLIntrinsic>(&I) ||
527
- isBarrierIntrinsic (&I) ||
528
- isUserFunctionCall (&I))
782
+ if (requiresSplittingCheckReleaseRegion (I))
529
783
{
530
784
// Look through all RaytQueryCheck-Release pairs, and check if the barrier/call
531
785
// instruction is within any of pairs.
0 commit comments