Skip to content

Commit f1bd245

Browse files
jaladreipsigcbot
authored andcommitted
Changes in code.
1 parent 11f2f86 commit f1bd245

File tree

9 files changed

+314
-22
lines changed

9 files changed

+314
-22
lines changed

IGC/AdaptorCommon/RayTracing/DynamicRayManagementPass.cpp

Lines changed: 258 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,11 @@ SPDX-License-Identifier: MIT
1212
#include "Compiler/IGCPassSupport.h"
1313
#include "common/LLVMWarningsPush.hpp"
1414
#include <llvm/IR/InstIterator.h>
15-
#include <llvm/Analysis/LoopInfo.h>
1615
#include <llvm/Analysis/CFG.h>
16+
#include <llvm/Analysis/InstructionSimplify.h>
17+
#include <llvm/Analysis/LoopInfo.h>
1718
#include <llvm/Transforms/Utils/BasicBlockUtils.h>
19+
#include <llvm/Transforms/Utils/SSAUpdater.h>
1820
#include "common/LLVMWarningsPop.hpp"
1921

2022
using namespace IGC;
@@ -56,7 +58,15 @@ class DynamicRayManagementPass : public FunctionPass
5658
llvm::SmallVector< llvm::LoadInst*, 4>& foundLoads);
5759

5860
bool AddDynamicRayManagement(Function& F);
61+
bool TryProceedBasedApproach(Function& F);
5962
void HandleComplexControlFlow(Function& F);
63+
bool requiresSplittingCheckReleaseRegion(Instruction& I);
64+
void FindProceedsInOperands(
65+
Instruction* I,
66+
SetVector<TraceRaySyncProceedHLIntrinsic*>& proceeds,
67+
SmallPtrSetImpl<Instruction*>& cache
68+
);
69+
6070

6171
void HoistBeforeMostInnerLoop(
6272
BasicBlock*& dominatorBasicBlock,
@@ -112,6 +122,9 @@ bool DynamicRayManagementPass::runOnFunction(Function& F)
112122
return false;
113123
}
114124

125+
if (TryProceedBasedApproach(F))
126+
return true;
127+
115128
changed = AddDynamicRayManagement(F);
116129

117130
if (changed)
@@ -212,6 +225,249 @@ void DynamicRayManagementPass::FindLoadsFromAlloca(
212225
}
213226
}
214227

228+
bool DynamicRayManagementPass::requiresSplittingCheckReleaseRegion(Instruction& I)
229+
{
230+
return
231+
isa<ContinuationHLIntrinsic>(I) ||
232+
isBarrierIntrinsic(&I) ||
233+
isUserFunctionCall(&I);
234+
}
235+
236+
void DynamicRayManagementPass::FindProceedsInOperands(Instruction* I, SetVector<TraceRaySyncProceedHLIntrinsic*>& proceeds, SmallPtrSetImpl<Instruction*>& cache)
237+
{
238+
if (!I)
239+
return;
240+
241+
if (!cache.insert(I).second)
242+
return;
243+
244+
if (auto* proceedI = dyn_cast<TraceRaySyncProceedHLIntrinsic>(I))
245+
{
246+
proceeds.insert(proceedI);
247+
return;
248+
}
249+
250+
for (auto& op : I->operands())
251+
{
252+
if (auto* opI = dyn_cast<Instruction>(op))
253+
{
254+
FindProceedsInOperands(opI, proceeds, cache);
255+
}
256+
}
257+
}
258+
259+
bool DynamicRayManagementPass::TryProceedBasedApproach(Function& F)
260+
{
261+
262+
#if LLVM_VERSION_MAJOR < 10
263+
// LLVM 9 doesn't have the necessary API for testing if the loop is guarded
264+
// none of the titles that use LLVM 9 use rayquery, so we just return instead of providing our own implementation
265+
return false;
266+
#else
267+
268+
// this approach assumes all traffic between private memory and RTStack happens on Proceed calls
269+
// will be removed once RayQuery will be overhauled to minimize shadowstack usage
270+
271+
if (IGC_IS_FLAG_ENABLED(DisableProceedBasedApproachForRayQueryDynamicRayManagementMechanism))
272+
return false;
273+
274+
SmallVector<TraceRaySyncProceedHLIntrinsic*> allProceeds;
275+
276+
for (auto& I : instructions(F))
277+
{
278+
// we don't want to use this approach in complex control flow situations
279+
if (requiresSplittingCheckReleaseRegion(I))
280+
return false;
281+
282+
// collect all Proceed calls, because some of them might be not in any loop
283+
if (auto* proceed = dyn_cast<TraceRaySyncProceedHLIntrinsic>(&I))
284+
allProceeds.push_back(proceed);
285+
}
286+
287+
if (allProceeds.empty())
288+
return false;
289+
290+
auto* LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
291+
292+
if (LI->empty())
293+
return false;
294+
295+
// we don't want to do the insertions on the fly, because changing control flow will invalidate the domtrees
296+
SetVector<BasicBlock*> checkBBs;
297+
SetVector<BasicBlock*> releaseBBs;
298+
299+
// we iterate over all loops from outermost to innermost
300+
// if we find a loop, we skip all loops that are nested in it
301+
SmallPtrSet<Loop*, 4> loopsToIgnore;
302+
for (auto& loop : LI->getLoopsInPreorder())
303+
{
304+
if (loopsToIgnore.contains(loop))
305+
continue;
306+
307+
if (!loop->isLoopSimplifyForm())
308+
return false;
309+
310+
SetVector<TraceRaySyncProceedHLIntrinsic*> proceeds;
311+
SmallPtrSet<Instruction*, 4> cache;
312+
FindProceedsInOperands(loop->getLoopGuardBranch(), proceeds, cache);
313+
314+
SmallVector<BasicBlock*> exitingBlocks;
315+
loop->getExitingBlocks(exitingBlocks);
316+
317+
for (auto* exitingBB : exitingBlocks)
318+
FindProceedsInOperands(exitingBB->getTerminator(), proceeds, cache);
319+
320+
if (proceeds.empty())
321+
continue;
322+
323+
loopsToIgnore.insert(loop->getSubLoops().begin(), loop->getSubLoops().end());
324+
325+
bool allProceedsInLoop = llvm::all_of(
326+
proceeds,
327+
[&](auto* proceed)
328+
{
329+
return loop->contains(proceed->getParent());
330+
}
331+
);
332+
333+
SmallVector<BasicBlock*> exitBlocks;
334+
loop->getExitBlocks(exitBlocks);
335+
336+
if (allProceedsInLoop)
337+
{
338+
// if all proceed calls are inside the loop, we just check/release the loop itself
339+
checkBBs.insert(loop->getLoopPreheader());
340+
341+
for (auto* exitBB : exitBlocks)
342+
releaseBBs.insert(exitBB);
343+
}
344+
else
345+
{
346+
// in other cases, we need to expand to make sure all proceed calls are inside the check/release scope
347+
auto* start = loop->getLoopPreheader();
348+
auto* end = loop->getLoopPreheader();
349+
350+
for (auto* proceed : proceeds)
351+
{
352+
start = m_DT->findNearestCommonDominator(start, proceed->getParent());
353+
end = m_PDT->findNearestCommonDominator(end, proceed->getParent());
354+
}
355+
356+
// following single entry multiple exits loop model, we insert one check and multiple releases
357+
checkBBs.insert(start);
358+
359+
for (auto* exitBB : exitBlocks)
360+
releaseBBs.insert(m_PDT->findNearestCommonDominator(end, exitBB));
361+
}
362+
363+
llvm::erase_if(
364+
allProceeds,
365+
[&](auto* proceed) {
366+
return loop->contains(proceed) || proceeds.contains(proceed);
367+
}
368+
);
369+
}
370+
371+
// abort if we have any proceeds that don't contribute to loop exit conditions
372+
if (!allProceeds.empty())
373+
return false;
374+
375+
// at this point we commit to the approach
376+
RTBuilder IRB(&*F.getEntryBlock().begin(), *m_CGCtx);
377+
378+
SmallVector<Instruction*> guardStoresAndLoads;
379+
380+
// create a guard boolean to prevent double checking/double releasing
381+
// later, we will try to optimize it out with LoadAndStorePromoter
382+
auto* guard = IRB.CreateAlloca(IRB.getInt1Ty(), nullptr, VALUE_NAME("RayQueryCheckReleaseGuard"));
383+
auto* init_guard = IRB.CreateStore(IRB.getFalse(), guard);
384+
guardStoresAndLoads.push_back(init_guard);
385+
386+
SmallVector<Instruction*> CheckReleaseIntrinsics;
387+
388+
for (auto* checkBB : checkBBs)
389+
{
390+
auto* IP = checkBB->getFirstNonPHI();
391+
IRB.SetInsertPoint(IP);
392+
393+
auto* load = IRB.CreateLoad(guard, VALUE_NAME("RQGuardValue"));
394+
395+
guardStoresAndLoads.push_back(load);
396+
397+
auto* cond = IRB.CreateNot(
398+
load,
399+
VALUE_NAME("NegatedRQGuardValue")
400+
);
401+
402+
CheckReleaseIntrinsics.push_back(IRB.CreateRayQueryCheckIntrinsic(cond));
403+
guardStoresAndLoads.push_back(IRB.CreateStore(IRB.getTrue(), guard));
404+
};
405+
406+
for (auto* insertBB : releaseBBs)
407+
{
408+
auto* IP = insertBB->getTerminator();
409+
IRB.SetInsertPoint(IP);
410+
411+
auto* cond = IRB.CreateLoad(guard, VALUE_NAME("RQGuardValue"));
412+
413+
guardStoresAndLoads.push_back(cond);
414+
415+
CheckReleaseIntrinsics.push_back(IRB.CreateRayQueryReleaseIntrinsic(cond));
416+
guardStoresAndLoads.push_back(IRB.CreateStore(IRB.getFalse(), guard));
417+
};
418+
419+
// make sure guard dominates all uses
420+
init_guard->moveBefore(&*F.getEntryBlock().getFirstInsertionPt());
421+
guard->moveBefore(&*F.getEntryBlock().getFirstInsertionPt());
422+
423+
SmallVector<PHINode*> phis;
424+
425+
SSAUpdater Updater(&phis);
426+
LoadAndStorePromoter LSP(guardStoresAndLoads, Updater, "RayQueryCheckReleaseGuardPromotion");
427+
LSP.run(guardStoresAndLoads);
428+
429+
for (auto* phi : phis)
430+
{
431+
if (auto* V = phi->hasConstantValue())
432+
{
433+
phi->replaceAllUsesWith(V);
434+
phi->eraseFromParent();
435+
}
436+
}
437+
438+
SimplifyQuery SQ(F.getParent()->getDataLayout());
439+
440+
for (auto* I : CheckReleaseIntrinsics)
441+
{
442+
Value* flag = I->getOperand(0);
443+
if (auto* flagAsBinOp = dyn_cast<BinaryOperator>(flag))
444+
flag =
445+
#if LLVM_VERSION_MAJOR >= 15
446+
simplifyBinOp(
447+
#else
448+
SimplifyBinOp(
449+
#endif
450+
flagAsBinOp->getOpcode(),
451+
flagAsBinOp->getOperand(0),
452+
flagAsBinOp->getOperand(1),
453+
SQ
454+
);
455+
456+
if (auto* CI = dyn_cast_or_null<ConstantInt>(flag))
457+
{
458+
if (CI->isZero())
459+
I->eraseFromParent();
460+
461+
if (CI->isOne())
462+
I->setOperand(0, IRB.getTrue());
463+
}
464+
}
465+
466+
return true;
467+
468+
#endif // LLVM_VERSION_MAJOR >= 10
469+
}
470+
215471
bool DynamicRayManagementPass::AddDynamicRayManagement(Function& F)
216472
{
217473
vector<AllocateRayQueryIntrinsic*> allocateRayQueries;
@@ -523,9 +779,7 @@ void DynamicRayManagementPass::HandleComplexControlFlow(Function& F)
523779
// and GenISA_RayQueryCheck after to avoid deadlocks.
524780
for (Instruction& I : instructions(F))
525781
{
526-
if (isa<ContinuationHLIntrinsic>(&I) ||
527-
isBarrierIntrinsic(&I) ||
528-
isUserFunctionCall(&I))
782+
if (requiresSplittingCheckReleaseRegion(I))
529783
{
530784
// Look through all RaytQueryCheck-Release pairs, and check if the barrier/call
531785
// instruction is within any of pairs.

IGC/AdaptorCommon/RayTracing/RTBuilder.cpp

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -161,37 +161,44 @@ Value* RTBuilder::CreateSyncStackPtrIntrinsic(
161161
return StackPtr;
162162
}
163163

164-
RayQueryCheckIntrinsic* RTBuilder::CreateRayQueryCheckIntrinsic()
164+
RayQueryCheckIntrinsic* RTBuilder::CreateRayQueryCheckIntrinsic(Value* predicate)
165165
{
166166
Module* M = this->GetInsertBlock()->getModule();
167167

168-
Value* rayQueryCheck = CreateCall(GenISAIntrinsic::getDeclaration(M, GenISAIntrinsic::GenISA_RayQueryCheck));
168+
if (!predicate)
169+
predicate = getTrue();
170+
171+
Value* rayQueryCheck = CreateCall(GenISAIntrinsic::getDeclaration(M, GenISAIntrinsic::GenISA_RayQueryCheck), predicate);
169172

170173
return cast<RayQueryCheckIntrinsic>(rayQueryCheck);
171174
}
172175

173-
RayQueryReleaseIntrinsic* RTBuilder::CreateRayQueryReleaseIntrinsic()
176+
RayQueryReleaseIntrinsic* RTBuilder::CreateRayQueryReleaseIntrinsic(Value* predicate)
174177
{
175178
Module* M = this->GetInsertBlock()->getModule();
176179

177-
Value* rayQueryRelease = CreateCall(GenISAIntrinsic::getDeclaration(M, GenISAIntrinsic::GenISA_RayQueryRelease));
180+
if (!predicate)
181+
predicate = getTrue();
182+
183+
Value* rayQueryRelease = CreateCall(GenISAIntrinsic::getDeclaration(M, GenISAIntrinsic::GenISA_RayQueryRelease), predicate);
178184

179185
return cast<RayQueryReleaseIntrinsic>(rayQueryRelease);
180186
}
181187

182-
PreemptionDisableIntrinsic* RTBuilder::CreatePreemptionDisableIntrinsic()
188+
PreemptionDisableIntrinsic* RTBuilder::CreatePreemptionDisableIntrinsic(Value* Flag)
183189
{
184190
Module* M = this->GetInsertBlock()->getModule();
185191

186192
auto* GII = CreateCall(
187193
GenISAIntrinsic::getDeclaration(
188194
M,
189-
GenISAIntrinsic::GenISA_PreemptionDisable));
195+
GenISAIntrinsic::GenISA_PreemptionDisable),
196+
Flag ? Flag : getTrue());
190197

191198
return cast<PreemptionDisableIntrinsic>(GII);
192199
}
193200

194-
PreemptionEnableIntrinsic* RTBuilder::CreatePreemptionEnableIntrinsic(Value *Flag)
201+
PreemptionEnableIntrinsic* RTBuilder::CreatePreemptionEnableIntrinsic(Value* Flag)
195202
{
196203
Module* M = this->GetInsertBlock()->getModule();
197204

IGC/AdaptorCommon/RayTracing/RTBuilder.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -256,11 +256,11 @@ class RTBuilder : public IGCIRBuilder<>
256256

257257
Value* CreateSyncStackPtrIntrinsic(Value* Addr, Type* PtrTy, bool AddDecoration);
258258

259-
RayQueryCheckIntrinsic* CreateRayQueryCheckIntrinsic();
260-
RayQueryReleaseIntrinsic* CreateRayQueryReleaseIntrinsic();
259+
RayQueryCheckIntrinsic* CreateRayQueryCheckIntrinsic(Value* predicate = nullptr);
260+
RayQueryReleaseIntrinsic* CreateRayQueryReleaseIntrinsic(Value* predicate = nullptr);
261261

262-
PreemptionDisableIntrinsic* CreatePreemptionDisableIntrinsic();
263-
PreemptionEnableIntrinsic* CreatePreemptionEnableIntrinsic(Value *Flag = nullptr);
262+
PreemptionDisableIntrinsic* CreatePreemptionDisableIntrinsic(Value* Flag = nullptr);
263+
PreemptionEnableIntrinsic* CreatePreemptionEnableIntrinsic(Value* Flag = nullptr);
264264

265265
SyncStackPointerVal* getSyncStackPointer();
266266

0 commit comments

Comments
 (0)