Skip to content

Commit 81a27a2

Browse files
wkuczynsigcbot
authored andcommitted
Changes in code.
1 parent ca8d616 commit 81a27a2

File tree

9 files changed

+22
-314
lines changed

9 files changed

+22
-314
lines changed

IGC/AdaptorCommon/RayTracing/DynamicRayManagementPass.cpp

Lines changed: 4 additions & 258 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,9 @@ SPDX-License-Identifier: MIT
1212
#include "Compiler/IGCPassSupport.h"
1313
#include "common/LLVMWarningsPush.hpp"
1414
#include <llvm/IR/InstIterator.h>
15-
#include <llvm/Analysis/CFG.h>
16-
#include <llvm/Analysis/InstructionSimplify.h>
1715
#include <llvm/Analysis/LoopInfo.h>
16+
#include <llvm/Analysis/CFG.h>
1817
#include <llvm/Transforms/Utils/BasicBlockUtils.h>
19-
#include <llvm/Transforms/Utils/SSAUpdater.h>
2018
#include "common/LLVMWarningsPop.hpp"
2119

2220
using namespace IGC;
@@ -58,15 +56,7 @@ class DynamicRayManagementPass : public FunctionPass
5856
llvm::SmallVector< llvm::LoadInst*, 4>& foundLoads);
5957

6058
bool AddDynamicRayManagement(Function& F);
61-
bool TryProceedBasedApproach(Function& F);
6259
void HandleComplexControlFlow(Function& F);
63-
bool requiresSplittingCheckReleaseRegion(Instruction& I);
64-
void FindProceedsInOperands(
65-
Instruction* I,
66-
SetVector<TraceRaySyncProceedHLIntrinsic*>& proceeds,
67-
SmallPtrSetImpl<Instruction*>& cache
68-
);
69-
7060

7161
void HoistBeforeMostInnerLoop(
7262
BasicBlock*& dominatorBasicBlock,
@@ -122,9 +112,6 @@ bool DynamicRayManagementPass::runOnFunction(Function& F)
122112
return false;
123113
}
124114

125-
if (TryProceedBasedApproach(F))
126-
return true;
127-
128115
changed = AddDynamicRayManagement(F);
129116

130117
if (changed)
@@ -225,249 +212,6 @@ void DynamicRayManagementPass::FindLoadsFromAlloca(
225212
}
226213
}
227214

228-
bool DynamicRayManagementPass::requiresSplittingCheckReleaseRegion(Instruction& I)
229-
{
230-
return
231-
isa<ContinuationHLIntrinsic>(I) ||
232-
isBarrierIntrinsic(&I) ||
233-
isUserFunctionCall(&I);
234-
}
235-
236-
void DynamicRayManagementPass::FindProceedsInOperands(Instruction* I, SetVector<TraceRaySyncProceedHLIntrinsic*>& proceeds, SmallPtrSetImpl<Instruction*>& cache)
237-
{
238-
if (!I)
239-
return;
240-
241-
if (!cache.insert(I).second)
242-
return;
243-
244-
if (auto* proceedI = dyn_cast<TraceRaySyncProceedHLIntrinsic>(I))
245-
{
246-
proceeds.insert(proceedI);
247-
return;
248-
}
249-
250-
for (auto& op : I->operands())
251-
{
252-
if (auto* opI = dyn_cast<Instruction>(op))
253-
{
254-
FindProceedsInOperands(opI, proceeds, cache);
255-
}
256-
}
257-
}
258-
259-
bool DynamicRayManagementPass::TryProceedBasedApproach(Function& F)
260-
{
261-
262-
#if LLVM_VERSION_MAJOR < 10
263-
// LLVM 9 doesn't have the necessary API for testing if the loop is guarded
264-
// none of the titles that use LLVM 9 use rayquery, so we just return instead of providing our own implementation
265-
return false;
266-
#else
267-
268-
// this approach assumes all traffic between private memory and RTStack happens on Proceed calls
269-
// will be removed once RayQuery will be overhauled to minimize shadowstack usage
270-
271-
if (IGC_IS_FLAG_ENABLED(DisableProceedBasedApproachForRayQueryDynamicRayManagementMechanism))
272-
return false;
273-
274-
SmallVector<TraceRaySyncProceedHLIntrinsic*> allProceeds;
275-
276-
for (auto& I : instructions(F))
277-
{
278-
// we don't want to use this approach in complex control flow situations
279-
if (requiresSplittingCheckReleaseRegion(I))
280-
return false;
281-
282-
// collect all Proceed calls, because some of them might be not in any loop
283-
if (auto* proceed = dyn_cast<TraceRaySyncProceedHLIntrinsic>(&I))
284-
allProceeds.push_back(proceed);
285-
}
286-
287-
if (allProceeds.empty())
288-
return false;
289-
290-
auto* LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
291-
292-
if (LI->empty())
293-
return false;
294-
295-
// we don't want to do the insertions on the fly, because changing control flow will invalidate the domtrees
296-
SetVector<BasicBlock*> checkBBs;
297-
SetVector<BasicBlock*> releaseBBs;
298-
299-
// we iterate over all loops from outermost to innermost
300-
// if we find a loop, we skip all loops that are nested in it
301-
SmallPtrSet<Loop*, 4> loopsToIgnore;
302-
for (auto& loop : LI->getLoopsInPreorder())
303-
{
304-
if (loopsToIgnore.contains(loop))
305-
continue;
306-
307-
if (!loop->isLoopSimplifyForm())
308-
return false;
309-
310-
SetVector<TraceRaySyncProceedHLIntrinsic*> proceeds;
311-
SmallPtrSet<Instruction*, 4> cache;
312-
FindProceedsInOperands(loop->getLoopGuardBranch(), proceeds, cache);
313-
314-
SmallVector<BasicBlock*> exitingBlocks;
315-
loop->getExitingBlocks(exitingBlocks);
316-
317-
for (auto* exitingBB : exitingBlocks)
318-
FindProceedsInOperands(exitingBB->getTerminator(), proceeds, cache);
319-
320-
if (proceeds.empty())
321-
continue;
322-
323-
loopsToIgnore.insert(loop->getSubLoops().begin(), loop->getSubLoops().end());
324-
325-
bool allProceedsInLoop = llvm::all_of(
326-
proceeds,
327-
[&](auto* proceed)
328-
{
329-
return loop->contains(proceed->getParent());
330-
}
331-
);
332-
333-
SmallVector<BasicBlock*> exitBlocks;
334-
loop->getExitBlocks(exitBlocks);
335-
336-
if (allProceedsInLoop)
337-
{
338-
// if all proceed calls are inside the loop, we just check/release the loop itself
339-
checkBBs.insert(loop->getLoopPreheader());
340-
341-
for (auto* exitBB : exitBlocks)
342-
releaseBBs.insert(exitBB);
343-
}
344-
else
345-
{
346-
// in other cases, we need to expand to make sure all proceed calls are inside the check/release scope
347-
auto* start = loop->getLoopPreheader();
348-
auto* end = loop->getLoopPreheader();
349-
350-
for (auto* proceed : proceeds)
351-
{
352-
start = m_DT->findNearestCommonDominator(start, proceed->getParent());
353-
end = m_PDT->findNearestCommonDominator(end, proceed->getParent());
354-
}
355-
356-
// following single entry multiple exits loop model, we insert one check and multiple releases
357-
checkBBs.insert(start);
358-
359-
for (auto* exitBB : exitBlocks)
360-
releaseBBs.insert(m_PDT->findNearestCommonDominator(end, exitBB));
361-
}
362-
363-
llvm::erase_if(
364-
allProceeds,
365-
[&](auto* proceed) {
366-
return loop->contains(proceed) || proceeds.contains(proceed);
367-
}
368-
);
369-
}
370-
371-
// abort if we have any proceeds that don't contribute to loop exit conditions
372-
if (!allProceeds.empty())
373-
return false;
374-
375-
// at this point we commit to the approach
376-
RTBuilder IRB(&*F.getEntryBlock().begin(), *m_CGCtx);
377-
378-
SmallVector<Instruction*> guardStoresAndLoads;
379-
380-
// create a guard boolean to prevent double checking/double releasing
381-
// later, we will try to optimize it out with LoadAndStorePromoter
382-
auto* guard = IRB.CreateAlloca(IRB.getInt1Ty(), nullptr, VALUE_NAME("RayQueryCheckReleaseGuard"));
383-
auto* init_guard = IRB.CreateStore(IRB.getFalse(), guard);
384-
guardStoresAndLoads.push_back(init_guard);
385-
386-
SmallVector<Instruction*> CheckReleaseIntrinsics;
387-
388-
for (auto* checkBB : checkBBs)
389-
{
390-
auto* IP = checkBB->getFirstNonPHI();
391-
IRB.SetInsertPoint(IP);
392-
393-
auto* load = IRB.CreateLoad(guard, VALUE_NAME("RQGuardValue"));
394-
395-
guardStoresAndLoads.push_back(load);
396-
397-
auto* cond = IRB.CreateNot(
398-
load,
399-
VALUE_NAME("NegatedRQGuardValue")
400-
);
401-
402-
CheckReleaseIntrinsics.push_back(IRB.CreateRayQueryCheckIntrinsic(cond));
403-
guardStoresAndLoads.push_back(IRB.CreateStore(IRB.getTrue(), guard));
404-
};
405-
406-
for (auto* insertBB : releaseBBs)
407-
{
408-
auto* IP = insertBB->getTerminator();
409-
IRB.SetInsertPoint(IP);
410-
411-
auto* cond = IRB.CreateLoad(guard, VALUE_NAME("RQGuardValue"));
412-
413-
guardStoresAndLoads.push_back(cond);
414-
415-
CheckReleaseIntrinsics.push_back(IRB.CreateRayQueryReleaseIntrinsic(cond));
416-
guardStoresAndLoads.push_back(IRB.CreateStore(IRB.getFalse(), guard));
417-
};
418-
419-
// make sure guard dominates all uses
420-
init_guard->moveBefore(&*F.getEntryBlock().getFirstInsertionPt());
421-
guard->moveBefore(&*F.getEntryBlock().getFirstInsertionPt());
422-
423-
SmallVector<PHINode*> phis;
424-
425-
SSAUpdater Updater(&phis);
426-
LoadAndStorePromoter LSP(guardStoresAndLoads, Updater, "RayQueryCheckReleaseGuardPromotion");
427-
LSP.run(guardStoresAndLoads);
428-
429-
for (auto* phi : phis)
430-
{
431-
if (auto* V = phi->hasConstantValue())
432-
{
433-
phi->replaceAllUsesWith(V);
434-
phi->eraseFromParent();
435-
}
436-
}
437-
438-
SimplifyQuery SQ(F.getParent()->getDataLayout());
439-
440-
for (auto* I : CheckReleaseIntrinsics)
441-
{
442-
Value* flag = I->getOperand(0);
443-
if (auto* flagAsBinOp = dyn_cast<BinaryOperator>(flag))
444-
flag =
445-
#if LLVM_VERSION_MAJOR >= 15
446-
simplifyBinOp(
447-
#else
448-
SimplifyBinOp(
449-
#endif
450-
flagAsBinOp->getOpcode(),
451-
flagAsBinOp->getOperand(0),
452-
flagAsBinOp->getOperand(1),
453-
SQ
454-
);
455-
456-
if (auto* CI = dyn_cast_or_null<ConstantInt>(flag))
457-
{
458-
if (CI->isZero())
459-
I->eraseFromParent();
460-
461-
if (CI->isOne())
462-
I->setOperand(0, IRB.getTrue());
463-
}
464-
}
465-
466-
return true;
467-
468-
#endif // LLVM_VERSION_MAJOR >= 10
469-
}
470-
471215
bool DynamicRayManagementPass::AddDynamicRayManagement(Function& F)
472216
{
473217
vector<AllocateRayQueryIntrinsic*> allocateRayQueries;
@@ -779,7 +523,9 @@ void DynamicRayManagementPass::HandleComplexControlFlow(Function& F)
779523
// and GenISA_RayQueryCheck after to avoid deadlocks.
780524
for (Instruction& I : instructions(F))
781525
{
782-
if (requiresSplittingCheckReleaseRegion(I))
526+
if (isa<ContinuationHLIntrinsic>(&I) ||
527+
isBarrierIntrinsic(&I) ||
528+
isUserFunctionCall(&I))
783529
{
784530
// Look through all RaytQueryCheck-Release pairs, and check if the barrier/call
785531
// instruction is within any of pairs.

IGC/AdaptorCommon/RayTracing/RTBuilder.cpp

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -161,44 +161,37 @@ Value* RTBuilder::CreateSyncStackPtrIntrinsic(
161161
return StackPtr;
162162
}
163163

164-
RayQueryCheckIntrinsic* RTBuilder::CreateRayQueryCheckIntrinsic(Value* predicate)
164+
RayQueryCheckIntrinsic* RTBuilder::CreateRayQueryCheckIntrinsic()
165165
{
166166
Module* M = this->GetInsertBlock()->getModule();
167167

168-
if (!predicate)
169-
predicate = getTrue();
170-
171-
Value* rayQueryCheck = CreateCall(GenISAIntrinsic::getDeclaration(M, GenISAIntrinsic::GenISA_RayQueryCheck), predicate);
168+
Value* rayQueryCheck = CreateCall(GenISAIntrinsic::getDeclaration(M, GenISAIntrinsic::GenISA_RayQueryCheck));
172169

173170
return cast<RayQueryCheckIntrinsic>(rayQueryCheck);
174171
}
175172

176-
RayQueryReleaseIntrinsic* RTBuilder::CreateRayQueryReleaseIntrinsic(Value* predicate)
173+
RayQueryReleaseIntrinsic* RTBuilder::CreateRayQueryReleaseIntrinsic()
177174
{
178175
Module* M = this->GetInsertBlock()->getModule();
179176

180-
if (!predicate)
181-
predicate = getTrue();
182-
183-
Value* rayQueryRelease = CreateCall(GenISAIntrinsic::getDeclaration(M, GenISAIntrinsic::GenISA_RayQueryRelease), predicate);
177+
Value* rayQueryRelease = CreateCall(GenISAIntrinsic::getDeclaration(M, GenISAIntrinsic::GenISA_RayQueryRelease));
184178

185179
return cast<RayQueryReleaseIntrinsic>(rayQueryRelease);
186180
}
187181

188-
PreemptionDisableIntrinsic* RTBuilder::CreatePreemptionDisableIntrinsic(Value* Flag)
182+
PreemptionDisableIntrinsic* RTBuilder::CreatePreemptionDisableIntrinsic()
189183
{
190184
Module* M = this->GetInsertBlock()->getModule();
191185

192186
auto* GII = CreateCall(
193187
GenISAIntrinsic::getDeclaration(
194188
M,
195-
GenISAIntrinsic::GenISA_PreemptionDisable),
196-
Flag ? Flag : getTrue());
189+
GenISAIntrinsic::GenISA_PreemptionDisable));
197190

198191
return cast<PreemptionDisableIntrinsic>(GII);
199192
}
200193

201-
PreemptionEnableIntrinsic* RTBuilder::CreatePreemptionEnableIntrinsic(Value* Flag)
194+
PreemptionEnableIntrinsic* RTBuilder::CreatePreemptionEnableIntrinsic(Value *Flag)
202195
{
203196
Module* M = this->GetInsertBlock()->getModule();
204197

IGC/AdaptorCommon/RayTracing/RTBuilder.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -256,11 +256,11 @@ class RTBuilder : public IGCIRBuilder<>
256256

257257
Value* CreateSyncStackPtrIntrinsic(Value* Addr, Type* PtrTy, bool AddDecoration);
258258

259-
RayQueryCheckIntrinsic* CreateRayQueryCheckIntrinsic(Value* predicate = nullptr);
260-
RayQueryReleaseIntrinsic* CreateRayQueryReleaseIntrinsic(Value* predicate = nullptr);
259+
RayQueryCheckIntrinsic* CreateRayQueryCheckIntrinsic();
260+
RayQueryReleaseIntrinsic* CreateRayQueryReleaseIntrinsic();
261261

262-
PreemptionDisableIntrinsic* CreatePreemptionDisableIntrinsic(Value* Flag = nullptr);
263-
PreemptionEnableIntrinsic* CreatePreemptionEnableIntrinsic(Value* Flag = nullptr);
262+
PreemptionDisableIntrinsic* CreatePreemptionDisableIntrinsic();
263+
PreemptionEnableIntrinsic* CreatePreemptionEnableIntrinsic(Value *Flag = nullptr);
264264

265265
SyncStackPointerVal* getSyncStackPointer();
266266

0 commit comments

Comments
 (0)