Skip to content

Commit 1a4bc3a

Browse files
committed
[AMDGPU] Avoid unnecessary graph visits during WQM marking
Avoid revisiting nodes with the same set of defined lanes by using a unified visited set which integrates lanes into the key. This retains the intent of the original code by still revisiting a subgraph if a different set of lanes is defined and hence marking might progress differently. Note: default size of the visited set has been confirmed to cover >99% of invocations in large array of test shaders. Reviewed By: piotr Differential Revision: https://reviews.llvm.org/D98772
1 parent 5a8d5a2 commit 1a4bc3a

File tree

1 file changed

+11
-15
lines changed

1 file changed

+11
-15
lines changed

llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -335,23 +335,22 @@ void SIWholeQuadMode::markDefs(const MachineInstr &UseMI, LiveRange &LR,
335335
struct PhiEntry {
336336
const VNInfo *Phi;
337337
unsigned PredIdx;
338-
unsigned VisitIdx;
339338
LaneBitmask DefinedLanes;
340339

341-
PhiEntry(const VNInfo *Phi, unsigned PredIdx, unsigned VisitIdx,
342-
LaneBitmask DefinedLanes)
343-
: Phi(Phi), PredIdx(PredIdx), VisitIdx(VisitIdx),
344-
DefinedLanes(DefinedLanes) {}
340+
PhiEntry(const VNInfo *Phi, unsigned PredIdx, LaneBitmask DefinedLanes)
341+
: Phi(Phi), PredIdx(PredIdx), DefinedLanes(DefinedLanes) {}
345342
};
346-
SmallSetVector<const VNInfo *, 4> Visited;
343+
using VisitKey = std::pair<const VNInfo *, LaneBitmask>;
347344
SmallVector<PhiEntry, 2> PhiStack;
345+
SmallSet<VisitKey, 4> Visited;
348346
LaneBitmask DefinedLanes;
349-
unsigned NextPredIdx; // Only used for processing phi nodes
347+
unsigned NextPredIdx = 0; // Only used for processing phi nodes
350348
do {
351349
const VNInfo *NextValue = nullptr;
350+
const VisitKey Key(Value, DefinedLanes);
352351

353-
if (!Visited.count(Value)) {
354-
Visited.insert(Value);
352+
if (!Visited.count(Key)) {
353+
Visited.insert(Key);
355354
// On first visit to a phi then start processing first predecessor
356355
NextPredIdx = 0;
357356
}
@@ -367,14 +366,14 @@ void SIWholeQuadMode::markDefs(const MachineInstr &UseMI, LiveRange &LR,
367366
auto PE = MBB->pred_end();
368367
for (; PI != PE && !NextValue; ++PI, ++Idx) {
369368
if (const VNInfo *VN = LR.getVNInfoBefore(LIS->getMBBEndIdx(*PI))) {
370-
if (!Visited.count(VN))
369+
if (!Visited.count(VisitKey(VN, DefinedLanes)))
371370
NextValue = VN;
372371
}
373372
}
374373

375374
// If there are more predecessors to process; add phi to stack
376375
if (PI != PE)
377-
PhiStack.emplace_back(Value, Idx, Visited.size(), DefinedLanes);
376+
PhiStack.emplace_back(Value, Idx, DefinedLanes);
378377
} else {
379378
MachineInstr *MI = LIS->getInstructionFromIndex(Value->def);
380379
assert(MI && "Def has no defining instruction");
@@ -404,7 +403,7 @@ void SIWholeQuadMode::markDefs(const MachineInstr &UseMI, LiveRange &LR,
404403
// Definition not complete; need to process input value
405404
LiveQueryResult LRQ = LR.Query(LIS->getInstructionIndex(*MI));
406405
if (const VNInfo *VN = LRQ.valueIn()) {
407-
if (!Visited.count(VN))
406+
if (!Visited.count(VisitKey(VN, DefinedLanes)))
408407
NextValue = VN;
409408
}
410409
}
@@ -424,9 +423,6 @@ void SIWholeQuadMode::markDefs(const MachineInstr &UseMI, LiveRange &LR,
424423
NextValue = Entry.Phi;
425424
NextPredIdx = Entry.PredIdx;
426425
DefinedLanes = Entry.DefinedLanes;
427-
// Rewind visited set to correct state
428-
while (Visited.size() > Entry.VisitIdx)
429-
Visited.pop_back();
430426
PhiStack.pop_back();
431427
}
432428

0 commit comments

Comments
 (0)