Skip to content

Commit 8d28a41

Browse files
authored
[AMDGPU] Remove SIWholeQuadMode pass early exit (#98450)
Merge the code bypass elements from the early exit into the main pass execution flow.
1 parent 0b71d80 commit 8d28a41

File tree

1 file changed

+38
-25
lines changed

1 file changed

+38
-25
lines changed

llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp

Lines changed: 38 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -219,11 +219,12 @@ class SIWholeQuadMode : public MachineFunctionPass {
219219
void lowerBlock(MachineBasicBlock &MBB);
220220
void processBlock(MachineBasicBlock &MBB, bool IsEntry);
221221

222-
void lowerLiveMaskQueries();
223-
void lowerCopyInstrs();
224-
void lowerKillInstrs(bool IsWQM);
222+
bool lowerLiveMaskQueries();
223+
bool lowerCopyInstrs();
224+
bool lowerKillInstrs(bool IsWQM);
225225
void lowerInitExec(MachineInstr &MI);
226-
MachineBasicBlock::iterator lowerInitExecInstrs(MachineBasicBlock &Entry);
226+
MachineBasicBlock::iterator lowerInitExecInstrs(MachineBasicBlock &Entry,
227+
bool &Changed);
227228

228229
public:
229230
static char ID;
@@ -796,6 +797,8 @@ MachineBasicBlock *SIWholeQuadMode::splitBlock(MachineBasicBlock *BB,
796797

797798
MachineInstr *SIWholeQuadMode::lowerKillF32(MachineBasicBlock &MBB,
798799
MachineInstr &MI) {
800+
assert(LiveMaskReg.isVirtual());
801+
799802
const DebugLoc &DL = MI.getDebugLoc();
800803
unsigned Opcode = 0;
801804

@@ -913,6 +916,8 @@ MachineInstr *SIWholeQuadMode::lowerKillF32(MachineBasicBlock &MBB,
913916

914917
MachineInstr *SIWholeQuadMode::lowerKillI1(MachineBasicBlock &MBB,
915918
MachineInstr &MI, bool IsWQM) {
919+
assert(LiveMaskReg.isVirtual());
920+
916921
const DebugLoc &DL = MI.getDebugLoc();
917922
MachineInstr *MaskUpdateMI = nullptr;
918923

@@ -1144,6 +1149,8 @@ MachineBasicBlock::iterator SIWholeQuadMode::prepareInsertion(
11441149
void SIWholeQuadMode::toExact(MachineBasicBlock &MBB,
11451150
MachineBasicBlock::iterator Before,
11461151
Register SaveWQM) {
1152+
assert(LiveMaskReg.isVirtual());
1153+
11471154
bool IsTerminator = Before == MBB.end();
11481155
if (!IsTerminator) {
11491156
auto FirstTerm = MBB.getFirstTerminator();
@@ -1423,7 +1430,7 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, bool IsEntry) {
14231430
assert(!SavedNonStrictReg);
14241431
}
14251432

1426-
void SIWholeQuadMode::lowerLiveMaskQueries() {
1433+
bool SIWholeQuadMode::lowerLiveMaskQueries() {
14271434
for (MachineInstr *MI : LiveMaskQueries) {
14281435
const DebugLoc &DL = MI->getDebugLoc();
14291436
Register Dest = MI->getOperand(0).getReg();
@@ -1435,9 +1442,10 @@ void SIWholeQuadMode::lowerLiveMaskQueries() {
14351442
LIS->ReplaceMachineInstrInMaps(*MI, *Copy);
14361443
MI->eraseFromParent();
14371444
}
1445+
return !LiveMaskQueries.empty();
14381446
}
14391447

1440-
void SIWholeQuadMode::lowerCopyInstrs() {
1448+
bool SIWholeQuadMode::lowerCopyInstrs() {
14411449
for (MachineInstr *MI : LowerToMovInstrs) {
14421450
assert(MI->getNumExplicitOperands() == 2);
14431451

@@ -1492,9 +1500,10 @@ void SIWholeQuadMode::lowerCopyInstrs() {
14921500
*MRI, MI->getOperand(0)));
14931501
MI->setDesc(TII->get(CopyOp));
14941502
}
1503+
return !LowerToCopyInstrs.empty() || !LowerToMovInstrs.empty();
14951504
}
14961505

1497-
void SIWholeQuadMode::lowerKillInstrs(bool IsWQM) {
1506+
bool SIWholeQuadMode::lowerKillInstrs(bool IsWQM) {
14981507
for (MachineInstr *MI : KillInstrs) {
14991508
MachineBasicBlock *MBB = MI->getParent();
15001509
MachineInstr *SplitPoint = nullptr;
@@ -1510,6 +1519,7 @@ void SIWholeQuadMode::lowerKillInstrs(bool IsWQM) {
15101519
if (SplitPoint)
15111520
splitBlock(MBB, SplitPoint);
15121521
}
1522+
return !KillInstrs.empty();
15131523
}
15141524

15151525
void SIWholeQuadMode::lowerInitExec(MachineInstr &MI) {
@@ -1601,7 +1611,7 @@ void SIWholeQuadMode::lowerInitExec(MachineInstr &MI) {
16011611
/// Lower INIT_EXEC instructions. Return a suitable insert point in \p Entry
16021612
/// for instructions that depend on EXEC.
16031613
MachineBasicBlock::iterator
1604-
SIWholeQuadMode::lowerInitExecInstrs(MachineBasicBlock &Entry) {
1614+
SIWholeQuadMode::lowerInitExecInstrs(MachineBasicBlock &Entry, bool &Changed) {
16051615
MachineBasicBlock::iterator InsertPt = Entry.getFirstNonPHI();
16061616

16071617
for (MachineInstr *MI : InitExecInstrs) {
@@ -1612,6 +1622,7 @@ SIWholeQuadMode::lowerInitExecInstrs(MachineBasicBlock &Entry) {
16121622
InsertPt = std::next(MI->getIterator());
16131623

16141624
lowerInitExec(*MI);
1625+
Changed = true;
16151626
}
16161627

16171628
return InsertPt;
@@ -1664,48 +1675,50 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
16641675
}
16651676

16661677
const char GlobalFlags = analyzeFunction(MF);
1667-
const bool NeedsLiveMask = !(KillInstrs.empty() && LiveMaskQueries.empty());
1678+
bool Changed = false;
16681679

16691680
LiveMaskReg = Exec;
16701681

16711682
MachineBasicBlock &Entry = MF.front();
1672-
MachineBasicBlock::iterator EntryMI = lowerInitExecInstrs(Entry);
1673-
1674-
// Shader is simple does not need any state changes or any complex lowering
1675-
if (!(GlobalFlags & (StateWQM | StateStrict)) && LowerToCopyInstrs.empty() &&
1676-
LowerToMovInstrs.empty() && KillInstrs.empty()) {
1677-
lowerLiveMaskQueries();
1678-
if (!InitExecInstrs.empty())
1679-
LIS->removeAllRegUnitsForPhysReg(AMDGPU::EXEC);
1680-
return !InitExecInstrs.empty() || !LiveMaskQueries.empty();
1681-
}
1683+
MachineBasicBlock::iterator EntryMI = lowerInitExecInstrs(Entry, Changed);
16821684

16831685
// Store a copy of the original live mask when required
1684-
if (NeedsLiveMask || (GlobalFlags & StateWQM)) {
1686+
const bool HasLiveMaskQueries = !LiveMaskQueries.empty();
1687+
const bool HasWaveModes = GlobalFlags & ~StateExact;
1688+
const bool HasKills = !KillInstrs.empty();
1689+
const bool UsesWQM = GlobalFlags & StateWQM;
1690+
if (HasKills || UsesWQM || (HasWaveModes && HasLiveMaskQueries)) {
16851691
LiveMaskReg = MRI->createVirtualRegister(TRI->getBoolRC());
16861692
MachineInstr *MI =
16871693
BuildMI(Entry, EntryMI, DebugLoc(), TII->get(AMDGPU::COPY), LiveMaskReg)
16881694
.addReg(Exec);
16891695
LIS->InsertMachineInstrInMaps(*MI);
1696+
Changed = true;
16901697
}
16911698

16921699
LLVM_DEBUG(printInfo());
16931700

1694-
lowerLiveMaskQueries();
1695-
lowerCopyInstrs();
1701+
Changed |= lowerLiveMaskQueries();
1702+
Changed |= lowerCopyInstrs();
16961703

1697-
// Shader only needs WQM
1698-
if (GlobalFlags == StateWQM) {
1704+
if (!HasWaveModes) {
1705+
// No wave mode execution
1706+
Changed |= lowerKillInstrs(false);
1707+
} else if (GlobalFlags == StateWQM) {
1708+
// Shader only needs WQM
16991709
auto MI = BuildMI(Entry, EntryMI, DebugLoc(), TII->get(WQMOpc), Exec)
17001710
.addReg(Exec);
17011711
LIS->InsertMachineInstrInMaps(*MI);
17021712
lowerKillInstrs(true);
1713+
Changed = true;
17031714
} else {
1715+
// Wave mode switching requires full lowering pass.
17041716
for (auto BII : Blocks)
17051717
processBlock(*BII.first, BII.first == &Entry);
17061718
// Lowering blocks causes block splitting so perform as a second pass.
17071719
for (auto BII : Blocks)
17081720
lowerBlock(*BII.first);
1721+
Changed = true;
17091722
}
17101723

17111724
// Compute live range for live mask
@@ -1721,5 +1734,5 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
17211734
if (!KillInstrs.empty() || !InitExecInstrs.empty())
17221735
LIS->removeAllRegUnitsForPhysReg(AMDGPU::EXEC);
17231736

1724-
return true;
1737+
return Changed;
17251738
}

0 commit comments

Comments
 (0)