@@ -219,11 +219,12 @@ class SIWholeQuadMode : public MachineFunctionPass {
219
219
void lowerBlock (MachineBasicBlock &MBB);
220
220
void processBlock (MachineBasicBlock &MBB, bool IsEntry);
221
221
222
- void lowerLiveMaskQueries ();
223
- void lowerCopyInstrs ();
224
- void lowerKillInstrs (bool IsWQM);
222
+ bool lowerLiveMaskQueries ();
223
+ bool lowerCopyInstrs ();
224
+ bool lowerKillInstrs (bool IsWQM);
225
225
void lowerInitExec (MachineInstr &MI);
226
- MachineBasicBlock::iterator lowerInitExecInstrs (MachineBasicBlock &Entry);
226
+ MachineBasicBlock::iterator lowerInitExecInstrs (MachineBasicBlock &Entry,
227
+ bool &Changed);
227
228
228
229
public:
229
230
static char ID;
@@ -796,6 +797,8 @@ MachineBasicBlock *SIWholeQuadMode::splitBlock(MachineBasicBlock *BB,
796
797
797
798
MachineInstr *SIWholeQuadMode::lowerKillF32 (MachineBasicBlock &MBB,
798
799
MachineInstr &MI) {
800
+ assert (LiveMaskReg.isVirtual ());
801
+
799
802
const DebugLoc &DL = MI.getDebugLoc ();
800
803
unsigned Opcode = 0 ;
801
804
@@ -913,6 +916,8 @@ MachineInstr *SIWholeQuadMode::lowerKillF32(MachineBasicBlock &MBB,
913
916
914
917
MachineInstr *SIWholeQuadMode::lowerKillI1 (MachineBasicBlock &MBB,
915
918
MachineInstr &MI, bool IsWQM) {
919
+ assert (LiveMaskReg.isVirtual ());
920
+
916
921
const DebugLoc &DL = MI.getDebugLoc ();
917
922
MachineInstr *MaskUpdateMI = nullptr ;
918
923
@@ -1144,6 +1149,8 @@ MachineBasicBlock::iterator SIWholeQuadMode::prepareInsertion(
1144
1149
void SIWholeQuadMode::toExact (MachineBasicBlock &MBB,
1145
1150
MachineBasicBlock::iterator Before,
1146
1151
Register SaveWQM) {
1152
+ assert (LiveMaskReg.isVirtual ());
1153
+
1147
1154
bool IsTerminator = Before == MBB.end ();
1148
1155
if (!IsTerminator) {
1149
1156
auto FirstTerm = MBB.getFirstTerminator ();
@@ -1423,7 +1430,7 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, bool IsEntry) {
1423
1430
assert (!SavedNonStrictReg);
1424
1431
}
1425
1432
1426
- void SIWholeQuadMode::lowerLiveMaskQueries () {
1433
+ bool SIWholeQuadMode::lowerLiveMaskQueries () {
1427
1434
for (MachineInstr *MI : LiveMaskQueries) {
1428
1435
const DebugLoc &DL = MI->getDebugLoc ();
1429
1436
Register Dest = MI->getOperand (0 ).getReg ();
@@ -1435,9 +1442,10 @@ void SIWholeQuadMode::lowerLiveMaskQueries() {
1435
1442
LIS->ReplaceMachineInstrInMaps (*MI, *Copy);
1436
1443
MI->eraseFromParent ();
1437
1444
}
1445
+ return !LiveMaskQueries.empty ();
1438
1446
}
1439
1447
1440
- void SIWholeQuadMode::lowerCopyInstrs () {
1448
+ bool SIWholeQuadMode::lowerCopyInstrs () {
1441
1449
for (MachineInstr *MI : LowerToMovInstrs) {
1442
1450
assert (MI->getNumExplicitOperands () == 2 );
1443
1451
@@ -1492,9 +1500,10 @@ void SIWholeQuadMode::lowerCopyInstrs() {
1492
1500
*MRI, MI->getOperand (0 )));
1493
1501
MI->setDesc (TII->get (CopyOp));
1494
1502
}
1503
+ return !LowerToCopyInstrs.empty () || !LowerToMovInstrs.empty ();
1495
1504
}
1496
1505
1497
- void SIWholeQuadMode::lowerKillInstrs (bool IsWQM) {
1506
+ bool SIWholeQuadMode::lowerKillInstrs (bool IsWQM) {
1498
1507
for (MachineInstr *MI : KillInstrs) {
1499
1508
MachineBasicBlock *MBB = MI->getParent ();
1500
1509
MachineInstr *SplitPoint = nullptr ;
@@ -1510,6 +1519,7 @@ void SIWholeQuadMode::lowerKillInstrs(bool IsWQM) {
1510
1519
if (SplitPoint)
1511
1520
splitBlock (MBB, SplitPoint);
1512
1521
}
1522
+ return !KillInstrs.empty ();
1513
1523
}
1514
1524
1515
1525
void SIWholeQuadMode::lowerInitExec (MachineInstr &MI) {
@@ -1601,7 +1611,7 @@ void SIWholeQuadMode::lowerInitExec(MachineInstr &MI) {
1601
1611
// / Lower INIT_EXEC instructions. Return a suitable insert point in \p Entry
1602
1612
// / for instructions that depend on EXEC.
1603
1613
MachineBasicBlock::iterator
1604
- SIWholeQuadMode::lowerInitExecInstrs (MachineBasicBlock &Entry) {
1614
+ SIWholeQuadMode::lowerInitExecInstrs (MachineBasicBlock &Entry, bool &Changed ) {
1605
1615
MachineBasicBlock::iterator InsertPt = Entry.getFirstNonPHI ();
1606
1616
1607
1617
for (MachineInstr *MI : InitExecInstrs) {
@@ -1612,6 +1622,7 @@ SIWholeQuadMode::lowerInitExecInstrs(MachineBasicBlock &Entry) {
1612
1622
InsertPt = std::next (MI->getIterator ());
1613
1623
1614
1624
lowerInitExec (*MI);
1625
+ Changed = true ;
1615
1626
}
1616
1627
1617
1628
return InsertPt;
@@ -1664,48 +1675,50 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
1664
1675
}
1665
1676
1666
1677
const char GlobalFlags = analyzeFunction (MF);
1667
- const bool NeedsLiveMask = !(KillInstrs. empty () && LiveMaskQueries. empty ()) ;
1678
+ bool Changed = false ;
1668
1679
1669
1680
LiveMaskReg = Exec;
1670
1681
1671
1682
MachineBasicBlock &Entry = MF.front ();
1672
- MachineBasicBlock::iterator EntryMI = lowerInitExecInstrs (Entry);
1673
-
1674
- // Shader is simple does not need any state changes or any complex lowering
1675
- if (!(GlobalFlags & (StateWQM | StateStrict)) && LowerToCopyInstrs.empty () &&
1676
- LowerToMovInstrs.empty () && KillInstrs.empty ()) {
1677
- lowerLiveMaskQueries ();
1678
- if (!InitExecInstrs.empty ())
1679
- LIS->removeAllRegUnitsForPhysReg (AMDGPU::EXEC);
1680
- return !InitExecInstrs.empty () || !LiveMaskQueries.empty ();
1681
- }
1683
+ MachineBasicBlock::iterator EntryMI = lowerInitExecInstrs (Entry, Changed);
1682
1684
1683
1685
// Store a copy of the original live mask when required
1684
- if (NeedsLiveMask || (GlobalFlags & StateWQM)) {
1686
+ const bool HasLiveMaskQueries = !LiveMaskQueries.empty ();
1687
+ const bool HasWaveModes = GlobalFlags & ~StateExact;
1688
+ const bool HasKills = !KillInstrs.empty ();
1689
+ const bool UsesWQM = GlobalFlags & StateWQM;
1690
+ if (HasKills || UsesWQM || (HasWaveModes && HasLiveMaskQueries)) {
1685
1691
LiveMaskReg = MRI->createVirtualRegister (TRI->getBoolRC ());
1686
1692
MachineInstr *MI =
1687
1693
BuildMI (Entry, EntryMI, DebugLoc (), TII->get (AMDGPU::COPY), LiveMaskReg)
1688
1694
.addReg (Exec);
1689
1695
LIS->InsertMachineInstrInMaps (*MI);
1696
+ Changed = true ;
1690
1697
}
1691
1698
1692
1699
LLVM_DEBUG (printInfo ());
1693
1700
1694
- lowerLiveMaskQueries ();
1695
- lowerCopyInstrs ();
1701
+ Changed |= lowerLiveMaskQueries ();
1702
+ Changed |= lowerCopyInstrs ();
1696
1703
1697
- // Shader only needs WQM
1698
- if (GlobalFlags == StateWQM) {
1704
+ if (!HasWaveModes) {
1705
+ // No wave mode execution
1706
+ Changed |= lowerKillInstrs (false );
1707
+ } else if (GlobalFlags == StateWQM) {
1708
+ // Shader only needs WQM
1699
1709
auto MI = BuildMI (Entry, EntryMI, DebugLoc (), TII->get (WQMOpc), Exec)
1700
1710
.addReg (Exec);
1701
1711
LIS->InsertMachineInstrInMaps (*MI);
1702
1712
lowerKillInstrs (true );
1713
+ Changed = true ;
1703
1714
} else {
1715
+ // Wave mode switching requires full lowering pass.
1704
1716
for (auto BII : Blocks)
1705
1717
processBlock (*BII.first , BII.first == &Entry);
1706
1718
// Lowering blocks causes block splitting so perform as a second pass.
1707
1719
for (auto BII : Blocks)
1708
1720
lowerBlock (*BII.first );
1721
+ Changed = true ;
1709
1722
}
1710
1723
1711
1724
// Compute live range for live mask
@@ -1721,5 +1734,5 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
1721
1734
if (!KillInstrs.empty () || !InitExecInstrs.empty ())
1722
1735
LIS->removeAllRegUnitsForPhysReg (AMDGPU::EXEC);
1723
1736
1724
- return true ;
1737
+ return Changed ;
1725
1738
}
0 commit comments