@@ -367,7 +367,7 @@ class SIInsertWaitcnts : public MachineFunctionPass {
367
367
DenseMap<MachineBasicBlock *, std::unique_ptr<BlockWaitcntBrackets>>
368
368
BlockWaitcntBracketsMap;
369
369
370
- DenseSet <MachineBasicBlock *> BlockWaitcntProcessedSet;
370
+ std::vector <MachineBasicBlock *> BlockWaitcntProcessedSet;
371
371
372
372
DenseMap<MachineLoop *, std::unique_ptr<LoopWaitcntData>> LoopWaitcntDataMap;
373
373
@@ -403,7 +403,8 @@ class SIInsertWaitcnts : public MachineFunctionPass {
403
403
void updateEventWaitCntAfter (MachineInstr &Inst,
404
404
BlockWaitcntBrackets *ScoreBrackets);
405
405
void mergeInputScoreBrackets (MachineBasicBlock &Block);
406
- MachineBasicBlock *loopBottom (const MachineLoop *Loop);
406
+ bool isLoopBottom (const MachineLoop *Loop, const MachineBasicBlock *Block);
407
+ unsigned countNumBottomBlocks (const MachineLoop *Loop);
407
408
void insertWaitcntInBlock (MachineFunction &MF, MachineBasicBlock &Block);
408
409
void insertWaitcntBeforeCF (MachineBasicBlock &Block, MachineInstr *Inst);
409
410
bool isWaitcntStronger (unsigned LHS, unsigned RHS);
@@ -1568,15 +1569,29 @@ void SIInsertWaitcnts::mergeInputScoreBrackets(MachineBasicBlock &Block) {
1568
1569
}
1569
1570
}
1570
1571
1571
- // / Return the "bottom" block of a loop. This differs from
1572
- // / MachineLoop::getBottomBlock in that it works even if the loop is
1573
- // / discontiguous.
1574
- MachineBasicBlock *SIInsertWaitcnts::loopBottom (const MachineLoop *Loop) {
1575
- MachineBasicBlock *Bottom = Loop->getHeader ();
1576
- for (MachineBasicBlock *MBB : Loop->blocks ())
1577
- if (MBB->getNumber () > Bottom->getNumber ())
1578
- Bottom = MBB;
1579
- return Bottom;
1572
+ // / Return true if the given basic block is a "bottom" block of a loop. This
1573
+ // / differs from MachineLoop::getBottomBlock in that it works even if the loop
1574
+ // / is discontiguous. This also handles multiple back-edges for the same
1575
+ // / "header" block of a loop.
1576
+ bool SIInsertWaitcnts::isLoopBottom (const MachineLoop *Loop,
1577
+ const MachineBasicBlock *Block) {
1578
+ for (MachineBasicBlock *MBB : Loop->blocks ()) {
1579
+ if (MBB == Block && MBB->isSuccessor (Loop->getHeader ())) {
1580
+ return true ;
1581
+ }
1582
+ }
1583
+ return false ;
1584
+ }
1585
+
1586
+ // / Count the number of "bottom" basic blocks of a loop.
1587
+ unsigned SIInsertWaitcnts::countNumBottomBlocks (const MachineLoop *Loop) {
1588
+ unsigned Count = 0 ;
1589
+ for (MachineBasicBlock *MBB : Loop->blocks ()) {
1590
+ if (MBB->isSuccessor (Loop->getHeader ())) {
1591
+ Count++;
1592
+ }
1593
+ }
1594
+ return Count;
1580
1595
}
1581
1596
1582
1597
// Generate s_waitcnt instructions where needed.
@@ -1685,7 +1700,7 @@ void SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
1685
1700
1686
1701
// Check if we need to force convergence at loop footer.
1687
1702
MachineLoop *ContainingLoop = MLI->getLoopFor (&Block);
1688
- if (ContainingLoop && loopBottom (ContainingLoop) == &Block) {
1703
+ if (ContainingLoop && isLoopBottom (ContainingLoop, &Block) ) {
1689
1704
LoopWaitcntData *WaitcntData = LoopWaitcntDataMap[ContainingLoop].get ();
1690
1705
WaitcntData->print ();
1691
1706
DEBUG (dbgs () << ' \n ' ;);
@@ -1773,6 +1788,7 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
1773
1788
TrackedWaitcntSet.clear ();
1774
1789
BlockVisitedSet.clear ();
1775
1790
VCCZBugHandledSet.clear ();
1791
+ LoopWaitcntDataMap.clear ();
1776
1792
1777
1793
// Walk over the blocks in reverse post-dominator order, inserting
1778
1794
// s_waitcnt where needed.
@@ -1799,21 +1815,30 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
1799
1815
// If we are walking into the block from before the loop, then guarantee
1800
1816
// at least 1 re-walk over the loop to propagate the information, even if
1801
1817
// no S_WAITCNT instructions were generated.
1802
- if (ContainingLoop && ContainingLoop->getHeader () == &MBB && J < I &&
1803
- (!BlockWaitcntProcessedSet.count (&MBB))) {
1804
- BlockWaitcntBracketsMap[&MBB]->setRevisitLoop (true );
1805
- DEBUG (dbgs () << " set-revisit: Block"
1806
- << ContainingLoop->getHeader ()->getNumber () << ' \n ' ;);
1818
+ if (ContainingLoop && ContainingLoop->getHeader () == &MBB) {
1819
+ unsigned Count = countNumBottomBlocks (ContainingLoop);
1820
+
1821
+ // If the loop has multiple back-edges, and so more than one "bottom"
1822
+ // basic block, we have to guarantee a re-walk over every blocks.
1823
+ if ((std::count (BlockWaitcntProcessedSet.begin (),
1824
+ BlockWaitcntProcessedSet.end (), &MBB) < Count)) {
1825
+ BlockWaitcntBracketsMap[&MBB]->setRevisitLoop (true );
1826
+ DEBUG (dbgs () << " set-revisit: Block"
1827
+ << ContainingLoop->getHeader ()->getNumber () << ' \n ' ;);
1828
+ }
1807
1829
}
1808
1830
1809
1831
// Walk over the instructions.
1810
1832
insertWaitcntInBlock (MF, MBB);
1811
1833
1812
1834
// Flag that waitcnts have been processed at least once.
1813
- BlockWaitcntProcessedSet.insert (&MBB);
1835
+ BlockWaitcntProcessedSet.push_back (&MBB);
1814
1836
1815
- // See if we want to revisit the loop.
1816
- if (ContainingLoop && loopBottom (ContainingLoop) == &MBB) {
1837
+ // See if we want to revisit the loop. If a loop has multiple back-edges,
1838
+ // we shouldn't revisit the same "bottom" basic block.
1839
+ if (ContainingLoop && isLoopBottom (ContainingLoop, &MBB) &&
1840
+ std::count (BlockWaitcntProcessedSet.begin (),
1841
+ BlockWaitcntProcessedSet.end (), &MBB) == 1 ) {
1817
1842
MachineBasicBlock *EntryBB = ContainingLoop->getHeader ();
1818
1843
BlockWaitcntBrackets *EntrySB = BlockWaitcntBracketsMap[EntryBB].get ();
1819
1844
if (EntrySB && EntrySB->getRevisitLoop ()) {
0 commit comments