@@ -1600,14 +1600,35 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
1600
1600
1601
1601
TrackedWaitcntSet.clear ();
1602
1602
BlockInfos.clear ();
1603
+ bool Modified = false ;
1604
+
1605
+ if (!MFI->isEntryFunction ()) {
1606
+ // Wait for any outstanding memory operations that the input registers may
1607
+ // depend on. We can't track them and it's better to do the wait after the
1608
+ // costly call sequence.
1609
+
1610
+ // TODO: Could insert earlier and schedule more liberally with operations
1611
+ // that only use caller preserved registers.
1612
+ MachineBasicBlock &EntryBB = MF.front ();
1613
+ MachineBasicBlock::iterator I = EntryBB.begin ();
1614
+ for (MachineBasicBlock::iterator E = EntryBB.end ();
1615
+ I != E && (I->isPHI () || I->isMetaInstruction ()); ++I)
1616
+ ;
1617
+ BuildMI (EntryBB, I, DebugLoc (), TII->get (AMDGPU::S_WAITCNT)).addImm (0 );
1618
+ if (ST->hasVscnt ())
1619
+ BuildMI (EntryBB, I, DebugLoc (), TII->get (AMDGPU::S_WAITCNT_VSCNT))
1620
+ .addReg (AMDGPU::SGPR_NULL, RegState::Undef)
1621
+ .addImm (0 );
1622
+
1623
+ Modified = true ;
1624
+ }
1603
1625
1604
1626
// Keep iterating over the blocks in reverse post order, inserting and
1605
1627
// updating s_waitcnt where needed, until a fix point is reached.
1606
1628
for (auto *MBB : ReversePostOrderTraversal<MachineFunction *>(&MF))
1607
1629
BlockInfos.insert ({MBB, BlockInfo (MBB)});
1608
1630
1609
1631
std::unique_ptr<WaitcntBrackets> Brackets;
1610
- bool Modified = false ;
1611
1632
bool Repeat;
1612
1633
do {
1613
1634
Repeat = false ;
@@ -1707,26 +1728,5 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
1707
1728
}
1708
1729
}
1709
1730
1710
- if (!MFI->isEntryFunction ()) {
1711
- // Wait for any outstanding memory operations that the input registers may
1712
- // depend on. We can't track them and it's better to the wait after the
1713
- // costly call sequence.
1714
-
1715
- // TODO: Could insert earlier and schedule more liberally with operations
1716
- // that only use caller preserved registers.
1717
- MachineBasicBlock &EntryBB = MF.front ();
1718
- MachineBasicBlock::iterator I = EntryBB.begin ();
1719
- for (MachineBasicBlock::iterator E = EntryBB.end ();
1720
- I != E && (I->isPHI () || I->isMetaInstruction ()); ++I)
1721
- ;
1722
- BuildMI (EntryBB, I, DebugLoc (), TII->get (AMDGPU::S_WAITCNT)).addImm (0 );
1723
- if (ST->hasVscnt ())
1724
- BuildMI (EntryBB, I, DebugLoc (), TII->get (AMDGPU::S_WAITCNT_VSCNT))
1725
- .addReg (AMDGPU::SGPR_NULL, RegState::Undef)
1726
- .addImm (0 );
1727
-
1728
- Modified = true ;
1729
- }
1730
-
1731
1731
return Modified;
1732
1732
}
0 commit comments