Skip to content

Commit 6617a5a

Browse files
committed
[AMDGPU] Move insertion of function entry waitcnt later
This allows tracking these as preexisting waitcnt. Reviewed By: rampitec Differential Revision: https://reviews.llvm.org/D101380
1 parent f6d7fc8 commit 6617a5a

File tree

2 files changed

+35
-22
lines changed

2 files changed

+35
-22
lines changed

llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1600,14 +1600,35 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
16001600

16011601
TrackedWaitcntSet.clear();
16021602
BlockInfos.clear();
1603+
bool Modified = false;
1604+
1605+
if (!MFI->isEntryFunction()) {
1606+
// Wait for any outstanding memory operations that the input registers may
1607+
// depend on. We can't track them and it's better to do the wait after the
1608+
// costly call sequence.
1609+
1610+
// TODO: Could insert earlier and schedule more liberally with operations
1611+
// that only use caller preserved registers.
1612+
MachineBasicBlock &EntryBB = MF.front();
1613+
MachineBasicBlock::iterator I = EntryBB.begin();
1614+
for (MachineBasicBlock::iterator E = EntryBB.end();
1615+
I != E && (I->isPHI() || I->isMetaInstruction()); ++I)
1616+
;
1617+
BuildMI(EntryBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT)).addImm(0);
1618+
if (ST->hasVscnt())
1619+
BuildMI(EntryBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT_VSCNT))
1620+
.addReg(AMDGPU::SGPR_NULL, RegState::Undef)
1621+
.addImm(0);
1622+
1623+
Modified = true;
1624+
}
16031625

16041626
// Keep iterating over the blocks in reverse post order, inserting and
16051627
// updating s_waitcnt where needed, until a fix point is reached.
16061628
for (auto *MBB : ReversePostOrderTraversal<MachineFunction *>(&MF))
16071629
BlockInfos.insert({MBB, BlockInfo(MBB)});
16081630

16091631
std::unique_ptr<WaitcntBrackets> Brackets;
1610-
bool Modified = false;
16111632
bool Repeat;
16121633
do {
16131634
Repeat = false;
@@ -1707,26 +1728,5 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
17071728
}
17081729
}
17091730

1710-
if (!MFI->isEntryFunction()) {
1711-
// Wait for any outstanding memory operations that the input registers may
1712-
// depend on. We can't track them and it's better to the wait after the
1713-
// costly call sequence.
1714-
1715-
// TODO: Could insert earlier and schedule more liberally with operations
1716-
// that only use caller preserved registers.
1717-
MachineBasicBlock &EntryBB = MF.front();
1718-
MachineBasicBlock::iterator I = EntryBB.begin();
1719-
for (MachineBasicBlock::iterator E = EntryBB.end();
1720-
I != E && (I->isPHI() || I->isMetaInstruction()); ++I)
1721-
;
1722-
BuildMI(EntryBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT)).addImm(0);
1723-
if (ST->hasVscnt())
1724-
BuildMI(EntryBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT_VSCNT))
1725-
.addReg(AMDGPU::SGPR_NULL, RegState::Undef)
1726-
.addImm(0);
1727-
1728-
Modified = true;
1729-
}
1730-
17311731
return Modified;
17321732
}

llvm/test/CodeGen/AMDGPU/waitcnt-preexisting.mir

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,4 +192,17 @@ body: |
192192
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
193193
S_WAITCNT 3952
194194
KILL $vgpr0
195+
196+
# Combine preexisting waitcnt with wait added to the start of a non-entry function.
197+
198+
---
199+
name: test_waitcnt_preexisting_func_start
200+
body: |
201+
bb.0:
202+
; GFX9-LABEL: name: test_waitcnt_preexisting_func_start
203+
; GFX9: S_WAITCNT 0
204+
; GFX9-NOT: S_WAITCNT 0
205+
; GFX9: S_ENDPGM 0
206+
S_WAITCNT 0
207+
S_ENDPGM 0
195208
...

0 commit comments

Comments
 (0)