Skip to content

Commit 8aaa491

Browse files
vangthao95kzhuravl
authored andcommitted
[AMDGPU][NFC] Refactor SIInsertWaitcnts zero waitcnt generation (llvm#82575)
Move the allZero* waitcnt generation methods into WaitcntGenerator class. Change-Id: I9e72187a97d9301a7bc34f6adcc02d24859130ed
1 parent 15a9983 commit 8aaa491

File tree

2 files changed

+22
-15
lines changed

2 files changed

+22
-15
lines changed

llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -480,6 +480,10 @@ class WaitcntGenerator {
480480
// WaitEventType to corresponding counter values in InstCounterType.
481481
virtual const unsigned *getWaitEventMask() const = 0;
482482

483+
// Returns a new waitcnt with all counters except VScnt set to 0. If
484+
// IncludeVSCnt is true, VScnt is set to 0, otherwise it is set to ~0u.
485+
virtual AMDGPU::Waitcnt getAllZeroWaitcnt(bool IncludeVSCnt) const = 0;
486+
483487
virtual ~WaitcntGenerator() = default;
484488
};
485489

@@ -516,6 +520,8 @@ class WaitcntGeneratorPreGFX12 : public WaitcntGenerator {
516520

517521
return WaitEventMaskForInstPreGFX12;
518522
}
523+
524+
virtual AMDGPU::Waitcnt getAllZeroWaitcnt(bool IncludeVSCnt) const override;
519525
};
520526

521527
class WaitcntGeneratorGFX12Plus : public WaitcntGenerator {
@@ -549,6 +555,8 @@ class WaitcntGeneratorGFX12Plus : public WaitcntGenerator {
549555

550556
return WaitEventMaskForInstGFX12Plus;
551557
}
558+
559+
virtual AMDGPU::Waitcnt getAllZeroWaitcnt(bool IncludeVSCnt) const override;
552560
};
553561

554562
class SIInsertWaitcnts : public MachineFunctionPass {
@@ -1304,6 +1312,16 @@ bool WaitcntGeneratorPreGFX12::createNewWaitcnt(
13041312
return Modified;
13051313
}
13061314

1315+
AMDGPU::Waitcnt
1316+
WaitcntGeneratorPreGFX12::getAllZeroWaitcnt(bool IncludeVSCnt) const {
1317+
return AMDGPU::Waitcnt(0, 0, 0, IncludeVSCnt && ST->hasVscnt() ? 0 : ~0u);
1318+
}
1319+
1320+
AMDGPU::Waitcnt
1321+
WaitcntGeneratorGFX12Plus::getAllZeroWaitcnt(bool IncludeVSCnt) const {
1322+
return AMDGPU::Waitcnt(0, 0, 0, IncludeVSCnt ? 0 : ~0u, 0, 0, 0);
1323+
}
1324+
13071325
/// Combine consecutive S_WAIT_*CNT instructions that precede \p It and
13081326
/// follow \p OldWaitcntInstr and apply any extra waits from \p Wait that
13091327
/// were added by previous passes. Currently this pass conservatively
@@ -1613,8 +1631,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
16131631
MI.getOpcode() == AMDGPU::SI_RETURN ||
16141632
MI.getOpcode() == AMDGPU::S_SETPC_B64_return ||
16151633
(MI.isReturn() && MI.isCall() && !callWaitsOnFunctionEntry(MI))) {
1616-
Wait = Wait.combined(
1617-
AMDGPU::Waitcnt::allZeroExceptVsCnt(ST->hasExtendedWaitCounts()));
1634+
Wait = Wait.combined(WCG->getAllZeroWaitcnt(/*IncludeVSCnt=*/false));
16181635
}
16191636
// Identify S_ENDPGM instructions which may have to wait for outstanding VMEM
16201637
// stores. In this case it can be useful to send a message to explicitly
@@ -1834,8 +1851,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
18341851
// cause an exception. Otherwise, insert an explicit S_WAITCNT 0 here.
18351852
if (MI.getOpcode() == AMDGPU::S_BARRIER &&
18361853
!ST->hasAutoWaitcntBeforeBarrier() && !ST->supportsBackOffBarrier()) {
1837-
Wait = Wait.combined(
1838-
AMDGPU::Waitcnt::allZero(ST->hasExtendedWaitCounts(), ST->hasVscnt()));
1854+
Wait = Wait.combined(WCG->getAllZeroWaitcnt(/*IncludeVSCnt=*/true));
18391855
}
18401856

18411857
// TODO: Remove this work-around, enable the assert for Bug 457939
@@ -1851,7 +1867,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
18511867
ScoreBrackets.simplifyWaitcnt(Wait);
18521868

18531869
if (ForceEmitZeroWaitcnts)
1854-
Wait = AMDGPU::Waitcnt::allZeroExceptVsCnt(ST->hasExtendedWaitCounts());
1870+
Wait = WCG->getAllZeroWaitcnt(/*IncludeVSCnt=*/false);
18551871

18561872
if (ForceEmitWaitcnt[LOAD_CNT])
18571873
Wait.LoadCnt = 0;
@@ -2089,7 +2105,7 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
20892105
if (callWaitsOnFunctionReturn(Inst)) {
20902106
// Act as a wait on everything
20912107
ScoreBrackets->applyWaitcnt(
2092-
AMDGPU::Waitcnt::allZeroExceptVsCnt(ST->hasExtendedWaitCounts()));
2108+
WCG->getAllZeroWaitcnt(/*IncludeVSCnt=*/false));
20932109
ScoreBrackets->setStateOnFunctionEntryOrReturn();
20942110
} else {
20952111
// May need to way wait for anything.

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -860,15 +860,6 @@ struct Waitcnt {
860860
: LoadCnt(LoadCnt), ExpCnt(ExpCnt), DsCnt(DsCnt), StoreCnt(StoreCnt),
861861
SampleCnt(SampleCnt), BvhCnt(BvhCnt), KmCnt(KmCnt) {}
862862

863-
static Waitcnt allZero(bool Extended, bool HasStorecnt) {
864-
return Extended ? Waitcnt(0, 0, 0, 0, 0, 0, 0)
865-
: Waitcnt(0, 0, 0, HasStorecnt ? 0 : ~0u);
866-
}
867-
868-
static Waitcnt allZeroExceptVsCnt(bool Extended) {
869-
return Extended ? Waitcnt(0, 0, 0, ~0u, 0, 0, 0) : Waitcnt(0, 0, 0, ~0u);
870-
}
871-
872863
bool hasWait() const { return StoreCnt != ~0u || hasWaitExceptStoreCnt(); }
873864

874865
bool hasWaitExceptStoreCnt() const {

0 commit comments

Comments
 (0)