Skip to content

[AMDGPU][NFC] Refactor SIInsertWaitcnts zero waitcnt generation #82575

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 22 additions & 6 deletions llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,10 @@ class WaitcntGenerator {
// WaitEventType to corresponding counter values in InstCounterType.
virtual const unsigned *getWaitEventMask() const = 0;

// Returns a new waitcnt with all counters except VScnt set to 0. If
// IncludeVSCnt is true, VScnt is set to 0, otherwise it is set to ~0u.
virtual AMDGPU::Waitcnt getAllZeroWaitcnt(bool IncludeVSCnt) const = 0;

virtual ~WaitcntGenerator() = default;
};

Expand Down Expand Up @@ -516,6 +520,8 @@ class WaitcntGeneratorPreGFX12 : public WaitcntGenerator {

return WaitEventMaskForInstPreGFX12;
}

virtual AMDGPU::Waitcnt getAllZeroWaitcnt(bool IncludeVSCnt) const override;
};

class WaitcntGeneratorGFX12Plus : public WaitcntGenerator {
Expand Down Expand Up @@ -549,6 +555,8 @@ class WaitcntGeneratorGFX12Plus : public WaitcntGenerator {

return WaitEventMaskForInstGFX12Plus;
}

virtual AMDGPU::Waitcnt getAllZeroWaitcnt(bool IncludeVSCnt) const override;
};

class SIInsertWaitcnts : public MachineFunctionPass {
Expand Down Expand Up @@ -1304,6 +1312,16 @@ bool WaitcntGeneratorPreGFX12::createNewWaitcnt(
return Modified;
}

AMDGPU::Waitcnt
WaitcntGeneratorPreGFX12::getAllZeroWaitcnt(bool IncludeVSCnt) const {
return AMDGPU::Waitcnt(0, 0, 0, IncludeVSCnt && ST->hasVscnt() ? 0 : ~0u);
}

AMDGPU::Waitcnt
WaitcntGeneratorGFX12Plus::getAllZeroWaitcnt(bool IncludeVSCnt) const {
return AMDGPU::Waitcnt(0, 0, 0, IncludeVSCnt ? 0 : ~0u, 0, 0, 0);
}

/// Combine consecutive S_WAIT_*CNT instructions that precede \p It and
/// follow \p OldWaitcntInstr and apply any extra waits from \p Wait that
/// were added by previous passes. Currently this pass conservatively
Expand Down Expand Up @@ -1613,8 +1631,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
MI.getOpcode() == AMDGPU::SI_RETURN ||
MI.getOpcode() == AMDGPU::S_SETPC_B64_return ||
(MI.isReturn() && MI.isCall() && !callWaitsOnFunctionEntry(MI))) {
Wait = Wait.combined(
AMDGPU::Waitcnt::allZeroExceptVsCnt(ST->hasExtendedWaitCounts()));
Wait = Wait.combined(WCG->getAllZeroWaitcnt(/*IncludeVSCnt=*/false));
}
// Identify S_ENDPGM instructions which may have to wait for outstanding VMEM
// stores. In this case it can be useful to send a message to explicitly
Expand Down Expand Up @@ -1834,8 +1851,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
// cause an exception. Otherwise, insert an explicit S_WAITCNT 0 here.
if (MI.getOpcode() == AMDGPU::S_BARRIER &&
!ST->hasAutoWaitcntBeforeBarrier() && !ST->supportsBackOffBarrier()) {
Wait = Wait.combined(
AMDGPU::Waitcnt::allZero(ST->hasExtendedWaitCounts(), ST->hasVscnt()));
Wait = Wait.combined(WCG->getAllZeroWaitcnt(/*IncludeVSCnt=*/true));
}

// TODO: Remove this work-around, enable the assert for Bug 457939
Expand All @@ -1851,7 +1867,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
ScoreBrackets.simplifyWaitcnt(Wait);

if (ForceEmitZeroWaitcnts)
Wait = AMDGPU::Waitcnt::allZeroExceptVsCnt(ST->hasExtendedWaitCounts());
Wait = WCG->getAllZeroWaitcnt(/*IncludeVSCnt=*/false);

if (ForceEmitWaitcnt[LOAD_CNT])
Wait.LoadCnt = 0;
Expand Down Expand Up @@ -2089,7 +2105,7 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
if (callWaitsOnFunctionReturn(Inst)) {
// Act as a wait on everything
ScoreBrackets->applyWaitcnt(
AMDGPU::Waitcnt::allZeroExceptVsCnt(ST->hasExtendedWaitCounts()));
WCG->getAllZeroWaitcnt(/*IncludeVSCnt=*/false));
ScoreBrackets->setStateOnFunctionEntryOrReturn();
} else {
// May need to way wait for anything.
Expand Down
9 changes: 0 additions & 9 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -870,15 +870,6 @@ struct Waitcnt {
: LoadCnt(LoadCnt), ExpCnt(ExpCnt), DsCnt(DsCnt), StoreCnt(StoreCnt),
SampleCnt(SampleCnt), BvhCnt(BvhCnt), KmCnt(KmCnt) {}

static Waitcnt allZero(bool Extended, bool HasStorecnt) {
return Extended ? Waitcnt(0, 0, 0, 0, 0, 0, 0)
: Waitcnt(0, 0, 0, HasStorecnt ? 0 : ~0u);
}

static Waitcnt allZeroExceptVsCnt(bool Extended) {
return Extended ? Waitcnt(0, 0, 0, ~0u, 0, 0, 0) : Waitcnt(0, 0, 0, ~0u);
}

bool hasWait() const { return StoreCnt != ~0u || hasWaitExceptStoreCnt(); }

bool hasWaitExceptStoreCnt() const {
Expand Down