-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AMDGPU] Refine operand iterators in the SIInsertWaitcnts. NFCI. #108884
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-amdgpu Author: Stanislav Mekhanoshin (rampitec) ChangesFull diff: https://github.com/llvm/llvm-project/pull/108884.diff 1 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index b855b6b3107029..6906784cdafb24 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -304,7 +304,8 @@ class WaitcntBrackets {
RegInterval getRegInterval(const MachineInstr *MI,
const MachineRegisterInfo *MRI,
- const SIRegisterInfo *TRI, unsigned OpNo) const;
+ const SIRegisterInfo *TRI,
+ const MachineOperand &Op) const;
bool counterOutOfOrder(InstCounterType T) const;
void simplifyWaitcnt(AMDGPU::Waitcnt &Wait) const;
@@ -405,9 +406,9 @@ class WaitcntBrackets {
}
}
- void setExpScore(const MachineInstr *MI, const SIInstrInfo *TII,
- const SIRegisterInfo *TRI, const MachineRegisterInfo *MRI,
- unsigned OpNo, unsigned Val);
+ void setExpScore(const MachineInstr *MI, const SIRegisterInfo *TRI,
+ const MachineRegisterInfo *MRI, const MachineOperand &Op,
+ unsigned Val);
const GCNSubtarget *ST = nullptr;
InstCounterType MaxCounter = NUM_EXTENDED_INST_CNTS;
@@ -734,8 +735,7 @@ class SIInsertWaitcnts : public MachineFunctionPass {
RegInterval WaitcntBrackets::getRegInterval(const MachineInstr *MI,
const MachineRegisterInfo *MRI,
const SIRegisterInfo *TRI,
- unsigned OpNo) const {
- const MachineOperand &Op = MI->getOperand(OpNo);
+ const MachineOperand &Op) const {
if (!TRI->isInAllocatableClass(Op.getReg()))
return {-1, -1};
@@ -773,12 +773,11 @@ RegInterval WaitcntBrackets::getRegInterval(const MachineInstr *MI,
}
void WaitcntBrackets::setExpScore(const MachineInstr *MI,
- const SIInstrInfo *TII,
const SIRegisterInfo *TRI,
- const MachineRegisterInfo *MRI, unsigned OpNo,
- unsigned Val) {
- RegInterval Interval = getRegInterval(MI, MRI, TRI, OpNo);
- assert(TRI->isVectorRegister(*MRI, MI->getOperand(OpNo).getReg()));
+ const MachineRegisterInfo *MRI,
+ const MachineOperand &Op, unsigned Val) {
+ RegInterval Interval = getRegInterval(MI, MRI, TRI, Op);
+ assert(TRI->isVectorRegister(*MRI, Op.getReg()));
for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
setRegScore(RegNo, EXP_CNT, Val);
}
@@ -804,79 +803,57 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
// Put score on the source vgprs. If this is a store, just use those
// specific register(s).
if (TII->isDS(Inst) && (Inst.mayStore() || Inst.mayLoad())) {
- int AddrOpIdx =
- AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::addr);
// All GDS operations must protect their address register (same as
// export.)
- if (AddrOpIdx != -1) {
- setExpScore(&Inst, TII, TRI, MRI, AddrOpIdx, CurrScore);
- }
+ if (const auto *AddrOp = TII->getNamedOperand(Inst, AMDGPU::OpName::addr))
+ setExpScore(&Inst, TRI, MRI, *AddrOp, CurrScore);
if (Inst.mayStore()) {
- if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::data0)) {
- setExpScore(
- &Inst, TII, TRI, MRI,
- AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0),
- CurrScore);
- }
- if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::data1)) {
- setExpScore(&Inst, TII, TRI, MRI,
- AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
- AMDGPU::OpName::data1),
- CurrScore);
- }
+ if (const auto *Data0 =
+ TII->getNamedOperand(Inst, AMDGPU::OpName::data0))
+ setExpScore(&Inst, TRI, MRI, *Data0, CurrScore);
+ if (const auto *Data1 =
+ TII->getNamedOperand(Inst, AMDGPU::OpName::data1))
+ setExpScore(&Inst, TRI, MRI, *Data1, CurrScore);
} else if (SIInstrInfo::isAtomicRet(Inst) && !SIInstrInfo::isGWS(Inst) &&
Inst.getOpcode() != AMDGPU::DS_APPEND &&
Inst.getOpcode() != AMDGPU::DS_CONSUME &&
Inst.getOpcode() != AMDGPU::DS_ORDERED_COUNT) {
- for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
- const MachineOperand &Op = Inst.getOperand(I);
- if (Op.isReg() && !Op.isDef() &&
- TRI->isVectorRegister(*MRI, Op.getReg())) {
- setExpScore(&Inst, TII, TRI, MRI, I, CurrScore);
- }
+ for (const MachineOperand &Op : Inst.uses()) {
+ if (Op.isReg() && TRI->isVectorRegister(*MRI, Op.getReg()))
+ setExpScore(&Inst, TRI, MRI, Op, CurrScore);
}
}
} else if (TII->isFLAT(Inst)) {
- if (Inst.mayStore()) {
- setExpScore(
- &Inst, TII, TRI, MRI,
- AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data),
- CurrScore);
- } else if (SIInstrInfo::isAtomicRet(Inst)) {
- setExpScore(
- &Inst, TII, TRI, MRI,
- AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data),
- CurrScore);
- }
+ if (Inst.mayStore())
+ setExpScore(&Inst, TRI, MRI,
+ *TII->getNamedOperand(Inst, AMDGPU::OpName::data),
+ CurrScore);
+ else if (SIInstrInfo::isAtomicRet(Inst))
+ setExpScore(&Inst, TRI, MRI,
+ *TII->getNamedOperand(Inst, AMDGPU::OpName::data),
+ CurrScore);
} else if (TII->isMIMG(Inst)) {
- if (Inst.mayStore()) {
- setExpScore(&Inst, TII, TRI, MRI, 0, CurrScore);
- } else if (SIInstrInfo::isAtomicRet(Inst)) {
- setExpScore(
- &Inst, TII, TRI, MRI,
- AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data),
- CurrScore);
- }
+ if (Inst.mayStore())
+ setExpScore(&Inst, TRI, MRI, Inst.getOperand(0), CurrScore);
+ else if (SIInstrInfo::isAtomicRet(Inst))
+ setExpScore(&Inst, TRI, MRI,
+ *TII->getNamedOperand(Inst, AMDGPU::OpName::data),
+ CurrScore);
} else if (TII->isMTBUF(Inst)) {
- if (Inst.mayStore()) {
- setExpScore(&Inst, TII, TRI, MRI, 0, CurrScore);
- }
+ if (Inst.mayStore())
+ setExpScore(&Inst, TRI, MRI, Inst.getOperand(0), CurrScore);
} else if (TII->isMUBUF(Inst)) {
- if (Inst.mayStore()) {
- setExpScore(&Inst, TII, TRI, MRI, 0, CurrScore);
- } else if (SIInstrInfo::isAtomicRet(Inst)) {
- setExpScore(
- &Inst, TII, TRI, MRI,
- AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data),
- CurrScore);
- }
+ if (Inst.mayStore())
+ setExpScore(&Inst, TRI, MRI, Inst.getOperand(0), CurrScore);
+ else if (SIInstrInfo::isAtomicRet(Inst))
+ setExpScore(&Inst, TRI, MRI,
+ *TII->getNamedOperand(Inst, AMDGPU::OpName::data),
+ CurrScore);
} else if (TII->isLDSDIR(Inst)) {
// LDSDIR instructions attach the score to the destination.
- setExpScore(
- &Inst, TII, TRI, MRI,
- AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::vdst),
- CurrScore);
+ setExpScore(&Inst, TRI, MRI,
+ *TII->getNamedOperand(Inst, AMDGPU::OpName::vdst), CurrScore);
} else {
if (TII->isEXP(Inst)) {
// For export the destination registers are really temps that
@@ -891,12 +868,9 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
}
}
}
- for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
- MachineOperand &MO = Inst.getOperand(I);
- if (MO.isReg() && !MO.isDef() &&
- TRI->isVectorRegister(*MRI, MO.getReg())) {
- setExpScore(&Inst, TII, TRI, MRI, I, CurrScore);
- }
+ for (const MachineOperand &Op : Inst.uses()) {
+ if (Op.isReg() && TRI->isVectorRegister(*MRI, Op.getReg()))
+ setExpScore(&Inst, TRI, MRI, Op, CurrScore);
}
}
} else /* LGKM_CNT || EXP_CNT || VS_CNT || NUM_INST_CNTS */ {
@@ -907,14 +881,10 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
// artificial dependency, while these are there only for register liveness
// accounting purposes.
//
- // Special cases where implicit register defs and uses exists, such as
- // M0, FLAT_SCR or VCC, but the wait will be generated earlier in the
- // generateWaitcntInstBefore() if that was loaded from memory.
- for (unsigned I = 0, E = Inst.getNumExplicitOperands(); I != E; ++I) {
- auto &Op = Inst.getOperand(I);
- if (!Op.isReg() || !Op.isDef())
- continue;
- RegInterval Interval = getRegInterval(&Inst, MRI, TRI, I);
+ // Special cases where implicit register defs exists, such as M0 or VCC,
+ // but none with memory instructions.
+ for (const MachineOperand &Op : Inst.defs()) {
+ RegInterval Interval = getRegInterval(&Inst, MRI, TRI, Op);
if (T == LOAD_CNT || T == SAMPLE_CNT || T == BVH_CNT) {
if (Interval.first >= NUM_ALL_VGPRS)
continue;
@@ -1692,22 +1662,19 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
// load). We also need to check WAW dependency with saved PC.
Wait = AMDGPU::Waitcnt();
- int CallAddrOpIdx =
- AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
-
- if (MI.getOperand(CallAddrOpIdx).isReg()) {
+ const auto *CallAddrOp = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
+ if (CallAddrOp->isReg()) {
RegInterval CallAddrOpInterval =
- ScoreBrackets.getRegInterval(&MI, MRI, TRI, CallAddrOpIdx);
+ ScoreBrackets.getRegInterval(&MI, MRI, TRI, *CallAddrOp);
for (int RegNo = CallAddrOpInterval.first;
RegNo < CallAddrOpInterval.second; ++RegNo)
ScoreBrackets.determineWait(SmemAccessCounter, RegNo, Wait);
- int RtnAddrOpIdx =
- AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst);
- if (RtnAddrOpIdx != -1) {
+ if (const auto *RtnAddrOp =
+ TII->getNamedOperand(MI, AMDGPU::OpName::dst)) {
RegInterval RtnAddrOpInterval =
- ScoreBrackets.getRegInterval(&MI, MRI, TRI, RtnAddrOpIdx);
+ ScoreBrackets.getRegInterval(&MI, MRI, TRI, *RtnAddrOp);
for (int RegNo = RtnAddrOpInterval.first;
RegNo < RtnAddrOpInterval.second; ++RegNo)
@@ -1769,8 +1736,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
}
// Loop over use and def operands.
- for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
- MachineOperand &Op = MI.getOperand(I);
+ for (const MachineOperand &Op : MI.operands()) {
if (!Op.isReg())
continue;
@@ -1778,7 +1744,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
if (Op.isTied() && Op.isUse() && TII->doesNotReadTiedSource(MI))
continue;
- RegInterval Interval = ScoreBrackets.getRegInterval(&MI, MRI, TRI, I);
+ RegInterval Interval = ScoreBrackets.getRegInterval(&MI, MRI, TRI, Op);
const bool IsVGPR = TRI->isVectorRegister(*MRI, Op.getReg());
for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
@@ -2357,34 +2323,35 @@ bool SIInsertWaitcnts::shouldFlushVmCnt(MachineLoop *ML,
if (MI.mayStore())
HasVMemStore = true;
}
- for (unsigned I = 0; I < MI.getNumOperands(); I++) {
- MachineOperand &Op = MI.getOperand(I);
+ for (const MachineOperand &Op : MI.uses()) {
if (!Op.isReg() || !TRI->isVectorRegister(*MRI, Op.getReg()))
continue;
- RegInterval Interval = Brackets.getRegInterval(&MI, MRI, TRI, I);
+ RegInterval Interval = Brackets.getRegInterval(&MI, MRI, TRI, Op);
// Vgpr use
- if (Op.isUse()) {
- for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
- // If we find a register that is loaded inside the loop, 1. and 2.
- // are invalidated and we can exit.
- if (VgprDef.contains(RegNo))
- return false;
- VgprUse.insert(RegNo);
- // If at least one of Op's registers is in the score brackets, the
- // value is likely loaded outside of the loop.
- if (Brackets.getRegScore(RegNo, LOAD_CNT) >
- Brackets.getScoreLB(LOAD_CNT) ||
- Brackets.getRegScore(RegNo, SAMPLE_CNT) >
- Brackets.getScoreLB(SAMPLE_CNT) ||
- Brackets.getRegScore(RegNo, BVH_CNT) >
- Brackets.getScoreLB(BVH_CNT)) {
- UsesVgprLoadedOutside = true;
- break;
- }
+ for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
+ // If we find a register that is loaded inside the loop, 1. and 2.
+ // are invalidated and we can exit.
+ if (VgprDef.contains(RegNo))
+ return false;
+ VgprUse.insert(RegNo);
+ // If at least one of Op's registers is in the score brackets, the
+ // value is likely loaded outside of the loop.
+ if (Brackets.getRegScore(RegNo, LOAD_CNT) >
+ Brackets.getScoreLB(LOAD_CNT) ||
+ Brackets.getRegScore(RegNo, SAMPLE_CNT) >
+ Brackets.getScoreLB(SAMPLE_CNT) ||
+ Brackets.getRegScore(RegNo, BVH_CNT) >
+ Brackets.getScoreLB(BVH_CNT)) {
+ UsesVgprLoadedOutside = true;
+ break;
}
}
- // VMem load vgpr def
- else if (isVMEMOrFlatVMEM(MI) && MI.mayLoad() && Op.isDef())
+ }
+
+ // VMem load vgpr def
+ if (isVMEMOrFlatVMEM(MI) && MI.mayLoad()) {
+ for (const MachineOperand &Op : MI.defs()) {
+ RegInterval Interval = Brackets.getRegInterval(&MI, MRI, TRI, Op);
for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
// If we find a register that is loaded inside the loop, 1. and 2.
// are invalidated and we can exit.
@@ -2392,6 +2359,7 @@ bool SIInsertWaitcnts::shouldFlushVmCnt(MachineLoop *ML,
return false;
VgprDef.insert(RegNo);
}
+ }
}
}
}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Overall I like it!
- Braces - uses/defs -> all_uses/all_defs
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with a few more nits
setExpScore(&Inst, TII, TRI, MRI, I, CurrScore); | ||
} | ||
for (const MachineOperand &Op : Inst.all_uses()) { | ||
if (Op.isReg() && TRI->isVectorRegister(*MRI, Op.getReg())) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: don't need the isReg check.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unfortunately documentation is lying and use is not always a reg.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is not a reg, a simple imm:
2329│ for (const MachineOperand &Op : MI.uses()) {
2330├> if (!TRI->isVectorRegister(*MRI, Op.getReg()))
2331│ continue;
(gdb) call Op.dump()
17408
(gdb) call MI.dump()
renamable $vgpr3 = nofpexcept V_MAX_F16_fake16_e32 17408, killed $vgpr3, implicit $mode, implicit $exec
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah but that's uses
. For all_uses
it is always a reg. I know, this situation is confusing and not ideal.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I do not want to change it, because the very next change will be to change all_uses with just uses. Also NFCI, but of a different kind.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
JBTW, any idea why is that so? Why uses()
include non-registers? And why is that documented as register uses then?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
And even more to it: why all_uses
does not include all of the uses
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I mean this:
[iterator_range](https://llvm.org/doxygen/classllvm_1_1iterator__range.html)< [const_mop_iterator](https://llvm.org/doxygen/classllvm_1_1MachineInstr.html#a476971826fa13b07e28ad971ec5a3234) > [uses](https://llvm.org/doxygen/classllvm_1_1MachineInstr.html#a15bc8fb07e719b5a47a7c9070c5e26af) () [const](https://llvm.org/doxygen/AArch64PromoteConstant_8cpp.html#a90f8350fecae261c25be85d38b451bff)
Returns a range that includes all operands that are register uses.
One has really dig deep to get to this statement, which ruins it completely:
This may include unrelated operands which are not register uses.
The high level description at the https://llvm.org/doxygen/classllvm_1_1MachineInstr.html does not include that last statement.
Because we were lazy and sloppy :)
Maybe part of the problem is a grammatical question of whether an immediate is a type of use, or something different. We don't have good words for "register uses" and "all uses including uses of immediate values". |
|
No description provided.