Skip to content

Commit 731a683

Browse files
authored
[AMDGPU] Refine operand iterators in the SIInsertWaitcnts. NFCI. (#108884)
1 parent 83220e9 commit 731a683

File tree

1 file changed

+79
-108
lines changed

1 file changed

+79
-108
lines changed

llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp

Lines changed: 79 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -304,7 +304,8 @@ class WaitcntBrackets {
304304

305305
RegInterval getRegInterval(const MachineInstr *MI,
306306
const MachineRegisterInfo *MRI,
307-
const SIRegisterInfo *TRI, unsigned OpNo) const;
307+
const SIRegisterInfo *TRI,
308+
const MachineOperand &Op) const;
308309

309310
bool counterOutOfOrder(InstCounterType T) const;
310311
void simplifyWaitcnt(AMDGPU::Waitcnt &Wait) const;
@@ -405,9 +406,9 @@ class WaitcntBrackets {
405406
}
406407
}
407408

408-
void setExpScore(const MachineInstr *MI, const SIInstrInfo *TII,
409-
const SIRegisterInfo *TRI, const MachineRegisterInfo *MRI,
410-
unsigned OpNo, unsigned Val);
409+
void setExpScore(const MachineInstr *MI, const SIRegisterInfo *TRI,
410+
const MachineRegisterInfo *MRI, const MachineOperand &Op,
411+
unsigned Val);
411412

412413
const GCNSubtarget *ST = nullptr;
413414
InstCounterType MaxCounter = NUM_EXTENDED_INST_CNTS;
@@ -734,8 +735,7 @@ class SIInsertWaitcnts : public MachineFunctionPass {
734735
RegInterval WaitcntBrackets::getRegInterval(const MachineInstr *MI,
735736
const MachineRegisterInfo *MRI,
736737
const SIRegisterInfo *TRI,
737-
unsigned OpNo) const {
738-
const MachineOperand &Op = MI->getOperand(OpNo);
738+
const MachineOperand &Op) const {
739739
if (!TRI->isInAllocatableClass(Op.getReg()))
740740
return {-1, -1};
741741

@@ -773,12 +773,11 @@ RegInterval WaitcntBrackets::getRegInterval(const MachineInstr *MI,
773773
}
774774

775775
void WaitcntBrackets::setExpScore(const MachineInstr *MI,
776-
const SIInstrInfo *TII,
777776
const SIRegisterInfo *TRI,
778-
const MachineRegisterInfo *MRI, unsigned OpNo,
779-
unsigned Val) {
780-
RegInterval Interval = getRegInterval(MI, MRI, TRI, OpNo);
781-
assert(TRI->isVectorRegister(*MRI, MI->getOperand(OpNo).getReg()));
777+
const MachineRegisterInfo *MRI,
778+
const MachineOperand &Op, unsigned Val) {
779+
RegInterval Interval = getRegInterval(MI, MRI, TRI, Op);
780+
assert(TRI->isVectorRegister(*MRI, Op.getReg()));
782781
for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
783782
setRegScore(RegNo, EXP_CNT, Val);
784783
}
@@ -804,79 +803,60 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
804803
// Put score on the source vgprs. If this is a store, just use those
805804
// specific register(s).
806805
if (TII->isDS(Inst) && (Inst.mayStore() || Inst.mayLoad())) {
807-
int AddrOpIdx =
808-
AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::addr);
809806
// All GDS operations must protect their address register (same as
810807
// export.)
811-
if (AddrOpIdx != -1) {
812-
setExpScore(&Inst, TII, TRI, MRI, AddrOpIdx, CurrScore);
813-
}
808+
if (const auto *AddrOp = TII->getNamedOperand(Inst, AMDGPU::OpName::addr))
809+
setExpScore(&Inst, TRI, MRI, *AddrOp, CurrScore);
814810

815811
if (Inst.mayStore()) {
816-
if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::data0)) {
817-
setExpScore(
818-
&Inst, TII, TRI, MRI,
819-
AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0),
820-
CurrScore);
821-
}
822-
if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::data1)) {
823-
setExpScore(&Inst, TII, TRI, MRI,
824-
AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
825-
AMDGPU::OpName::data1),
826-
CurrScore);
827-
}
812+
if (const auto *Data0 =
813+
TII->getNamedOperand(Inst, AMDGPU::OpName::data0))
814+
setExpScore(&Inst, TRI, MRI, *Data0, CurrScore);
815+
if (const auto *Data1 =
816+
TII->getNamedOperand(Inst, AMDGPU::OpName::data1))
817+
setExpScore(&Inst, TRI, MRI, *Data1, CurrScore);
828818
} else if (SIInstrInfo::isAtomicRet(Inst) && !SIInstrInfo::isGWS(Inst) &&
829819
Inst.getOpcode() != AMDGPU::DS_APPEND &&
830820
Inst.getOpcode() != AMDGPU::DS_CONSUME &&
831821
Inst.getOpcode() != AMDGPU::DS_ORDERED_COUNT) {
832-
for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
833-
const MachineOperand &Op = Inst.getOperand(I);
834-
if (Op.isReg() && !Op.isDef() &&
835-
TRI->isVectorRegister(*MRI, Op.getReg())) {
836-
setExpScore(&Inst, TII, TRI, MRI, I, CurrScore);
837-
}
822+
for (const MachineOperand &Op : Inst.all_uses()) {
823+
if (Op.isReg() && TRI->isVectorRegister(*MRI, Op.getReg()))
824+
setExpScore(&Inst, TRI, MRI, Op, CurrScore);
838825
}
839826
}
840827
} else if (TII->isFLAT(Inst)) {
841828
if (Inst.mayStore()) {
842-
setExpScore(
843-
&Inst, TII, TRI, MRI,
844-
AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data),
845-
CurrScore);
829+
setExpScore(&Inst, TRI, MRI,
830+
*TII->getNamedOperand(Inst, AMDGPU::OpName::data),
831+
CurrScore);
846832
} else if (SIInstrInfo::isAtomicRet(Inst)) {
847-
setExpScore(
848-
&Inst, TII, TRI, MRI,
849-
AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data),
850-
CurrScore);
833+
setExpScore(&Inst, TRI, MRI,
834+
*TII->getNamedOperand(Inst, AMDGPU::OpName::data),
835+
CurrScore);
851836
}
852837
} else if (TII->isMIMG(Inst)) {
853838
if (Inst.mayStore()) {
854-
setExpScore(&Inst, TII, TRI, MRI, 0, CurrScore);
839+
setExpScore(&Inst, TRI, MRI, Inst.getOperand(0), CurrScore);
855840
} else if (SIInstrInfo::isAtomicRet(Inst)) {
856-
setExpScore(
857-
&Inst, TII, TRI, MRI,
858-
AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data),
859-
CurrScore);
841+
setExpScore(&Inst, TRI, MRI,
842+
*TII->getNamedOperand(Inst, AMDGPU::OpName::data),
843+
CurrScore);
860844
}
861845
} else if (TII->isMTBUF(Inst)) {
862-
if (Inst.mayStore()) {
863-
setExpScore(&Inst, TII, TRI, MRI, 0, CurrScore);
864-
}
846+
if (Inst.mayStore())
847+
setExpScore(&Inst, TRI, MRI, Inst.getOperand(0), CurrScore);
865848
} else if (TII->isMUBUF(Inst)) {
866849
if (Inst.mayStore()) {
867-
setExpScore(&Inst, TII, TRI, MRI, 0, CurrScore);
850+
setExpScore(&Inst, TRI, MRI, Inst.getOperand(0), CurrScore);
868851
} else if (SIInstrInfo::isAtomicRet(Inst)) {
869-
setExpScore(
870-
&Inst, TII, TRI, MRI,
871-
AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data),
872-
CurrScore);
852+
setExpScore(&Inst, TRI, MRI,
853+
*TII->getNamedOperand(Inst, AMDGPU::OpName::data),
854+
CurrScore);
873855
}
874856
} else if (TII->isLDSDIR(Inst)) {
875857
// LDSDIR instructions attach the score to the destination.
876-
setExpScore(
877-
&Inst, TII, TRI, MRI,
878-
AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::vdst),
879-
CurrScore);
858+
setExpScore(&Inst, TRI, MRI,
859+
*TII->getNamedOperand(Inst, AMDGPU::OpName::vdst), CurrScore);
880860
} else {
881861
if (TII->isEXP(Inst)) {
882862
// For export the destination registers are really temps that
@@ -891,12 +871,9 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
891871
}
892872
}
893873
}
894-
for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
895-
MachineOperand &MO = Inst.getOperand(I);
896-
if (MO.isReg() && !MO.isDef() &&
897-
TRI->isVectorRegister(*MRI, MO.getReg())) {
898-
setExpScore(&Inst, TII, TRI, MRI, I, CurrScore);
899-
}
874+
for (const MachineOperand &Op : Inst.all_uses()) {
875+
if (Op.isReg() && TRI->isVectorRegister(*MRI, Op.getReg()))
876+
setExpScore(&Inst, TRI, MRI, Op, CurrScore);
900877
}
901878
}
902879
} else /* LGKM_CNT || EXP_CNT || VS_CNT || NUM_INST_CNTS */ {
@@ -907,14 +884,10 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
907884
// artificial dependency, while these are there only for register liveness
908885
// accounting purposes.
909886
//
910-
// Special cases where implicit register defs and uses exists, such as
911-
// M0, FLAT_SCR or VCC, but the wait will be generated earlier in the
912-
// generateWaitcntInstBefore() if that was loaded from memory.
913-
for (unsigned I = 0, E = Inst.getNumExplicitOperands(); I != E; ++I) {
914-
auto &Op = Inst.getOperand(I);
915-
if (!Op.isReg() || !Op.isDef())
916-
continue;
917-
RegInterval Interval = getRegInterval(&Inst, MRI, TRI, I);
887+
// Special cases where implicit register defs exists, such as M0 or VCC,
888+
// but none with memory instructions.
889+
for (const MachineOperand &Op : Inst.defs()) {
890+
RegInterval Interval = getRegInterval(&Inst, MRI, TRI, Op);
918891
if (T == LOAD_CNT || T == SAMPLE_CNT || T == BVH_CNT) {
919892
if (Interval.first >= NUM_ALL_VGPRS)
920893
continue;
@@ -1692,22 +1665,19 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
16921665
// load). We also need to check WAW dependency with saved PC.
16931666
Wait = AMDGPU::Waitcnt();
16941667

1695-
int CallAddrOpIdx =
1696-
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
1697-
1698-
if (MI.getOperand(CallAddrOpIdx).isReg()) {
1668+
const auto &CallAddrOp = *TII->getNamedOperand(MI, AMDGPU::OpName::src0);
1669+
if (CallAddrOp.isReg()) {
16991670
RegInterval CallAddrOpInterval =
1700-
ScoreBrackets.getRegInterval(&MI, MRI, TRI, CallAddrOpIdx);
1671+
ScoreBrackets.getRegInterval(&MI, MRI, TRI, CallAddrOp);
17011672

17021673
for (int RegNo = CallAddrOpInterval.first;
17031674
RegNo < CallAddrOpInterval.second; ++RegNo)
17041675
ScoreBrackets.determineWait(SmemAccessCounter, RegNo, Wait);
17051676

1706-
int RtnAddrOpIdx =
1707-
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst);
1708-
if (RtnAddrOpIdx != -1) {
1677+
if (const auto *RtnAddrOp =
1678+
TII->getNamedOperand(MI, AMDGPU::OpName::dst)) {
17091679
RegInterval RtnAddrOpInterval =
1710-
ScoreBrackets.getRegInterval(&MI, MRI, TRI, RtnAddrOpIdx);
1680+
ScoreBrackets.getRegInterval(&MI, MRI, TRI, *RtnAddrOp);
17111681

17121682
for (int RegNo = RtnAddrOpInterval.first;
17131683
RegNo < RtnAddrOpInterval.second; ++RegNo)
@@ -1769,16 +1739,15 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
17691739
}
17701740

17711741
// Loop over use and def operands.
1772-
for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
1773-
MachineOperand &Op = MI.getOperand(I);
1742+
for (const MachineOperand &Op : MI.operands()) {
17741743
if (!Op.isReg())
17751744
continue;
17761745

17771746
// If the instruction does not read tied source, skip the operand.
17781747
if (Op.isTied() && Op.isUse() && TII->doesNotReadTiedSource(MI))
17791748
continue;
17801749

1781-
RegInterval Interval = ScoreBrackets.getRegInterval(&MI, MRI, TRI, I);
1750+
RegInterval Interval = ScoreBrackets.getRegInterval(&MI, MRI, TRI, Op);
17821751

17831752
const bool IsVGPR = TRI->isVectorRegister(*MRI, Op.getReg());
17841753
for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
@@ -2357,41 +2326,43 @@ bool SIInsertWaitcnts::shouldFlushVmCnt(MachineLoop *ML,
23572326
if (MI.mayStore())
23582327
HasVMemStore = true;
23592328
}
2360-
for (unsigned I = 0; I < MI.getNumOperands(); I++) {
2361-
MachineOperand &Op = MI.getOperand(I);
2329+
for (const MachineOperand &Op : MI.all_uses()) {
23622330
if (!Op.isReg() || !TRI->isVectorRegister(*MRI, Op.getReg()))
23632331
continue;
2364-
RegInterval Interval = Brackets.getRegInterval(&MI, MRI, TRI, I);
2332+
RegInterval Interval = Brackets.getRegInterval(&MI, MRI, TRI, Op);
23652333
// Vgpr use
2366-
if (Op.isUse()) {
2367-
for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
2368-
// If we find a register that is loaded inside the loop, 1. and 2.
2369-
// are invalidated and we can exit.
2370-
if (VgprDef.contains(RegNo))
2371-
return false;
2372-
VgprUse.insert(RegNo);
2373-
// If at least one of Op's registers is in the score brackets, the
2374-
// value is likely loaded outside of the loop.
2375-
if (Brackets.getRegScore(RegNo, LOAD_CNT) >
2376-
Brackets.getScoreLB(LOAD_CNT) ||
2377-
Brackets.getRegScore(RegNo, SAMPLE_CNT) >
2378-
Brackets.getScoreLB(SAMPLE_CNT) ||
2379-
Brackets.getRegScore(RegNo, BVH_CNT) >
2380-
Brackets.getScoreLB(BVH_CNT)) {
2381-
UsesVgprLoadedOutside = true;
2382-
break;
2383-
}
2334+
for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
2335+
// If we find a register that is loaded inside the loop, 1. and 2.
2336+
// are invalidated and we can exit.
2337+
if (VgprDef.contains(RegNo))
2338+
return false;
2339+
VgprUse.insert(RegNo);
2340+
// If at least one of Op's registers is in the score brackets, the
2341+
// value is likely loaded outside of the loop.
2342+
if (Brackets.getRegScore(RegNo, LOAD_CNT) >
2343+
Brackets.getScoreLB(LOAD_CNT) ||
2344+
Brackets.getRegScore(RegNo, SAMPLE_CNT) >
2345+
Brackets.getScoreLB(SAMPLE_CNT) ||
2346+
Brackets.getRegScore(RegNo, BVH_CNT) >
2347+
Brackets.getScoreLB(BVH_CNT)) {
2348+
UsesVgprLoadedOutside = true;
2349+
break;
23842350
}
23852351
}
2386-
// VMem load vgpr def
2387-
else if (isVMEMOrFlatVMEM(MI) && MI.mayLoad() && Op.isDef())
2352+
}
2353+
2354+
// VMem load vgpr def
2355+
if (isVMEMOrFlatVMEM(MI) && MI.mayLoad()) {
2356+
for (const MachineOperand &Op : MI.all_defs()) {
2357+
RegInterval Interval = Brackets.getRegInterval(&MI, MRI, TRI, Op);
23882358
for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
23892359
// If we find a register that is loaded inside the loop, 1. and 2.
23902360
// are invalidated and we can exit.
23912361
if (VgprUse.contains(RegNo))
23922362
return false;
23932363
VgprDef.insert(RegNo);
23942364
}
2365+
}
23952366
}
23962367
}
23972368
}

0 commit comments

Comments
 (0)