@@ -304,7 +304,8 @@ class WaitcntBrackets {
304
304
305
305
RegInterval getRegInterval (const MachineInstr *MI,
306
306
const MachineRegisterInfo *MRI,
307
- const SIRegisterInfo *TRI, unsigned OpNo) const ;
307
+ const SIRegisterInfo *TRI,
308
+ const MachineOperand &Op) const ;
308
309
309
310
bool counterOutOfOrder (InstCounterType T) const ;
310
311
void simplifyWaitcnt (AMDGPU::Waitcnt &Wait) const ;
@@ -405,9 +406,9 @@ class WaitcntBrackets {
405
406
}
406
407
}
407
408
408
- void setExpScore (const MachineInstr *MI, const SIInstrInfo *TII ,
409
- const SIRegisterInfo *TRI , const MachineRegisterInfo *MRI ,
410
- unsigned OpNo, unsigned Val);
409
+ void setExpScore (const MachineInstr *MI, const SIRegisterInfo *TRI ,
410
+ const MachineRegisterInfo *MRI , const MachineOperand &Op ,
411
+ unsigned Val);
411
412
412
413
const GCNSubtarget *ST = nullptr ;
413
414
InstCounterType MaxCounter = NUM_EXTENDED_INST_CNTS;
@@ -734,8 +735,7 @@ class SIInsertWaitcnts : public MachineFunctionPass {
734
735
RegInterval WaitcntBrackets::getRegInterval (const MachineInstr *MI,
735
736
const MachineRegisterInfo *MRI,
736
737
const SIRegisterInfo *TRI,
737
- unsigned OpNo) const {
738
- const MachineOperand &Op = MI->getOperand (OpNo);
738
+ const MachineOperand &Op) const {
739
739
if (!TRI->isInAllocatableClass (Op.getReg ()))
740
740
return {-1 , -1 };
741
741
@@ -773,12 +773,11 @@ RegInterval WaitcntBrackets::getRegInterval(const MachineInstr *MI,
773
773
}
774
774
775
775
void WaitcntBrackets::setExpScore (const MachineInstr *MI,
776
- const SIInstrInfo *TII,
777
776
const SIRegisterInfo *TRI,
778
- const MachineRegisterInfo *MRI, unsigned OpNo,
779
- unsigned Val) {
780
- RegInterval Interval = getRegInterval (MI, MRI, TRI, OpNo );
781
- assert (TRI->isVectorRegister (*MRI, MI-> getOperand (OpNo) .getReg ()));
777
+ const MachineRegisterInfo *MRI,
778
+ const MachineOperand &Op, unsigned Val) {
779
+ RegInterval Interval = getRegInterval (MI, MRI, TRI, Op );
780
+ assert (TRI->isVectorRegister (*MRI, Op .getReg ()));
782
781
for (int RegNo = Interval.first ; RegNo < Interval.second ; ++RegNo) {
783
782
setRegScore (RegNo, EXP_CNT, Val);
784
783
}
@@ -804,79 +803,60 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
804
803
// Put score on the source vgprs. If this is a store, just use those
805
804
// specific register(s).
806
805
if (TII->isDS (Inst) && (Inst.mayStore () || Inst.mayLoad ())) {
807
- int AddrOpIdx =
808
- AMDGPU::getNamedOperandIdx (Inst.getOpcode (), AMDGPU::OpName::addr);
809
806
// All GDS operations must protect their address register (same as
810
807
// export.)
811
- if (AddrOpIdx != -1 ) {
812
- setExpScore (&Inst, TII, TRI, MRI, AddrOpIdx, CurrScore);
813
- }
808
+ if (const auto *AddrOp = TII->getNamedOperand (Inst, AMDGPU::OpName::addr))
809
+ setExpScore (&Inst, TRI, MRI, *AddrOp, CurrScore);
814
810
815
811
if (Inst.mayStore ()) {
816
- if (AMDGPU::hasNamedOperand (Inst.getOpcode (), AMDGPU::OpName::data0)) {
817
- setExpScore (
818
- &Inst, TII, TRI, MRI,
819
- AMDGPU::getNamedOperandIdx (Inst.getOpcode (), AMDGPU::OpName::data0),
820
- CurrScore);
821
- }
822
- if (AMDGPU::hasNamedOperand (Inst.getOpcode (), AMDGPU::OpName::data1)) {
823
- setExpScore (&Inst, TII, TRI, MRI,
824
- AMDGPU::getNamedOperandIdx (Inst.getOpcode (),
825
- AMDGPU::OpName::data1),
826
- CurrScore);
827
- }
812
+ if (const auto *Data0 =
813
+ TII->getNamedOperand (Inst, AMDGPU::OpName::data0))
814
+ setExpScore (&Inst, TRI, MRI, *Data0, CurrScore);
815
+ if (const auto *Data1 =
816
+ TII->getNamedOperand (Inst, AMDGPU::OpName::data1))
817
+ setExpScore (&Inst, TRI, MRI, *Data1, CurrScore);
828
818
} else if (SIInstrInfo::isAtomicRet (Inst) && !SIInstrInfo::isGWS (Inst) &&
829
819
Inst.getOpcode () != AMDGPU::DS_APPEND &&
830
820
Inst.getOpcode () != AMDGPU::DS_CONSUME &&
831
821
Inst.getOpcode () != AMDGPU::DS_ORDERED_COUNT) {
832
- for (unsigned I = 0 , E = Inst.getNumOperands (); I != E; ++I) {
833
- const MachineOperand &Op = Inst.getOperand (I);
834
- if (Op.isReg () && !Op.isDef () &&
835
- TRI->isVectorRegister (*MRI, Op.getReg ())) {
836
- setExpScore (&Inst, TII, TRI, MRI, I, CurrScore);
837
- }
822
+ for (const MachineOperand &Op : Inst.all_uses ()) {
823
+ if (Op.isReg () && TRI->isVectorRegister (*MRI, Op.getReg ()))
824
+ setExpScore (&Inst, TRI, MRI, Op, CurrScore);
838
825
}
839
826
}
840
827
} else if (TII->isFLAT (Inst)) {
841
828
if (Inst.mayStore ()) {
842
- setExpScore (
843
- &Inst, TII, TRI, MRI,
844
- AMDGPU::getNamedOperandIdx (Inst.getOpcode (), AMDGPU::OpName::data),
845
- CurrScore);
829
+ setExpScore (&Inst, TRI, MRI,
830
+ *TII->getNamedOperand (Inst, AMDGPU::OpName::data),
831
+ CurrScore);
846
832
} else if (SIInstrInfo::isAtomicRet (Inst)) {
847
- setExpScore (
848
- &Inst, TII, TRI, MRI,
849
- AMDGPU::getNamedOperandIdx (Inst.getOpcode (), AMDGPU::OpName::data),
850
- CurrScore);
833
+ setExpScore (&Inst, TRI, MRI,
834
+ *TII->getNamedOperand (Inst, AMDGPU::OpName::data),
835
+ CurrScore);
851
836
}
852
837
} else if (TII->isMIMG (Inst)) {
853
838
if (Inst.mayStore ()) {
854
- setExpScore (&Inst, TII, TRI, MRI, 0 , CurrScore);
839
+ setExpScore (&Inst, TRI, MRI, Inst. getOperand ( 0 ) , CurrScore);
855
840
} else if (SIInstrInfo::isAtomicRet (Inst)) {
856
- setExpScore (
857
- &Inst, TII, TRI, MRI,
858
- AMDGPU::getNamedOperandIdx (Inst.getOpcode (), AMDGPU::OpName::data),
859
- CurrScore);
841
+ setExpScore (&Inst, TRI, MRI,
842
+ *TII->getNamedOperand (Inst, AMDGPU::OpName::data),
843
+ CurrScore);
860
844
}
861
845
} else if (TII->isMTBUF (Inst)) {
862
- if (Inst.mayStore ()) {
863
- setExpScore (&Inst, TII, TRI, MRI, 0 , CurrScore);
864
- }
846
+ if (Inst.mayStore ())
847
+ setExpScore (&Inst, TRI, MRI, Inst.getOperand (0 ), CurrScore);
865
848
} else if (TII->isMUBUF (Inst)) {
866
849
if (Inst.mayStore ()) {
867
- setExpScore (&Inst, TII, TRI, MRI, 0 , CurrScore);
850
+ setExpScore (&Inst, TRI, MRI, Inst. getOperand ( 0 ) , CurrScore);
868
851
} else if (SIInstrInfo::isAtomicRet (Inst)) {
869
- setExpScore (
870
- &Inst, TII, TRI, MRI,
871
- AMDGPU::getNamedOperandIdx (Inst.getOpcode (), AMDGPU::OpName::data),
872
- CurrScore);
852
+ setExpScore (&Inst, TRI, MRI,
853
+ *TII->getNamedOperand (Inst, AMDGPU::OpName::data),
854
+ CurrScore);
873
855
}
874
856
} else if (TII->isLDSDIR (Inst)) {
875
857
// LDSDIR instructions attach the score to the destination.
876
- setExpScore (
877
- &Inst, TII, TRI, MRI,
878
- AMDGPU::getNamedOperandIdx (Inst.getOpcode (), AMDGPU::OpName::vdst),
879
- CurrScore);
858
+ setExpScore (&Inst, TRI, MRI,
859
+ *TII->getNamedOperand (Inst, AMDGPU::OpName::vdst), CurrScore);
880
860
} else {
881
861
if (TII->isEXP (Inst)) {
882
862
// For export the destination registers are really temps that
@@ -891,12 +871,9 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
891
871
}
892
872
}
893
873
}
894
- for (unsigned I = 0 , E = Inst.getNumOperands (); I != E; ++I) {
895
- MachineOperand &MO = Inst.getOperand (I);
896
- if (MO.isReg () && !MO.isDef () &&
897
- TRI->isVectorRegister (*MRI, MO.getReg ())) {
898
- setExpScore (&Inst, TII, TRI, MRI, I, CurrScore);
899
- }
874
+ for (const MachineOperand &Op : Inst.all_uses ()) {
875
+ if (Op.isReg () && TRI->isVectorRegister (*MRI, Op.getReg ()))
876
+ setExpScore (&Inst, TRI, MRI, Op, CurrScore);
900
877
}
901
878
}
902
879
} else /* LGKM_CNT || EXP_CNT || VS_CNT || NUM_INST_CNTS */ {
@@ -907,14 +884,10 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
907
884
// artificial dependency, while these are there only for register liveness
908
885
// accounting purposes.
909
886
//
910
- // Special cases where implicit register defs and uses exists, such as
911
- // M0, FLAT_SCR or VCC, but the wait will be generated earlier in the
912
- // generateWaitcntInstBefore() if that was loaded from memory.
913
- for (unsigned I = 0 , E = Inst.getNumExplicitOperands (); I != E; ++I) {
914
- auto &Op = Inst.getOperand (I);
915
- if (!Op.isReg () || !Op.isDef ())
916
- continue ;
917
- RegInterval Interval = getRegInterval (&Inst, MRI, TRI, I);
887
+ // Special cases where implicit register defs exists, such as M0 or VCC,
888
+ // but none with memory instructions.
889
+ for (const MachineOperand &Op : Inst.defs ()) {
890
+ RegInterval Interval = getRegInterval (&Inst, MRI, TRI, Op);
918
891
if (T == LOAD_CNT || T == SAMPLE_CNT || T == BVH_CNT) {
919
892
if (Interval.first >= NUM_ALL_VGPRS)
920
893
continue ;
@@ -1692,22 +1665,19 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
1692
1665
// load). We also need to check WAW dependency with saved PC.
1693
1666
Wait = AMDGPU::Waitcnt ();
1694
1667
1695
- int CallAddrOpIdx =
1696
- AMDGPU::getNamedOperandIdx (MI.getOpcode (), AMDGPU::OpName::src0);
1697
-
1698
- if (MI.getOperand (CallAddrOpIdx).isReg ()) {
1668
+ const auto &CallAddrOp = *TII->getNamedOperand (MI, AMDGPU::OpName::src0);
1669
+ if (CallAddrOp.isReg ()) {
1699
1670
RegInterval CallAddrOpInterval =
1700
- ScoreBrackets.getRegInterval (&MI, MRI, TRI, CallAddrOpIdx );
1671
+ ScoreBrackets.getRegInterval (&MI, MRI, TRI, CallAddrOp );
1701
1672
1702
1673
for (int RegNo = CallAddrOpInterval.first ;
1703
1674
RegNo < CallAddrOpInterval.second ; ++RegNo)
1704
1675
ScoreBrackets.determineWait (SmemAccessCounter, RegNo, Wait);
1705
1676
1706
- int RtnAddrOpIdx =
1707
- AMDGPU::getNamedOperandIdx (MI.getOpcode (), AMDGPU::OpName::dst);
1708
- if (RtnAddrOpIdx != -1 ) {
1677
+ if (const auto *RtnAddrOp =
1678
+ TII->getNamedOperand (MI, AMDGPU::OpName::dst)) {
1709
1679
RegInterval RtnAddrOpInterval =
1710
- ScoreBrackets.getRegInterval (&MI, MRI, TRI, RtnAddrOpIdx );
1680
+ ScoreBrackets.getRegInterval (&MI, MRI, TRI, *RtnAddrOp );
1711
1681
1712
1682
for (int RegNo = RtnAddrOpInterval.first ;
1713
1683
RegNo < RtnAddrOpInterval.second ; ++RegNo)
@@ -1769,16 +1739,15 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
1769
1739
}
1770
1740
1771
1741
// Loop over use and def operands.
1772
- for (unsigned I = 0 , E = MI.getNumOperands (); I != E; ++I) {
1773
- MachineOperand &Op = MI.getOperand (I);
1742
+ for (const MachineOperand &Op : MI.operands ()) {
1774
1743
if (!Op.isReg ())
1775
1744
continue ;
1776
1745
1777
1746
// If the instruction does not read tied source, skip the operand.
1778
1747
if (Op.isTied () && Op.isUse () && TII->doesNotReadTiedSource (MI))
1779
1748
continue ;
1780
1749
1781
- RegInterval Interval = ScoreBrackets.getRegInterval (&MI, MRI, TRI, I );
1750
+ RegInterval Interval = ScoreBrackets.getRegInterval (&MI, MRI, TRI, Op );
1782
1751
1783
1752
const bool IsVGPR = TRI->isVectorRegister (*MRI, Op.getReg ());
1784
1753
for (int RegNo = Interval.first ; RegNo < Interval.second ; ++RegNo) {
@@ -2357,41 +2326,43 @@ bool SIInsertWaitcnts::shouldFlushVmCnt(MachineLoop *ML,
2357
2326
if (MI.mayStore ())
2358
2327
HasVMemStore = true ;
2359
2328
}
2360
- for (unsigned I = 0 ; I < MI.getNumOperands (); I++) {
2361
- MachineOperand &Op = MI.getOperand (I);
2329
+ for (const MachineOperand &Op : MI.all_uses ()) {
2362
2330
if (!Op.isReg () || !TRI->isVectorRegister (*MRI, Op.getReg ()))
2363
2331
continue ;
2364
- RegInterval Interval = Brackets.getRegInterval (&MI, MRI, TRI, I );
2332
+ RegInterval Interval = Brackets.getRegInterval (&MI, MRI, TRI, Op );
2365
2333
// Vgpr use
2366
- if (Op.isUse ()) {
2367
- for (int RegNo = Interval.first ; RegNo < Interval.second ; ++RegNo) {
2368
- // If we find a register that is loaded inside the loop, 1. and 2.
2369
- // are invalidated and we can exit.
2370
- if (VgprDef.contains (RegNo))
2371
- return false ;
2372
- VgprUse.insert (RegNo);
2373
- // If at least one of Op's registers is in the score brackets, the
2374
- // value is likely loaded outside of the loop.
2375
- if (Brackets.getRegScore (RegNo, LOAD_CNT) >
2376
- Brackets.getScoreLB (LOAD_CNT) ||
2377
- Brackets.getRegScore (RegNo, SAMPLE_CNT) >
2378
- Brackets.getScoreLB (SAMPLE_CNT) ||
2379
- Brackets.getRegScore (RegNo, BVH_CNT) >
2380
- Brackets.getScoreLB (BVH_CNT)) {
2381
- UsesVgprLoadedOutside = true ;
2382
- break ;
2383
- }
2334
+ for (int RegNo = Interval.first ; RegNo < Interval.second ; ++RegNo) {
2335
+ // If we find a register that is loaded inside the loop, 1. and 2.
2336
+ // are invalidated and we can exit.
2337
+ if (VgprDef.contains (RegNo))
2338
+ return false ;
2339
+ VgprUse.insert (RegNo);
2340
+ // If at least one of Op's registers is in the score brackets, the
2341
+ // value is likely loaded outside of the loop.
2342
+ if (Brackets.getRegScore (RegNo, LOAD_CNT) >
2343
+ Brackets.getScoreLB (LOAD_CNT) ||
2344
+ Brackets.getRegScore (RegNo, SAMPLE_CNT) >
2345
+ Brackets.getScoreLB (SAMPLE_CNT) ||
2346
+ Brackets.getRegScore (RegNo, BVH_CNT) >
2347
+ Brackets.getScoreLB (BVH_CNT)) {
2348
+ UsesVgprLoadedOutside = true ;
2349
+ break ;
2384
2350
}
2385
2351
}
2386
- // VMem load vgpr def
2387
- else if (isVMEMOrFlatVMEM (MI) && MI.mayLoad () && Op.isDef ())
2352
+ }
2353
+
2354
+ // VMem load vgpr def
2355
+ if (isVMEMOrFlatVMEM (MI) && MI.mayLoad ()) {
2356
+ for (const MachineOperand &Op : MI.all_defs ()) {
2357
+ RegInterval Interval = Brackets.getRegInterval (&MI, MRI, TRI, Op);
2388
2358
for (int RegNo = Interval.first ; RegNo < Interval.second ; ++RegNo) {
2389
2359
// If we find a register that is loaded inside the loop, 1. and 2.
2390
2360
// are invalidated and we can exit.
2391
2361
if (VgprUse.contains (RegNo))
2392
2362
return false ;
2393
2363
VgprDef.insert (RegNo);
2394
2364
}
2365
+ }
2395
2366
}
2396
2367
}
2397
2368
}
0 commit comments