@@ -369,7 +369,6 @@ class SIInsertWaitcnts : public MachineFunctionPass {
369
369
const MachineRegisterInfo *MRI = nullptr ;
370
370
AMDGPU::IsaVersion IV;
371
371
372
- DenseSet<MachineInstr *> TrackedWaitcntSet;
373
372
DenseMap<const Value *, MachineBasicBlock *> SLoadAddresses;
374
373
DenseMap<MachineBasicBlock *, bool > PreheadersToFlush;
375
374
MachineLoopInfo *MLI;
@@ -493,6 +492,9 @@ class SIInsertWaitcnts : public MachineFunctionPass {
493
492
MachineInstr &OldWaitcntInstr,
494
493
AMDGPU::Waitcnt &Wait,
495
494
MachineBasicBlock::instr_iterator It) const ;
495
+
496
+ // Transform a soft waitcnt into a normal one.
497
+ bool promoteSoftWaitCnt (MachineInstr *Waitcnt) const ;
496
498
};
497
499
498
500
} // end anonymous namespace
@@ -872,6 +874,15 @@ static bool updateOperandIfDifferent(MachineInstr &MI, uint16_t OpName,
872
874
return true ;
873
875
}
874
876
877
+ bool SIInsertWaitcnts::promoteSoftWaitCnt (MachineInstr *Waitcnt) const {
878
+ unsigned Opcode = Waitcnt->getOpcode ();
879
+ if (!SIInstrInfo::isSoftWaitcnt (Opcode))
880
+ return false ;
881
+
882
+ Waitcnt->setDesc (TII->get (SIInstrInfo::getNonSoftWaitcntOpcode (Opcode)));
883
+ return true ;
884
+ }
885
+
875
886
// / Combine consecutive waitcnt instructions that precede \p It and follow
876
887
// / \p OldWaitcntInstr and apply any extra wait from waitcnt that were added
877
888
// / by previous passes. Currently this pass conservatively assumes that these
@@ -888,86 +899,77 @@ bool SIInsertWaitcnts::applyPreexistingWaitcnt(
888
899
if (II.isMetaInstruction ())
889
900
continue ;
890
901
891
- if (II.getOpcode () == AMDGPU::S_WAITCNT) {
892
- // Conservatively update required wait if this waitcnt was added in an
893
- // earlier pass. In this case it will not exist in the tracked waitcnt
894
- // set.
895
- if (!TrackedWaitcntSet.count (&II)) {
896
- unsigned IEnc = II.getOperand (0 ).getImm ();
897
- AMDGPU::Waitcnt OldWait = AMDGPU::decodeWaitcnt (IV, IEnc);
898
- Wait = Wait.combined (OldWait);
899
- }
902
+ unsigned Opcode = II.getOpcode ();
903
+ bool IsSoft = SIInstrInfo::isSoftWaitcnt (Opcode);
904
+
905
+ if (SIInstrInfo::isWaitcnt (Opcode)) {
906
+ // Update required wait count. If this is a soft waitcnt (= it was added
907
+ // by an earlier pass), it may be entirely removed.
908
+ unsigned IEnc = II.getOperand (0 ).getImm ();
909
+ AMDGPU::Waitcnt OldWait = AMDGPU::decodeWaitcnt (IV, IEnc);
910
+ if (IsSoft)
911
+ ScoreBrackets.simplifyWaitcnt (OldWait);
912
+ Wait = Wait.combined (OldWait);
900
913
901
914
// Merge consecutive waitcnt of the same type by erasing multiples.
902
- if (!WaitcntInstr) {
903
- WaitcntInstr = &II;
904
- } else {
915
+ if (WaitcntInstr || (!Wait.hasWaitExceptVsCnt () && IsSoft)) {
905
916
II.eraseFromParent ();
906
917
Modified = true ;
907
- }
918
+ } else
919
+ WaitcntInstr = &II;
908
920
909
921
} else {
910
- assert (II. getOpcode () == AMDGPU::S_WAITCNT_VSCNT );
922
+ assert (SIInstrInfo::isWaitcntVsCnt (Opcode) );
911
923
assert (II.getOperand (0 ).getReg () == AMDGPU::SGPR_NULL);
912
- if (!TrackedWaitcntSet.count (&II)) {
913
- unsigned OldVSCnt =
914
- TII->getNamedOperand (II, AMDGPU::OpName::simm16)->getImm ();
915
- Wait.VsCnt = std::min (Wait.VsCnt , OldVSCnt);
916
- }
917
924
918
- if (!WaitcntVsCntInstr) {
919
- WaitcntVsCntInstr = &II;
920
- } else {
925
+ unsigned OldVSCnt =
926
+ TII->getNamedOperand (II, AMDGPU::OpName::simm16)->getImm ();
927
+ if (IsSoft)
928
+ ScoreBrackets.simplifyWaitcnt (InstCounterType::VS_CNT, OldVSCnt);
929
+ Wait.VsCnt = std::min (Wait.VsCnt , OldVSCnt);
930
+
931
+ if (WaitcntVsCntInstr || (!Wait.hasWaitVsCnt () && IsSoft)) {
921
932
II.eraseFromParent ();
922
933
Modified = true ;
923
- }
934
+ } else
935
+ WaitcntVsCntInstr = &II;
924
936
}
925
937
}
926
938
927
939
// Updated encoding of merged waitcnt with the required wait.
928
940
if (WaitcntInstr) {
929
- if (Wait.hasWaitExceptVsCnt ()) {
930
- Modified |=
931
- updateOperandIfDifferent (*WaitcntInstr, AMDGPU::OpName::simm16,
932
- AMDGPU::encodeWaitcnt (IV, Wait));
933
- ScoreBrackets.applyWaitcnt (Wait);
934
- Wait.VmCnt = ~0u ;
935
- Wait.LgkmCnt = ~0u ;
936
- Wait.ExpCnt = ~0u ;
937
-
938
- LLVM_DEBUG (It == OldWaitcntInstr.getParent ()->end ()
939
- ? dbgs () << " applyPreexistingWaitcnt\n "
940
- << " New Instr at block end: " << *WaitcntInstr
941
- << ' \n '
942
- : dbgs () << " applyPreexistingWaitcnt\n "
943
- << " Old Instr: " << *It
944
- << " New Instr: " << *WaitcntInstr << ' \n ' );
941
+ Modified |= updateOperandIfDifferent (*WaitcntInstr, AMDGPU::OpName::simm16,
942
+ AMDGPU::encodeWaitcnt (IV, Wait));
943
+ Modified |= promoteSoftWaitCnt (WaitcntInstr);
945
944
946
- } else {
947
- WaitcntInstr->eraseFromParent ();
948
- Modified = true ;
949
- }
945
+ ScoreBrackets.applyWaitcnt (Wait);
946
+ Wait.VmCnt = ~0u ;
947
+ Wait.LgkmCnt = ~0u ;
948
+ Wait.ExpCnt = ~0u ;
949
+
950
+ LLVM_DEBUG (It == OldWaitcntInstr.getParent ()->end ()
951
+ ? dbgs ()
952
+ << " applyPreexistingWaitcnt\n "
953
+ << " New Instr at block end: " << *WaitcntInstr << ' \n '
954
+ : dbgs () << " applyPreexistingWaitcnt\n "
955
+ << " Old Instr: " << *It
956
+ << " New Instr: " << *WaitcntInstr << ' \n ' );
950
957
}
951
958
952
959
if (WaitcntVsCntInstr) {
953
- if (Wait.hasWaitVsCnt ()) {
954
- assert (ST->hasVscnt ());
955
- Modified |= updateOperandIfDifferent (*WaitcntVsCntInstr,
956
- AMDGPU::OpName::simm16, Wait.VsCnt );
957
- ScoreBrackets.applyWaitcnt (Wait);
958
- Wait.VsCnt = ~0u ;
959
-
960
- LLVM_DEBUG (It == OldWaitcntInstr.getParent ()->end ()
961
- ? dbgs () << " applyPreexistingWaitcnt\n "
962
- << " New Instr at block end: "
963
- << *WaitcntVsCntInstr << ' \n '
964
- : dbgs () << " applyPreexistingWaitcnt\n "
965
- << " Old Instr: " << *It
966
- << " New Instr: " << *WaitcntVsCntInstr << ' \n ' );
967
- } else {
968
- WaitcntVsCntInstr->eraseFromParent ();
969
- Modified = true ;
970
- }
960
+ Modified |= updateOperandIfDifferent (*WaitcntVsCntInstr,
961
+ AMDGPU::OpName::simm16, Wait.VsCnt );
962
+ Modified |= promoteSoftWaitCnt (WaitcntVsCntInstr);
963
+ ScoreBrackets.applyWaitcnt (Wait);
964
+ Wait.VsCnt = ~0u ;
965
+
966
+ LLVM_DEBUG (It == OldWaitcntInstr.getParent ()->end ()
967
+ ? dbgs () << " applyPreexistingWaitcnt\n "
968
+ << " New Instr at block end: " << *WaitcntVsCntInstr
969
+ << ' \n '
970
+ : dbgs () << " applyPreexistingWaitcnt\n "
971
+ << " Old Instr: " << *It
972
+ << " New Instr: " << *WaitcntVsCntInstr << ' \n ' );
971
973
}
972
974
973
975
return Modified;
@@ -1319,7 +1321,6 @@ bool SIInsertWaitcnts::generateWaitcnt(AMDGPU::Waitcnt Wait,
1319
1321
unsigned Enc = AMDGPU::encodeWaitcnt (IV, Wait);
1320
1322
auto SWaitInst =
1321
1323
BuildMI (Block, It, DL, TII->get (AMDGPU::S_WAITCNT)).addImm (Enc);
1322
- TrackedWaitcntSet.insert (SWaitInst);
1323
1324
Modified = true ;
1324
1325
1325
1326
LLVM_DEBUG (dbgs () << " generateWaitcnt\n " ;
@@ -1333,7 +1334,6 @@ bool SIInsertWaitcnts::generateWaitcnt(AMDGPU::Waitcnt Wait,
1333
1334
auto SWaitInst = BuildMI (Block, It, DL, TII->get (AMDGPU::S_WAITCNT_VSCNT))
1334
1335
.addReg (AMDGPU::SGPR_NULL, RegState::Undef)
1335
1336
.addImm (Wait.VsCnt );
1336
- TrackedWaitcntSet.insert (SWaitInst);
1337
1337
Modified = true ;
1338
1338
1339
1339
LLVM_DEBUG (dbgs () << " generateWaitcnt\n " ;
@@ -1581,9 +1581,9 @@ bool WaitcntBrackets::merge(const WaitcntBrackets &Other) {
1581
1581
}
1582
1582
1583
1583
static bool isWaitInstr (MachineInstr &Inst) {
1584
- return Inst.getOpcode () == AMDGPU::S_WAITCNT ||
1585
- (Inst. getOpcode () == AMDGPU::S_WAITCNT_VSCNT &&
1586
- Inst.getOperand (0 ).isReg () &&
1584
+ auto Opcode = Inst.getOpcode ();
1585
+ return SIInstrInfo::isWaitcnt (Opcode) ||
1586
+ ( SIInstrInfo::isWaitcntVsCnt (Opcode) && Inst.getOperand (0 ).isReg () &&
1587
1587
Inst.getOperand (0 ).getReg () == AMDGPU::SGPR_NULL);
1588
1588
}
1589
1589
@@ -1852,7 +1852,6 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
1852
1852
TRI->getEncodingValue (AMDGPU::SGPR0) & AMDGPU::HWEncoding::REG_IDX_MASK;
1853
1853
Encoding.SGPRL = Encoding.SGPR0 + NumSGPRsMax - 1 ;
1854
1854
1855
- TrackedWaitcntSet.clear ();
1856
1855
BlockInfos.clear ();
1857
1856
bool Modified = false ;
1858
1857
0 commit comments