@@ -480,6 +480,10 @@ class WaitcntGenerator {
480
480
// WaitEventType to corresponding counter values in InstCounterType.
481
481
virtual const unsigned *getWaitEventMask () const = 0;
482
482
483
+ // Returns a new waitcnt with all counters except VScnt set to 0. If
484
+ // IncludeVSCnt is true, VScnt is set to 0, otherwise it is set to ~0u.
485
+ virtual AMDGPU::Waitcnt getAllZeroWaitcnt (bool IncludeVSCnt) const = 0;
486
+
483
487
virtual ~WaitcntGenerator () = default ;
484
488
};
485
489
@@ -516,6 +520,8 @@ class WaitcntGeneratorPreGFX12 : public WaitcntGenerator {
516
520
517
521
return WaitEventMaskForInstPreGFX12;
518
522
}
523
+
524
+ virtual AMDGPU::Waitcnt getAllZeroWaitcnt (bool IncludeVSCnt) const override ;
519
525
};
520
526
521
527
class WaitcntGeneratorGFX12Plus : public WaitcntGenerator {
@@ -549,6 +555,8 @@ class WaitcntGeneratorGFX12Plus : public WaitcntGenerator {
549
555
550
556
return WaitEventMaskForInstGFX12Plus;
551
557
}
558
+
559
+ virtual AMDGPU::Waitcnt getAllZeroWaitcnt (bool IncludeVSCnt) const override ;
552
560
};
553
561
554
562
class SIInsertWaitcnts : public MachineFunctionPass {
@@ -1304,6 +1312,16 @@ bool WaitcntGeneratorPreGFX12::createNewWaitcnt(
1304
1312
return Modified;
1305
1313
}
1306
1314
1315
+ AMDGPU::Waitcnt
1316
+ WaitcntGeneratorPreGFX12::getAllZeroWaitcnt (bool IncludeVSCnt) const {
1317
+ return AMDGPU::Waitcnt (0 , 0 , 0 , IncludeVSCnt && ST->hasVscnt () ? 0 : ~0u );
1318
+ }
1319
+
1320
+ AMDGPU::Waitcnt
1321
+ WaitcntGeneratorGFX12Plus::getAllZeroWaitcnt (bool IncludeVSCnt) const {
1322
+ return AMDGPU::Waitcnt (0 , 0 , 0 , IncludeVSCnt ? 0 : ~0u , 0 , 0 , 0 );
1323
+ }
1324
+
1307
1325
// / Combine consecutive S_WAIT_*CNT instructions that precede \p It and
1308
1326
// / follow \p OldWaitcntInstr and apply any extra waits from \p Wait that
1309
1327
// / were added by previous passes. Currently this pass conservatively
@@ -1613,8 +1631,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
1613
1631
MI.getOpcode () == AMDGPU::SI_RETURN ||
1614
1632
MI.getOpcode () == AMDGPU::S_SETPC_B64_return ||
1615
1633
(MI.isReturn () && MI.isCall () && !callWaitsOnFunctionEntry (MI))) {
1616
- Wait = Wait.combined (
1617
- AMDGPU::Waitcnt::allZeroExceptVsCnt (ST->hasExtendedWaitCounts ()));
1634
+ Wait = Wait.combined (WCG->getAllZeroWaitcnt (/* IncludeVSCnt=*/ false ));
1618
1635
}
1619
1636
// Identify S_ENDPGM instructions which may have to wait for outstanding VMEM
1620
1637
// stores. In this case it can be useful to send a message to explicitly
@@ -1834,8 +1851,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
1834
1851
// cause an exception. Otherwise, insert an explicit S_WAITCNT 0 here.
1835
1852
if (MI.getOpcode () == AMDGPU::S_BARRIER &&
1836
1853
!ST->hasAutoWaitcntBeforeBarrier () && !ST->supportsBackOffBarrier ()) {
1837
- Wait = Wait.combined (
1838
- AMDGPU::Waitcnt::allZero (ST->hasExtendedWaitCounts (), ST->hasVscnt ()));
1854
+ Wait = Wait.combined (WCG->getAllZeroWaitcnt (/* IncludeVSCnt=*/ true ));
1839
1855
}
1840
1856
1841
1857
// TODO: Remove this work-around, enable the assert for Bug 457939
@@ -1851,7 +1867,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
1851
1867
ScoreBrackets.simplifyWaitcnt (Wait);
1852
1868
1853
1869
if (ForceEmitZeroWaitcnts)
1854
- Wait = AMDGPU::Waitcnt::allZeroExceptVsCnt (ST-> hasExtendedWaitCounts () );
1870
+ Wait = WCG-> getAllZeroWaitcnt ( /* IncludeVSCnt= */ false );
1855
1871
1856
1872
if (ForceEmitWaitcnt[LOAD_CNT])
1857
1873
Wait.LoadCnt = 0 ;
@@ -2089,7 +2105,7 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
2089
2105
if (callWaitsOnFunctionReturn (Inst)) {
2090
2106
// Act as a wait on everything
2091
2107
ScoreBrackets->applyWaitcnt (
2092
- AMDGPU::Waitcnt::allZeroExceptVsCnt (ST-> hasExtendedWaitCounts () ));
2108
+ WCG-> getAllZeroWaitcnt ( /* IncludeVSCnt= */ false ));
2093
2109
ScoreBrackets->setStateOnFunctionEntryOrReturn ();
2094
2110
} else {
2095
2111
// May need to way wait for anything.
0 commit comments