@@ -448,12 +448,19 @@ class WaitcntGenerator {
448
448
const SIInstrInfo *TII = nullptr ;
449
449
AMDGPU::IsaVersion IV;
450
450
InstCounterType MaxCounter;
451
+ bool OptNone;
451
452
452
453
public:
453
454
WaitcntGenerator () {}
454
- WaitcntGenerator (const GCNSubtarget *ST, InstCounterType MaxCounter)
455
- : ST(ST), TII(ST->getInstrInfo ()),
456
- IV(AMDGPU::getIsaVersion(ST->getCPU ())), MaxCounter(MaxCounter) {}
455
+ WaitcntGenerator (const MachineFunction &MF, InstCounterType MaxCounter)
456
+ : ST(&MF.getSubtarget<GCNSubtarget>()), TII(ST->getInstrInfo ()),
457
+ IV(AMDGPU::getIsaVersion(ST->getCPU ())), MaxCounter(MaxCounter),
458
+ OptNone(MF.getFunction().hasOptNone() ||
459
+ MF.getTarget().getOptLevel() == CodeGenOptLevel::None) {}
460
+
461
+ // Return true if the current function should be compiled with no
462
+ // optimization.
463
+ bool isOptNone () const { return OptNone; }
457
464
458
465
// Edits an existing sequence of wait count instructions according
459
466
// to an incoming Waitcnt value, which is itself updated to reflect
@@ -504,8 +511,8 @@ class WaitcntGenerator {
504
511
class WaitcntGeneratorPreGFX12 : public WaitcntGenerator {
505
512
public:
506
513
WaitcntGeneratorPreGFX12 () {}
507
- WaitcntGeneratorPreGFX12 (const GCNSubtarget *ST )
508
- : WaitcntGenerator(ST , NUM_NORMAL_INST_CNTS) {}
514
+ WaitcntGeneratorPreGFX12 (const MachineFunction &MF )
515
+ : WaitcntGenerator(MF , NUM_NORMAL_INST_CNTS) {}
509
516
510
517
bool
511
518
applyPreexistingWaitcnt (WaitcntBrackets &ScoreBrackets,
@@ -539,8 +546,9 @@ class WaitcntGeneratorPreGFX12 : public WaitcntGenerator {
539
546
class WaitcntGeneratorGFX12Plus : public WaitcntGenerator {
540
547
public:
541
548
WaitcntGeneratorGFX12Plus () {}
542
- WaitcntGeneratorGFX12Plus (const GCNSubtarget *ST, InstCounterType MaxCounter)
543
- : WaitcntGenerator(ST, MaxCounter) {}
549
+ WaitcntGeneratorGFX12Plus (const MachineFunction &MF,
550
+ InstCounterType MaxCounter)
551
+ : WaitcntGenerator(MF, MaxCounter) {}
544
552
545
553
bool
546
554
applyPreexistingWaitcnt (WaitcntBrackets &ScoreBrackets,
@@ -597,8 +605,6 @@ class SIInsertWaitcnts : public MachineFunctionPass {
597
605
bool ForceEmitZeroWaitcnts;
598
606
bool ForceEmitWaitcnt[NUM_INST_CNTS];
599
607
600
- bool OptNone;
601
-
602
608
// In any given run of this pass, WCG will point to one of these two
603
609
// generator objects, which must have been re-initialised before use
604
610
// from a value made using a subtarget constructor.
@@ -1203,19 +1209,19 @@ bool WaitcntGeneratorPreGFX12::applyPreexistingWaitcnt(
1203
1209
continue ;
1204
1210
1205
1211
unsigned Opcode = SIInstrInfo::getNonSoftWaitcntOpcode (II.getOpcode ());
1206
- bool IsSoft = Opcode != II.getOpcode ();
1212
+ bool TrySimplify = Opcode != II.getOpcode () && !OptNone ;
1207
1213
1208
1214
// Update required wait count. If this is a soft waitcnt (= it was added
1209
1215
// by an earlier pass), it may be entirely removed.
1210
1216
if (Opcode == AMDGPU::S_WAITCNT) {
1211
1217
unsigned IEnc = II.getOperand (0 ).getImm ();
1212
1218
AMDGPU::Waitcnt OldWait = AMDGPU::decodeWaitcnt (IV, IEnc);
1213
- if (IsSoft )
1219
+ if (TrySimplify )
1214
1220
ScoreBrackets.simplifyWaitcnt (OldWait);
1215
1221
Wait = Wait.combined (OldWait);
1216
1222
1217
1223
// Merge consecutive waitcnt of the same type by erasing multiples.
1218
- if (WaitcntInstr || (!Wait.hasWaitExceptStoreCnt () && IsSoft )) {
1224
+ if (WaitcntInstr || (!Wait.hasWaitExceptStoreCnt () && TrySimplify )) {
1219
1225
II.eraseFromParent ();
1220
1226
Modified = true ;
1221
1227
} else
@@ -1226,11 +1232,11 @@ bool WaitcntGeneratorPreGFX12::applyPreexistingWaitcnt(
1226
1232
1227
1233
unsigned OldVSCnt =
1228
1234
TII->getNamedOperand (II, AMDGPU::OpName::simm16)->getImm ();
1229
- if (IsSoft )
1235
+ if (TrySimplify )
1230
1236
ScoreBrackets.simplifyWaitcnt (InstCounterType::STORE_CNT, OldVSCnt);
1231
1237
Wait.StoreCnt = std::min (Wait.StoreCnt , OldVSCnt);
1232
1238
1233
- if (WaitcntVsCntInstr || (!Wait.hasWaitStoreCnt () && IsSoft )) {
1239
+ if (WaitcntVsCntInstr || (!Wait.hasWaitStoreCnt () && TrySimplify )) {
1234
1240
II.eraseFromParent ();
1235
1241
Modified = true ;
1236
1242
} else
@@ -1356,21 +1362,21 @@ bool WaitcntGeneratorGFX12Plus::applyPreexistingWaitcnt(
1356
1362
// by an earlier pass), it may be entirely removed.
1357
1363
1358
1364
unsigned Opcode = SIInstrInfo::getNonSoftWaitcntOpcode (II.getOpcode ());
1359
- bool IsSoft = Opcode != II.getOpcode ();
1365
+ bool TrySimplify = Opcode != II.getOpcode () && !OptNone ;
1360
1366
1361
1367
if (Opcode == AMDGPU::S_WAIT_LOADCNT_DSCNT) {
1362
1368
unsigned OldEnc =
1363
1369
TII->getNamedOperand (II, AMDGPU::OpName::simm16)->getImm ();
1364
1370
AMDGPU::Waitcnt OldWait = AMDGPU::decodeLoadcntDscnt (IV, OldEnc);
1365
- if (IsSoft )
1371
+ if (TrySimplify )
1366
1372
ScoreBrackets.simplifyWaitcnt (OldWait);
1367
1373
Wait = Wait.combined (OldWait);
1368
1374
UpdatableInstr = &CombinedLoadDsCntInstr;
1369
1375
} else if (Opcode == AMDGPU::S_WAIT_STORECNT_DSCNT) {
1370
1376
unsigned OldEnc =
1371
1377
TII->getNamedOperand (II, AMDGPU::OpName::simm16)->getImm ();
1372
1378
AMDGPU::Waitcnt OldWait = AMDGPU::decodeStorecntDscnt (IV, OldEnc);
1373
- if (IsSoft )
1379
+ if (TrySimplify )
1374
1380
ScoreBrackets.simplifyWaitcnt (OldWait);
1375
1381
Wait = Wait.combined (OldWait);
1376
1382
UpdatableInstr = &CombinedStoreDsCntInstr;
@@ -1379,7 +1385,7 @@ bool WaitcntGeneratorGFX12Plus::applyPreexistingWaitcnt(
1379
1385
assert (CT.has_value ());
1380
1386
unsigned OldCnt =
1381
1387
TII->getNamedOperand (II, AMDGPU::OpName::simm16)->getImm ();
1382
- if (IsSoft )
1388
+ if (TrySimplify )
1383
1389
ScoreBrackets.simplifyWaitcnt (CT.value (), OldCnt);
1384
1390
addWait (Wait, CT.value (), OldCnt);
1385
1391
UpdatableInstr = &WaitInstrs[CT.value ()];
@@ -1649,7 +1655,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
1649
1655
// * we are not in Dynamic VGPR mode
1650
1656
else if (MI.getOpcode () == AMDGPU::S_ENDPGM ||
1651
1657
MI.getOpcode () == AMDGPU::S_ENDPGM_SAVED) {
1652
- if (ST->getGeneration () >= AMDGPUSubtarget::GFX11 && !OptNone &&
1658
+ if (ST->getGeneration () >= AMDGPUSubtarget::GFX11 && !WCG-> isOptNone () &&
1653
1659
ScoreBrackets.getScoreRange (STORE_CNT) != 0 &&
1654
1660
!ScoreBrackets.hasPendingEvent (SCRATCH_WRITE_ACCESS))
1655
1661
ReleaseVGPRInsts.insert (&MI);
@@ -2471,11 +2477,11 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
2471
2477
2472
2478
if (ST->hasExtendedWaitCounts ()) {
2473
2479
MaxCounter = NUM_EXTENDED_INST_CNTS;
2474
- WCGGFX12Plus = WaitcntGeneratorGFX12Plus (ST , MaxCounter);
2480
+ WCGGFX12Plus = WaitcntGeneratorGFX12Plus (MF , MaxCounter);
2475
2481
WCG = &WCGGFX12Plus;
2476
2482
} else {
2477
2483
MaxCounter = NUM_NORMAL_INST_CNTS;
2478
- WCGPreGFX12 = WaitcntGeneratorPreGFX12 (ST );
2484
+ WCGPreGFX12 = WaitcntGeneratorPreGFX12 (MF );
2479
2485
WCG = &WCGPreGFX12;
2480
2486
}
2481
2487
@@ -2487,9 +2493,6 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
2487
2493
2488
2494
SmemAccessCounter = eventCounter (WaitEventMaskForInst, SMEM_ACCESS);
2489
2495
2490
- OptNone = MF.getFunction ().hasOptNone () ||
2491
- MF.getTarget ().getOptLevel () == CodeGenOptLevel::None;
2492
-
2493
2496
HardwareLimits Limits = {};
2494
2497
if (ST->hasExtendedWaitCounts ()) {
2495
2498
Limits.LoadcntMax = AMDGPU::getLoadcntBitMask (IV);
0 commit comments