Skip to content

Commit 3091bdb

Browse files
committed
[AMDGPU] Do not release VGPRs at -O0
This was an oversight when the GFX11 early release VGPRs optimization was reimplemented in D153279. Sending the DEALLOC_VGPRS message is a performance optimization so there is no need to do it at -O0. In addition it makes some kinds of post mortem debugging hard or impossible, since VGPR values are no longer available to inspect at the s_endpgm instruction. Differential Revision: https://reviews.llvm.org/D157599
1 parent c5e8fbb commit 3091bdb

File tree

2 files changed

+564
-1
lines changed

2 files changed

+564
-1
lines changed

llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -382,6 +382,8 @@ class SIInsertWaitcnts : public MachineFunctionPass {
382382
bool ForceEmitZeroWaitcnts;
383383
bool ForceEmitWaitcnt[NUM_INST_CNTS];
384384

385+
bool OptNone;
386+
385387
// S_ENDPGM instructions before which we should insert a DEALLOC_VGPRS
386388
// message.
387389
DenseSet<MachineInstr *> ReleaseVGPRInsts;
@@ -1035,7 +1037,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
10351037
// do this if there are no outstanding scratch stores.
10361038
else if (MI.getOpcode() == AMDGPU::S_ENDPGM ||
10371039
MI.getOpcode() == AMDGPU::S_ENDPGM_SAVED) {
1038-
if (ST->getGeneration() >= AMDGPUSubtarget::GFX11 &&
1040+
if (ST->getGeneration() >= AMDGPUSubtarget::GFX11 && !OptNone &&
10391041
ScoreBrackets.getScoreRange(VS_CNT) != 0 &&
10401042
!ScoreBrackets.hasPendingEvent(SCRATCH_WRITE_ACCESS))
10411043
ReleaseVGPRInsts.insert(&MI);
@@ -1817,6 +1819,9 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
18171819
for (auto T : inst_counter_types())
18181820
ForceEmitWaitcnt[T] = false;
18191821

1822+
OptNone = MF.getFunction().hasOptNone() ||
1823+
MF.getTarget().getOptLevel() == CodeGenOpt::None;
1824+
18201825
HardwareLimits Limits = {};
18211826
Limits.VmcntMax = AMDGPU::getVmcntBitMask(IV);
18221827
Limits.ExpcntMax = AMDGPU::getExpcntBitMask(IV);

0 commit comments

Comments
 (0)