Skip to content

Commit d538c58

Browse files
committed
[AMDGPU] Fix missed SI_RETURN_TO_EPILOG in pre-emit peephole
SIPreEmitPeephole does not process all terminators, which means it can fail to handle SI_RETURN_TO_EPILOG if immediately preceeded by a branch to the early exit block. Reviewed By: rampitec Differential Revision: https://reviews.llvm.org/D85872
1 parent c58f1fe commit d538c58

File tree

2 files changed

+19
-10
lines changed

2 files changed

+19
-10
lines changed

llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -266,16 +266,24 @@ bool SIPreEmitPeephole::runOnMachineFunction(MachineFunction &MF) {
266266

267267
for (MachineBasicBlock &MBB : MF) {
268268
MachineBasicBlock::iterator MBBE = MBB.getFirstTerminator();
269-
if (MBBE != MBB.end()) {
270-
MachineInstr &MI = *MBBE;
269+
MachineBasicBlock::iterator TermI = MBBE;
270+
// Check first terminator for VCC branches to optimize
271+
if (TermI != MBB.end()) {
272+
MachineInstr &MI = *TermI;
271273
switch (MI.getOpcode()) {
272274
case AMDGPU::S_CBRANCH_VCCZ:
273275
case AMDGPU::S_CBRANCH_VCCNZ:
274276
Changed |= optimizeVccBranch(MI);
275277
continue;
276-
case AMDGPU::SI_RETURN_TO_EPILOG:
277-
// FIXME: This is not an optimization and should be
278-
// moved somewhere else.
278+
default:
279+
break;
280+
}
281+
}
282+
// Check all terminators for SI_RETURN_TO_EPILOG
283+
// FIXME: This is not an optimization and should be moved somewhere else.
284+
while (TermI != MBB.end()) {
285+
MachineInstr &MI = *TermI;
286+
if (MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) {
279287
assert(!MF.getInfo<SIMachineFunctionInfo>()->returnsVoid());
280288

281289
// Graphics shaders returning non-void shouldn't contain S_ENDPGM,
@@ -293,11 +301,11 @@ bool SIPreEmitPeephole::runOnMachineFunction(MachineFunction &MF) {
293301
.addMBB(EmptyMBBAtEnd);
294302
MI.eraseFromParent();
295303
MBBE = MBB.getFirstTerminator();
304+
TermI = MBBE;
305+
continue;
296306
}
297-
break;
298-
default:
299-
break;
300307
}
308+
TermI++;
301309
}
302310

303311
if (!ST.hasVGPRIndexMode())

llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,14 +115,15 @@ define amdgpu_ps { <4 x float> } @test_return_to_epilog_with_optimized_kill(floa
115115
; GCN: liveins: $sgpr0_sgpr1
116116
; GCN: $exec = S_MOV_B64 0
117117
; GCN: bb.6.end:
118-
; GCN: successors: %bb.7(0x80000000)
118+
; GCN: successors: %bb.7(0x40000000), %bb.8(0x40000000)
119119
; GCN: liveins: $sgpr0_sgpr1
120120
; GCN: $exec = S_OR_B64 $exec, killed renamable $sgpr0_sgpr1, implicit-def $scc
121121
; GCN: S_CBRANCH_EXECZ %bb.7, implicit $exec
122-
; GCN: SI_RETURN_TO_EPILOG undef $vgpr0, undef $vgpr1, undef $vgpr2, undef $vgpr3
122+
; GCN: S_BRANCH %bb.8
123123
; GCN: bb.7:
124124
; GCN: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
125125
; GCN: S_ENDPGM 0
126+
; GCN: bb.8:
126127
entry:
127128
%.i0 = fdiv reassoc nnan nsz arcp contract afn float 1.000000e+00, %val
128129
%cmp0 = fcmp olt float %.i0, 0.000000e+00

0 commit comments

Comments
 (0)