Skip to content

Commit 6e17708

Browse files
committed
AMDGPU: Fix SI_IF lowering when the save exec reg has terminator uses
Reverts part of 6524a7a. Since that commit, the expansion was ignoring the actual save exec register produced by the instruction, and looking at other instructions. I do not understand why it was looking at other instructions, but relying on this scan was wrong. Fixes verifier errors after SI_IF is tail duplicated, which should be correct to do. The results were fed into a phi, which was lowered to the S_MOV_B64_term instructions.
1 parent 29e646f commit 6e17708

File tree

2 files changed

+78
-23
lines changed

2 files changed

+78
-23
lines changed

llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp

Lines changed: 3 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -98,8 +98,6 @@ class SILowerControlFlow : public MachineFunctionPass {
9898
void emitLoop(MachineInstr &MI);
9999
void emitEndCf(MachineInstr &MI);
100100

101-
Register getSaveExec(MachineInstr* MI);
102-
103101
void findMaskOperands(MachineInstr &MI, unsigned OpNo,
104102
SmallVectorImpl<MachineOperand> &Src) const;
105103

@@ -177,29 +175,11 @@ static bool isSimpleIf(const MachineInstr &MI, const MachineRegisterInfo *MRI,
177175
return true;
178176
}
179177

180-
Register SILowerControlFlow::getSaveExec(MachineInstr *MI) {
181-
MachineBasicBlock *MBB = MI->getParent();
182-
MachineOperand &SaveExec = MI->getOperand(0);
183-
assert(SaveExec.getSubReg() == AMDGPU::NoSubRegister);
184-
185-
Register SaveExecReg = SaveExec.getReg();
186-
unsigned FalseTermOpc =
187-
TII->isWave32() ? AMDGPU::S_MOV_B32_term : AMDGPU::S_MOV_B64_term;
188-
MachineBasicBlock::iterator I = (MI);
189-
MachineBasicBlock::iterator J = std::next(I);
190-
if (J != MBB->end() && J->getOpcode() == FalseTermOpc &&
191-
J->getOperand(1).isReg() && J->getOperand(1).getReg() == SaveExecReg) {
192-
SaveExecReg = J->getOperand(0).getReg();
193-
J->eraseFromParent();
194-
}
195-
return SaveExecReg;
196-
}
197-
198178
void SILowerControlFlow::emitIf(MachineInstr &MI) {
199179
MachineBasicBlock &MBB = *MI.getParent();
200180
const DebugLoc &DL = MI.getDebugLoc();
201181
MachineBasicBlock::iterator I(&MI);
202-
Register SaveExecReg = getSaveExec(&MI);
182+
Register SaveExecReg = MI.getOperand(0).getReg();
203183
MachineOperand& Cond = MI.getOperand(1);
204184
assert(Cond.getSubReg() == AMDGPU::NoSubRegister);
205185

@@ -282,7 +262,7 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
282262
MachineBasicBlock &MBB = *MI.getParent();
283263
const DebugLoc &DL = MI.getDebugLoc();
284264

285-
Register DstReg = getSaveExec(&MI);
265+
Register DstReg = MI.getOperand(0).getReg();
286266

287267
bool ExecModified = MI.getOperand(3).getImm() != 0;
288268
MachineBasicBlock::iterator Start = MBB.begin();
@@ -354,7 +334,7 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
354334
void SILowerControlFlow::emitIfBreak(MachineInstr &MI) {
355335
MachineBasicBlock &MBB = *MI.getParent();
356336
const DebugLoc &DL = MI.getDebugLoc();
357-
auto Dst = getSaveExec(&MI);
337+
auto Dst = MI.getOperand(0).getReg();
358338

359339
// Skip ANDing with exec if the break condition is already masked by exec
360340
// because it is a V_CMP in the same basic block. (We know the break
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=si-lower-control-flow -verify-machineinstrs -o - %s | FileCheck %s
3+
4+
# The save exec result register of SI_IF is used by other terminators
5+
# inserted to behave as a lowered phi. The output register of SI_IF
6+
# was ignored, and the def was removed, so the S_MOV_B64_term uses
7+
# would fail the verifier.
8+
9+
---
10+
name: si_if_use
11+
alignment: 1
12+
legalized: true
13+
regBankSelected: true
14+
selected: true
15+
tracksRegLiveness: true
16+
body: |
17+
; CHECK-LABEL: name: si_if_use
18+
; CHECK: bb.0:
19+
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
20+
; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
21+
; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0
22+
; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed $vgpr1
23+
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 killed [[COPY]], killed [[COPY1]], implicit $exec
24+
; CHECK: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
25+
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
26+
; CHECK: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc
27+
; CHECK: $exec = S_MOV_B64_term killed [[S_AND_B64_]]
28+
; CHECK: S_CBRANCH_EXECZ %bb.1, implicit $exec
29+
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec
30+
; CHECK: [[S_MOV_B64_term1:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec
31+
; CHECK: S_BRANCH %bb.2
32+
; CHECK: bb.1:
33+
; CHECK: successors: %bb.2(0x80000000)
34+
; CHECK: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term1]]
35+
; CHECK: dead %7:vgpr_32 = GLOBAL_LOAD_DWORD undef %8:vreg_64, 0, 0, 0, 0, implicit $exec :: (volatile load 4, addrspace 1)
36+
; CHECK: [[COPY4:%[0-9]+]]:sreg_64_xexec = COPY [[COPY3]]
37+
; CHECK: bb.2:
38+
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
39+
; CHECK: [[COPY5:%[0-9]+]]:sreg_64_xexec = COPY [[COPY4]]
40+
; CHECK: $exec = S_OR_B64 $exec, killed [[COPY5]], implicit-def $scc
41+
; CHECK: S_SLEEP 1
42+
; CHECK: [[COPY6:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
43+
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY6]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
44+
; CHECK: [[S_XOR_B64_1:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_1]], [[COPY6]], implicit-def dead $scc
45+
; CHECK: $exec = S_MOV_B64_term killed [[S_AND_B64_1]]
46+
; CHECK: S_CBRANCH_EXECZ %bb.1, implicit $exec
47+
; CHECK: [[S_MOV_B64_term1:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_1]], implicit $exec
48+
; CHECK: [[S_MOV_B64_term2:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_1]], implicit $exec
49+
; CHECK: S_BRANCH %bb.2
50+
bb.0:
51+
liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
52+
53+
%0:vgpr_32 = COPY killed $vgpr0
54+
%1:vgpr_32 = COPY killed $vgpr1
55+
%3:sreg_64_xexec = V_CMP_EQ_U32_e64 killed %0, killed %1, implicit $exec
56+
%10:sreg_64_xexec = SI_IF %3, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec
57+
%14:sreg_64_xexec = S_MOV_B64_term %10, implicit $exec
58+
%13:sreg_64_xexec = S_MOV_B64_term %10, implicit $exec
59+
S_BRANCH %bb.2
60+
61+
bb.1:
62+
%11:sreg_64_xexec = COPY %13
63+
dead %6:vgpr_32 = GLOBAL_LOAD_DWORD undef %8:vreg_64, 0, 0, 0, 0, implicit $exec :: (volatile load 4, addrspace 1)
64+
%14:sreg_64_xexec = COPY %11
65+
66+
bb.2:
67+
%12:sreg_64_xexec = COPY %14
68+
SI_END_CF killed %12, implicit-def $exec, implicit-def dead $scc, implicit $exec
69+
S_SLEEP 1
70+
%9:sreg_64_xexec = SI_IF %3, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec
71+
%14:sreg_64_xexec = S_MOV_B64_term %9, implicit $exec
72+
%13:sreg_64_xexec = S_MOV_B64_term %9, implicit $exec
73+
S_BRANCH %bb.2
74+
75+
...

0 commit comments

Comments
 (0)