Skip to content

Commit 971f417

Browse files
committed
[AMDGPU][GlobalISel] Insert an and with exec before s_cbranch_vccnz if necessary
While v_cmp will AND inactive lanes with 0, that is not the case for logical operations. This fixes a Vulkan CTS test that would hang otherwise. Differential Revision: https://reviews.llvm.org/D105709
1 parent d81a843 commit 971f417

File tree

2 files changed

+171
-4
lines changed

2 files changed

+171
-4
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2465,6 +2465,27 @@ bool AMDGPUInstructionSelector::selectG_AMDGPU_ATOMIC_CMPXCHG(
24652465
return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
24662466
}
24672467

2468+
static bool isVCmpResult(Register Reg, MachineRegisterInfo &MRI) {
2469+
if (Reg.isPhysical())
2470+
return false;
2471+
2472+
MachineInstr &MI = *MRI.getUniqueVRegDef(Reg);
2473+
const unsigned Opcode = MI.getOpcode();
2474+
2475+
if (Opcode == AMDGPU::COPY)
2476+
return isVCmpResult(MI.getOperand(1).getReg(), MRI);
2477+
2478+
if (Opcode == AMDGPU::G_AND || Opcode == AMDGPU::G_OR ||
2479+
Opcode == AMDGPU::G_XOR)
2480+
return isVCmpResult(MI.getOperand(1).getReg(), MRI) &&
2481+
isVCmpResult(MI.getOperand(2).getReg(), MRI);
2482+
2483+
if (Opcode == TargetOpcode::G_INTRINSIC)
2484+
return MI.getIntrinsicID() == Intrinsic::amdgcn_class;
2485+
2486+
return Opcode == AMDGPU::G_ICMP || Opcode == AMDGPU::G_FCMP;
2487+
}
2488+
24682489
bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const {
24692490
MachineBasicBlock *BB = I.getParent();
24702491
MachineOperand &CondOp = I.getOperand(0);
@@ -2488,11 +2509,22 @@ bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const {
24882509
BrOpcode = AMDGPU::S_CBRANCH_SCC1;
24892510
ConstrainRC = &AMDGPU::SReg_32RegClass;
24902511
} else {
2491-
// FIXME: Do we have to insert an and with exec here, like in SelectionDAG?
2492-
// We sort of know that a VCC producer based on the register bank, that ands
2493-
// inactive lanes with 0. What if there was a logical operation with vcc
2494-
// producers in different blocks/with different exec masks?
24952512
// FIXME: Should scc->vcc copies and with exec?
2513+
2514+
// Unless the value of CondReg is a result of a V_CMP* instruction then we
2515+
// need to insert an and with exec.
2516+
if (!isVCmpResult(CondReg, *MRI)) {
2517+
const bool Is64 = STI.isWave64();
2518+
const unsigned Opcode = Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
2519+
const Register Exec = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO;
2520+
2521+
Register TmpReg = MRI->createVirtualRegister(TRI.getBoolRC());
2522+
BuildMI(*BB, &I, DL, TII.get(Opcode), TmpReg)
2523+
.addReg(CondReg)
2524+
.addReg(Exec);
2525+
CondReg = TmpReg;
2526+
}
2527+
24962528
CondPhysReg = TRI.getVCC();
24972529
BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
24982530
ConstrainRC = TRI.getBoolRC();

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-brcond.mir

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,3 +174,138 @@ body: |
174174
bb.1:
175175
176176
...
177+
178+
---
179+
180+
name: brcond_class_intrinsic
181+
legalized: true
182+
regBankSelected: true
183+
184+
body: |
185+
; GCN-LABEL: name: brcond_class_intrinsic
186+
; GCN: bb.0:
187+
; GCN: successors: %bb.1(0x80000000)
188+
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
189+
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
190+
; GCN: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec
191+
; GCN: $vcc = COPY [[V_CMP_CLASS_F32_e64_]]
192+
; GCN: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
193+
; GCN: bb.1:
194+
bb.0:
195+
liveins: $vgpr0, $vgpr1
196+
197+
%0:vgpr(s32) = COPY $vgpr0
198+
%1:vgpr(s32) = COPY $vgpr1
199+
%2:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %0:vgpr(s32), %1:vgpr(s32)
200+
G_BRCOND %2(s1), %bb.1
201+
202+
bb.1:
203+
204+
...
205+
206+
---
207+
208+
name: brcond_cmp_logic
209+
legalized: true
210+
regBankSelected: true
211+
212+
body: |
213+
; GCN-LABEL: name: brcond_cmp_logic
214+
; GCN: bb.0:
215+
; GCN: successors: %bb.1(0x80000000)
216+
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
217+
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
218+
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
219+
; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
220+
; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
221+
; GCN: %5:sreg_64 = nofpexcept V_CMP_EQ_F32_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $mode, implicit $exec
222+
; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[V_CMP_EQ_U32_e64_]], %5, implicit-def dead $scc
223+
; GCN: $vcc = COPY [[S_AND_B64_]]
224+
; GCN: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
225+
; GCN: bb.1:
226+
bb.0:
227+
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
228+
229+
%0:vgpr(s32) = COPY $vgpr0
230+
%1:vgpr(s32) = COPY $vgpr1
231+
%2:vgpr(s32) = COPY $vgpr2
232+
%3:vgpr(s32) = COPY $vgpr3
233+
%4:vcc(s1) = G_ICMP intpred(eq), %0, %1
234+
%5:vcc(s1) = G_FCMP floatpred(oeq), %2, %3
235+
%6:vcc(s1) = G_AND %4, %5
236+
G_BRCOND %6(s1), %bb.1
237+
238+
bb.1:
239+
240+
...
241+
242+
---
243+
244+
name: brcond_logic
245+
legalized: true
246+
regBankSelected: true
247+
248+
body: |
249+
; GCN-LABEL: name: brcond_logic
250+
; GCN: bb.0:
251+
; GCN: successors: %bb.1(0x80000000)
252+
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
253+
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
254+
; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0
255+
; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY2]], implicit-def $scc
256+
; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec
257+
; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
258+
; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
259+
; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[S_AND_B64_]], $exec, implicit-def $scc
260+
; GCN: $vcc = COPY [[S_AND_B64_1]]
261+
; GCN: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
262+
; GCN: bb.1:
263+
bb.0:
264+
liveins: $sgpr0, $vgpr0, $vgpr1
265+
266+
%0:vgpr(s32) = COPY $vgpr0
267+
%1:vgpr(s32) = COPY $vgpr1
268+
%2:sgpr(s32) = COPY $sgpr0
269+
%3:sgpr(s1) = G_TRUNC %2(s32)
270+
%4:vcc(s1) = COPY %3(s1)
271+
%5:vcc(s1) = G_ICMP intpred(eq), %0, %1
272+
%6:vcc(s1) = G_AND %5, %4
273+
G_BRCOND %6(s1), %bb.1
274+
275+
bb.1:
276+
277+
...
278+
279+
---
280+
281+
name: brcond_logic_const
282+
legalized: true
283+
regBankSelected: true
284+
285+
body: |
286+
; GCN-LABEL: name: brcond_logic_const
287+
; GCN: bb.0:
288+
; GCN: successors: %bb.1(0x80000000)
289+
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
290+
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
291+
; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
292+
; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1
293+
; GCN: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[V_CMP_EQ_U32_e64_]], [[S_MOV_B64_]], implicit-def dead $scc
294+
; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[S_XOR_B64_]], $exec, implicit-def $scc
295+
; GCN: $vcc = COPY [[S_AND_B64_]]
296+
; GCN: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
297+
; GCN: bb.1:
298+
bb.0:
299+
liveins: $vgpr0, $vgpr1
300+
301+
%0:vgpr(s32) = COPY $vgpr0
302+
%1:vgpr(s32) = COPY $vgpr1
303+
%2:vcc(s1) = G_ICMP intpred(eq), %0, %1
304+
%3:sgpr(s1) = G_CONSTANT i1 true
305+
%4:vcc(s1) = COPY %3(s1)
306+
%5:vcc(s1) = G_XOR %2, %4
307+
G_BRCOND %5(s1), %bb.1
308+
309+
bb.1:
310+
311+
...

0 commit comments

Comments
 (0)