Skip to content

Commit 565af15

Browse files
committed
[AMDGPU] Extend pre-emit peephole to redundantly masked VCC
Extend pre-emit peephole for S_CBRANCH_VCC[N]Z to eliminate redundant S_AND operations against EXEC for V_CMP results in VCC. These occur after after register allocation when VCC has been selected as the comparison destination. Reviewed By: rampitec Differential Revision: https://reviews.llvm.org/D120202
1 parent 79787b9 commit 565af15

19 files changed

+139
-60
lines changed

llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,15 @@ bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const {
7474
// We end up with this pattern sometimes after basic block placement.
7575
// It happens while combining a block which assigns -1 or 0 to a saved mask
7676
// and another block which consumes that saved mask and then a branch.
77+
//
78+
// While searching this also performs the following substitution:
79+
// vcc = V_CMP
80+
// vcc = S_AND exec, vcc
81+
// S_CBRANCH_VCC[N]Z
82+
// =>
83+
// vcc = V_CMP
84+
// S_CBRANCH_VCC[N]Z
85+
7786
bool Changed = false;
7887
MachineBasicBlock &MBB = *MI.getParent();
7988
const GCNSubtarget &ST = MBB.getParent()->getSubtarget<GCNSubtarget>();
@@ -121,14 +130,27 @@ bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const {
121130
SReg = Op2.getReg();
122131
auto M = std::next(A);
123132
bool ReadsSreg = false;
133+
bool ModifiesExec = false;
124134
for (; M != E; ++M) {
125135
if (M->definesRegister(SReg, TRI))
126136
break;
127137
if (M->modifiesRegister(SReg, TRI))
128138
return Changed;
129139
ReadsSreg |= M->readsRegister(SReg, TRI);
140+
ModifiesExec |= M->modifiesRegister(ExecReg, TRI);
141+
}
142+
if (M == E)
143+
return Changed;
144+
// If SReg is VCC and SReg definition is a VALU comparison.
145+
// This means S_AND with EXEC is not required.
146+
// Erase the S_AND and return.
147+
// Note: isVOPC is used instead of isCompare to catch V_CMP_CLASS
148+
if (A->getOpcode() == And && SReg == CondReg && !ModifiesExec &&
149+
TII->isVOPC(*M) && TII->isVALU(*M)) {
150+
A->eraseFromParent();
151+
return true;
130152
}
131-
if (M == E || !M->isMoveImmediate() || !M->getOperand(1).isImm() ||
153+
if (!M->isMoveImmediate() || !M->getOperand(1).isImm() ||
132154
(M->getOperand(1).getImm() != -1 && M->getOperand(1).getImm() != 0))
133155
return Changed;
134156
MaskValue = M->getOperand(1).getImm();

llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -623,7 +623,6 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
623623
; GFX908-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[12:13]
624624
; GFX908-NEXT: v_add_co_u32_e64 v14, s[2:3], v14, v6
625625
; GFX908-NEXT: v_addc_co_u32_e64 v15, s[2:3], v15, v7, s[2:3]
626-
; GFX908-NEXT: s_and_b64 vcc, exec, vcc
627626
; GFX908-NEXT: s_cbranch_vccz .LBB3_1
628627
; GFX908-NEXT: .LBB3_5: ; %bb16
629628
; GFX908-NEXT: ; Parent Loop BB3_2 Depth=1
@@ -751,7 +750,6 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
751750
; GFX90A-NEXT: v_add_co_u32_e32 v16, vcc, v16, v10
752751
; GFX90A-NEXT: v_addc_co_u32_e32 v17, vcc, v17, v11, vcc
753752
; GFX90A-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[14:15]
754-
; GFX90A-NEXT: s_and_b64 vcc, exec, vcc
755753
; GFX90A-NEXT: s_cbranch_vccz .LBB3_1
756754
; GFX90A-NEXT: .LBB3_5: ; %bb16
757755
; GFX90A-NEXT: ; Parent Loop BB3_2 Depth=1

llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,6 @@ define amdgpu_kernel void @s_add_co_br_user(i32 %i) {
8282
; GFX7-NEXT: s_cmp_lg_u32 s0, 0
8383
; GFX7-NEXT: s_addc_u32 s0, s2, 0
8484
; GFX7-NEXT: v_cmp_ge_u32_e32 vcc, s0, v0
85-
; GFX7-NEXT: s_and_b64 vcc, exec, vcc
8685
; GFX7-NEXT: s_cbranch_vccnz .LBB1_2
8786
; GFX7-NEXT: ; %bb.1: ; %bb0
8887
; GFX7-NEXT: v_mov_b32_e32 v0, 0
@@ -109,7 +108,6 @@ define amdgpu_kernel void @s_add_co_br_user(i32 %i) {
109108
; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
110109
; GFX9-NEXT: s_addc_u32 s0, s2, 0
111110
; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, s0, v0
112-
; GFX9-NEXT: s_and_b64 vcc, exec, vcc
113111
; GFX9-NEXT: s_cbranch_vccnz .LBB1_2
114112
; GFX9-NEXT: ; %bb.1: ; %bb0
115113
; GFX9-NEXT: v_mov_b32_e32 v0, 0
@@ -136,7 +134,6 @@ define amdgpu_kernel void @s_add_co_br_user(i32 %i) {
136134
; GFX10-NEXT: s_cmpk_lg_u32 s1, 0x0
137135
; GFX10-NEXT: s_addc_u32 s0, s0, 0
138136
; GFX10-NEXT: v_cmp_ge_u32_e32 vcc_lo, s0, v0
139-
; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, vcc_lo
140137
; GFX10-NEXT: s_cbranch_vccnz .LBB1_2
141138
; GFX10-NEXT: ; %bb.1: ; %bb0
142139
; GFX10-NEXT: v_mov_b32_e32 v0, 0

llvm/test/CodeGen/AMDGPU/global-load-saddr-to-vaddr.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,6 @@ define amdgpu_kernel void @test_move_load_address_to_vgpr_d16_hi(i16 addrspace(1
7171
; GCN-NEXT: global_load_short_d16_hi v0, v[2:3], off glc
7272
; GCN-NEXT: s_waitcnt vmcnt(0)
7373
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, s2, v0
74-
; GCN-NEXT: s_and_b64 vcc, exec, vcc
7574
; GCN-NEXT: s_cbranch_vccz .LBB1_1
7675
; GCN-NEXT: ; %bb.2: ; %bb2
7776
; GCN-NEXT: s_endpgm

llvm/test/CodeGen/AMDGPU/idiv-licm.ll

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -431,7 +431,6 @@ define amdgpu_kernel void @udiv16_invariant_denom(i16 addrspace(1)* nocapture %a
431431
; GFX9-NEXT: v_mad_f32 v0, -v0, v2, v8
432432
; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, s5, v4
433433
; GFX9-NEXT: v_cmp_ge_f32_e64 s[0:1], |v0|, v2
434-
; GFX9-NEXT: s_and_b64 vcc, exec, vcc
435434
; GFX9-NEXT: v_addc_co_u32_e64 v0, s[0:1], 0, v7, s[0:1]
436435
; GFX9-NEXT: global_store_short v[5:6], v0, off
437436
; GFX9-NEXT: s_cbranch_vccz .LBB4_1
@@ -516,7 +515,6 @@ define amdgpu_kernel void @urem16_invariant_denom(i16 addrspace(1)* nocapture %a
516515
; GFX9-NEXT: v_addc_co_u32_e64 v8, s[2:3], 0, v10, s[2:3]
517516
; GFX9-NEXT: v_mul_lo_u32 v8, v8, s7
518517
; GFX9-NEXT: v_add_co_u32_e64 v5, s[0:1], s4, v5
519-
; GFX9-NEXT: s_and_b64 vcc, exec, vcc
520518
; GFX9-NEXT: v_addc_co_u32_e64 v6, s[0:1], v7, v6, s[0:1]
521519
; GFX9-NEXT: v_sub_u32_e32 v0, v0, v8
522520
; GFX9-NEXT: global_store_short v[5:6], v0, off
@@ -552,7 +550,6 @@ define amdgpu_kernel void @urem16_invariant_denom(i16 addrspace(1)* nocapture %a
552550
; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, 0, v8, vcc_lo
553551
; GFX10-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x400, v4
554552
; GFX10-NEXT: v_mul_lo_u32 v7, v7, s4
555-
; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, vcc_lo
556553
; GFX10-NEXT: v_sub_nc_u32_e32 v0, v0, v7
557554
; GFX10-NEXT: global_store_short v[5:6], v0, off
558555
; GFX10-NEXT: s_cbranch_vccz .LBB5_1
@@ -608,7 +605,6 @@ define amdgpu_kernel void @sdiv16_invariant_denom(i16 addrspace(1)* nocapture %a
608605
; GFX9-NEXT: v_cmp_ge_f32_e64 s[0:1], |v7|, |v2|
609606
; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, s5, v4
610607
; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, v0, s[0:1]
611-
; GFX9-NEXT: s_and_b64 vcc, exec, vcc
612608
; GFX9-NEXT: v_add_u32_e32 v0, v8, v0
613609
; GFX9-NEXT: global_store_short v[5:6], v0, off
614610
; GFX9-NEXT: s_cbranch_vccz .LBB6_1
@@ -701,7 +697,6 @@ define amdgpu_kernel void @srem16_invariant_denom(i16 addrspace(1)* nocapture %a
701697
; GFX9-NEXT: v_mov_b32_e32 v8, s5
702698
; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, s7, v4
703699
; GFX9-NEXT: v_add_co_u32_e64 v5, s[0:1], s4, v5
704-
; GFX9-NEXT: s_and_b64 vcc, exec, vcc
705700
; GFX9-NEXT: v_addc_co_u32_e64 v6, s[0:1], v8, v6, s[0:1]
706701
; GFX9-NEXT: v_sub_u32_e32 v0, v7, v0
707702
; GFX9-NEXT: global_store_short v[5:6], v0, off
@@ -741,7 +736,6 @@ define amdgpu_kernel void @srem16_invariant_denom(i16 addrspace(1)* nocapture %a
741736
; GFX10-NEXT: v_add_co_u32 v5, s0, s2, v5
742737
; GFX10-NEXT: v_add_co_ci_u32_e64 v6, s0, s3, v6, s0
743738
; GFX10-NEXT: v_mul_lo_u32 v0, v0, s1
744-
; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, vcc_lo
745739
; GFX10-NEXT: v_sub_nc_u32_e32 v0, v7, v0
746740
; GFX10-NEXT: global_store_short v[5:6], v0, off
747741
; GFX10-NEXT: s_cbranch_vccz .LBB7_1

llvm/test/CodeGen/AMDGPU/insert-skip-from-vcc.mir

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -535,3 +535,119 @@ body: |
535535
S_CBRANCH_VCCZ %bb.1, implicit $vcc
536536
S_ENDPGM 0
537537
...
538+
---
539+
# GCN-LABEL: name: and_cmp_vccz
540+
# GCN: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
541+
# GCN-NOT: S_AND_
542+
# GCN: S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
543+
name: and_cmp_vccz
544+
body: |
545+
bb.0:
546+
S_NOP 0
547+
548+
bb.1:
549+
S_NOP 0
550+
551+
bb.2:
552+
V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
553+
$vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc
554+
S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
555+
S_ENDPGM 0
556+
...
557+
---
558+
# GCN-LABEL: name: and_cmp_vccnz
559+
# GCN: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
560+
# GCN-NOT: S_AND_
561+
# GCN: S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
562+
name: and_cmp_vccnz
563+
body: |
564+
bb.0:
565+
S_NOP 0
566+
567+
bb.1:
568+
S_NOP 0
569+
570+
bb.2:
571+
V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
572+
$vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc
573+
S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
574+
S_ENDPGM 0
575+
...
576+
---
577+
# GCN-LABEL: name: andn2_cmp_vccz
578+
# GCN: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
579+
# GCN: $vcc = S_ANDN2_B64 $exec, $vcc, implicit-def dead $scc
580+
# GCN: S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
581+
name: andn2_cmp_vccz
582+
body: |
583+
bb.0:
584+
S_NOP 0
585+
586+
bb.1:
587+
S_NOP 0
588+
589+
bb.2:
590+
V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
591+
$vcc = S_ANDN2_B64 $exec, $vcc, implicit-def dead $scc
592+
S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
593+
S_ENDPGM 0
594+
...
595+
---
596+
# GCN-LABEL: name: and_cmpclass_vccz
597+
# GCN: V_CMP_CLASS_F32_e32 killed $sgpr0, killed $vgpr0, implicit-def $vcc, implicit $exec
598+
# GCN-NOT: S_AND_
599+
# GCN: S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
600+
name: and_cmpclass_vccz
601+
body: |
602+
bb.0:
603+
S_NOP 0
604+
605+
bb.1:
606+
S_NOP 0
607+
608+
bb.2:
609+
V_CMP_CLASS_F32_e32 killed $sgpr0, killed $vgpr0, implicit-def $vcc, implicit $exec
610+
$vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc
611+
S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
612+
S_ENDPGM 0
613+
...
614+
---
615+
# GCN-LABEL: name: and_cmpx_vccz
616+
# GCN: V_CMPX_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit-def $exec, implicit $exec
617+
# GCN-NOT: S_AND_
618+
# GCN: S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
619+
name: and_cmpx_vccz
620+
body: |
621+
bb.0:
622+
S_NOP 0
623+
624+
bb.1:
625+
S_NOP 0
626+
627+
bb.2:
628+
V_CMPX_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit-def $exec, implicit $exec
629+
$vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc
630+
S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
631+
S_ENDPGM 0
632+
...
633+
---
634+
# GCN-LABEL: name: and_or_cmp_vccz
635+
# GCN: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
636+
# GCN: $exec = S_OR_B64 $exec, $sgpr0_sgpr1, implicit-def dead $scc
637+
# GCN: $vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc
638+
# GCN: S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
639+
name: and_or_cmp_vccz
640+
body: |
641+
bb.0:
642+
S_NOP 0
643+
644+
bb.1:
645+
S_NOP 0
646+
647+
bb.2:
648+
V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
649+
$exec = S_OR_B64 $exec, $sgpr0_sgpr1, implicit-def dead $scc
650+
$vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc
651+
S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
652+
S_ENDPGM 0
653+
...

llvm/test/CodeGen/AMDGPU/multilevel-break.ll

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -195,15 +195,13 @@ define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 {
195195
; GCN-NEXT: s_waitcnt vmcnt(0)
196196
; GCN-NEXT: s_mov_b64 s[6:7], -1
197197
; GCN-NEXT: v_cmp_gt_i32_e32 vcc, 1, v1
198-
; GCN-NEXT: s_and_b64 vcc, exec, vcc
199198
; GCN-NEXT: ; implicit-def: $sgpr8_sgpr9
200199
; GCN-NEXT: s_mov_b64 s[10:11], -1
201200
; GCN-NEXT: s_cbranch_vccnz .LBB1_6
202201
; GCN-NEXT: ; %bb.3: ; %LeafBlock1
203202
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
204203
; GCN-NEXT: s_mov_b64 s[6:7], -1
205204
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
206-
; GCN-NEXT: s_and_b64 vcc, exec, vcc
207205
; GCN-NEXT: s_mov_b64 s[8:9], -1
208206
; GCN-NEXT: s_cbranch_vccz .LBB1_5
209207
; GCN-NEXT: ; %bb.4: ; %case1
@@ -223,7 +221,6 @@ define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 {
223221
; GCN-NEXT: ; %bb.7: ; %LeafBlock
224222
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
225223
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
226-
; GCN-NEXT: s_and_b64 vcc, exec, vcc
227224
; GCN-NEXT: s_mov_b64 s[8:9], -1
228225
; GCN-NEXT: s_cbranch_vccz .LBB1_1
229226
; GCN-NEXT: ; %bb.8: ; %case0

llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,6 @@ define amdgpu_kernel void @nested_loop_conditions(i64 addrspace(1)* nocapture %a
151151
; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc
152152
; GCN-NEXT: s_waitcnt vmcnt(0)
153153
; GCN-NEXT: v_cmp_lt_i32_e32 vcc, 8, v0
154-
; GCN-NEXT: s_and_b64 vcc, exec, vcc
155154
; GCN-NEXT: s_cbranch_vccnz .LBB1_6
156155
; GCN-NEXT: ; %bb.1: ; %bb14.lr.ph
157156
; GCN-NEXT: s_load_dword s4, s[0:1], 0x0
@@ -176,7 +175,6 @@ define amdgpu_kernel void @nested_loop_conditions(i64 addrspace(1)* nocapture %a
176175
; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc
177176
; GCN-NEXT: s_waitcnt vmcnt(0)
178177
; GCN-NEXT: v_cmp_lt_i32_e32 vcc, 8, v0
179-
; GCN-NEXT: s_and_b64 vcc, exec, vcc
180178
; GCN-NEXT: s_cbranch_vccnz .LBB1_4
181179
; GCN-NEXT: ; %bb.5: ; %bb21
182180
; GCN-NEXT: ; in Loop: Header=BB1_3 Depth=1

llvm/test/CodeGen/AMDGPU/salu-to-valu.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -438,7 +438,6 @@ entry:
438438
; {{^}}sopc_vopc_legalize_bug:
439439
; GCN: s_load_dword [[SGPR:s[0-9]+]]
440440
; GCN: v_cmp_le_u32_e32 vcc, [[SGPR]], v{{[0-9]+}}
441-
; GCN: s_and_b64 vcc, exec, vcc
442441
; GCN: s_cbranch_vccnz [[EXIT:.L[A-Z0-9_]+]]
443442
; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
444443
; GCN-NOHSA: buffer_store_dword [[ONE]]

llvm/test/CodeGen/AMDGPU/sdiv64.ll

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,6 @@ define amdgpu_kernel void @s_test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
217217
; GCN-IR-NEXT: s_addc_u32 s11, s11, 0
218218
; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, s[10:11], v[0:1]
219219
; GCN-IR-NEXT: s_mov_b64 s[14:15], s[8:9]
220-
; GCN-IR-NEXT: s_and_b64 vcc, exec, vcc
221220
; GCN-IR-NEXT: s_cbranch_vccz .LBB0_3
222221
; GCN-IR-NEXT: .LBB0_4: ; %Flow6
223222
; GCN-IR-NEXT: s_lshl_b64 s[6:7], s[12:13], 1
@@ -1071,7 +1070,6 @@ define amdgpu_kernel void @s_test_sdiv24_48(i48 addrspace(1)* %out, i48 %x, i48
10711070
; GCN-IR-NEXT: s_addc_u32 s11, s11, 0
10721071
; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, s[10:11], v[0:1]
10731072
; GCN-IR-NEXT: s_mov_b64 s[14:15], s[8:9]
1074-
; GCN-IR-NEXT: s_and_b64 vcc, exec, vcc
10751073
; GCN-IR-NEXT: s_cbranch_vccz .LBB9_3
10761074
; GCN-IR-NEXT: .LBB9_4: ; %Flow3
10771075
; GCN-IR-NEXT: s_lshl_b64 s[6:7], s[12:13], 1
@@ -1283,7 +1281,6 @@ define amdgpu_kernel void @s_test_sdiv_k_num_i64(i64 addrspace(1)* %out, i64 %x)
12831281
; GCN-IR-NEXT: s_addc_u32 s9, s9, 0
12841282
; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, s[8:9], v[0:1]
12851283
; GCN-IR-NEXT: s_mov_b64 s[12:13], s[6:7]
1286-
; GCN-IR-NEXT: s_and_b64 vcc, exec, vcc
12871284
; GCN-IR-NEXT: s_cbranch_vccz .LBB10_3
12881285
; GCN-IR-NEXT: .LBB10_4: ; %Flow5
12891286
; GCN-IR-NEXT: s_lshl_b64 s[2:3], s[10:11], 1

llvm/test/CodeGen/AMDGPU/skip-if-dead.ll

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -750,7 +750,6 @@ define amdgpu_ps void @test_kill_divergent_loop(i32 %arg) #0 {
750750
; SI-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc
751751
; SI-NEXT: s_waitcnt vmcnt(0)
752752
; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
753-
; SI-NEXT: s_and_b64 vcc, exec, vcc
754753
; SI-NEXT: s_cbranch_vccnz .LBB10_2
755754
; SI-NEXT: .LBB10_4: ; %Flow1
756755
; SI-NEXT: s_or_b64 exec, exec, s[4:5]
@@ -796,7 +795,6 @@ define amdgpu_ps void @test_kill_divergent_loop(i32 %arg) #0 {
796795
; GFX10-WAVE64-NEXT: global_load_dword v0, v[0:1], off glc dlc
797796
; GFX10-WAVE64-NEXT: s_waitcnt vmcnt(0)
798797
; GFX10-WAVE64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
799-
; GFX10-WAVE64-NEXT: s_and_b64 vcc, exec, vcc
800798
; GFX10-WAVE64-NEXT: s_cbranch_vccnz .LBB10_1
801799
; GFX10-WAVE64-NEXT: .LBB10_3: ; %Flow1
802800
; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[2:3]
@@ -840,7 +838,6 @@ define amdgpu_ps void @test_kill_divergent_loop(i32 %arg) #0 {
840838
; GFX10-WAVE32-NEXT: global_load_dword v0, v[0:1], off glc dlc
841839
; GFX10-WAVE32-NEXT: s_waitcnt vmcnt(0)
842840
; GFX10-WAVE32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
843-
; GFX10-WAVE32-NEXT: s_and_b32 vcc_lo, exec_lo, vcc_lo
844841
; GFX10-WAVE32-NEXT: s_cbranch_vccnz .LBB10_1
845842
; GFX10-WAVE32-NEXT: .LBB10_3: ; %Flow1
846843
; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s1
@@ -901,7 +898,6 @@ define amdgpu_ps void @phi_use_def_before_kill(float inreg %x) #0 {
901898
; SI-NEXT: v_mov_b32_e32 v0, 4.0
902899
; SI-NEXT: .LBB11_3: ; %phibb
903900
; SI-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
904-
; SI-NEXT: s_and_b64 vcc, exec, vcc
905901
; SI-NEXT: s_cbranch_vccz .LBB11_5
906902
; SI-NEXT: ; %bb.4: ; %bb10
907903
; SI-NEXT: s_mov_b32 s3, 0xf000
@@ -934,7 +930,6 @@ define amdgpu_ps void @phi_use_def_before_kill(float inreg %x) #0 {
934930
; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0
935931
; GFX10-WAVE64-NEXT: .LBB11_3: ; %phibb
936932
; GFX10-WAVE64-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
937-
; GFX10-WAVE64-NEXT: s_and_b64 vcc, exec, vcc
938933
; GFX10-WAVE64-NEXT: s_cbranch_vccz .LBB11_5
939934
; GFX10-WAVE64-NEXT: ; %bb.4: ; %bb10
940935
; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 9
@@ -965,7 +960,6 @@ define amdgpu_ps void @phi_use_def_before_kill(float inreg %x) #0 {
965960
; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0
966961
; GFX10-WAVE32-NEXT: .LBB11_3: ; %phibb
967962
; GFX10-WAVE32-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v0
968-
; GFX10-WAVE32-NEXT: s_and_b32 vcc_lo, exec_lo, vcc_lo
969963
; GFX10-WAVE32-NEXT: s_cbranch_vccz .LBB11_5
970964
; GFX10-WAVE32-NEXT: ; %bb.4: ; %bb10
971965
; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 9

0 commit comments

Comments
 (0)