Skip to content

Commit d2e5d35

Browse files
committed
[StructurizeCFG] Clean up some boolean not instructions
In some cases StructurizeCFG inserts i1 xor instructions to invert predicates. Add a quick loop to clean these up afterwards if we can get away with modifying an existing compare instruction instead. (StructurizeCFG is generally run late in the pipeline so instcombine does not clean them up for us.) Differential Revision: https://reviews.llvm.org/D118623
1 parent db04266 commit d2e5d35

21 files changed

+166
-149
lines changed

llvm/lib/Transforms/Scalar/StructurizeCFG.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,8 @@ class StructurizeCFG {
276276

277277
void insertConditions(bool Loops);
278278

279+
void simplifyConditions();
280+
279281
void delPhiValues(BasicBlock *From, BasicBlock *To);
280282

281283
void addPhiValues(BasicBlock *From, BasicBlock *To);
@@ -586,6 +588,28 @@ void StructurizeCFG::insertConditions(bool Loops) {
586588
}
587589
}
588590

591+
/// Simplify any inverted conditions that were built by buildConditions.
592+
void StructurizeCFG::simplifyConditions() {
593+
SmallVector<Instruction *> InstToErase;
594+
for (auto &I : concat<PredMap::value_type>(Predicates, LoopPreds)) {
595+
auto &Preds = I.second;
596+
for (auto &J : Preds) {
597+
auto &Cond = J.second;
598+
Instruction *Inverted;
599+
if (match(Cond, m_Not(m_OneUse(m_Instruction(Inverted)))) &&
600+
!Cond->use_empty()) {
601+
if (auto *InvertedCmp = dyn_cast<CmpInst>(Inverted)) {
602+
InvertedCmp->setPredicate(InvertedCmp->getInversePredicate());
603+
Cond->replaceAllUsesWith(InvertedCmp);
604+
InstToErase.push_back(cast<Instruction>(Cond));
605+
}
606+
}
607+
}
608+
}
609+
for (auto *I : InstToErase)
610+
I->eraseFromParent();
611+
}
612+
589613
/// Remove all PHI values coming from "From" into "To" and remember
590614
/// them in DeletedPhis
591615
void StructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
@@ -1065,6 +1089,7 @@ bool StructurizeCFG::run(Region *R, DominatorTree *DT) {
10651089
createFlow();
10661090
insertConditions(false);
10671091
insertConditions(true);
1092+
simplifyConditions();
10681093
setPhiValues();
10691094
simplifyAffectedPhis();
10701095
rebuildSSA();

llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -139,8 +139,10 @@ define void @constrained_if_register_class() {
139139
; CHECK-NEXT: s_load_dword s4, s[4:5], 0x0
140140
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
141141
; CHECK-NEXT: s_cmp_lg_u32 s4, 0
142-
; CHECK-NEXT: s_cbranch_scc1 .LBB4_4
143-
; CHECK-NEXT: ; %bb.1: ; %bb2
142+
; CHECK-NEXT: s_cbranch_scc0 .LBB4_2
143+
; CHECK-NEXT: .LBB4_1: ; %bb12
144+
; CHECK-NEXT: s_setpc_b64 s[30:31]
145+
; CHECK-NEXT: .LBB4_2: ; %bb2
144146
; CHECK-NEXT: s_getpc_b64 s[4:5]
145147
; CHECK-NEXT: s_add_u32 s4, s4, const.ptr@gotpcrel32@lo+4
146148
; CHECK-NEXT: s_addc_u32 s5, s5, const.ptr@gotpcrel32@hi+12
@@ -153,15 +155,13 @@ define void @constrained_if_register_class() {
153155
; CHECK-NEXT: s_mov_b32 s4, -1
154156
; CHECK-NEXT: s_waitcnt vmcnt(0)
155157
; CHECK-NEXT: v_cmp_gt_f32_e32 vcc, 1.0, v0
156-
; CHECK-NEXT: s_cbranch_vccnz .LBB4_3
157-
; CHECK-NEXT: ; %bb.2: ; %bb7
158+
; CHECK-NEXT: s_cbranch_vccnz .LBB4_4
159+
; CHECK-NEXT: ; %bb.3: ; %bb7
158160
; CHECK-NEXT: s_mov_b32 s4, 0
159-
; CHECK-NEXT: .LBB4_3: ; %bb8
161+
; CHECK-NEXT: .LBB4_4: ; %bb8
160162
; CHECK-NEXT: s_cmp_lg_u32 s4, 0
161-
; CHECK-NEXT: s_cbranch_scc0 .LBB4_5
162-
; CHECK-NEXT: .LBB4_4: ; %bb12
163-
; CHECK-NEXT: s_setpc_b64 s[30:31]
164-
; CHECK-NEXT: .LBB4_5: ; %bb11
163+
; CHECK-NEXT: s_cbranch_scc1 .LBB4_1
164+
; CHECK-NEXT: ; %bb.5: ; %bb11
165165
; CHECK-NEXT: v_mov_b32_e32 v0, 4.0
166166
; CHECK-NEXT: buffer_store_dword v0, v0, s[0:3], 0 offen
167167
; CHECK-NEXT: s_waitcnt vmcnt(0)

llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -838,7 +838,7 @@ define <2 x i64> @v_sdiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
838838
; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc
839839
; CGP-NEXT: ; implicit-def: $vgpr4
840840
; CGP-NEXT: ; implicit-def: $vgpr10
841-
; CGP-NEXT: .LBB2_2: ; %Flow2
841+
; CGP-NEXT: .LBB2_2: ; %Flow1
842842
; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[6:7]
843843
; CGP-NEXT: s_xor_b64 exec, exec, s[6:7]
844844
; CGP-NEXT: s_cbranch_execz .LBB2_4
@@ -3118,7 +3118,7 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
31183118
; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc
31193119
; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3
31203120
; CGP-NEXT: ; implicit-def: $vgpr8
3121-
; CGP-NEXT: .LBB8_2: ; %Flow2
3121+
; CGP-NEXT: .LBB8_2: ; %Flow1
31223122
; CGP-NEXT: s_or_saveexec_b64 s[8:9], s[8:9]
31233123
; CGP-NEXT: v_lshl_b64 v[9:10], s[6:7], v6
31243124
; CGP-NEXT: s_xor_b64 exec, exec, s[8:9]

llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -824,7 +824,7 @@ define <2 x i64> @v_srem_v2i64(<2 x i64> %num, <2 x i64> %den) {
824824
; CGP-NEXT: v_subb_u32_e32 v1, vcc, v2, v4, vcc
825825
; CGP-NEXT: ; implicit-def: $vgpr4
826826
; CGP-NEXT: ; implicit-def: $vgpr10
827-
; CGP-NEXT: .LBB2_2: ; %Flow2
827+
; CGP-NEXT: .LBB2_2: ; %Flow1
828828
; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7]
829829
; CGP-NEXT: s_xor_b64 exec, exec, s[4:5]
830830
; CGP-NEXT: s_cbranch_execz .LBB2_4
@@ -3072,7 +3072,7 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
30723072
; CGP-NEXT: v_subb_u32_e32 v1, vcc, v2, v4, vcc
30733073
; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3
30743074
; CGP-NEXT: ; implicit-def: $vgpr8
3075-
; CGP-NEXT: .LBB8_2: ; %Flow2
3075+
; CGP-NEXT: .LBB8_2: ; %Flow1
30763076
; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[8:9]
30773077
; CGP-NEXT: v_lshl_b64 v[9:10], s[6:7], v6
30783078
; CGP-NEXT: s_xor_b64 exec, exec, s[4:5]

llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -759,7 +759,7 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
759759
; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
760760
; CGP-NEXT: ; implicit-def: $vgpr4
761761
; CGP-NEXT: ; implicit-def: $vgpr10
762-
; CGP-NEXT: .LBB2_2: ; %Flow2
762+
; CGP-NEXT: .LBB2_2: ; %Flow1
763763
; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[6:7]
764764
; CGP-NEXT: s_xor_b64 exec, exec, s[6:7]
765765
; CGP-NEXT: s_cbranch_execz .LBB2_4
@@ -1641,7 +1641,7 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
16411641
; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
16421642
; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3
16431643
; CGP-NEXT: ; implicit-def: $vgpr8
1644-
; CGP-NEXT: .LBB8_2: ; %Flow2
1644+
; CGP-NEXT: .LBB8_2: ; %Flow1
16451645
; CGP-NEXT: s_or_saveexec_b64 s[8:9], s[8:9]
16461646
; CGP-NEXT: v_lshl_b64 v[9:10], s[6:7], v6
16471647
; CGP-NEXT: s_xor_b64 exec, exec, s[8:9]

llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -750,7 +750,7 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) {
750750
; CGP-NEXT: v_cndmask_b32_e32 v1, v2, v5, vcc
751751
; CGP-NEXT: ; implicit-def: $vgpr4
752752
; CGP-NEXT: ; implicit-def: $vgpr10
753-
; CGP-NEXT: .LBB2_2: ; %Flow2
753+
; CGP-NEXT: .LBB2_2: ; %Flow1
754754
; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7]
755755
; CGP-NEXT: s_xor_b64 exec, exec, s[4:5]
756756
; CGP-NEXT: s_cbranch_execz .LBB2_4
@@ -2181,7 +2181,7 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
21812181
; CGP-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
21822182
; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3
21832183
; CGP-NEXT: ; implicit-def: $vgpr8
2184-
; CGP-NEXT: .LBB8_2: ; %Flow2
2184+
; CGP-NEXT: .LBB8_2: ; %Flow1
21852185
; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[8:9]
21862186
; CGP-NEXT: v_lshl_b64 v[9:10], s[6:7], v6
21872187
; CGP-NEXT: s_xor_b64 exec, exec, s[4:5]

llvm/test/CodeGen/AMDGPU/branch-relaxation.ll

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -227,30 +227,31 @@ bb3:
227227

228228
; GCN-LABEL: {{^}}uniform_unconditional_min_long_forward_branch:
229229
; GCN: s_cmp_eq_u32
230-
; GCN: s_cbranch_scc{{[0-1]}} [[BB2:.LBB[0-9]+_[0-9]+]]
230+
; GCN: s_cbranch_scc{{[0-1]}} [[BB1:.LBB[0-9]+_[0-9]+]]
231231

232232
; GCN-NEXT: {{.LBB[0-9]+_[0-9]+}}: ; %bb0
233233
; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC0_LO:[0-9]+]]:[[PC0_HI:[0-9]+]]{{\]}}
234234
; GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
235-
; GCN-NEXT: s_add_u32 s[[PC0_LO]], s[[PC0_LO]], ([[BB3:.LBB[0-9]_[0-9]+]]-[[POST_GETPC]])&4294967295
236-
; GCN-NEXT: s_addc_u32 s[[PC0_HI]], s[[PC0_HI]], ([[BB3:.LBB[0-9]_[0-9]+]]-[[POST_GETPC]])>>32
235+
; GCN-NEXT: s_add_u32 s[[PC0_LO]], s[[PC0_LO]], ([[BB4:.LBB[0-9]_[0-9]+]]-[[POST_GETPC]])&4294967295
236+
; GCN-NEXT: s_addc_u32 s[[PC0_HI]], s[[PC0_HI]], ([[BB4]]-[[POST_GETPC]])>>32
237237
; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC0_LO]]:[[PC0_HI]]{{\]}}
238238

239-
; GCN: [[BB2]]: ; %bb3
240-
; GCN: v_nop_e64
241-
; GCN: v_nop_e64
242-
; GCN: v_nop_e64
243-
; GCN: v_nop_e64
244-
; GCN: ;;#ASMEND
245-
246-
; GCN: [[BB3]]:
239+
; GCN: [[BB1]]:
247240
; GCN: v_mov_b32_e32 [[BB2_K:v[0-9]+]], 17
248241
; GCN: buffer_store_dword [[BB2_K]]
249242

250243
; GCN: v_mov_b32_e32 [[BB4_K:v[0-9]+]], 63
251244
; GCN: buffer_store_dword [[BB4_K]]
252245
; GCN: s_endpgm
253-
; GCN-NEXT: .Lfunc_end{{[0-9]+}}:
246+
247+
; GCN: [[BB4]]: ; %bb3
248+
; GCN: v_nop_e64
249+
; GCN: v_nop_e64
250+
; GCN: v_nop_e64
251+
; GCN: v_nop_e64
252+
; GCN: ;;#ASMEND
253+
254+
; GCN: .Lfunc_end{{[0-9]+}}:
254255
define amdgpu_kernel void @uniform_unconditional_min_long_forward_branch(i32 addrspace(1)* %arg, i32 %arg1) {
255256
bb0:
256257
%tmp = icmp ne i32 %arg1, 0

llvm/test/CodeGen/AMDGPU/ctpop16.ll

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1502,30 +1502,30 @@ define amdgpu_kernel void @ctpop_i16_in_br(i16 addrspace(1)* %out, i16 addrspace
15021502
; SI-NEXT: s_waitcnt lgkmcnt(0)
15031503
; SI-NEXT: s_lshr_b32 s5, s4, 16
15041504
; SI-NEXT: s_cmp_lg_u32 s5, 0
1505-
; SI-NEXT: s_cbranch_scc0 .LBB14_2
1505+
; SI-NEXT: s_cbranch_scc0 .LBB14_4
15061506
; SI-NEXT: ; %bb.1: ; %else
15071507
; SI-NEXT: s_mov_b32 s11, 0xf000
15081508
; SI-NEXT: s_mov_b32 s10, -1
15091509
; SI-NEXT: s_mov_b32 s8, s2
15101510
; SI-NEXT: s_mov_b32 s9, s3
15111511
; SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 offset:2
15121512
; SI-NEXT: s_mov_b64 s[2:3], 0
1513-
; SI-NEXT: s_cbranch_execz .LBB14_3
1514-
; SI-NEXT: s_branch .LBB14_4
1515-
; SI-NEXT: .LBB14_2:
1516-
; SI-NEXT: s_mov_b64 s[2:3], -1
1517-
; SI-NEXT: v_mov_b32_e32 v0, 0
1518-
; SI-NEXT: .LBB14_3: ; %if
1513+
; SI-NEXT: s_cbranch_execnz .LBB14_3
1514+
; SI-NEXT: .LBB14_2: ; %if
15191515
; SI-NEXT: s_and_b32 s2, s4, 0xffff
15201516
; SI-NEXT: s_bcnt1_i32_b32 s2, s2
15211517
; SI-NEXT: s_waitcnt vmcnt(0)
15221518
; SI-NEXT: v_mov_b32_e32 v0, s2
1523-
; SI-NEXT: .LBB14_4: ; %endif
1519+
; SI-NEXT: .LBB14_3: ; %endif
15241520
; SI-NEXT: s_mov_b32 s3, 0xf000
15251521
; SI-NEXT: s_mov_b32 s2, -1
15261522
; SI-NEXT: s_waitcnt vmcnt(0)
15271523
; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
15281524
; SI-NEXT: s_endpgm
1525+
; SI-NEXT: .LBB14_4:
1526+
; SI-NEXT: s_mov_b64 s[2:3], -1
1527+
; SI-NEXT: v_mov_b32_e32 v0, 0
1528+
; SI-NEXT: s_branch .LBB14_2
15291529
;
15301530
; VI-LABEL: ctpop_i16_in_br:
15311531
; VI: ; %bb.0: ; %entry
@@ -1535,30 +1535,30 @@ define amdgpu_kernel void @ctpop_i16_in_br(i16 addrspace(1)* %out, i16 addrspace
15351535
; VI-NEXT: s_lshr_b32 s5, s4, 16
15361536
; VI-NEXT: v_cmp_ne_u16_e64 s[6:7], s5, 0
15371537
; VI-NEXT: s_and_b64 vcc, exec, s[6:7]
1538-
; VI-NEXT: s_cbranch_vccz .LBB14_2
1538+
; VI-NEXT: s_cbranch_vccz .LBB14_4
15391539
; VI-NEXT: ; %bb.1: ; %else
15401540
; VI-NEXT: s_mov_b32 s11, 0xf000
15411541
; VI-NEXT: s_mov_b32 s10, -1
15421542
; VI-NEXT: s_mov_b32 s8, s2
15431543
; VI-NEXT: s_mov_b32 s9, s3
15441544
; VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 offset:2
15451545
; VI-NEXT: s_mov_b64 s[2:3], 0
1546-
; VI-NEXT: s_cbranch_execz .LBB14_3
1547-
; VI-NEXT: s_branch .LBB14_4
1548-
; VI-NEXT: .LBB14_2:
1549-
; VI-NEXT: s_mov_b64 s[2:3], -1
1550-
; VI-NEXT: ; implicit-def: $vgpr0
1551-
; VI-NEXT: .LBB14_3: ; %if
1546+
; VI-NEXT: s_cbranch_execnz .LBB14_3
1547+
; VI-NEXT: .LBB14_2: ; %if
15521548
; VI-NEXT: s_and_b32 s2, s4, 0xffff
15531549
; VI-NEXT: s_bcnt1_i32_b32 s2, s2
15541550
; VI-NEXT: s_waitcnt vmcnt(0)
15551551
; VI-NEXT: v_mov_b32_e32 v0, s2
1556-
; VI-NEXT: .LBB14_4: ; %endif
1552+
; VI-NEXT: .LBB14_3: ; %endif
15571553
; VI-NEXT: s_mov_b32 s3, 0xf000
15581554
; VI-NEXT: s_mov_b32 s2, -1
15591555
; VI-NEXT: s_waitcnt vmcnt(0)
15601556
; VI-NEXT: buffer_store_short v0, off, s[0:3], 0
15611557
; VI-NEXT: s_endpgm
1558+
; VI-NEXT: .LBB14_4:
1559+
; VI-NEXT: s_mov_b64 s[2:3], -1
1560+
; VI-NEXT: ; implicit-def: $vgpr0
1561+
; VI-NEXT: s_branch .LBB14_2
15621562
;
15631563
; EG-LABEL: ctpop_i16_in_br:
15641564
; EG: ; %bb.0: ; %entry

llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1534,50 +1534,50 @@ define amdgpu_kernel void @insert_split_bb(<2 x i32> addrspace(1)* %out, i32 add
15341534
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
15351535
; SI-NEXT: s_waitcnt lgkmcnt(0)
15361536
; SI-NEXT: s_cmp_lg_u32 s6, 0
1537-
; SI-NEXT: s_cbranch_scc0 .LBB30_2
1537+
; SI-NEXT: s_cbranch_scc0 .LBB30_4
15381538
; SI-NEXT: ; %bb.1: ; %else
15391539
; SI-NEXT: s_load_dword s7, s[2:3], 0x1
15401540
; SI-NEXT: s_mov_b64 s[4:5], 0
15411541
; SI-NEXT: s_andn2_b64 vcc, exec, s[4:5]
15421542
; SI-NEXT: s_waitcnt lgkmcnt(0)
15431543
; SI-NEXT: s_mov_b64 vcc, vcc
1544-
; SI-NEXT: s_cbranch_vccz .LBB30_3
1545-
; SI-NEXT: s_branch .LBB30_4
1546-
; SI-NEXT: .LBB30_2:
1547-
; SI-NEXT: .LBB30_3: ; %if
1544+
; SI-NEXT: s_cbranch_vccnz .LBB30_3
1545+
; SI-NEXT: .LBB30_2: ; %if
15481546
; SI-NEXT: s_load_dword s7, s[2:3], 0x0
1549-
; SI-NEXT: .LBB30_4: ; %endif
1547+
; SI-NEXT: .LBB30_3: ; %endif
15501548
; SI-NEXT: s_waitcnt lgkmcnt(0)
15511549
; SI-NEXT: v_mov_b32_e32 v0, s6
15521550
; SI-NEXT: s_mov_b32 s3, 0x100f000
15531551
; SI-NEXT: s_mov_b32 s2, -1
15541552
; SI-NEXT: v_mov_b32_e32 v1, s7
15551553
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
15561554
; SI-NEXT: s_endpgm
1555+
; SI-NEXT: .LBB30_4:
1556+
; SI-NEXT: s_branch .LBB30_2
15571557
;
15581558
; VI-LABEL: insert_split_bb:
15591559
; VI: ; %bb.0: ; %entry
15601560
; VI-NEXT: s_load_dword s6, s[4:5], 0x10
15611561
; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
15621562
; VI-NEXT: s_waitcnt lgkmcnt(0)
15631563
; VI-NEXT: s_cmp_lg_u32 s6, 0
1564-
; VI-NEXT: s_cbranch_scc0 .LBB30_2
1564+
; VI-NEXT: s_cbranch_scc0 .LBB30_4
15651565
; VI-NEXT: ; %bb.1: ; %else
15661566
; VI-NEXT: s_load_dword s7, s[2:3], 0x4
1567-
; VI-NEXT: s_cbranch_execz .LBB30_3
1568-
; VI-NEXT: s_branch .LBB30_4
1569-
; VI-NEXT: .LBB30_2:
1570-
; VI-NEXT: .LBB30_3: ; %if
1567+
; VI-NEXT: s_cbranch_execnz .LBB30_3
1568+
; VI-NEXT: .LBB30_2: ; %if
15711569
; VI-NEXT: s_waitcnt lgkmcnt(0)
15721570
; VI-NEXT: s_load_dword s7, s[2:3], 0x0
1573-
; VI-NEXT: .LBB30_4: ; %endif
1571+
; VI-NEXT: .LBB30_3: ; %endif
15741572
; VI-NEXT: s_waitcnt lgkmcnt(0)
15751573
; VI-NEXT: v_mov_b32_e32 v0, s6
15761574
; VI-NEXT: s_mov_b32 s3, 0x1100f000
15771575
; VI-NEXT: s_mov_b32 s2, -1
15781576
; VI-NEXT: v_mov_b32_e32 v1, s7
15791577
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
15801578
; VI-NEXT: s_endpgm
1579+
; VI-NEXT: .LBB30_4:
1580+
; VI-NEXT: s_branch .LBB30_2
15811581
entry:
15821582
%0 = insertelement <2 x i32> undef, i32 %a, i32 0
15831583
%1 = icmp eq i32 %a, 0

llvm/test/CodeGen/AMDGPU/loop_break.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,10 @@ define amdgpu_kernel void @break_loop(i32 %arg) #0 {
1717
; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
1818
; OPT: bb4:
1919
; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
20-
; OPT-NEXT: [[CMP1:%.*]] = icmp slt i32 [[MY_TMP]], [[LOAD]]
21-
; OPT-NEXT: [[TMP0:%.*]] = xor i1 [[CMP1]], true
20+
; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
2221
; OPT-NEXT: br label [[FLOW]]
2322
; OPT: Flow:
24-
; OPT-NEXT: [[TMP1:%.*]] = phi i1 [ [[TMP0]], [[BB4]] ], [ true, [[BB1]] ]
23+
; OPT-NEXT: [[TMP1:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ]
2524
; OPT-NEXT: [[TMP2]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP1]], i64 [[PHI_BROKEN]])
2625
; OPT-NEXT: [[TMP3:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP2]])
2726
; OPT-NEXT: br i1 [[TMP3]], label [[BB9:%.*]], label [[BB1]]

0 commit comments

Comments
 (0)