Skip to content

Commit 24ed0a8

Browse files
[SimplifyCFG] Fix bugs and Address reviews
1 parent d29e5b0 commit 24ed0a8

File tree

5 files changed

+172
-45
lines changed

5 files changed

+172
-45
lines changed

llvm/lib/Transforms/Utils/SimplifyCFG.cpp

Lines changed: 67 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1562,27 +1562,39 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
15621562

15631563
auto *TI = BB->getTerminator();
15641564

1565-
SmallVector<BasicBlock *> SuccessorBlocks;
1566-
for (auto *Succ : successors(BB))
1567-
SuccessorBlocks.push_back(Succ);
1565+
SmallVector<BasicBlock *, 8> SuccessorBBs;
1566+
for (auto *Succ : successors(BB)) {
1567+
BasicBlock::iterator SuccItr = Succ->begin();
1568+
// If we find an unreachable instruction at the beginning of a basic block,
1569+
// we can still hoist instructions from the rest of the basic blocks.
1570+
if (isa<UnreachableInst>(*SuccItr))
1571+
continue;
1572+
SuccessorBBs.push_back(Succ);
1573+
}
15681574

1569-
// Sort successor blocks based on the number of instructions.
1570-
// This is because we always want to iterate over instructions
1571-
// of the smallest block.
1572-
llvm::stable_sort(SuccessorBlocks, [](BasicBlock *BB1, BasicBlock *BB2) {
1573-
return BB1->sizeWithoutDebug() < BB2->sizeWithoutDebug();
1574-
});
1575+
// Find the smallest BB because we always want to iterate over instructions
1576+
// of the smallest Successor.
1577+
auto *SmallestBB = *std::min_element(SuccessorBBs.begin(), SuccessorBBs.end(),
1578+
[](BasicBlock *BB1, BasicBlock *BB2) {
1579+
return BB1->size() < BB2->size();
1580+
});
1581+
std::iter_swap(
1582+
SuccessorBBs.begin(),
1583+
std::find(SuccessorBBs.begin(), SuccessorBBs.end(), SmallestBB));
15751584

15761585
// The second of pair is a SkipFlags bitmask.
15771586
using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
15781587
SmallVector<SuccIterPair, 8> SuccIterPairs;
1579-
for (auto *Succ : SuccessorBlocks) {
1588+
for (auto *Succ : SuccessorBBs) {
15801589
BasicBlock::iterator SuccItr = Succ->begin();
15811590
if (isa<PHINode>(*SuccItr))
15821591
return false;
15831592
SuccIterPairs.push_back(SuccIterPair(SuccItr, 0));
15841593
}
15851594

1595+
if (SuccIterPairs.size() < 2)
1596+
return false;
1597+
15861598
// Check if only hoisting terminators is allowed. This does not add new
15871599
// instructions to the hoist location.
15881600
if (EqTermsOnly) {
@@ -1600,14 +1612,6 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
16001612
// many instructions we skip, serving as a compilation time control as well as
16011613
// preventing excessive increase of life ranges.
16021614
unsigned NumSkipped = 0;
1603-
// If we find an unreachable instruction at the beginning of a basic block, we
1604-
// can still hoist instructions from the rest of the basic blocks.
1605-
if (SuccIterPairs.size() > 2) {
1606-
erase_if(SuccIterPairs,
1607-
[](const auto &Pair) { return isa<UnreachableInst>(Pair.first); });
1608-
if (SuccIterPairs.size() < 2)
1609-
return false;
1610-
}
16111615

16121616
bool Changed = false;
16131617
auto *SuccIterPairBegin = SuccIterPairs.begin();
@@ -1642,14 +1646,25 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
16421646
iterator_range(SuccIterPairBegin, SuccIterPairs.end());
16431647
Instruction *I1 = &*BB1ItrPair.first;
16441648
auto *BB1 = I1->getParent();
1649+
1650+
// Skip debug info if it is not identical.
1651+
bool IdenticalDebugs = all_of(OtherSuccIterRange, [I1](auto &Iter) {
1652+
Instruction *I2 = &*Iter;
1653+
return I1->isIdenticalToWhenDefined(I2);
1654+
});
1655+
if (!IdenticalDebugs) {
1656+
while (isa<DbgInfoIntrinsic>(I1))
1657+
I1 = &*++BB1ItrPair.first;
1658+
}
1659+
16451660
bool HasIdenticalInst = true;
16461661

16471662
// Check if there are identical instructions in all other successors
16481663
for (auto &map : OtherSuccessorsHash) {
16491664
Instruction *I2 = map[getHash(I1)].first;
16501665
// We might face with same hash values for different instructions.
16511666
// If that happens, ignore the instruction.
1652-
if (!I2 || !I1->isIdenticalTo(I2)) {
1667+
if (!I2 || !I1->isIdenticalToWhenDefined(I2)) {
16531668
HasIdenticalInst = false;
16541669
break;
16551670
}
@@ -1665,7 +1680,7 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
16651680
SuccIterPair.second |= skippedInstrFlags(I);
16661681
}
16671682
}
1668-
NumSkipped++;
1683+
++NumSkipped;
16691684
if (I1->isTerminator())
16701685
return Changed;
16711686
++BB1ItrPair.first;
@@ -1737,13 +1752,38 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
17371752
for (auto &map : OtherSuccessorsHash) {
17381753
Instruction *I2 = map[getHash(I1)].first;
17391754
assert(I2 != I1);
1740-
if (!I2->use_empty())
1755+
// Update hashcode of all instructions using I2
1756+
if (!I2->use_empty()) {
1757+
SmallVector<llvm::hash_code, 8> PrevHashCodes;
1758+
SmallVector<llvm::Instruction *, 8> PrevUsers;
1759+
// Once the uses of I1 are replaced, the hash value computed for
1760+
// those users are not valid anymore so we gather users and then
1761+
// recompute the hash codes for them. We need to do this only for
1762+
// the instructions located in the same block as I2 because we
1763+
// initially only hashed those instructions.
1764+
for (auto *user : I2->users()) {
1765+
if (auto *I = dyn_cast<Instruction>(user)) {
1766+
if (I->getParent() != I2->getParent())
1767+
continue;
1768+
PrevHashCodes.push_back(getHash(I));
1769+
PrevUsers.push_back(I);
1770+
}
1771+
}
17411772
I2->replaceAllUsesWith(I1);
1773+
unsigned index = 0;
1774+
for (auto &PrevHash : PrevHashCodes) {
1775+
auto NewHash = getHash(PrevUsers[index]);
1776+
std::swap(map[NewHash], map[PrevHash]);
1777+
map.erase(PrevHash);
1778+
index++;
1779+
}
1780+
}
17421781
I1->andIRFlags(I2);
17431782
combineMetadataForCSE(I1, I2, true);
17441783
// I1 and I2 are being combined into a single instruction. Its debug
17451784
// location is the merged locations of the original instructions.
17461785
I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
1786+
map.erase(getHash(I1));
17471787
I2->eraseFromParent();
17481788
}
17491789
}
@@ -1757,10 +1797,11 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
17571797
// We are about to skip over a pair of non-identical instructions. Record
17581798
// if any have characteristics that would prevent reordering instructions
17591799
// across them.
1800+
BB1ItrPair.first++;
17601801
SkipFlagsBB1 |= skippedInstrFlags(I1);
17611802
if (SameLevelHoist) {
17621803
for (auto &SuccIterPair : OtherSuccIterPairRange) { // update flags
1763-
Instruction *I = &*SuccIterPair.first;
1804+
Instruction *I = &*SuccIterPair.first++;
17641805
SuccIterPair.second |= skippedInstrFlags(I);
17651806
}
17661807
}
@@ -1874,16 +1915,20 @@ bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
18741915
}
18751916
}
18761917
}
1918+
18771919
SmallVector<DominatorTree::UpdateType, 4> Updates;
1920+
18781921
// Update any PHI nodes in our new successors.
18791922
for (BasicBlock *Succ : successors(BB1)) {
18801923
AddPredecessorToBlock(Succ, TIParent, BB1);
18811924
if (DTU)
18821925
Updates.push_back({DominatorTree::Insert, TIParent, Succ});
18831926
}
1927+
18841928
if (DTU)
18851929
for (BasicBlock *Succ : successors(TI))
18861930
Updates.push_back({DominatorTree::Delete, TIParent, Succ});
1931+
18871932
EraseTerminatorAndDCECond(TI);
18881933
if (DTU)
18891934
DTU->applyUpdates(Updates);
@@ -3631,7 +3676,6 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
36313676
// Change the PHI node into a select instruction.
36323677
Value *TrueVal = PN->getIncomingValueForBlock(IfTrue);
36333678
Value *FalseVal = PN->getIncomingValueForBlock(IfFalse);
3634-
36353679
Value *Sel = Builder.CreateSelect(IfCond, TrueVal, FalseVal, "", DomBI);
36363680
PN->replaceAllUsesWith(Sel);
36373681
Sel->takeName(PN);

llvm/test/CodeGen/ARM/aes-erratum-fix.ll

Lines changed: 7 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -314,16 +314,15 @@ define arm_aapcs_vfpcc void @aese_set8_cond_via_ptr(i1 zeroext %0, i8* %1, <16 x
314314
; CHECK-FIX-LABEL: aese_set8_cond_via_ptr:
315315
; CHECK-FIX: @ %bb.0:
316316
; CHECK-FIX-NEXT: vorr q0, q0, q0
317+
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
317318
; CHECK-FIX-NEXT: cmp r0, #0
318319
; CHECK-FIX-NEXT: beq .LBB12_2
319320
; CHECK-FIX-NEXT: @ %bb.1:
320-
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
321321
; CHECK-FIX-NEXT: vld1.8 {d16[0]}, [r1]
322322
; CHECK-FIX-NEXT: cmp r0, #0
323323
; CHECK-FIX-NEXT: bne .LBB12_3
324324
; CHECK-FIX-NEXT: b .LBB12_4
325325
; CHECK-FIX-NEXT: .LBB12_2:
326-
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
327326
; CHECK-FIX-NEXT: cmp r0, #0
328327
; CHECK-FIX-NEXT: beq .LBB12_4
329328
; CHECK-FIX-NEXT: .LBB12_3:
@@ -3264,23 +3263,18 @@ define arm_aapcs_vfpcc void @aesd_set64_via_val(i64 %0, <16 x i8> %1, <16 x i8>*
32643263
define arm_aapcs_vfpcc void @aesd_set64_cond_via_ptr(i1 zeroext %0, i64* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
32653264
; CHECK-FIX-NOSCHED-LABEL: aesd_set64_cond_via_ptr:
32663265
; CHECK-FIX-NOSCHED: @ %bb.0:
3267-
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
3268-
; CHECK-FIX-NOSCHED-NEXT: beq .LBB76_2
3269-
; CHECK-FIX-NOSCHED-NEXT: @ %bb.1:
3270-
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2]
3271-
; CHECK-FIX-NOSCHED-NEXT: vldr d16, [r1]
3272-
; CHECK-FIX-NOSCHED-NEXT: b .LBB76_3
3273-
; CHECK-FIX-NOSCHED-NEXT: .LBB76_2:
32743266
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2]
3275-
; CHECK-FIX-NOSCHED-NEXT: .LBB76_3:
3267+
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
3268+
; CHECK-FIX-NOSCHED-NEXT: vldrne d16, [r1]
3269+
; CHECK-FIX-NOSCHED-NEXT: vorr q8, q8, q8
32763270
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
32773271
; CHECK-FIX-NOSCHED-NEXT: vldrne d0, [r1]
32783272
; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
32793273
; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q0
32803274
; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8
32813275
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2]
32823276
; CHECK-FIX-NOSCHED-NEXT: bx lr
3283-
;
3277+
32843278
; CHECK-CORTEX-FIX-LABEL: aesd_set64_cond_via_ptr:
32853279
; CHECK-CORTEX-FIX: @ %bb.0:
32863280
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
@@ -4214,19 +4208,15 @@ define arm_aapcs_vfpcc void @aesd_setf32_cond_via_ptr(i1 zeroext %0, float* %1,
42144208
; CHECK-FIX-LABEL: aesd_setf32_cond_via_ptr:
42154209
; CHECK-FIX: @ %bb.0:
42164210
; CHECK-FIX-NEXT: vorr q0, q0, q0
4211+
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
42174212
; CHECK-FIX-NEXT: cmp r0, #0
42184213
; CHECK-FIX-NEXT: beq .LBB88_2
42194214
; CHECK-FIX-NEXT: @ %bb.1:
4220-
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
42214215
; CHECK-FIX-NEXT: vld1.32 {d16[0]}, [r1:32]
4222-
; CHECK-FIX-NEXT: cmp r0, #0
4223-
; CHECK-FIX-NEXT: bne .LBB88_3
4224-
; CHECK-FIX-NEXT: b .LBB88_4
42254216
; CHECK-FIX-NEXT: .LBB88_2:
4226-
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
42274217
; CHECK-FIX-NEXT: cmp r0, #0
42284218
; CHECK-FIX-NEXT: beq .LBB88_4
4229-
; CHECK-FIX-NEXT: .LBB88_3:
4219+
; CHECK-FIX-NEXT: @ %bb.3:
42304220
; CHECK-FIX-NEXT: vld1.32 {d0[0]}, [r1:32]
42314221
; CHECK-FIX-NEXT: .LBB88_4:
42324222
; CHECK-FIX-NEXT: aesd.8 q8, q0

llvm/test/CodeGen/Thumb2/mve-memtp-branch.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ define i32 @a(i8 zeroext %b, ptr nocapture readonly %c, ptr nocapture readonly %
1111
; CHECK: @ %bb.0: @ %entry
1212
; CHECK-NEXT: .save {r4, r5, r7, lr}
1313
; CHECK-NEXT: push {r4, r5, r7, lr}
14-
; CHECK-NEXT: cmp r0, #2
15-
; CHECK-NEXT: bls.w .LBB0_12
14+
; CHECK-NEXT: cmp r0, #3
15+
; CHECK-NEXT: blo.w .LBB0_12
1616
; CHECK-NEXT: @ %bb.1: @ %for.body.us.preheader
1717
; CHECK-NEXT: movw r5, :lower16:arr_183
1818
; CHECK-NEXT: movs r3, #0
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
; opt -passes='default<O3>' -S --mtriple=aarch64-linux-gnu --mcpu=a64fx < %s | FileCheck %s
2+
3+
; Hoist identical instructions from successor blocks even if
4+
; they are not located at the same level. This could help generate
5+
; more compact vectorized code.
6+
; More info can be found at https://github.com/llvm/llvm-project/issues/68395.
7+
8+
9+
define void @hoist_then_vectorize(ptr %a, ptr %b, ptr %c, ptr %d, i32 %N){
10+
; CHECK-LABEL: @hoist_then_vectorize(
11+
; CHECK-NEXT: iter.check:
12+
; CHECK-NEXT: [[VSCALE:%.*]] = tail call i64 @llvm.vscale.i64()
13+
; CHECK-NEXT: [[SHIFT:%.*]] = shl i64 [[VSCALE:%.*]], 1
14+
; CHECK-NEXT: [[MIN_ITR:%.*]] = icmp ugt i64 [[SHIFT:%.*]], 20
15+
; CHECK-NEXT: br i1 [[MIN_ITR:%.*]], label [[FOR_BODY_PREHEADER:%.*]], label [[VECTOR_MAIN_LOOP_ITR_CHECK:%.*]]
16+
; CHECK: vector.main.loop.iter.check:
17+
; CHECK-NEXT: [[VSCALE2:%.*]] = tail call i64 @llvm.vscale.i64()
18+
; CHECK-NEXT: [[SHIFT2:%.*]] = shl i64 [[VSCALE2:%.*]], 2
19+
; CHECK-NEXT: [[MIN_ITR2:%.*]] = icmp ugt i64 [[SHIFT2:%.*]], 20
20+
; CHECK-NEXT: br i1 [[MIN_ITR2:%.*]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
21+
; CHECK: vector.ph:
22+
; CHECK-NEXT: [[VSCALE3:%.*]] = tail call i64 @llvm.vscale.i64()
23+
; CHECK-NEXT: [[SHIFT3:%.*]] = shl i64 [[VSCALE3:%.*]], 2
24+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 20, [[SHIFT3:%.*]]
25+
; CHECK-NEXT: [[N_VEC:%.*]] = sub nuw nsw i64 20, [[N_MOD_VF:%.*]]
26+
; CHECK-NEXT: [[VSCALE4:%.*]] = tail call i64 @llvm.vscale.i64()
27+
; CHECK-NEXT: [[SHIFT4:%.*]] = shl i64 [[VSCALE4:%.*]], 2
28+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
29+
; CHECK: vector.body:
30+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY:%.*]] ]
31+
; CHECK-NEXT: [[GEP_D:%.*]] = getelementptr inbounds i32, ptr [[D:%.*]], i64 [[INDEX:%.*]]
32+
; CHECK-NEXT: [[LOAD_D:%.*]] = load <vscale x 4 x i32>, ptr [[GEP_D:%.*]], align 4
33+
; CHECK-NEXT: [[MASK1:%.*]] = icmp slt <vscale x 4 x i32> [[LOAD_D:%.*]], zeroinitializer
34+
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX:%.*]]
35+
; CHECK-NEXT: [[LOAD_A:%.*]] = load <vscale x 4 x i32>, ptr [[GEP_A:%.*]], align 4
36+
; CHECK-NEXT: [[MASK2:%.*]] = icmp eq <vscale x 4 x i32> [[LOAD_A:%.*]], zeroinitializer
37+
; CHECK-NEXT: [[SEL1:%.*]] = select <vscale x 4 x i1> [[MASK2:%.*]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
38+
; CHECK-NEXT: [[SEL2:%.*]] = select <vscale x 4 x i1> [[MASK1:%.*]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> [[SEL1:%.*]]
39+
; CHECK-NEXT: [[ADD:%.*]] = add <vscale x 4 x i32> [[LOAD_A:%.*]], [[SEL2:%.*]]
40+
; CHECK-NEXT: store <vscale x 4 x i32> [[ADD:%.*]], ptr [[GEP_A:%.*]], align 4
41+
; CHECK-NEXT: [[INDEX_NEXT:%.*]] = add nuw i64 [[INDEX:%.*]], [[SHIFT4:%.*]]
42+
; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp eq i64 [[INDEX_NEXT:%.*]], [[N_VEC:%.*]]
43+
; CHECK-NEXT: br i1 [[LOOP_COND:%.*]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY:%.*]]
44+
45+
entry:
46+
br label %for.body
47+
48+
for.cond.cleanup: ; preds = %for.inc
49+
ret void
50+
51+
for.body: ; preds = %entry, %for.inc
52+
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
53+
%arrayidx = getelementptr inbounds i32, ptr %d, i64 %indvars.iv
54+
%ldr_d = load i32, ptr %arrayidx, align 4
55+
%cmp1 = icmp slt i32 %ldr_d, 0
56+
br i1 %cmp1, label %if.then, label %if.else
57+
58+
if.then: ; preds = %for.body
59+
%arrayidx3 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
60+
%ldr_a = load i32, ptr %arrayidx3, align 4
61+
%add33 = add i32 %ldr_a, 1
62+
store i32 %add33, ptr %arrayidx3, align 4
63+
br label %for.inc
64+
65+
if.else: ; preds = %for.body
66+
%cmp7 = icmp eq i32 %ldr_d, 0
67+
br i1 %cmp7, label %if.then9, label %if.else15
68+
69+
if.then9: ; preds = %if.else
70+
%arrayidx11 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
71+
%ldr_a2 = load i32, ptr %arrayidx11, align 4
72+
%add1334 = add i32 %ldr_a2, 2
73+
store i32 %add1334, ptr %arrayidx11, align 4
74+
br label %for.inc
75+
76+
if.else15: ; preds = %if.else
77+
%arrayidx112 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
78+
%ldr_a3 = load i32, ptr %arrayidx112, align 4
79+
%add1935 = add i32 %ldr_a3, 3
80+
store i32 %add1935, ptr %arrayidx112, align 4
81+
br label %for.inc
82+
83+
for.inc: ; preds = %if.then, %if.else15, %if.then9
84+
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
85+
%exitcond.not = icmp eq i64 %indvars.iv.next, 20
86+
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
87+
}

llvm/test/Transforms/SimplifyCFG/dont-hoist-deoptimize.ll

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,21 +3,25 @@
33
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2"
44
target triple = "x86_64-unknown-linux-gnu"
55

6+
; SimplifyCFG hoists %tmp and %tmp2 but after skipping %tmp3, we reach to the skipping threshold and
7+
; bail out, not hoisting %tmp4.
8+
69
declare void @llvm.experimental.deoptimize.isVoid(...) #0
710

811
define void @widget(i1 %arg) {
912
; CHECK-LABEL: @widget(
1013
; CHECK-NEXT: bb:
1114
; CHECK-NEXT: [[TMP:%.*]] = trunc i64 5 to i32
15+
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 0 to i32
1216
; CHECK-NEXT: br i1 [[ARG:%.*]], label [[BB1:%.*]], label [[BB4:%.*]]
1317
; CHECK: bb1:
14-
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 0 to i32
1518
; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 0 to i32
19+
; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 2 to i32
1620
; CHECK-NEXT: call void (...) @llvm.experimental.deoptimize.isVoid(i32 13) #[[ATTR0:[0-9]+]] [ "deopt"() ]
1721
; CHECK-NEXT: ret void
1822
; CHECK: bb4:
19-
; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 1 to i32
20-
; CHECK-NEXT: [[TMP7:%.*]] = trunc i64 0 to i32
23+
; CHECK-NEXT: [[TMP7:%.*]] = trunc i64 1 to i32
24+
; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 2 to i32
2125
; CHECK-NEXT: call void (...) @llvm.experimental.deoptimize.isVoid(i32 13) #[[ATTR0]] [ "deopt"() ]
2226
; CHECK-NEXT: ret void
2327
;
@@ -28,13 +32,15 @@ bb1: ; preds = %bb
2832
%tmp = trunc i64 5 to i32
2933
%tmp2 = trunc i64 0 to i32
3034
%tmp3 = trunc i64 0 to i32
35+
%tmp4 = trunc i64 2 to i32
3136
call void (...) @llvm.experimental.deoptimize.isVoid(i32 13) #0 [ "deopt"() ]
3237
ret void
3338

3439
bb4: ; preds = %bb
3540
%tmp5 = trunc i64 5 to i32
3641
%tmp6 = trunc i64 1 to i32
3742
%tmp7 = trunc i64 0 to i32
43+
%tmp8 = trunc i64 2 to i32
3844
call void (...) @llvm.experimental.deoptimize.isVoid(i32 13) #0 [ "deopt"() ]
3945
ret void
4046
}

0 commit comments

Comments
 (0)