Skip to content
This repository was archived by the owner on Feb 5, 2019. It is now read-only.

Commit 3972070

Browse files
committed
[SimplifyCFG] Fix a cost modeling oversight in branch commoning
The cost modeling was not accounting for the fact we were duplicating the instruction once per predecessor. With a default threshold of 1, this meant we were actually creating #pred copies. Adding to the fun, there is *absolutely no* test coverage for this. Simply bailing for more than one predecessor passes all checked in tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@341001 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 6568fb5 commit 3972070

File tree

2 files changed

+60
-2
lines changed

2 files changed

+60
-2
lines changed

lib/Transforms/Utils/SimplifyCFG.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2539,6 +2539,8 @@ static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI,
25392539
bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
25402540
BasicBlock *BB = BI->getParent();
25412541

2542+
const unsigned PredCount = pred_size(BB);
2543+
25422544
Instruction *Cond = nullptr;
25432545
if (BI->isConditional())
25442546
Cond = dyn_cast<Instruction>(BI->getCondition());
@@ -2588,7 +2590,8 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
25882590
// too many instructions and these involved instructions can be executed
25892591
// unconditionally. We denote all involved instructions except the condition
25902592
// as "bonus instructions", and only allow this transformation when the
2591-
// number of the bonus instructions does not exceed a certain threshold.
2593+
// number of the bonus instructions we'll need to create when cloning into
2594+
// each predecessor does not exceed a certain threshold.
25922595
unsigned NumBonusInsts = 0;
25932596
for (auto I = BB->begin(); Cond != &*I; ++I) {
25942597
// Ignore dbg intrinsics.
@@ -2603,7 +2606,10 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
26032606
// I is used in the same BB. Since BI uses Cond and doesn't have more slots
26042607
// to use any other instruction, User must be an instruction between next(I)
26052608
// and Cond.
2606-
++NumBonusInsts;
2609+
2610+
// Account for the cost of duplicating this instruction into each
2611+
// predecessor.
2612+
NumBonusInsts += PredCount;
26072613
// Early exits once we reach the limit.
26082614
if (NumBonusInsts > BonusInstThreshold)
26092615
return false;

test/Transforms/SimplifyCFG/branch-fold-threshold.ll

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
; RUN: opt %s -simplifycfg -S | FileCheck %s --check-prefix=NORMAL
22
; RUN: opt %s -simplifycfg -S -bonus-inst-threshold=2 | FileCheck %s --check-prefix=AGGRESSIVE
3+
; RUN: opt %s -simplifycfg -S -bonus-inst-threshold=4 | FileCheck %s --check-prefix=WAYAGGRESSIVE
34

45
define i32 @foo(i32 %a, i32 %b, i32 %c, i32 %d, i32* %input) {
56
; NORMAL-LABEL: @foo(
@@ -26,3 +27,54 @@ cond.end:
2627
%cond = phi i32 [ %0, %cond.false ], [ 0, %lor.lhs.false ], [ 0, %entry ]
2728
ret i32 %cond
2829
}
30+
31+
declare void @distinct_a();
32+
declare void @distinct_b();
33+
34+
;; Like foo, but have to duplicate into multiple predecessors
35+
define i32 @bar(i32 %a, i32 %b, i32 %c, i32 %d, i32* %input) {
36+
; NORMAL-LABEL: @bar(
37+
; AGGRESSIVE-LABEL: @bar(
38+
entry:
39+
%cmp_split = icmp slt i32 %d, %b
40+
%cmp = icmp sgt i32 %d, 3
41+
br i1 %cmp_split, label %pred_a, label %pred_b
42+
pred_a:
43+
; NORMAL-LABEL: pred_a:
44+
; AGGRESSIVE-LABEL: pred_a:
45+
; WAYAGGRESSIVE-LABEL: pred_a:
46+
; NORMAL: br i1
47+
; AGGRESSIVE: br i1
48+
; WAYAGGRESSIVE: br i1
49+
call void @distinct_a();
50+
br i1 %cmp, label %cond.end, label %lor.lhs.false
51+
pred_b:
52+
; NORMAL-LABEL: pred_b:
53+
; AGGRESSIVE-LABEL: pred_b:
54+
; WAYAGGRESSIVE-LABEL: pred_b:
55+
; NORMAL: br i1
56+
; AGGRESSIVE: br i1
57+
; WAYAGGRESSIVE: br i1
58+
call void @distinct_b();
59+
br i1 %cmp, label %cond.end, label %lor.lhs.false
60+
61+
lor.lhs.false:
62+
%mul = shl i32 %c, 1
63+
%add = add nsw i32 %mul, %a
64+
%cmp1 = icmp slt i32 %add, %b
65+
br i1 %cmp1, label %cond.false, label %cond.end
66+
; NORMAL-LABEL: lor.lhs.false:
67+
; AGGRESIVE-LABEL: lor.lhs.false:
68+
; WAYAGGRESIVE-LABEL: lor.lhs.false:
69+
; NORMAL: br i1
70+
; AGGRESSIVE: br i1
71+
; WAYAGGRESSIVE-NOT: br i1
72+
73+
cond.false:
74+
%0 = load i32, i32* %input, align 4
75+
br label %cond.end
76+
77+
cond.end:
78+
%cond = phi i32 [ %0, %cond.false ], [ 0, %lor.lhs.false ],[ 0, %pred_a ],[ 0, %pred_b ]
79+
ret i32 %cond
80+
}

0 commit comments

Comments
 (0)