Skip to content

Commit bd7949b

Browse files
committed
reland e5581df [SimplifyCFG] accumulate bonus insts cost
Fixed compile time increase due to always constructing LocalCostTracker. Now only construct LocalCostTracker when needed.
1 parent 3637dc6 commit bd7949b

File tree

9 files changed

+105
-40
lines changed

9 files changed

+105
-40
lines changed

llvm/include/llvm/Transforms/Utils/Local.h

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
#include "llvm/ADT/ArrayRef.h"
1818
#include "llvm/IR/Dominators.h"
19+
#include "llvm/IR/ValueMap.h"
1920
#include "llvm/Support/CommandLine.h"
2021
#include "llvm/Transforms/Utils/SimplifyCFGOptions.h"
2122
#include <cstdint>
@@ -164,6 +165,26 @@ bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
164165
/// values, but instcombine orders them so it usually won't matter.
165166
bool EliminateDuplicatePHINodes(BasicBlock *BB);
166167

168+
/// Class to track cost of simplify CFG transformations.
169+
class SimplifyCFGCostTracker {
170+
/// Number of bonus instructions due to folding branches into predecessors.
171+
/// E.g. folding
172+
/// if (cond1) return false;
173+
/// if (cond2) return false;
174+
/// return true;
175+
/// into
176+
/// if (cond1 | cond2) return false;
177+
/// return true;
178+
/// In this case cond2 is always executed whereas originally it may be
179+
/// evicted due to early exit of cond1. 'cond2' is called bonus instructions
180+
/// and such bonus instructions could accumulate for unrolled loops, therefore
181+
/// use a value map to accumulate their costs across transformations.
182+
ValueMap<BasicBlock *, unsigned> NumBonusInsts;
183+
184+
public:
185+
void updateNumBonusInsts(BasicBlock *Parent, unsigned InstCount);
186+
unsigned getNumBonusInsts(BasicBlock *Parent);
187+
};
167188
/// This function is used to do simplification of a CFG. For example, it
168189
/// adjusts branches to branches to eliminate the extra hop, it eliminates
169190
/// unreachable basic blocks, and does other peephole optimization of the CFG.
@@ -174,7 +195,8 @@ extern cl::opt<bool> RequireAndPreserveDomTree;
174195
bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
175196
DomTreeUpdater *DTU = nullptr,
176197
const SimplifyCFGOptions &Options = {},
177-
ArrayRef<WeakVH> LoopHeaders = {});
198+
ArrayRef<WeakVH> LoopHeaders = {},
199+
SimplifyCFGCostTracker *CostTracker = nullptr);
178200

179201
/// This function is used to flatten a CFG. For example, it uses parallel-and
180202
/// and parallel-or mode to collapse if-conditions and merge if-regions with
@@ -184,7 +206,8 @@ bool FlattenCFG(BasicBlock *BB, AAResults *AA = nullptr);
184206
/// If this basic block is ONLY a setcc and a branch, and if a predecessor
185207
/// branches to us and one of our successors, fold the setcc into the
186208
/// predecessor and use logical operations to pick the right destination.
187-
bool FoldBranchToCommonDest(BranchInst *BI, llvm::DomTreeUpdater *DTU = nullptr,
209+
bool FoldBranchToCommonDest(BranchInst *BI, SimplifyCFGCostTracker &CostTracker,
210+
DomTreeUpdater *DTU = nullptr,
188211
MemorySSAUpdater *MSSAU = nullptr,
189212
const TargetTransformInfo *TTI = nullptr,
190213
unsigned BonusInstThreshold = 1);

llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,8 @@ static bool tailMergeBlocksWithSimilarFunctionTerminators(Function &F,
221221
/// iterating until no more changes are made.
222222
static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
223223
DomTreeUpdater *DTU,
224-
const SimplifyCFGOptions &Options) {
224+
const SimplifyCFGOptions &Options,
225+
SimplifyCFGCostTracker &CostTracker) {
225226
bool Changed = false;
226227
bool LocalChange = true;
227228

@@ -252,7 +253,7 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
252253
while (BBIt != F.end() && DTU->isBBPendingDeletion(&*BBIt))
253254
++BBIt;
254255
}
255-
if (simplifyCFG(&BB, TTI, DTU, Options, LoopHeaders)) {
256+
if (simplifyCFG(&BB, TTI, DTU, Options, LoopHeaders, &CostTracker)) {
256257
LocalChange = true;
257258
++NumSimpl;
258259
}
@@ -266,11 +267,13 @@ static bool simplifyFunctionCFGImpl(Function &F, const TargetTransformInfo &TTI,
266267
DominatorTree *DT,
267268
const SimplifyCFGOptions &Options) {
268269
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
270+
SimplifyCFGCostTracker CostTracker;
269271

270272
bool EverChanged = removeUnreachableBlocks(F, DT ? &DTU : nullptr);
271273
EverChanged |=
272274
tailMergeBlocksWithSimilarFunctionTerminators(F, DT ? &DTU : nullptr);
273-
EverChanged |= iterativelySimplifyCFG(F, TTI, DT ? &DTU : nullptr, Options);
275+
EverChanged |=
276+
iterativelySimplifyCFG(F, TTI, DT ? &DTU : nullptr, Options, CostTracker);
274277

275278
// If neither pass changed anything, we're done.
276279
if (!EverChanged) return false;
@@ -284,7 +287,8 @@ static bool simplifyFunctionCFGImpl(Function &F, const TargetTransformInfo &TTI,
284287
return true;
285288

286289
do {
287-
EverChanged = iterativelySimplifyCFG(F, TTI, DT ? &DTU : nullptr, Options);
290+
EverChanged = iterativelySimplifyCFG(F, TTI, DT ? &DTU : nullptr, Options,
291+
CostTracker);
288292
EverChanged |= removeUnreachableBlocks(F, DT ? &DTU : nullptr);
289293
} while (EverChanged);
290294

llvm/lib/Transforms/Utils/LoopSimplify.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -480,6 +480,7 @@ static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist,
480480
DominatorTree *DT, LoopInfo *LI,
481481
ScalarEvolution *SE, AssumptionCache *AC,
482482
MemorySSAUpdater *MSSAU, bool PreserveLCSSA) {
483+
SimplifyCFGCostTracker CostTracker;
483484
bool Changed = false;
484485
if (MSSAU && VerifyMemorySSA)
485486
MSSAU->getMemorySSA()->verifyMemorySSA();
@@ -661,7 +662,7 @@ static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist,
661662
// The block has now been cleared of all instructions except for
662663
// a comparison and a conditional branch. SimplifyCFG may be able
663664
// to fold it now.
664-
if (!FoldBranchToCommonDest(BI, /*DTU=*/nullptr, MSSAU))
665+
if (!FoldBranchToCommonDest(BI, CostTracker, /*DTU=*/nullptr, MSSAU))
665666
continue;
666667

667668
// Success. The block is now dead, so remove it from the loop,

llvm/lib/Transforms/Utils/SimplifyCFG.cpp

Lines changed: 48 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,21 @@ STATISTIC(NumInvokes,
207207
STATISTIC(NumInvokesMerged, "Number of invokes that were merged together");
208208
STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed");
209209

210+
namespace llvm {
211+
212+
void SimplifyCFGCostTracker::updateNumBonusInsts(BasicBlock *BB,
213+
unsigned InstCount) {
214+
auto Loc = NumBonusInsts.find(BB);
215+
if (Loc == NumBonusInsts.end())
216+
Loc = NumBonusInsts.insert({BB, 0}).first;
217+
Loc->second = Loc->second + InstCount;
218+
}
219+
unsigned SimplifyCFGCostTracker::getNumBonusInsts(BasicBlock *BB) {
220+
return NumBonusInsts.lookup(BB);
221+
}
222+
223+
} // namespace llvm
224+
210225
namespace {
211226

212227
// The first field contains the value that the switch produces when a certain
@@ -243,6 +258,10 @@ class SimplifyCFGOpt {
243258
ArrayRef<WeakVH> LoopHeaders;
244259
const SimplifyCFGOptions &Options;
245260
bool Resimplify;
261+
// Accumulates number of bonus instructions due to merging basic blocks
262+
// of common destination.
263+
SimplifyCFGCostTracker *CostTracker;
264+
std::unique_ptr<SimplifyCFGCostTracker> LocalCostTracker;
246265

247266
Value *isValueEqualityComparison(Instruction *TI);
248267
BasicBlock *GetValueEqualityComparisonCases(
@@ -286,8 +305,15 @@ class SimplifyCFGOpt {
286305
public:
287306
SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU,
288307
const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders,
289-
const SimplifyCFGOptions &Opts)
308+
const SimplifyCFGOptions &Opts,
309+
SimplifyCFGCostTracker *CostTracker_)
290310
: TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {
311+
// Cannot do this with member initializer list since LocalCostTracker is not
312+
// initialized there yet.
313+
CostTracker = CostTracker_
314+
? CostTracker_
315+
: (LocalCostTracker.reset(new SimplifyCFGCostTracker()),
316+
LocalCostTracker.get());
291317
assert((!DTU || !DTU->hasPostDomTree()) &&
292318
"SimplifyCFG is not yet capable of maintaining validity of a "
293319
"PostDomTree, so don't ask for it.");
@@ -3624,8 +3650,9 @@ static bool isVectorOp(Instruction &I) {
36243650
/// If this basic block is simple enough, and if a predecessor branches to us
36253651
/// and one of our successors, fold the block into the predecessor and use
36263652
/// logical operations to pick the right destination.
3627-
bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
3628-
MemorySSAUpdater *MSSAU,
3653+
bool llvm::FoldBranchToCommonDest(BranchInst *BI,
3654+
SimplifyCFGCostTracker &CostTracker,
3655+
DomTreeUpdater *DTU, MemorySSAUpdater *MSSAU,
36293656
const TargetTransformInfo *TTI,
36303657
unsigned BonusInstThreshold) {
36313658
// If this block ends with an unconditional branch,
@@ -3697,7 +3724,6 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
36973724
// as "bonus instructions", and only allow this transformation when the
36983725
// number of the bonus instructions we'll need to create when cloning into
36993726
// each predecessor does not exceed a certain threshold.
3700-
unsigned NumBonusInsts = 0;
37013727
bool SawVectorOp = false;
37023728
const unsigned PredCount = Preds.size();
37033729
for (Instruction &I : *BB) {
@@ -3716,12 +3742,13 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
37163742
// predecessor. Ignore free instructions.
37173743
if (!TTI || TTI->getInstructionCost(&I, CostKind) !=
37183744
TargetTransformInfo::TCC_Free) {
3719-
NumBonusInsts += PredCount;
3720-
3721-
// Early exits once we reach the limit.
3722-
if (NumBonusInsts >
3723-
BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
3724-
return false;
3745+
for (auto PredBB : Preds) {
3746+
CostTracker.updateNumBonusInsts(PredBB, PredCount);
3747+
// Early exits once we reach the limit.
3748+
if (CostTracker.getNumBonusInsts(PredBB) >
3749+
BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
3750+
return false;
3751+
}
37253752
}
37263753

37273754
auto IsBCSSAUse = [BB, &I](Use &U) {
@@ -3735,10 +3762,12 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
37353762
if (!all_of(I.uses(), IsBCSSAUse))
37363763
return false;
37373764
}
3738-
if (NumBonusInsts >
3739-
BonusInstThreshold *
3740-
(SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1))
3741-
return false;
3765+
for (auto PredBB : Preds) {
3766+
if (CostTracker.getNumBonusInsts(PredBB) >
3767+
BonusInstThreshold *
3768+
(SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1))
3769+
return false;
3770+
}
37423771

37433772
// Ok, we have the budget. Perform the transformation.
37443773
for (BasicBlock *PredBlock : Preds) {
@@ -6889,7 +6918,7 @@ bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
68896918
// branches to us and our successor, fold the comparison into the
68906919
// predecessor and use logical operations to update the incoming value
68916920
// for PHI nodes in common successor.
6892-
if (FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
6921+
if (FoldBranchToCommonDest(BI, *CostTracker, DTU, /*MSSAU=*/nullptr, &TTI,
68936922
Options.BonusInstThreshold))
68946923
return requestResimplify();
68956924
return false;
@@ -6958,7 +6987,7 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
69586987
// If this basic block is ONLY a compare and a branch, and if a predecessor
69596988
// branches to us and one of our successors, fold the comparison into the
69606989
// predecessor and use logical operations to pick the right destination.
6961-
if (FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
6990+
if (FoldBranchToCommonDest(BI, *CostTracker, DTU, /*MSSAU=*/nullptr, &TTI,
69626991
Options.BonusInstThreshold))
69636992
return requestResimplify();
69646993

@@ -7257,8 +7286,9 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
72577286

72587287
bool llvm::simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
72597288
DomTreeUpdater *DTU, const SimplifyCFGOptions &Options,
7260-
ArrayRef<WeakVH> LoopHeaders) {
7289+
ArrayRef<WeakVH> LoopHeaders,
7290+
SimplifyCFGCostTracker *CostTracker) {
72617291
return SimplifyCFGOpt(TTI, DTU, BB->getModule()->getDataLayout(), LoopHeaders,
7262-
Options)
7292+
Options, CostTracker)
72637293
.run(BB);
72647294
}

llvm/test/Transforms/LoopUnroll/peel-loop-inner.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2-
; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop-unroll<peeling;no-runtime>,simplifycfg,instcombine' -unroll-force-peel-count=3 -verify-dom-info | FileCheck %s
2+
; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop-unroll<peeling;no-runtime>,simplifycfg<bonus-inst-threshold=3>,instcombine' -unroll-force-peel-count=3 -verify-dom-info | FileCheck %s
33

44
define void @basic(i32 %K, i32 %N) {
55
; CHECK-LABEL: @basic(

llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2-
; RUN: opt -O2 -S < %s | FileCheck %s
2+
; RUN: opt -bonus-inst-threshold=4 -O2 -S < %s | FileCheck %s
33

44
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
55
target triple = "x86_64--"

llvm/test/Transforms/SimplifyCFG/branch-fold-multiple.ll

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,12 @@
33

44
%struct.S = type { [4 x i32] }
55

6-
; Check the second, third, and fourth basic blocks are folded into
7-
; the first basic block since each has one bonus intruction, which
8-
; does not exceed the default bouns instruction threshold of 1.
6+
; Check the second basic block is folded into the first basic block
7+
; since it has one bonus intruction. The third basic block is not
8+
; folded into the first basic block since the accumulated bonus
9+
; instructions will exceed the default threshold of 1. The fourth basic
10+
; block is foled into the third basic block since the accumulated
11+
; bonus instruction cost is 1.
912

1013
define i1 @test1(i32 %0, i32 %1, i32 %2, i32 %3) {
1114
; CHECK-LABEL: @test1(
@@ -15,14 +18,18 @@ define i1 @test1(i32 %0, i32 %1, i32 %2, i32 %3) {
1518
; CHECK-NEXT: [[MUL1:%.*]] = mul i32 [[TMP1:%.*]], [[TMP1]]
1619
; CHECK-NEXT: [[CMP2_1:%.*]] = icmp sgt i32 [[MUL1]], 0
1720
; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP2]], i1 true, i1 [[CMP2_1]]
21+
; CHECK-NEXT: br i1 [[OR_COND]], label [[CLEANUP:%.*]], label [[FOR_COND_1:%.*]]
22+
; CHECK: for.cond.1:
1823
; CHECK-NEXT: [[MUL2:%.*]] = mul i32 [[TMP2:%.*]], [[TMP2]]
1924
; CHECK-NEXT: [[CMP2_2:%.*]] = icmp sgt i32 [[MUL2]], 0
20-
; CHECK-NEXT: [[OR_COND1:%.*]] = select i1 [[OR_COND]], i1 true, i1 [[CMP2_2]]
2125
; CHECK-NEXT: [[MUL3:%.*]] = mul i32 [[TMP3:%.*]], [[TMP3]]
2226
; CHECK-NEXT: [[CMP2_3:%.*]] = icmp sgt i32 [[MUL3]], 0
23-
; CHECK-NEXT: [[OR_COND2:%.*]] = select i1 [[OR_COND1]], i1 true, i1 [[CMP2_3]]
24-
; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[OR_COND2]], i1 false, i1 true
25-
; CHECK-NEXT: ret i1 [[SPEC_SELECT]]
27+
; CHECK-NEXT: [[OR_COND1:%.*]] = select i1 [[CMP2_2]], i1 true, i1 [[CMP2_3]]
28+
; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[OR_COND1]], i1 false, i1 true
29+
; CHECK-NEXT: br label [[CLEANUP]]
30+
; CHECK: cleanup:
31+
; CHECK-NEXT: [[CMP:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[SPEC_SELECT]], [[FOR_COND_1]] ]
32+
; CHECK-NEXT: ret i1 [[CMP]]
2633
;
2734
entry:
2835
%mul0 = mul i32 %0, %0

llvm/test/Transforms/SimplifyCFG/branch-fold-threshold.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
; RUN: opt %s -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S | FileCheck %s --check-prefix=NORMAL
2-
; RUN: opt %s -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S -bonus-inst-threshold=2 | FileCheck %s --check-prefix=AGGRESSIVE
3-
; RUN: opt %s -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S -bonus-inst-threshold=4 | FileCheck %s --check-prefix=WAYAGGRESSIVE
2+
; RUN: opt %s -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S -bonus-inst-threshold=3 | FileCheck %s --check-prefix=AGGRESSIVE
3+
; RUN: opt %s -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S -bonus-inst-threshold=6 | FileCheck %s --check-prefix=WAYAGGRESSIVE
44
; RUN: opt %s -passes=simplifycfg -S | FileCheck %s --check-prefix=NORMAL
5-
; RUN: opt %s -passes='simplifycfg<bonus-inst-threshold=2>' -S | FileCheck %s --check-prefix=AGGRESSIVE
6-
; RUN: opt %s -passes='simplifycfg<bonus-inst-threshold=4>' -S | FileCheck %s --check-prefix=WAYAGGRESSIVE
5+
; RUN: opt %s -passes='simplifycfg<bonus-inst-threshold=3>' -S | FileCheck %s --check-prefix=AGGRESSIVE
6+
; RUN: opt %s -passes='simplifycfg<bonus-inst-threshold=6>' -S | FileCheck %s --check-prefix=WAYAGGRESSIVE
77

88
define i32 @foo(i32 %a, i32 %b, i32 %c, i32 %d, i32* %input) {
99
; NORMAL-LABEL: @foo(

llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest-two-preds-cost.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
22
; RUN: opt < %s -S -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -bonus-inst-threshold=1 | FileCheck --check-prefixes=ALL,THR1 %s
3-
; RUN: opt < %s -S -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -bonus-inst-threshold=2 | FileCheck --check-prefixes=ALL,THR2 %s
3+
; RUN: opt < %s -S -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -bonus-inst-threshold=3 | FileCheck --check-prefixes=ALL,THR2 %s
44

55
declare void @sideeffect0()
66
declare void @sideeffect1()
@@ -10,7 +10,7 @@ declare i1 @gen1()
1010

1111
; Here we'd want to duplicate %v3_adj into two predecessors,
1212
; but -bonus-inst-threshold=1 says that we can only clone it into one.
13-
; With -bonus-inst-threshold=2 we can clone it into both though.
13+
; With -bonus-inst-threshold=3 we can clone it into both though.
1414
define void @two_preds_with_extra_op(i8 %v0, i8 %v1, i8 %v2, i8 %v3) {
1515
; THR1-LABEL: @two_preds_with_extra_op(
1616
; THR1-NEXT: entry:

0 commit comments

Comments
 (0)