53
53
#include " llvm/ADT/STLExtras.h"
54
54
#include " llvm/ADT/SmallPtrSet.h"
55
55
#include " llvm/ADT/Statistic.h"
56
+ #include " llvm/Analysis/BlockFrequencyInfo.h"
56
57
#include " llvm/Analysis/DomTreeUpdater.h"
57
58
#include " llvm/Analysis/GlobalsModRef.h"
58
59
#include " llvm/Analysis/InstructionSimplify.h"
75
76
#include " llvm/IR/Module.h"
76
77
#include " llvm/InitializePasses.h"
77
78
#include " llvm/Pass.h"
79
+ #include " llvm/Support/CommandLine.h"
78
80
#include " llvm/Support/Debug.h"
79
81
#include " llvm/Support/raw_ostream.h"
80
82
#include " llvm/Transforms/Scalar.h"
81
83
#include " llvm/Transforms/Utils/BasicBlockUtils.h"
84
+ #include < cmath>
82
85
using namespace llvm ;
83
86
84
87
#define DEBUG_TYPE " tailcallelim"
@@ -87,6 +90,11 @@ STATISTIC(NumEliminated, "Number of tail calls removed");
87
90
STATISTIC (NumRetDuped, " Number of return duplicated" );
88
91
STATISTIC (NumAccumAdded, " Number of accumulators introduced" );
89
92
93
+ static cl::opt<bool > ForceDisableBFI (
94
+ " tre-disable-entrycount-recompute" , cl::init(false ), cl::Hidden,
95
+ cl::desc(" Force disabling recomputing of function entry count, on "
96
+ " successful tail recursion elimination." ));
97
+
90
98
// / Scan the specified function for alloca instructions.
91
99
// / If it contains any dynamic allocas, returns false.
92
100
static bool canTRE (Function &F) {
@@ -399,6 +407,8 @@ class TailRecursionEliminator {
399
407
AliasAnalysis *AA;
400
408
OptimizationRemarkEmitter *ORE;
401
409
DomTreeUpdater &DTU;
410
+ BlockFrequencyInfo *const BFI;
411
+ const uint64_t OrigEntryBBFreq;
402
412
403
413
// The below are shared state we want to have available when eliminating any
404
414
// calls in the function. There values should be populated by
@@ -428,8 +438,20 @@ class TailRecursionEliminator {
428
438
429
439
TailRecursionEliminator (Function &F, const TargetTransformInfo *TTI,
430
440
AliasAnalysis *AA, OptimizationRemarkEmitter *ORE,
431
- DomTreeUpdater &DTU)
432
- : F(F), TTI(TTI), AA(AA), ORE(ORE), DTU(DTU) {}
441
+ DomTreeUpdater &DTU, BlockFrequencyInfo *BFI)
442
+ : F(F), TTI(TTI), AA(AA), ORE(ORE), DTU(DTU), BFI(BFI),
443
+ OrigEntryBBFreq (
444
+ BFI ? BFI->getBlockFreq (&F.getEntryBlock()).getFrequency() : 0U) {
445
+ if (BFI) {
446
+ auto EC = F.getEntryCount ();
447
+ (void )EC;
448
+ assert (
449
+ (EC.has_value () && EC->getCount () != 0 && OrigEntryBBFreq) &&
450
+ " If the function has an entry count, its entry basic block should "
451
+ " have a non-zero frequency. Pass a nullptr BFI if the function has "
452
+ " no entry count" );
453
+ }
454
+ }
433
455
434
456
CallInst *findTRECandidate (BasicBlock *BB);
435
457
@@ -450,7 +472,7 @@ class TailRecursionEliminator {
450
472
public:
451
473
static bool eliminate (Function &F, const TargetTransformInfo *TTI,
452
474
AliasAnalysis *AA, OptimizationRemarkEmitter *ORE,
453
- DomTreeUpdater &DTU);
475
+ DomTreeUpdater &DTU, BlockFrequencyInfo *BFI );
454
476
};
455
477
} // namespace
456
478
@@ -735,6 +757,21 @@ bool TailRecursionEliminator::eliminateCall(CallInst *CI) {
735
757
CI->eraseFromParent (); // Remove call.
736
758
DTU.applyUpdates ({{DominatorTree::Insert, BB, HeaderBB}});
737
759
++NumEliminated;
760
+ if (OrigEntryBBFreq) {
761
+ assert (F.getEntryCount ().has_value ());
762
+ // This pass is not expected to remove BBs, only add an entry BB. For that
763
+ // reason, and because the BB here isn't the new entry BB, the BFI lookup is
764
+ // expected to succeed.
765
+ assert (&F.getEntryBlock () != BB);
766
+ auto RelativeBBFreq =
767
+ static_cast <double >(BFI->getBlockFreq (BB).getFrequency ()) /
768
+ static_cast <double >(OrigEntryBBFreq);
769
+ auto OldEntryCount = F.getEntryCount ()->getCount ();
770
+ auto ToSubtract =
771
+ static_cast <uint64_t >(std::round (RelativeBBFreq * OldEntryCount));
772
+ assert (OldEntryCount > ToSubtract);
773
+ F.setEntryCount (OldEntryCount - ToSubtract, F.getEntryCount ()->getType ());
774
+ }
738
775
return true ;
739
776
}
740
777
@@ -861,7 +898,8 @@ bool TailRecursionEliminator::eliminate(Function &F,
861
898
const TargetTransformInfo *TTI,
862
899
AliasAnalysis *AA,
863
900
OptimizationRemarkEmitter *ORE,
864
- DomTreeUpdater &DTU) {
901
+ DomTreeUpdater &DTU,
902
+ BlockFrequencyInfo *BFI) {
865
903
if (F.getFnAttribute (" disable-tail-calls" ).getValueAsBool ())
866
904
return false ;
867
905
@@ -877,7 +915,7 @@ bool TailRecursionEliminator::eliminate(Function &F,
877
915
return MadeChange;
878
916
879
917
// Change any tail recursive calls to loops.
880
- TailRecursionEliminator TRE (F, TTI, AA, ORE, DTU);
918
+ TailRecursionEliminator TRE (F, TTI, AA, ORE, DTU, BFI );
881
919
882
920
for (BasicBlock &BB : F)
883
921
MadeChange |= TRE.processBlock (BB);
@@ -919,7 +957,8 @@ struct TailCallElim : public FunctionPass {
919
957
return TailRecursionEliminator::eliminate (
920
958
F, &getAnalysis<TargetTransformInfoWrapperPass>().getTTI (F),
921
959
&getAnalysis<AAResultsWrapperPass>().getAAResults (),
922
- &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE (), DTU);
960
+ &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE (), DTU,
961
+ nullptr );
923
962
}
924
963
};
925
964
}
@@ -942,14 +981,22 @@ PreservedAnalyses TailCallElimPass::run(Function &F,
942
981
943
982
TargetTransformInfo &TTI = AM.getResult <TargetIRAnalysis>(F);
944
983
AliasAnalysis &AA = AM.getResult <AAManager>(F);
984
+ // This must come first. It needs the 2 analyses, meaning, if it came after
985
+ // the lines asking for the cached result, should they be nullptr (which, in
986
+ // the case of the PDT, is likely), updates to the trees would be missed.
987
+ auto *BFI = (!ForceDisableBFI && UpdateFunctionEntryCount &&
988
+ F.getEntryCount ().has_value () && F.getEntryCount ()->getCount ())
989
+ ? &AM.getResult <BlockFrequencyAnalysis>(F)
990
+ : nullptr ;
945
991
auto &ORE = AM.getResult <OptimizationRemarkEmitterAnalysis>(F);
946
992
auto *DT = AM.getCachedResult <DominatorTreeAnalysis>(F);
947
993
auto *PDT = AM.getCachedResult <PostDominatorTreeAnalysis>(F);
948
994
// There is no noticable performance difference here between Lazy and Eager
949
995
// UpdateStrategy based on some test results. It is feasible to switch the
950
996
// UpdateStrategy to Lazy if we find it profitable later.
951
997
DomTreeUpdater DTU (DT, PDT, DomTreeUpdater::UpdateStrategy::Eager);
952
- bool Changed = TailRecursionEliminator::eliminate (F, &TTI, &AA, &ORE, DTU);
998
+ bool Changed =
999
+ TailRecursionEliminator::eliminate (F, &TTI, &AA, &ORE, DTU, BFI);
953
1000
954
1001
if (!Changed)
955
1002
return PreservedAnalyses::all ();
0 commit comments