53
53
#include " llvm/ADT/STLExtras.h"
54
54
#include " llvm/ADT/SmallPtrSet.h"
55
55
#include " llvm/ADT/Statistic.h"
56
+ #include " llvm/Analysis/BlockFrequencyInfo.h"
56
57
#include " llvm/Analysis/DomTreeUpdater.h"
57
58
#include " llvm/Analysis/GlobalsModRef.h"
58
59
#include " llvm/Analysis/InstructionSimplify.h"
75
76
#include " llvm/IR/Module.h"
76
77
#include " llvm/InitializePasses.h"
77
78
#include " llvm/Pass.h"
79
+ #include " llvm/Support/CommandLine.h"
78
80
#include " llvm/Support/Debug.h"
79
81
#include " llvm/Support/raw_ostream.h"
80
82
#include " llvm/Transforms/Scalar.h"
81
83
#include " llvm/Transforms/Utils/BasicBlockUtils.h"
84
+ #include < cmath>
82
85
using namespace llvm ;
83
86
84
87
#define DEBUG_TYPE " tailcallelim"
@@ -87,6 +90,11 @@ STATISTIC(NumEliminated, "Number of tail calls removed");
87
90
STATISTIC (NumRetDuped, " Number of return duplicated" );
88
91
STATISTIC (NumAccumAdded, " Number of accumulators introduced" );
89
92
93
+ static cl::opt<bool > ForceDisableBFI (
94
+ " tre-disable-entrycount-recompute" , cl::init(false ), cl::Hidden,
95
+ cl::desc(" Force disabling recomputing of function entry count, on "
96
+ " successful tail recursion elimination." ));
97
+
90
98
// / Scan the specified function for alloca instructions.
91
99
// / If it contains any dynamic allocas, returns false.
92
100
static bool canTRE (Function &F) {
@@ -409,6 +417,8 @@ class TailRecursionEliminator {
409
417
AliasAnalysis *AA;
410
418
OptimizationRemarkEmitter *ORE;
411
419
DomTreeUpdater &DTU;
420
+ BlockFrequencyInfo *const BFI;
421
+ const uint64_t OrigEntryBBFreq;
412
422
413
423
// The below are shared state we want to have available when eliminating any
414
424
// calls in the function. There values should be populated by
@@ -438,8 +448,20 @@ class TailRecursionEliminator {
438
448
439
449
TailRecursionEliminator (Function &F, const TargetTransformInfo *TTI,
440
450
AliasAnalysis *AA, OptimizationRemarkEmitter *ORE,
441
- DomTreeUpdater &DTU)
442
- : F(F), TTI(TTI), AA(AA), ORE(ORE), DTU(DTU) {}
451
+ DomTreeUpdater &DTU, BlockFrequencyInfo *BFI)
452
+ : F(F), TTI(TTI), AA(AA), ORE(ORE), DTU(DTU), BFI(BFI),
453
+ OrigEntryBBFreq (
454
+ BFI ? BFI->getBlockFreq (&F.getEntryBlock()).getFrequency() : 0U) {
455
+ if (BFI) {
456
+ auto EC = F.getEntryCount ();
457
+ (void )EC;
458
+ assert (
459
+ (EC.has_value () && EC->getCount () != 0 && OrigEntryBBFreq) &&
460
+ " If the function has an entry count, its entry basic block should "
461
+ " have a non-zero frequency. Pass a nullptr BFI if the function has "
462
+ " no entry count" );
463
+ }
464
+ }
443
465
444
466
CallInst *findTRECandidate (BasicBlock *BB);
445
467
@@ -460,7 +482,7 @@ class TailRecursionEliminator {
460
482
public:
461
483
static bool eliminate (Function &F, const TargetTransformInfo *TTI,
462
484
AliasAnalysis *AA, OptimizationRemarkEmitter *ORE,
463
- DomTreeUpdater &DTU);
485
+ DomTreeUpdater &DTU, BlockFrequencyInfo *BFI );
464
486
};
465
487
} // namespace
466
488
@@ -746,6 +768,21 @@ bool TailRecursionEliminator::eliminateCall(CallInst *CI) {
746
768
CI->eraseFromParent (); // Remove call.
747
769
DTU.applyUpdates ({{DominatorTree::Insert, BB, HeaderBB}});
748
770
++NumEliminated;
771
+ if (OrigEntryBBFreq) {
772
+ assert (F.getEntryCount ().has_value ());
773
+ // This pass is not expected to remove BBs, only add an entry BB. For that
774
+ // reason, and because the BB here isn't the new entry BB, the BFI lookup is
775
+ // expected to succeed.
776
+ assert (&F.getEntryBlock () != BB);
777
+ auto RelativeBBFreq =
778
+ static_cast <double >(BFI->getBlockFreq (BB).getFrequency ()) /
779
+ static_cast <double >(OrigEntryBBFreq);
780
+ auto OldEntryCount = F.getEntryCount ()->getCount ();
781
+ auto ToSubtract =
782
+ static_cast <uint64_t >(std::round (RelativeBBFreq * OldEntryCount));
783
+ assert (OldEntryCount > ToSubtract);
784
+ F.setEntryCount (OldEntryCount - ToSubtract, F.getEntryCount ()->getType ());
785
+ }
749
786
return true ;
750
787
}
751
788
@@ -872,7 +909,8 @@ bool TailRecursionEliminator::eliminate(Function &F,
872
909
const TargetTransformInfo *TTI,
873
910
AliasAnalysis *AA,
874
911
OptimizationRemarkEmitter *ORE,
875
- DomTreeUpdater &DTU) {
912
+ DomTreeUpdater &DTU,
913
+ BlockFrequencyInfo *BFI) {
876
914
if (F.getFnAttribute (" disable-tail-calls" ).getValueAsBool ())
877
915
return false ;
878
916
@@ -888,7 +926,7 @@ bool TailRecursionEliminator::eliminate(Function &F,
888
926
return MadeChange;
889
927
890
928
// Change any tail recursive calls to loops.
891
- TailRecursionEliminator TRE (F, TTI, AA, ORE, DTU);
929
+ TailRecursionEliminator TRE (F, TTI, AA, ORE, DTU, BFI );
892
930
893
931
for (BasicBlock &BB : F)
894
932
MadeChange |= TRE.processBlock (BB);
@@ -930,7 +968,8 @@ struct TailCallElim : public FunctionPass {
930
968
return TailRecursionEliminator::eliminate (
931
969
F, &getAnalysis<TargetTransformInfoWrapperPass>().getTTI (F),
932
970
&getAnalysis<AAResultsWrapperPass>().getAAResults (),
933
- &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE (), DTU);
971
+ &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE (), DTU,
972
+ nullptr );
934
973
}
935
974
};
936
975
}
@@ -953,14 +992,22 @@ PreservedAnalyses TailCallElimPass::run(Function &F,
953
992
954
993
TargetTransformInfo &TTI = AM.getResult <TargetIRAnalysis>(F);
955
994
AliasAnalysis &AA = AM.getResult <AAManager>(F);
995
+ // This must come first. It needs the 2 analyses, meaning, if it came after
996
+ // the lines asking for the cached result, should they be nullptr (which, in
997
+ // the case of the PDT, is likely), updates to the trees would be missed.
998
+ auto *BFI = (!ForceDisableBFI && UpdateFunctionEntryCount &&
999
+ F.getEntryCount ().has_value () && F.getEntryCount ()->getCount ())
1000
+ ? &AM.getResult <BlockFrequencyAnalysis>(F)
1001
+ : nullptr ;
956
1002
auto &ORE = AM.getResult <OptimizationRemarkEmitterAnalysis>(F);
957
1003
auto *DT = AM.getCachedResult <DominatorTreeAnalysis>(F);
958
1004
auto *PDT = AM.getCachedResult <PostDominatorTreeAnalysis>(F);
959
1005
// There is no noticable performance difference here between Lazy and Eager
960
1006
// UpdateStrategy based on some test results. It is feasible to switch the
961
1007
// UpdateStrategy to Lazy if we find it profitable later.
962
1008
DomTreeUpdater DTU (DT, PDT, DomTreeUpdater::UpdateStrategy::Eager);
963
- bool Changed = TailRecursionEliminator::eliminate (F, &TTI, &AA, &ORE, DTU);
1009
+ bool Changed =
1010
+ TailRecursionEliminator::eliminate (F, &TTI, &AA, &ORE, DTU, BFI);
964
1011
965
1012
if (!Changed)
966
1013
return PreservedAnalyses::all ();
0 commit comments