53
53
#include " llvm/ADT/STLExtras.h"
54
54
#include " llvm/ADT/SmallPtrSet.h"
55
55
#include " llvm/ADT/Statistic.h"
56
+ #include " llvm/Analysis/BlockFrequencyInfo.h"
56
57
#include " llvm/Analysis/DomTreeUpdater.h"
57
58
#include " llvm/Analysis/GlobalsModRef.h"
58
59
#include " llvm/Analysis/InstructionSimplify.h"
75
76
#include " llvm/IR/Module.h"
76
77
#include " llvm/InitializePasses.h"
77
78
#include " llvm/Pass.h"
79
+ #include " llvm/Support/CommandLine.h"
78
80
#include " llvm/Support/Debug.h"
79
81
#include " llvm/Support/raw_ostream.h"
80
82
#include " llvm/Transforms/Scalar.h"
81
83
#include " llvm/Transforms/Utils/BasicBlockUtils.h"
84
+ #include < cmath>
82
85
using namespace llvm ;
83
86
84
87
#define DEBUG_TYPE " tailcallelim"
@@ -87,6 +90,11 @@ STATISTIC(NumEliminated, "Number of tail calls removed");
87
90
STATISTIC (NumRetDuped, " Number of return duplicated" );
88
91
STATISTIC (NumAccumAdded, " Number of accumulators introduced" );
89
92
93
+ static cl::opt<bool > ForceDisableBFI (
94
+ " tre-disable-entrycount-recompute" , cl::init(false ), cl::Hidden,
95
+ cl::desc(" Force disabling recomputing of function entry count, on "
96
+ " successful tail recursion elimination." ));
97
+
90
98
// / Scan the specified function for alloca instructions.
91
99
// / If it contains any dynamic allocas, returns false.
92
100
static bool canTRE (Function &F) {
@@ -399,6 +407,9 @@ class TailRecursionEliminator {
399
407
AliasAnalysis *AA;
400
408
OptimizationRemarkEmitter *ORE;
401
409
DomTreeUpdater &DTU;
410
+ BlockFrequencyInfo *const BFI;
411
+ const uint64_t OrigEntryBBFreq;
412
+ const uint64_t OrigEntryCount;
402
413
403
414
// The below are shared state we want to have available when eliminating any
404
415
// calls in the function. There values should be populated by
@@ -428,8 +439,19 @@ class TailRecursionEliminator {
428
439
429
440
TailRecursionEliminator (Function &F, const TargetTransformInfo *TTI,
430
441
AliasAnalysis *AA, OptimizationRemarkEmitter *ORE,
431
- DomTreeUpdater &DTU)
432
- : F(F), TTI(TTI), AA(AA), ORE(ORE), DTU(DTU) {}
442
+ DomTreeUpdater &DTU, BlockFrequencyInfo *BFI)
443
+ : F(F), TTI(TTI), AA(AA), ORE(ORE), DTU(DTU), BFI(BFI),
444
+ OrigEntryBBFreq (
445
+ BFI ? BFI->getBlockFreq (&F.getEntryBlock()).getFrequency() : 0U),
446
+ OrigEntryCount(F.getEntryCount() ? F.getEntryCount()->getCount() : 0) {
447
+ if (BFI) {
448
+ // The assert is meant as API documentation for the caller.
449
+ assert ((OrigEntryCount != 0 && OrigEntryBBFreq != 0 ) &&
450
+ " If a BFI was provided, the function should have both an entry "
451
+ " count that is non-zero and an entry basic block with a non-zero "
452
+ " frequency." );
453
+ }
454
+ }
433
455
434
456
CallInst *findTRECandidate (BasicBlock *BB);
435
457
@@ -450,7 +472,7 @@ class TailRecursionEliminator {
450
472
public:
451
473
static bool eliminate (Function &F, const TargetTransformInfo *TTI,
452
474
AliasAnalysis *AA, OptimizationRemarkEmitter *ORE,
453
- DomTreeUpdater &DTU);
475
+ DomTreeUpdater &DTU, BlockFrequencyInfo *BFI );
454
476
};
455
477
} // namespace
456
478
@@ -735,6 +757,28 @@ bool TailRecursionEliminator::eliminateCall(CallInst *CI) {
735
757
CI->eraseFromParent (); // Remove call.
736
758
DTU.applyUpdates ({{DominatorTree::Insert, BB, HeaderBB}});
737
759
++NumEliminated;
760
+ if (OrigEntryBBFreq) {
761
+ assert (F.getEntryCount ().has_value ());
762
+ // This pass is not expected to remove BBs, only add an entry BB. For that
763
+ // reason, and because the BB here isn't the new entry BB, the BFI lookup is
764
+ // expected to succeed.
765
+ assert (&F.getEntryBlock () != BB);
766
+ auto RelativeBBFreq =
767
+ static_cast <double >(BFI->getBlockFreq (BB).getFrequency ()) /
768
+ static_cast <double >(OrigEntryBBFreq);
769
+ auto ToSubtract =
770
+ static_cast <uint64_t >(std::round (RelativeBBFreq * OrigEntryCount));
771
+ auto OldEntryCount = F.getEntryCount ()->getCount ();
772
+ if (OldEntryCount <= ToSubtract) {
773
+ LLVM_DEBUG (
774
+ errs () << " [TRE] The entrycount attributable to the recursive call, "
775
+ << ToSubtract
776
+ << " , should be strictly lower than the function entry count, "
777
+ << OldEntryCount << " \n " );
778
+ } else {
779
+ F.setEntryCount (OldEntryCount - ToSubtract, F.getEntryCount ()->getType ());
780
+ }
781
+ }
738
782
return true ;
739
783
}
740
784
@@ -861,7 +905,8 @@ bool TailRecursionEliminator::eliminate(Function &F,
861
905
const TargetTransformInfo *TTI,
862
906
AliasAnalysis *AA,
863
907
OptimizationRemarkEmitter *ORE,
864
- DomTreeUpdater &DTU) {
908
+ DomTreeUpdater &DTU,
909
+ BlockFrequencyInfo *BFI) {
865
910
if (F.getFnAttribute (" disable-tail-calls" ).getValueAsBool ())
866
911
return false ;
867
912
@@ -877,7 +922,7 @@ bool TailRecursionEliminator::eliminate(Function &F,
877
922
return MadeChange;
878
923
879
924
// Change any tail recursive calls to loops.
880
- TailRecursionEliminator TRE (F, TTI, AA, ORE, DTU);
925
+ TailRecursionEliminator TRE (F, TTI, AA, ORE, DTU, BFI );
881
926
882
927
for (BasicBlock &BB : F)
883
928
MadeChange |= TRE.processBlock (BB);
@@ -919,7 +964,8 @@ struct TailCallElim : public FunctionPass {
919
964
return TailRecursionEliminator::eliminate (
920
965
F, &getAnalysis<TargetTransformInfoWrapperPass>().getTTI (F),
921
966
&getAnalysis<AAResultsWrapperPass>().getAAResults (),
922
- &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE (), DTU);
967
+ &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE (), DTU,
968
+ /* BFI=*/ nullptr );
923
969
}
924
970
};
925
971
}
@@ -942,14 +988,22 @@ PreservedAnalyses TailCallElimPass::run(Function &F,
942
988
943
989
TargetTransformInfo &TTI = AM.getResult <TargetIRAnalysis>(F);
944
990
AliasAnalysis &AA = AM.getResult <AAManager>(F);
991
+ // This must come first. It needs the 2 analyses, meaning, if it came after
992
+ // the lines asking for the cached result, should they be nullptr (which, in
993
+ // the case of the PDT, is likely), updates to the trees would be missed.
994
+ auto *BFI = (!ForceDisableBFI && UpdateFunctionEntryCount &&
995
+ F.getEntryCount ().has_value () && F.getEntryCount ()->getCount ())
996
+ ? &AM.getResult <BlockFrequencyAnalysis>(F)
997
+ : nullptr ;
945
998
auto &ORE = AM.getResult <OptimizationRemarkEmitterAnalysis>(F);
946
999
auto *DT = AM.getCachedResult <DominatorTreeAnalysis>(F);
947
1000
auto *PDT = AM.getCachedResult <PostDominatorTreeAnalysis>(F);
948
1001
// There is no noticable performance difference here between Lazy and Eager
949
1002
// UpdateStrategy based on some test results. It is feasible to switch the
950
1003
// UpdateStrategy to Lazy if we find it profitable later.
951
1004
DomTreeUpdater DTU (DT, PDT, DomTreeUpdater::UpdateStrategy::Eager);
952
- bool Changed = TailRecursionEliminator::eliminate (F, &TTI, &AA, &ORE, DTU);
1005
+ bool Changed =
1006
+ TailRecursionEliminator::eliminate (F, &TTI, &AA, &ORE, DTU, BFI);
953
1007
954
1008
if (!Changed)
955
1009
return PreservedAnalyses::all ();
0 commit comments