53
53
#include " llvm/ADT/STLExtras.h"
54
54
#include " llvm/ADT/SmallPtrSet.h"
55
55
#include " llvm/ADT/Statistic.h"
56
+ #include " llvm/Analysis/BlockFrequencyInfo.h"
56
57
#include " llvm/Analysis/DomTreeUpdater.h"
57
58
#include " llvm/Analysis/GlobalsModRef.h"
58
59
#include " llvm/Analysis/InstructionSimplify.h"
@@ -409,6 +410,8 @@ class TailRecursionEliminator {
409
410
AliasAnalysis *AA;
410
411
OptimizationRemarkEmitter *ORE;
411
412
DomTreeUpdater &DTU;
413
+ const uint64_t OrigEntryBBFreq;
414
+ DenseMap<const BasicBlock *, uint64_t > OriginalBBFreqs;
412
415
413
416
// The below are shared state we want to have available when eliminating any
414
417
// calls in the function. There values should be populated by
@@ -438,8 +441,18 @@ class TailRecursionEliminator {
438
441
439
442
TailRecursionEliminator (Function &F, const TargetTransformInfo *TTI,
440
443
AliasAnalysis *AA, OptimizationRemarkEmitter *ORE,
441
- DomTreeUpdater &DTU)
442
- : F(F), TTI(TTI), AA(AA), ORE(ORE), DTU(DTU) {}
444
+ DomTreeUpdater &DTU, BlockFrequencyInfo *BFI)
445
+ : F(F), TTI(TTI), AA(AA), ORE(ORE), DTU(DTU),
446
+ OrigEntryBBFreq (
447
+ BFI ? BFI->getBlockFreq (&F.getEntryBlock()).getFrequency() : 0U) {
448
+ assert (((BFI != nullptr ) == (OrigEntryBBFreq != 0 )) &&
449
+ " If the function has an entry count, its entry basic block should "
450
+ " have a non-zero frequency. Pass a nullptr BFI if the function has "
451
+ " no entry count" );
452
+ if (BFI)
453
+ for (const auto &BB : F)
454
+ OriginalBBFreqs.insert ({&BB, BFI->getBlockFreq (&BB).getFrequency ()});
455
+ }
443
456
444
457
CallInst *findTRECandidate (BasicBlock *BB);
445
458
@@ -460,7 +473,7 @@ class TailRecursionEliminator {
460
473
public:
461
474
static bool eliminate (Function &F, const TargetTransformInfo *TTI,
462
475
AliasAnalysis *AA, OptimizationRemarkEmitter *ORE,
463
- DomTreeUpdater &DTU);
476
+ DomTreeUpdater &DTU, BlockFrequencyInfo *BFI );
464
477
};
465
478
} // namespace
466
479
@@ -746,6 +759,17 @@ bool TailRecursionEliminator::eliminateCall(CallInst *CI) {
746
759
CI->eraseFromParent (); // Remove call.
747
760
DTU.applyUpdates ({{DominatorTree::Insert, BB, HeaderBB}});
748
761
++NumEliminated;
762
+ if (auto EC = F.getEntryCount ()) {
763
+ assert (OrigEntryBBFreq);
764
+ auto It = OriginalBBFreqs.find (BB);
765
+ assert (It != OriginalBBFreqs.end ());
766
+ auto RelativeBBFreq =
767
+ static_cast <double >(It->second ) / static_cast <double >(OrigEntryBBFreq);
768
+ auto OldEntryCount = EC.value ().getCount ();
769
+ auto ToSubtract = static_cast <uint64_t >(RelativeBBFreq * OldEntryCount);
770
+ assert (OldEntryCount > ToSubtract);
771
+ F.setEntryCount (OldEntryCount - ToSubtract, EC->getType ());
772
+ }
749
773
return true ;
750
774
}
751
775
@@ -872,7 +896,8 @@ bool TailRecursionEliminator::eliminate(Function &F,
872
896
const TargetTransformInfo *TTI,
873
897
AliasAnalysis *AA,
874
898
OptimizationRemarkEmitter *ORE,
875
- DomTreeUpdater &DTU) {
899
+ DomTreeUpdater &DTU,
900
+ BlockFrequencyInfo *BFI) {
876
901
if (F.getFnAttribute (" disable-tail-calls" ).getValueAsBool ())
877
902
return false ;
878
903
@@ -888,7 +913,7 @@ bool TailRecursionEliminator::eliminate(Function &F,
888
913
return MadeChange;
889
914
890
915
// Change any tail recursive calls to loops.
891
- TailRecursionEliminator TRE (F, TTI, AA, ORE, DTU);
916
+ TailRecursionEliminator TRE (F, TTI, AA, ORE, DTU, BFI );
892
917
893
918
for (BasicBlock &BB : F)
894
919
MadeChange |= TRE.processBlock (BB);
@@ -909,6 +934,7 @@ struct TailCallElim : public FunctionPass {
909
934
AU.addRequired <TargetTransformInfoWrapperPass>();
910
935
AU.addRequired <AAResultsWrapperPass>();
911
936
AU.addRequired <OptimizationRemarkEmitterWrapperPass>();
937
+ AU.addRequired <BlockFrequencyInfoWrapperPass>();
912
938
AU.addPreserved <GlobalsAAWrapperPass>();
913
939
AU.addPreserved <DominatorTreeWrapperPass>();
914
940
AU.addPreserved <PostDominatorTreeWrapperPass>();
@@ -918,6 +944,9 @@ struct TailCallElim : public FunctionPass {
918
944
if (skipFunction (F))
919
945
return false ;
920
946
947
+ auto *BFI = F.getEntryCount ().has_value ()
948
+ ? &getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI ()
949
+ : nullptr ;
921
950
auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
922
951
auto *DT = DTWP ? &DTWP->getDomTree () : nullptr ;
923
952
auto *PDTWP = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>();
@@ -930,7 +959,8 @@ struct TailCallElim : public FunctionPass {
930
959
return TailRecursionEliminator::eliminate (
931
960
F, &getAnalysis<TargetTransformInfoWrapperPass>().getTTI (F),
932
961
&getAnalysis<AAResultsWrapperPass>().getAAResults (),
933
- &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE (), DTU);
962
+ &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE (), DTU,
963
+ BFI);
934
964
}
935
965
};
936
966
}
@@ -953,14 +983,21 @@ PreservedAnalyses TailCallElimPass::run(Function &F,
953
983
954
984
TargetTransformInfo &TTI = AM.getResult <TargetIRAnalysis>(F);
955
985
AliasAnalysis &AA = AM.getResult <AAManager>(F);
986
+ // This must come first. It needs the 2 analyses, meaning, if it came after
987
+ // the lines asking for the cached result, should they be nullptr (which, in
988
+ // the case of the PDT, is likely), updates to the trees would be missed.
989
+ auto *BFI = F.getEntryCount ().has_value ()
990
+ ? &AM.getResult <BlockFrequencyAnalysis>(F)
991
+ : nullptr ;
956
992
auto &ORE = AM.getResult <OptimizationRemarkEmitterAnalysis>(F);
957
993
auto *DT = AM.getCachedResult <DominatorTreeAnalysis>(F);
958
994
auto *PDT = AM.getCachedResult <PostDominatorTreeAnalysis>(F);
959
995
// There is no noticable performance difference here between Lazy and Eager
960
996
// UpdateStrategy based on some test results. It is feasible to switch the
961
997
// UpdateStrategy to Lazy if we find it profitable later.
962
998
DomTreeUpdater DTU (DT, PDT, DomTreeUpdater::UpdateStrategy::Eager);
963
- bool Changed = TailRecursionEliminator::eliminate (F, &TTI, &AA, &ORE, DTU);
999
+ bool Changed =
1000
+ TailRecursionEliminator::eliminate (F, &TTI, &AA, &ORE, DTU, BFI);
964
1001
965
1002
if (!Changed)
966
1003
return PreservedAnalyses::all ();
0 commit comments