[Inlining] Teach shouldBeDeferred to take the total cost into account

kazutakahirata · kazutakahirata · commit e8984fe65b94 · 2020-05-05T11:02:06.000-07:00
Summary: This patch teaches shouldBeDeferred to take into account the total cost of inlining. Suppose we have a call hierarchy {A1,A2,A3,...}->B->C. (Each of A1, A2, A3, ... calls B, which in turn calls C.) Without this patch, shouldBeDeferred essentially returns true if TotalSecondaryCost < IC.getCost() where TotalSecondaryCost is the total cost of inlining B into As. This means that if B is a small wraper function, for example, it would get inlined into all of As. In turn, C gets inlined into all of As. In other words, shouldBeDeferred ignores the cost of inlining C into each of As. This patch adds an option, inline-deferral-scale, to replace the expression above with: TotalCost < Allowance where - TotalCost is TotalSecondaryCost + IC.getCost() * # of As, and - Allowance is IC.getCost() * Scale For now, the new option defaults to -1, disabling the new scheme. Reviewers: davidxl Subscribers: eraman, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D79138
diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -93,6 +93,13 @@ static cl::opt<bool>
     DisableInlinedAllocaMerging("disable-inlined-alloca-merging",
                                 cl::init(false), cl::Hidden);
 
+// An integer used to limit the cost of inline deferral.  The default negative
+// number tells shouldBeDeferred to only take the secondary cost into account.
+static cl::opt<int>
+    InlineDeferralScale("inline-deferral-scale",
+                        cl::desc("Scale to limit the cost of inline deferral"),
+                        cl::init(-1), cl::Hidden);
+
 namespace {
 
 enum class InlinerFunctionImportStatsOpts {
@@ -338,12 +345,8 @@ shouldBeDeferred(Function *Caller, InlineCost IC, int &TotalSecondaryCost,
   bool ApplyLastCallBonus = Caller->hasLocalLinkage() && !Caller->hasOneUse();
   // This bool tracks what happens if we DO inline C into B.
   bool InliningPreventsSomeOuterInline = false;
+  unsigned NumCallerUsers = 0;
   for (User *U : Caller->users()) {
-    // If the caller will not be removed (either because it does not have a
-    // local linkage or because the LastCallToStaticBonus has been already
-    // applied), then we can exit the loop early.
-    if (!ApplyLastCallBonus && TotalSecondaryCost >= IC.getCost())
-      return false;
     CallBase *CS2 = dyn_cast<CallBase>(U);
 
     // If this isn't a call to Caller (it could be some other sort
@@ -369,16 +372,28 @@ shouldBeDeferred(Function *Caller, InlineCost IC, int &TotalSecondaryCost,
     if (IC2.getCostDelta() <= CandidateCost) {
       InliningPreventsSomeOuterInline = true;
       TotalSecondaryCost += IC2.getCost();
+      NumCallerUsers++;
     }
   }
+
+  if (!InliningPreventsSomeOuterInline)
+    return false;
+
   // If all outer calls to Caller would get inlined, the cost for the last
   // one is set very low by getInlineCost, in anticipation that Caller will
   // be removed entirely.  We did not account for this above unless there
   // is only one caller of Caller.
   if (ApplyLastCallBonus)
     TotalSecondaryCost -= InlineConstants::LastCallToStaticBonus;
 
-  return InliningPreventsSomeOuterInline && TotalSecondaryCost < IC.getCost();
+  // If InlineDeferralScale is negative, then ignore the cost of primary
+  // inlining -- IC.getCost() multiplied by the number of callers to Caller.
+  if (InlineDeferralScale < 0)
+    return TotalSecondaryCost < IC.getCost();
+
+  int TotalCost = TotalSecondaryCost + IC.getCost() * NumCallerUsers;
+  int Allowance = IC.getCost() * InlineDeferralScale;
+  return TotalCost < Allowance;
 }
 
 static std::basic_ostream<char> &operator<<(std::basic_ostream<char> &R,