-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[NFC][AlwaysInliner] Reduce AlwaysInliner memory consumption. #96958
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Refactored AlwaysInliner to remove some of inlined functions earlier. Fixes out of memory issue on a huge test case from issue 59126. Also cleaned up one test.
@llvm/pr-subscribers-llvm-transforms Author: Daniil Fukalov (dfukalov) ChangesRefactored AlwaysInliner to remove some of inlined functions earlier. Fixes out of memory issue on a huge test case from issue 59126. Also cleaned up one test. Full diff: https://github.com/llvm/llvm-project/pull/96958.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
index cc375f9badcd4..e97e167412bfe 100644
--- a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
+++ b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
@@ -15,12 +15,12 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/InlineAdvisor.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
-#include "llvm/Transforms/IPO/Inliner.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -38,6 +38,7 @@ bool AlwaysInlineImpl(
SmallSetVector<CallBase *, 16> Calls;
bool Changed = false;
SmallVector<Function *, 16> InlinedFunctions;
+ SmallVector<Function *, 16> WorkList;
for (Function &F : M) {
// When callee coroutine function is inlined into caller coroutine function
// before coro-split pass,
@@ -46,64 +47,74 @@ bool AlwaysInlineImpl(
if (F.isPresplitCoroutine())
continue;
- if (!F.isDeclaration() && isInlineViable(F).isSuccess()) {
- Calls.clear();
-
- for (User *U : F.users())
- if (auto *CB = dyn_cast<CallBase>(U))
- if (CB->getCalledFunction() == &F &&
- CB->hasFnAttr(Attribute::AlwaysInline) &&
- !CB->getAttributes().hasFnAttr(Attribute::NoInline))
- Calls.insert(CB);
-
- for (CallBase *CB : Calls) {
- Function *Caller = CB->getCaller();
- OptimizationRemarkEmitter ORE(Caller);
- DebugLoc DLoc = CB->getDebugLoc();
- BasicBlock *Block = CB->getParent();
-
- InlineFunctionInfo IFI(GetAssumptionCache, &PSI,
- GetBFI ? &GetBFI(*Caller) : nullptr,
- GetBFI ? &GetBFI(F) : nullptr);
-
- InlineResult Res = InlineFunction(*CB, IFI, /*MergeAttributes=*/true,
- &GetAAR(F), InsertLifetime);
- if (!Res.isSuccess()) {
- ORE.emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc,
- Block)
- << "'" << ore::NV("Callee", &F) << "' is not inlined into '"
- << ore::NV("Caller", Caller)
- << "': " << ore::NV("Reason", Res.getFailureReason());
- });
- continue;
- }
-
- emitInlinedIntoBasedOnCost(
- ORE, DLoc, Block, F, *Caller,
- InlineCost::getAlways("always inline attribute"),
- /*ForProfileContext=*/false, DEBUG_TYPE);
+ if (!F.isDeclaration() && isInlineViable(F).isSuccess())
+ WorkList.push_back(&F);
+ }
- Changed = true;
+ for (Function *F : WorkList) {
+ Calls.clear();
+
+ for (User *U : F->users())
+ if (auto *CB = dyn_cast<CallBase>(U))
+ if (CB->getCalledFunction() == F &&
+ CB->hasFnAttr(Attribute::AlwaysInline) &&
+ !CB->getAttributes().hasFnAttr(Attribute::NoInline))
+ Calls.insert(CB);
+
+ for (CallBase *CB : Calls) {
+ Function *Caller = CB->getCaller();
+ OptimizationRemarkEmitter ORE(Caller);
+ DebugLoc DLoc = CB->getDebugLoc();
+ BasicBlock *Block = CB->getParent();
+
+ InlineFunctionInfo IFI(GetAssumptionCache, &PSI,
+ GetBFI ? &GetBFI(*Caller) : nullptr,
+ GetBFI ? &GetBFI(*F) : nullptr);
+
+ InlineResult Res = InlineFunction(*CB, IFI, /*MergeAttributes=*/true,
+ &GetAAR(*F), InsertLifetime);
+ if (!Res.isSuccess()) {
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc,
+ Block)
+ << "'" << ore::NV("Callee", F) << "' is not inlined into '"
+ << ore::NV("Caller", Caller)
+ << "': " << ore::NV("Reason", Res.getFailureReason());
+ });
+ continue;
}
- if (F.hasFnAttribute(Attribute::AlwaysInline)) {
- // Remember to try and delete this function afterward. This both avoids
- // re-walking the rest of the module and avoids dealing with any
- // iterator invalidation issues while deleting functions.
- InlinedFunctions.push_back(&F);
+ emitInlinedIntoBasedOnCost(
+ ORE, DLoc, Block, *F, *Caller,
+ InlineCost::getAlways("always inline attribute"),
+ /*ForProfileContext=*/false, DEBUG_TYPE);
+
+ Changed = true;
+ }
+
+ F->removeDeadConstantUsers();
+ if (F->hasFnAttribute(Attribute::AlwaysInline) && F->isDefTriviallyDead()) {
+ // Remember to try and delete this function afterward. This both avoids
+ // re-walking the rest of the module and avoids dealing with any
+ // iterator invalidation issues while deleting functions.
+ if (F->hasComdat()){
+ InlinedFunctions.push_back(F);
+ } else {
+ M.getFunctionList().erase(F);
+ Changed = true;
}
}
+
}
- // Remove any live functions.
+ // Final cleanup stage. Firstly, remove any live functions.
erase_if(InlinedFunctions, [&](Function *F) {
F->removeDeadConstantUsers();
return !F->isDefTriviallyDead();
});
// Delete the non-comdat ones from the module and also from our vector.
- auto NonComdatBegin = partition(
+ auto *NonComdatBegin = partition(
InlinedFunctions, [&](Function *F) { return F->hasComdat(); });
for (Function *F : make_range(NonComdatBegin, InlinedFunctions.end())) {
M.getFunctionList().erase(F);
@@ -191,7 +202,7 @@ PreservedAnalyses AlwaysInlinerPass::run(Module &M,
auto &PSI = MAM.getResult<ProfileSummaryAnalysis>(M);
bool Changed = AlwaysInlineImpl(M, InsertLifetime, PSI, GetAssumptionCache,
- GetAAR, GetBFI);
+ GetAAR, GetBFI);
return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
}
diff --git a/llvm/test/Transforms/Inline/always-inline-phase-ordering.ll b/llvm/test/Transforms/Inline/always-inline-phase-ordering.ll
index e69ca48344900..237cf180947bf 100644
--- a/llvm/test/Transforms/Inline/always-inline-phase-ordering.ll
+++ b/llvm/test/Transforms/Inline/always-inline-phase-ordering.ll
@@ -1,12 +1,10 @@
; RUN: opt --Os -pass-remarks=inline -S < %s 2>&1 | FileCheck %s
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
-target triple = "arm64e-apple-macosx13"
; CHECK: remark: <unknown>:0:0: 'wibble' inlined into 'bar.8' with (cost=always): always inline attribute
; CHECK: remark: <unknown>:0:0: 'wibble' inlined into 'pluto' with (cost=always): always inline attribute
; CHECK: remark: <unknown>:0:0: 'snork' inlined into 'blam' with (cost=always): always inline attribute
; CHECK: remark: <unknown>:0:0: 'wobble' inlined into 'blam' with (cost=always): always inline attribute
-; CHECK: remark: <unknown>:0:0: 'wobble' inlined into 'snork' with (cost=always): always inline attribute
; CHECK: remark: <unknown>:0:0: 'spam' inlined into 'blam' with (cost=65, threshold=75)
; CHECK: remark: <unknown>:0:0: 'wibble.1' inlined into 'widget' with (cost=30, threshold=75)
; CHECK: remark: <unknown>:0:0: 'widget' inlined into 'bar.8' with (cost=30, threshold=75)
@@ -24,14 +22,14 @@ bb:
unreachable
}
-define linkonce_odr void @pluto() #1 !prof !38 {
+define linkonce_odr void @pluto() !prof !38 {
bb:
call void @wibble()
ret void
}
; Function Attrs: alwaysinline
-define linkonce_odr void @wibble() #2 {
+define linkonce_odr void @wibble() #1 {
bb:
call void @widget()
ret void
@@ -62,7 +60,7 @@ bb:
}
; Function Attrs: alwaysinline
-define linkonce_odr i32 @snork() #2 {
+define linkonce_odr i32 @snork() #1 {
bb:
%tmpv1 = call i32 @spam()
%tmpv2 = call i32 @wobble()
@@ -83,7 +81,7 @@ bb:
}
; Function Attrs: alwaysinline
-define linkonce_odr i32 @wobble() #2 {
+define linkonce_odr i32 @wobble() #1 {
bb:
%tmpv = call i64 @wobble.5(i8 0)
%tmpv1 = call i64 @eggs.7()
@@ -119,8 +117,7 @@ bb:
}
attributes #0 = { "frame-pointer"="non-leaf" }
-attributes #1 = { "target-cpu"="apple-m1" }
-attributes #2 = { alwaysinline }
+attributes #1 = { alwaysinline }
!llvm.module.flags = !{!0, !1, !30, !31, !32, !36, !37}
|
|
||
; CHECK: remark: <unknown>:0:0: 'wibble' inlined into 'bar.8' with (cost=always): always inline attribute | ||
; CHECK: remark: <unknown>:0:0: 'wibble' inlined into 'pluto' with (cost=always): always inline attribute | ||
; CHECK: remark: <unknown>:0:0: 'snork' inlined into 'blam' with (cost=always): always inline attribute | ||
; CHECK: remark: <unknown>:0:0: 'wobble' inlined into 'blam' with (cost=always): always inline attribute | ||
; CHECK: remark: <unknown>:0:0: 'wobble' inlined into 'snork' with (cost=always): always inline attribute |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This remark was about inlining 'wobble' into already inlined 'snork'.
|
@@ -1,12 +1,10 @@ | |||
; RUN: opt --Os -pass-remarks=inline -S < %s 2>&1 | FileCheck %s | |||
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" | |||
target triple = "arm64e-apple-macosx13" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
please split the unrelated test changes to another PR
can you elaborate in the description on "Refactored AlwaysInliner to remove some of inlined functions earlier" and how it fixes the issue? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
so the issue is that if we have for example a bunch of internal functions that are used once by another internal function, we may run into quadratic memory usage if everything is inlined? that makes sense
} | ||
|
||
F->removeDeadConstantUsers(); | ||
if (F->hasFnAttribute(Attribute::AlwaysInline) && F->isDefTriviallyDead()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
do we need the alwaysinline check? F->isDefTriviallyDead()
should be sufficient
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
without checking AlwaysInline
attribute the change will be not NFC: it will process here not only AlwaysInline
functions, but also can remove some other dead functions. I'm not sure it is intended in the pass. There is the test about it and it fails without the check of attribute.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
fair enough
✅ With the latest revision this PR passed the C/C++ code formatter. |
auto NonComdatBegin = partition( | ||
InlinedFunctions, [&](Function *F) { return F->hasComdat(); }); | ||
auto *NonComdatBegin = | ||
partition(InlinedFunctions, [&](Function *F) { return F->hasComdat(); }); | ||
for (Function *F : make_range(NonComdatBegin, InlinedFunctions.end())) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this part is obsolete now right? we don't have any non-comdat functions in InlinedFunctions
(should also change the variable name to InlinedComdatFunctions
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes, thanks, removed
} | ||
|
||
F->removeDeadConstantUsers(); | ||
if (F->hasFnAttribute(Attribute::AlwaysInline) && F->isDefTriviallyDead()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
fair enough
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lgtm with comment added, thanks for doing this
// So we won't inline the coroutine function if it have not been unsplited | ||
SmallVector<Function *, 16> InlinedComdatFunctions; | ||
|
||
for (Function &F : make_early_inc_range(M)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
add a comment about the fact that we need this since we may delete the function we're visiting
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've checked a number of places make_early_inc_range()
used - found no comments about its functionality. It seems its usage instead of default iterator is quite obvious, what do you think?
Updated the comment below about InlinedComdatFunctions
usage.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it's not immediately obvious since the deletion happens further down so I'd write a comment, but won't object if you omit the comment
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/144/builds/1406 Here is the relevant piece of the build log for the reference:
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/56/builds/1452 Here is the relevant piece of the build log for the reference:
|
…6958) Refactored AlwaysInliner to remove some of inlined functions earlier. Before the change AlwaysInliner walked through all functions in the module and inlined them into calls where it is appropriate. Removing of the dead inlined functions was performed only after all of inlining. For the test case from the issue [59126](llvm#59126) compiler consumes all of the memory on 64GB machine, so is killed. The change checks if just inlined function can be removed from the module and removes it.
…6958) Refactored AlwaysInliner to remove some of inlined functions earlier. Before the change AlwaysInliner walked through all functions in the module and inlined them into calls where it is appropriate. Removing of the dead inlined functions was performed only after all of inlining. For the test case from the issue [59126](llvm#59126) compiler consumes all of the memory on 64GB machine, so is killed. The change checks if just inlined function can be removed from the module and removes it.
Refactored AlwaysInliner to remove some of inlined functions earlier.
Before the change AlwaysInliner walked through all functions in the module and inlined them into calls where it is appropriate. Removing of the dead inlined functions was performed only after all of inlining. For the test case from the issue 59126 compiler consumes all of the memory on 64GB machine, so is killed.
The change checks if just inlined function can be removed from the module and removes it.