Skip to content

Commit b6409c3

Browse files
nikictroelsbjerre
authored andcommitted
[GlobalOpt] Cache whether CC is changeable (llvm#71381)
The hasAddressTaken() call in hasOnlyColdCalls() has quadratic complexity if there are many cold calls to a function: We're going to visit each call of the function, and then for each of them iterate all the users of the function. We've recently encountered a case where GlobalOpt spends more than an hour in these hasAddressTaken() checks when full LTO is used. Avoid this by moving the hasAddressTaken() check into hasChangeableCC() and caching its result, so it is only computed once per function. (cherry picked from commit e360a16)
1 parent 26608f2 commit b6409c3

File tree

1 file changed

+22
-8
lines changed

1 file changed

+22
-8
lines changed

llvm/lib/Transforms/IPO/GlobalOpt.cpp

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1732,13 +1732,16 @@ static void RemoveAttribute(Function *F, Attribute::AttrKind A) {
17321732
/// idea here is that we don't want to mess with the convention if the user
17331733
/// explicitly requested something with performance implications like coldcc,
17341734
/// GHC, or anyregcc.
1735-
static bool hasChangeableCC(Function *F) {
1735+
static bool hasChangeableCCImpl(Function *F) {
17361736
CallingConv::ID CC = F->getCallingConv();
17371737

17381738
// FIXME: Is it worth transforming x86_stdcallcc and x86_fastcallcc?
17391739
if (CC != CallingConv::C && CC != CallingConv::X86_ThisCall)
17401740
return false;
17411741

1742+
if (F->isVarArg())
1743+
return false;
1744+
17421745
// FIXME: Change CC for the whole chain of musttail calls when possible.
17431746
//
17441747
// Can't change CC of the function that either has musttail calls, or is a
@@ -1758,7 +1761,16 @@ static bool hasChangeableCC(Function *F) {
17581761
if (BB.getTerminatingMustTailCall())
17591762
return false;
17601763

1761-
return true;
1764+
return !F->hasAddressTaken();
1765+
}
1766+
1767+
using ChangeableCCCacheTy = SmallDenseMap<Function *, bool, 8>;
1768+
static bool hasChangeableCC(Function *F,
1769+
ChangeableCCCacheTy &ChangeableCCCache) {
1770+
auto Res = ChangeableCCCache.try_emplace(F, false);
1771+
if (Res.second)
1772+
Res.first->second = hasChangeableCCImpl(F);
1773+
return Res.first->second;
17621774
}
17631775

17641776
/// Return true if the block containing the call site has a BlockFrequency of
@@ -1812,7 +1824,8 @@ static void changeCallSitesToColdCC(Function *F) {
18121824
// coldcc calling convention.
18131825
static bool
18141826
hasOnlyColdCalls(Function &F,
1815-
function_ref<BlockFrequencyInfo &(Function &)> GetBFI) {
1827+
function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
1828+
ChangeableCCCacheTy &ChangeableCCCache) {
18161829
for (BasicBlock &BB : F) {
18171830
for (Instruction &I : BB) {
18181831
if (CallInst *CI = dyn_cast<CallInst>(&I)) {
@@ -1831,8 +1844,7 @@ hasOnlyColdCalls(Function &F,
18311844
if (!CalledFn->hasLocalLinkage())
18321845
return false;
18331846
// Check if it's valid to use coldcc calling convention.
1834-
if (!hasChangeableCC(CalledFn) || CalledFn->isVarArg() ||
1835-
CalledFn->hasAddressTaken())
1847+
if (!hasChangeableCC(CalledFn, ChangeableCCCache))
18361848
return false;
18371849
BlockFrequencyInfo &CallerBFI = GetBFI(F);
18381850
if (!isColdCallSite(*CI, CallerBFI))
@@ -1962,9 +1974,10 @@ OptimizeFunctions(Module &M,
19621974

19631975
bool Changed = false;
19641976

1977+
ChangeableCCCacheTy ChangeableCCCache;
19651978
std::vector<Function *> AllCallsCold;
19661979
for (Function &F : llvm::make_early_inc_range(M))
1967-
if (hasOnlyColdCalls(F, GetBFI))
1980+
if (hasOnlyColdCalls(F, GetBFI, ChangeableCCCache))
19681981
AllCallsCold.push_back(&F);
19691982

19701983
// Optimize functions.
@@ -2026,7 +2039,7 @@ OptimizeFunctions(Module &M,
20262039
continue;
20272040
}
20282041

2029-
if (hasChangeableCC(&F) && !F.isVarArg() && !F.hasAddressTaken()) {
2042+
if (hasChangeableCC(&F, ChangeableCCCache)) {
20302043
NumInternalFunc++;
20312044
TargetTransformInfo &TTI = GetTTI(F);
20322045
// Change the calling convention to coldcc if either stress testing is
@@ -2036,14 +2049,15 @@ OptimizeFunctions(Module &M,
20362049
if (EnableColdCCStressTest ||
20372050
(TTI.useColdCCForColdCall(F) &&
20382051
isValidCandidateForColdCC(F, GetBFI, AllCallsCold))) {
2052+
ChangeableCCCache.erase(&F);
20392053
F.setCallingConv(CallingConv::Cold);
20402054
changeCallSitesToColdCC(&F);
20412055
Changed = true;
20422056
NumColdCC++;
20432057
}
20442058
}
20452059

2046-
if (hasChangeableCC(&F) && !F.isVarArg() && !F.hasAddressTaken()) {
2060+
if (hasChangeableCC(&F, ChangeableCCCache)) {
20472061
// If this function has a calling convention worth changing, is not a
20482062
// varargs function, and is only called directly, promote it to use the
20492063
// Fast calling convention.

0 commit comments

Comments
 (0)