Skip to content

Commit 1fc5bc6

Browse files
nikictroelsbjerre
authored andcommitted
[GlobalOpt] Cache whether CC is changeable (llvm#71381)
The hasAddressTaken() call in hasOnlyColdCalls() has quadratic complexity if there are many cold calls to a function: We're going to visit each call of the function, and then for each of them iterate all the users of the function. We've recently encountered a case where GlobalOpt spends more than an hour in these hasAddressTaken() checks when full LTO is used. Avoid this by moving the hasAddressTaken() check into hasChangeableCC() and caching its result, so it is only computed once per function.
1 parent a894e66 commit 1fc5bc6

File tree

1 file changed

+22
-8
lines changed

1 file changed

+22
-8
lines changed

llvm/lib/Transforms/IPO/GlobalOpt.cpp

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1689,13 +1689,16 @@ static void RemoveAttribute(Function *F, Attribute::AttrKind A) {
16891689
/// idea here is that we don't want to mess with the convention if the user
16901690
/// explicitly requested something with performance implications like coldcc,
16911691
/// GHC, or anyregcc.
1692-
static bool hasChangeableCC(Function *F) {
1692+
static bool hasChangeableCCImpl(Function *F) {
16931693
CallingConv::ID CC = F->getCallingConv();
16941694

16951695
// FIXME: Is it worth transforming x86_stdcallcc and x86_fastcallcc?
16961696
if (CC != CallingConv::C && CC != CallingConv::X86_ThisCall)
16971697
return false;
16981698

1699+
if (F->isVarArg())
1700+
return false;
1701+
16991702
// FIXME: Change CC for the whole chain of musttail calls when possible.
17001703
//
17011704
// Can't change CC of the function that either has musttail calls, or is a
@@ -1715,7 +1718,16 @@ static bool hasChangeableCC(Function *F) {
17151718
if (BB.getTerminatingMustTailCall())
17161719
return false;
17171720

1718-
return true;
1721+
return !F->hasAddressTaken();
1722+
}
1723+
1724+
using ChangeableCCCacheTy = SmallDenseMap<Function *, bool, 8>;
1725+
static bool hasChangeableCC(Function *F,
1726+
ChangeableCCCacheTy &ChangeableCCCache) {
1727+
auto Res = ChangeableCCCache.try_emplace(F, false);
1728+
if (Res.second)
1729+
Res.first->second = hasChangeableCCImpl(F);
1730+
return Res.first->second;
17191731
}
17201732

17211733
/// Return true if the block containing the call site has a BlockFrequency of
@@ -1769,7 +1781,8 @@ static void changeCallSitesToColdCC(Function *F) {
17691781
// coldcc calling convention.
17701782
static bool
17711783
hasOnlyColdCalls(Function &F,
1772-
function_ref<BlockFrequencyInfo &(Function &)> GetBFI) {
1784+
function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
1785+
ChangeableCCCacheTy &ChangeableCCCache) {
17731786
for (BasicBlock &BB : F) {
17741787
for (Instruction &I : BB) {
17751788
if (CallInst *CI = dyn_cast<CallInst>(&I)) {
@@ -1785,8 +1798,7 @@ hasOnlyColdCalls(Function &F,
17851798
if (CalledFn->getIntrinsicID() != Intrinsic::not_intrinsic)
17861799
continue;
17871800
// Check if it's valid to use coldcc calling convention.
1788-
if (!hasChangeableCC(CalledFn) || CalledFn->isVarArg() ||
1789-
CalledFn->hasAddressTaken())
1801+
if (!hasChangeableCC(CalledFn, ChangeableCCCache))
17901802
return false;
17911803
BlockFrequencyInfo &CallerBFI = GetBFI(F);
17921804
if (!isColdCallSite(*CI, CallerBFI))
@@ -1914,9 +1926,10 @@ OptimizeFunctions(Module &M,
19141926

19151927
bool Changed = false;
19161928

1929+
ChangeableCCCacheTy ChangeableCCCache;
19171930
std::vector<Function *> AllCallsCold;
19181931
for (Function &F : llvm::make_early_inc_range(M))
1919-
if (hasOnlyColdCalls(F, GetBFI))
1932+
if (hasOnlyColdCalls(F, GetBFI, ChangeableCCCache))
19201933
AllCallsCold.push_back(&F);
19211934

19221935
// Optimize functions.
@@ -1980,7 +1993,7 @@ OptimizeFunctions(Module &M,
19801993
continue;
19811994
}
19821995

1983-
if (hasChangeableCC(&F) && !F.isVarArg() && !F.hasAddressTaken()) {
1996+
if (hasChangeableCC(&F, ChangeableCCCache)) {
19841997
NumInternalFunc++;
19851998
TargetTransformInfo &TTI = GetTTI(F);
19861999
// Change the calling convention to coldcc if either stress testing is
@@ -1990,14 +2003,15 @@ OptimizeFunctions(Module &M,
19902003
if (EnableColdCCStressTest ||
19912004
(TTI.useColdCCForColdCall(F) &&
19922005
isValidCandidateForColdCC(F, GetBFI, AllCallsCold))) {
2006+
ChangeableCCCache.erase(&F);
19932007
F.setCallingConv(CallingConv::Cold);
19942008
changeCallSitesToColdCC(&F);
19952009
Changed = true;
19962010
NumColdCC++;
19972011
}
19982012
}
19992013

2000-
if (hasChangeableCC(&F) && !F.isVarArg() && !F.hasAddressTaken()) {
2014+
if (hasChangeableCC(&F, ChangeableCCCache)) {
20012015
// If this function has a calling convention worth changing, is not a
20022016
// varargs function, and is only called directly, promote it to use the
20032017
// Fast calling convention.

0 commit comments

Comments
 (0)