Skip to content

Commit c24b254

Browse files
nikictroelsbjerre
authored andcommitted
[GlobalOpt] Cache whether CC is changeable (llvm#71381)
The hasAddressTaken() call in hasOnlyColdCalls() has quadratic complexity if there are many cold calls to a function: We're going to visit each call of the function, and then for each of them iterate all the users of the function. We've recently encountered a case where GlobalOpt spends more than an hour in these hasAddressTaken() checks when full LTO is used. Avoid this by moving the hasAddressTaken() check into hasChangeableCC() and caching its result, so it is only computed once per function. (cherry picked from commit e360a16) Adapted to LLVM11 by Ming-Chuan Lin <[email protected]>
1 parent 657a62f commit c24b254

File tree

1 file changed

+22
-9
lines changed

1 file changed

+22
-9
lines changed

llvm/lib/Transforms/IPO/GlobalOpt.cpp

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2158,13 +2158,16 @@ static void RemoveAttribute(Function *F, Attribute::AttrKind A) {
21582158
/// idea here is that we don't want to mess with the convention if the user
21592159
/// explicitly requested something with performance implications like coldcc,
21602160
/// GHC, or anyregcc.
2161-
static bool hasChangeableCC(Function *F) {
2161+
static bool hasChangeableCCImpl(Function *F) {
21622162
CallingConv::ID CC = F->getCallingConv();
21632163

21642164
// FIXME: Is it worth transforming x86_stdcallcc and x86_fastcallcc?
21652165
if (CC != CallingConv::C && CC != CallingConv::X86_ThisCall)
21662166
return false;
21672167

2168+
if (F->isVarArg())
2169+
return false;
2170+
21682171
// FIXME: Change CC for the whole chain of musttail calls when possible.
21692172
//
21702173
// Can't change CC of the function that either has musttail calls, or is a
@@ -2184,7 +2187,16 @@ static bool hasChangeableCC(Function *F) {
21842187
if (BB.getTerminatingMustTailCall())
21852188
return false;
21862189

2187-
return true;
2190+
return !F->hasAddressTaken();
2191+
}
2192+
2193+
using ChangeableCCCacheTy = SmallDenseMap<Function *, bool, 8>;
2194+
static bool hasChangeableCC(Function *F,
2195+
ChangeableCCCacheTy &ChangeableCCCache) {
2196+
auto Res = ChangeableCCCache.try_emplace(F, false);
2197+
if (Res.second)
2198+
Res.first->second = hasChangeableCCImpl(F);
2199+
return Res.first->second;
21882200
}
21892201

21902202
/// Return true if the block containing the call site has a BlockFrequency of
@@ -2239,7 +2251,8 @@ static void changeCallSitesToColdCC(Function *F) {
22392251
// coldcc calling convention.
22402252
static bool
22412253
hasOnlyColdCalls(Function &F,
2242-
function_ref<BlockFrequencyInfo &(Function &)> GetBFI) {
2254+
function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
2255+
ChangeableCCCacheTy &ChangeableCCCache) {
22432256
for (BasicBlock &BB : F) {
22442257
for (Instruction &I : BB) {
22452258
if (CallInst *CI = dyn_cast<CallInst>(&I)) {
@@ -2255,8 +2268,7 @@ hasOnlyColdCalls(Function &F,
22552268
if (CalledFn->getIntrinsicID() != Intrinsic::not_intrinsic)
22562269
continue;
22572270
// Check if it's valid to use coldcc calling convention.
2258-
if (!hasChangeableCC(CalledFn) || CalledFn->isVarArg() ||
2259-
CalledFn->hasAddressTaken())
2271+
if (!hasChangeableCC(CalledFn, ChangeableCCCache))
22602272
return false;
22612273
BlockFrequencyInfo &CallerBFI = GetBFI(F);
22622274
if (!isColdCallSite(*CI, CallerBFI))
@@ -2386,10 +2398,11 @@ OptimizeFunctions(Module &M,
23862398

23872399
bool Changed = false;
23882400

2401+
ChangeableCCCacheTy ChangeableCCCache;
23892402
std::vector<Function *> AllCallsCold;
23902403
for (Module::iterator FI = M.begin(), E = M.end(); FI != E;) {
23912404
Function *F = &*FI++;
2392-
if (hasOnlyColdCalls(*F, GetBFI))
2405+
if (hasOnlyColdCalls(*F, GetBFI, ChangeableCCCache))
23932406
AllCallsCold.push_back(F);
23942407
}
23952408

@@ -2456,7 +2469,7 @@ OptimizeFunctions(Module &M,
24562469
continue;
24572470
}
24582471

2459-
if (hasChangeableCC(F) && !F->isVarArg() && !F->hasAddressTaken()) {
2472+
if (hasChangeableCC(F, ChangeableCCCache)) {
24602473
NumInternalFunc++;
24612474
TargetTransformInfo &TTI = GetTTI(*F);
24622475
// Change the calling convention to coldcc if either stress testing is
@@ -2466,15 +2479,15 @@ OptimizeFunctions(Module &M,
24662479
if (EnableColdCCStressTest ||
24672480
(TTI.useColdCCForColdCall(*F) &&
24682481
isValidCandidateForColdCC(*F, GetBFI, AllCallsCold))) {
2482+
ChangeableCCCache.erase(F);
24692483
F->setCallingConv(CallingConv::Cold);
24702484
changeCallSitesToColdCC(F);
24712485
Changed = true;
24722486
NumColdCC++;
24732487
}
24742488
}
24752489

2476-
if (hasChangeableCC(F) && !F->isVarArg() &&
2477-
!F->hasAddressTaken()) {
2490+
if (hasChangeableCC(F, ChangeableCCCache)) {
24782491
// If this function has a calling convention worth changing, is not a
24792492
// varargs function, and is only called directly, promote it to use the
24802493
// Fast calling convention.

0 commit comments

Comments
 (0)