@@ -2158,13 +2158,16 @@ static void RemoveAttribute(Function *F, Attribute::AttrKind A) {
2158
2158
// / idea here is that we don't want to mess with the convention if the user
2159
2159
// / explicitly requested something with performance implications like coldcc,
2160
2160
// / GHC, or anyregcc.
2161
- static bool hasChangeableCC (Function *F) {
2161
+ static bool hasChangeableCCImpl (Function *F) {
2162
2162
CallingConv::ID CC = F->getCallingConv ();
2163
2163
2164
2164
// FIXME: Is it worth transforming x86_stdcallcc and x86_fastcallcc?
2165
2165
if (CC != CallingConv::C && CC != CallingConv::X86_ThisCall)
2166
2166
return false ;
2167
2167
2168
+ if (F->isVarArg ())
2169
+ return false ;
2170
+
2168
2171
// FIXME: Change CC for the whole chain of musttail calls when possible.
2169
2172
//
2170
2173
// Can't change CC of the function that either has musttail calls, or is a
@@ -2184,7 +2187,16 @@ static bool hasChangeableCC(Function *F) {
2184
2187
if (BB.getTerminatingMustTailCall ())
2185
2188
return false ;
2186
2189
2187
- return true ;
2190
+ return !F->hasAddressTaken ();
2191
+ }
2192
+
2193
+ using ChangeableCCCacheTy = SmallDenseMap<Function *, bool , 8 >;
2194
+ static bool hasChangeableCC (Function *F,
2195
+ ChangeableCCCacheTy &ChangeableCCCache) {
2196
+ auto Res = ChangeableCCCache.try_emplace (F, false );
2197
+ if (Res.second )
2198
+ Res.first ->second = hasChangeableCCImpl (F);
2199
+ return Res.first ->second ;
2188
2200
}
2189
2201
2190
2202
// / Return true if the block containing the call site has a BlockFrequency of
@@ -2239,7 +2251,8 @@ static void changeCallSitesToColdCC(Function *F) {
2239
2251
// coldcc calling convention.
2240
2252
static bool
2241
2253
hasOnlyColdCalls (Function &F,
2242
- function_ref<BlockFrequencyInfo &(Function &)> GetBFI) {
2254
+ function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
2255
+ ChangeableCCCacheTy &ChangeableCCCache) {
2243
2256
for (BasicBlock &BB : F) {
2244
2257
for (Instruction &I : BB) {
2245
2258
if (CallInst *CI = dyn_cast<CallInst>(&I)) {
@@ -2255,8 +2268,7 @@ hasOnlyColdCalls(Function &F,
2255
2268
if (CalledFn->getIntrinsicID () != Intrinsic::not_intrinsic)
2256
2269
continue ;
2257
2270
// Check if it's valid to use coldcc calling convention.
2258
- if (!hasChangeableCC (CalledFn) || CalledFn->isVarArg () ||
2259
- CalledFn->hasAddressTaken ())
2271
+ if (!hasChangeableCC (CalledFn, ChangeableCCCache))
2260
2272
return false ;
2261
2273
BlockFrequencyInfo &CallerBFI = GetBFI (F);
2262
2274
if (!isColdCallSite (*CI, CallerBFI))
@@ -2386,10 +2398,11 @@ OptimizeFunctions(Module &M,
2386
2398
2387
2399
bool Changed = false ;
2388
2400
2401
+ ChangeableCCCacheTy ChangeableCCCache;
2389
2402
std::vector<Function *> AllCallsCold;
2390
2403
for (Module::iterator FI = M.begin (), E = M.end (); FI != E;) {
2391
2404
Function *F = &*FI++;
2392
- if (hasOnlyColdCalls (*F, GetBFI))
2405
+ if (hasOnlyColdCalls (*F, GetBFI, ChangeableCCCache ))
2393
2406
AllCallsCold.push_back (F);
2394
2407
}
2395
2408
@@ -2456,7 +2469,7 @@ OptimizeFunctions(Module &M,
2456
2469
continue ;
2457
2470
}
2458
2471
2459
- if (hasChangeableCC (F) && !F-> isVarArg () && !F-> hasAddressTaken ( )) {
2472
+ if (hasChangeableCC (F, ChangeableCCCache )) {
2460
2473
NumInternalFunc++;
2461
2474
TargetTransformInfo &TTI = GetTTI (*F);
2462
2475
// Change the calling convention to coldcc if either stress testing is
@@ -2466,15 +2479,15 @@ OptimizeFunctions(Module &M,
2466
2479
if (EnableColdCCStressTest ||
2467
2480
(TTI.useColdCCForColdCall (*F) &&
2468
2481
isValidCandidateForColdCC (*F, GetBFI, AllCallsCold))) {
2482
+ ChangeableCCCache.erase (F);
2469
2483
F->setCallingConv (CallingConv::Cold);
2470
2484
changeCallSitesToColdCC (F);
2471
2485
Changed = true ;
2472
2486
NumColdCC++;
2473
2487
}
2474
2488
}
2475
2489
2476
- if (hasChangeableCC (F) && !F->isVarArg () &&
2477
- !F->hasAddressTaken ()) {
2490
+ if (hasChangeableCC (F, ChangeableCCCache)) {
2478
2491
// If this function has a calling convention worth changing, is not a
2479
2492
// varargs function, and is only called directly, promote it to use the
2480
2493
// Fast calling convention.
0 commit comments