Skip to content

Commit 160b4ae

Browse files
committed
ColdBlockInfo: consider post-dominating coldness
A block that is post-dominated by a cold block is itself cold. The most basic kind of cold block is one that ends with a fatalError or some other non-returning call, as that block is only executed at most once per task. To help propagate this exit-block coldness, without rewriting the algorithm to be something that iterates to a fixed-point, I've added some fast checks for blocks that reach "obviously cold" blocks.
1 parent 0ff67cc commit 160b4ae

File tree

9 files changed

+136
-13
lines changed

9 files changed

+136
-13
lines changed

include/swift/AST/SILOptions.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,9 @@ class SILOptions {
122122
/// within the optimizer. This influences static branch prediction.
123123
bool EnableThrowsPrediction = false;
124124

125+
/// Controls whether to say that blocks ending in an 'unreachable' are cold.
126+
bool EnableNoReturnCold = false;
127+
125128
/// Should we run any SIL performance optimizations
126129
///
127130
/// Useful when you want to enable -O LLVM opts but not -O SIL opts.

include/swift/Option/FrontendOptions.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -545,6 +545,11 @@ def enable_throws_prediction : Flag<["-"], "enable-throws-prediction">,
545545
def disable_throws_prediction : Flag<["-"], "disable-throws-prediction">,
546546
HelpText<"Disables optimization assumption that functions rarely throw errors.">;
547547

548+
def enable_noreturn_prediction : Flag<["-"], "enable-noreturn-prediction">,
549+
HelpText<"Enables optimization assumption that calls to no-return functions are cold.">;
550+
def disable_noreturn_prediction : Flag<["-"], "disable-noreturn-prediction">,
551+
HelpText<"Disables optimization assumption that calls to no-return functions are cold.">;
552+
548553
def disable_access_control : Flag<["-"], "disable-access-control">,
549554
HelpText<"Don't respect access control restrictions">;
550555
def enable_access_control : Flag<["-"], "enable-access-control">,

include/swift/SILOptimizer/Analysis/ColdBlockInfo.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
namespace swift {
2020
class DominanceAnalysis;
21+
class PostDominanceAnalysis;
2122
class SILBasicBlock;
2223

2324
/// Cache a set of basic blocks that have been determined to be cold or hot.
@@ -26,6 +27,7 @@ class SILBasicBlock;
2627
/// across passes.
2728
class ColdBlockInfo {
2829
DominanceAnalysis *DA;
30+
PostDominanceAnalysis *PDA;
2931

3032
/// Each block in this map has been determined to be either cold or hot.
3133
llvm::DenseMap<const SILBasicBlock*, bool> ColdBlockMap;
@@ -53,8 +55,11 @@ class ColdBlockInfo {
5355
bool isCold(const SILBasicBlock *BB,
5456
int recursionDepth);
5557

58+
bool reachesColdBlock(const SILBasicBlock *BB);
59+
5660
public:
57-
ColdBlockInfo(DominanceAnalysis *DA): DA(DA) {}
61+
ColdBlockInfo(DominanceAnalysis *DA, PostDominanceAnalysis *PDA)
62+
: DA(DA), PDA(PDA) {}
5863

5964
bool isSlowPath(const SILBasicBlock *FromBB, const SILBasicBlock *ToBB) {
6065
return isSlowPath(FromBB, ToBB, 0);

lib/DriverTool/sil_opt_main.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,10 @@ struct SILOptOptions {
293293
EnableThrowsPrediction = llvm::cl::opt<bool>("enable-throws-prediction",
294294
llvm::cl::desc("Enables optimization assumption that functions rarely throw errors."));
295295

296+
llvm::cl::opt<bool>
297+
EnableNoReturnCold = llvm::cl::opt<bool>("enable-noreturn-prediction",
298+
llvm::cl::desc("Enables optimization assumption that calls to no-return functions are cold."));
299+
296300
llvm::cl::opt<bool>
297301
EnableMoveInoutStackProtection = llvm::cl::opt<bool>("enable-move-inout-stack-protector",
298302
llvm::cl::desc("Enable the stack protector by moving values to temporaries."));
@@ -852,6 +856,7 @@ int sil_opt_main(ArrayRef<const char *> argv, void *MainAddr) {
852856
SILOpts.EnableSpeculativeDevirtualization = options.EnableSpeculativeDevirtualization;
853857
SILOpts.EnableAsyncDemotion = options.EnableAsyncDemotion;
854858
SILOpts.EnableThrowsPrediction = options.EnableThrowsPrediction;
859+
SILOpts.EnableNoReturnCold = options.EnableNoReturnCold;
855860
SILOpts.IgnoreAlwaysInline = options.IgnoreAlwaysInline;
856861
SILOpts.EnableOSSAModules = options.EnableOSSAModules;
857862
SILOpts.EnableSILOpaqueValues = options.EnableSILOpaqueValues;

lib/Frontend/CompilerInvocation.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2661,6 +2661,9 @@ static bool ParseSILArgs(SILOptions &Opts, ArgList &Args,
26612661
Opts.EnableThrowsPrediction = Args.hasFlag(
26622662
OPT_enable_throws_prediction, OPT_disable_throws_prediction,
26632663
Opts.EnableThrowsPrediction);
2664+
Opts.EnableNoReturnCold = Args.hasFlag(
2665+
OPT_enable_noreturn_prediction, OPT_disable_noreturn_prediction,
2666+
Opts.EnableNoReturnCold);
26642667
Opts.EnableActorDataRaceChecks |= Args.hasFlag(
26652668
OPT_enable_actor_data_race_checks,
26662669
OPT_disable_actor_data_race_checks, /*default=*/false);

lib/SILOptimizer/Analysis/ColdBlockInfo.cpp

Lines changed: 92 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "swift/SILOptimizer/Analysis/DominanceAnalysis.h"
1515
#include "swift/SIL/SILArgument.h"
1616
#include "swift/AST/SemanticAttrs.h"
17+
#include "swift/SIL/SILModule.h"
1718

1819
#define DEBUG_TYPE "cold-block-info"
1920

@@ -29,6 +30,37 @@ static SILValue getCondition(SILValue C) {
2930
return C;
3031
}
3132

33+
/// A cold terminator is one where it's unlikely to be reached, which are
34+
/// function exits that are less-common. A cold terminator implies a cold block.
35+
/// - 'unreachable', as it's never executed.
36+
/// - 'throw', if throws prediction is enabled.
37+
static bool isColdTerminator(const TermInst *term) {
38+
switch (term->getTermKind()) {
39+
case TermKind::AwaitAsyncContinuationInst:
40+
case TermKind::BranchInst:
41+
case TermKind::CondBranchInst:
42+
case TermKind::SwitchValueInst:
43+
case TermKind::SwitchEnumInst:
44+
case TermKind::SwitchEnumAddrInst:
45+
case TermKind::DynamicMethodBranchInst:
46+
case TermKind::CheckedCastBranchInst:
47+
case TermKind::CheckedCastAddrBranchInst:
48+
case TermKind::TryApplyInst:
49+
case TermKind::YieldInst:
50+
case TermKind::ReturnInst:
51+
return false;
52+
case TermKind::UnreachableInst:
53+
return term->getModule().getOptions().EnableNoReturnCold;
54+
case TermKind::ThrowInst:
55+
case TermKind::ThrowAddrInst:
56+
case TermKind::UnwindInst:
57+
return term->getModule().getOptions().EnableThrowsPrediction;
58+
}
59+
}
60+
static bool hasColdTerminator(const SILBasicBlock *bb) {
61+
return isColdTerminator(bb->getTerminator());
62+
}
63+
3264
/// \return a BranchHint if this conditional's likely value can be inferred.
3365
ColdBlockInfo::BranchHint ColdBlockInfo::getBranchHint(SILValue Cond,
3466
int recursionDepth) {
@@ -135,6 +167,57 @@ static std::optional<bool> isColdProfiledEdge(const SILBasicBlock *FromBB,
135167
return takenProbability < WARM_EDGE_MINIMUM;
136168
}
137169

170+
// Does this block always reach a cold block?
171+
//
172+
// The `isCold` function is not used here, to avoid duplicated work and
173+
// keep this function fast and non-recursive.
174+
bool ColdBlockInfo::reachesColdBlock(const SILBasicBlock *BB) {
175+
assert(ColdBlockMap.find(BB) == ColdBlockMap.end() && "already known temp?");
176+
auto *fn = const_cast<SILFunction *>(BB->getParent());
177+
PostDominanceInfo *info = PDA->get(fn);
178+
auto *node = info->getNode(BB);
179+
180+
// Advance to the block closest to the root that post-dominates BB.
181+
// Such a block is the only kind that might exit the function.
182+
SmallVector<const SILBasicBlock *, 8> chain;
183+
std::optional<bool> lastBBColdness;
184+
while (node && !info->isVirtualRoot(node)) {
185+
auto postDomBB = node->getBlock();
186+
187+
// If the block has a known temperature, we can stop early.
188+
auto result = ColdBlockMap.find(postDomBB);
189+
if (result != ColdBlockMap.end()) {
190+
lastBBColdness = result->getSecond();
191+
break;
192+
}
193+
194+
chain.push_back(node->getBlock());
195+
node = node->getIDom();
196+
}
197+
198+
// If we don't already know the last block's temperature, do a fast check.
199+
if (!lastBBColdness && hasColdTerminator(chain.back()))
200+
lastBBColdness = true;
201+
202+
// We don't have a definitive answer, so say it does not reach a cold one.
203+
if (!lastBBColdness)
204+
return false;
205+
206+
// Mark all blocks in the dominance chain with the given temperature.
207+
for (auto chainBB : chain) {
208+
ColdBlockMap[chainBB] = *lastBBColdness;
209+
210+
if (*lastBBColdness) {
211+
LLVM_DEBUG(llvm::dbgs()
212+
<< "ColdBlockInfo(" << BB->getParent()->getName() << "): "
213+
<< "bb" << chainBB->getDebugID() << " is cold\n");
214+
}
215+
}
216+
217+
assert(ColdBlockMap.find(BB) != ColdBlockMap.end() && "didn't cache?");
218+
return *lastBBColdness;
219+
}
220+
138221
/// \return true if the CFG path FromBB->*->ToBB is either
139222
/// 1. directly gated by a _slowPath branch hint.
140223
/// 2. has a low probability of being taken according to profile information.
@@ -175,6 +258,10 @@ bool ColdBlockInfo::isSlowPath(const SILBasicBlock *FromBB,
175258
}
176259
}
177260

261+
// If the target BB is a cold function exit, then the path is cold.
262+
if (hasColdTerminator(ToBB))
263+
return true;
264+
178265
// Should we give-up here?
179266
if (recursionDepth > RecursionDepthLimit)
180267
return false;
@@ -208,6 +295,10 @@ bool ColdBlockInfo::isCold(const SILBasicBlock *BB, int recursionDepth) {
208295
return true;
209296
}
210297

298+
// If this block always reaches a cold exit, then it's cold.
299+
if (reachesColdBlock(BB))
300+
return true;
301+
211302
std::vector<const SILBasicBlock*> DomChain;
212303
DomChain.push_back(BB);
213304
bool IsCold = false;
@@ -220,7 +311,7 @@ bool ColdBlockInfo::isCold(const SILBasicBlock *BB, int recursionDepth) {
220311
break;
221312
}
222313

223-
// If a dominator of BB is cold, then BB is cold.
314+
// If a dominator of BB has a temperature, propagate it.
224315
auto I = ColdBlockMap.find(fromBB);
225316
if (I != ColdBlockMap.end()) {
226317
IsCold = I->second;

lib/SILOptimizer/IPO/CapturePropagation.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -590,6 +590,7 @@ bool CapturePropagation::optimizePartialApply(PartialApplyInst *PAI) {
590590

591591
void CapturePropagation::run() {
592592
DominanceAnalysis *DA = PM->getAnalysis<DominanceAnalysis>();
593+
PostDominanceAnalysis *PDA = PM->getAnalysis<PostDominanceAnalysis>();
593594
auto *F = getFunction();
594595
bool HasChanged = false;
595596

@@ -598,7 +599,7 @@ void CapturePropagation::run() {
598599
return;
599600

600601
// Cache cold blocks per function.
601-
ColdBlockInfo ColdBlocks(DA);
602+
ColdBlockInfo ColdBlocks(DA, PDA);
602603
for (auto &BB : *F) {
603604
if (ColdBlocks.isCold(&BB))
604605
continue;

lib/SILOptimizer/LoopTransforms/COWArrayOpt.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -154,10 +154,11 @@ class COWArrayOpt {
154154
// analyzing.
155155
SILValue CurrentArrayAddr;
156156
public:
157-
COWArrayOpt(RCIdentityFunctionInfo *RCIA, SILLoop *L, DominanceAnalysis *DA)
157+
COWArrayOpt(RCIdentityFunctionInfo *RCIA, SILLoop *L, DominanceAnalysis *DA,
158+
PostDominanceAnalysis *PDA)
158159
: RCIA(RCIA), Function(L->getHeader()->getParent()), Loop(L),
159160
Preheader(L->getLoopPreheader()), DomTree(DA->get(Function)),
160-
ColdBlocks(DA), CachedSafeLoop(false, false), ReachingBlocks(Function) {}
161+
ColdBlocks(DA, PDA), CachedSafeLoop(false, false), ReachingBlocks(Function) {}
161162

162163
bool run();
163164

@@ -1063,6 +1064,7 @@ class COWArrayOptPass : public SILFunctionTransform {
10631064
<< getFunction()->getName() << "\n");
10641065

10651066
auto *DA = PM->getAnalysis<DominanceAnalysis>();
1067+
auto *PDA = PM->getAnalysis<PostDominanceAnalysis>();
10661068
auto *LA = PM->getAnalysis<SILLoopAnalysis>();
10671069
auto *RCIA =
10681070
PM->getAnalysis<RCIdentityAnalysis>()->get(getFunction());
@@ -1084,7 +1086,7 @@ class COWArrayOptPass : public SILFunctionTransform {
10841086

10851087
bool HasChanged = false;
10861088
for (auto *L : Loops)
1087-
HasChanged |= COWArrayOpt(RCIA, L, DA).run();
1089+
HasChanged |= COWArrayOpt(RCIA, L, DA, PDA).run();
10881090

10891091
if (HasChanged)
10901092
invalidateAnalysis(SILAnalysis::InvalidationKind::CallsAndInstructions);

lib/SILOptimizer/Transforms/PerformanceInliner.cpp

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -239,10 +239,11 @@ class SILPerformanceInliner {
239239
SILPerformanceInliner(StringRef PassName, SILOptFunctionBuilder &FuncBuilder,
240240
InlineSelection WhatToInline,
241241
SILPassManager *pm, DominanceAnalysis *DA,
242+
PostDominanceAnalysis *PDA,
242243
SILLoopAnalysis *LA, BasicCalleeAnalysis *BCA,
243244
OptimizationMode OptMode, OptRemark::Emitter &ORE)
244245
: PassName(PassName), FuncBuilder(FuncBuilder),
245-
WhatToInline(WhatToInline), pm(pm), DA(DA), LA(LA), BCA(BCA), CBI(DA), ORE(ORE),
246+
WhatToInline(WhatToInline), pm(pm), DA(DA), LA(LA), BCA(BCA), CBI(DA, PDA), ORE(ORE),
246247
OptMode(OptMode) {}
247248

248249
bool inlineCallsIntoFunction(SILFunction *F);
@@ -525,9 +526,15 @@ bool SILPerformanceInliner::isProfitableToInline(
525526
return false;
526527
}
527528

528-
SILLoopInfo *LI = LA->get(Callee);
529-
ShortestPathAnalysis *SPA = getSPA(Callee, LI);
530-
assert(SPA->isValid());
529+
SILLoopInfo *CalleeLI = LA->get(Callee);
530+
ShortestPathAnalysis *CalleeSPA = getSPA(Callee, CalleeLI);
531+
if (!CalleeSPA->isValid()) {
532+
CalleeSPA->analyze(CBI, [](FullApplySite FAS) {
533+
// We don't compute SPA for another call-level. Functions called from
534+
// the callee are assumed to have DefaultApplyLength.
535+
return DefaultApplyLength;
536+
});
537+
}
531538

532539
ConstantTracker constTracker(Callee, &callerTracker, AI);
533540
DominanceInfo *DT = DA->get(Callee);
@@ -560,7 +567,7 @@ bool SILPerformanceInliner::isProfitableToInline(
560567
// benefits.
561568
while (SILBasicBlock *block = domOrder.getNext()) {
562569
constTracker.beginBlock();
563-
Weight BlockW = SPA->getWeight(block, CallerWeight);
570+
Weight BlockW = CalleeSPA->getWeight(block, CallerWeight);
564571

565572
for (SILInstruction &I : *block) {
566573
constTracker.trackInst(&I);
@@ -773,7 +780,7 @@ bool SILPerformanceInliner::isProfitableToInline(
773780
LLVM_DEBUG(dumpCaller(AI.getFunction());
774781
llvm::dbgs() << " decision {c=" << CalleeCost
775782
<< ", b=" << Benefit
776-
<< ", l=" << SPA->getScopeLength(CalleeEntry, 0)
783+
<< ", l=" << CalleeSPA->getScopeLength(CalleeEntry, 0)
777784
<< ", c-w=" << CallerWeight
778785
<< ", bb=" << Callee->size()
779786
<< ", c-bb=" << NumCallerBlocks
@@ -1331,6 +1338,7 @@ class SILPerformanceInlinerPass : public SILFunctionTransform {
13311338

13321339
void run() override {
13331340
DominanceAnalysis *DA = PM->getAnalysis<DominanceAnalysis>();
1341+
PostDominanceAnalysis *PDA = PM->getAnalysis<PostDominanceAnalysis>();
13341342
SILLoopAnalysis *LA = PM->getAnalysis<SILLoopAnalysis>();
13351343
BasicCalleeAnalysis *BCA = PM->getAnalysis<BasicCalleeAnalysis>();
13361344
OptRemark::Emitter ORE(DEBUG_TYPE, *getFunction());
@@ -1344,7 +1352,7 @@ class SILPerformanceInlinerPass : public SILFunctionTransform {
13441352
SILOptFunctionBuilder FuncBuilder(*this);
13451353

13461354
SILPerformanceInliner Inliner(getID(), FuncBuilder, WhatToInline,
1347-
getPassManager(), DA, LA, BCA, OptMode, ORE);
1355+
getPassManager(), DA, PDA, LA, BCA, OptMode, ORE);
13481356

13491357
assert(getFunction()->isDefinition() &&
13501358
"Expected only functions with bodies!");

0 commit comments

Comments
 (0)