Skip to content

Commit 65de025

Browse files
committed
[HotColdSplit] Split more aggressively before/after cold invokes
While a cold invoke itself and its unwind destination can't be extracted, code which unconditionally executes before/after the invoke may still be profitable to extract. With cost model changes from D57125 applied, this gives a 3.5% increase in split text across LNT+externals on arm64 at -Os. llvm-svn: 352160
1 parent 5cf6665 commit 65de025

File tree

2 files changed

+86
-39
lines changed

2 files changed

+86
-39
lines changed

llvm/lib/Transforms/IPO/HotColdSplitting.cpp

Lines changed: 55 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,11 @@ bool unlikelyExecuted(BasicBlock &BB) {
119119

120120
/// Check whether it's safe to outline \p BB.
121121
static bool mayExtractBlock(const BasicBlock &BB) {
122-
return !BB.hasAddressTaken() && !BB.isEHPad();
122+
// EH pads are unsafe to outline because doing so breaks EH type tables. It
123+
// follows that invoke instructions cannot be extracted, because CodeExtractor
124+
// requires unwind destinations to be within the extraction region.
125+
return !BB.hasAddressTaken() && !BB.isEHPad() &&
126+
!isa<InvokeInst>(BB.getTerminator());
123127
}
124128

125129
/// Check whether \p Region is profitable to outline.
@@ -283,6 +287,8 @@ using BlockTy = std::pair<BasicBlock *, unsigned>;
283287
namespace {
284288
/// A maximal outlining region. This contains all blocks post-dominated by a
285289
/// sink block, the sink block itself, and all blocks dominated by the sink.
290+
/// If sink-predecessors and sink-successors cannot be extracted in one region,
291+
/// the static constructor returns a list of suitable extraction regions.
286292
class OutliningRegion {
287293
/// A list of (block, score) pairs. A block's score is non-zero iff it's a
288294
/// viable sub-region entry point. Blocks with higher scores are better entry
@@ -297,12 +303,9 @@ class OutliningRegion {
297303
/// Whether the entire function is cold.
298304
bool EntireFunctionCold = false;
299305

300-
/// Whether or not \p BB could be the entry point of an extracted region.
301-
static bool isViableEntryPoint(BasicBlock &BB) { return !BB.isEHPad(); }
302-
303306
/// If \p BB is a viable entry point, return \p Score. Return 0 otherwise.
304307
static unsigned getEntryPointScore(BasicBlock &BB, unsigned Score) {
305-
return isViableEntryPoint(BB) ? Score : 0;
308+
return mayExtractBlock(BB) ? Score : 0;
306309
}
307310

308311
/// These scores should be lower than the score for predecessor blocks,
@@ -318,21 +321,23 @@ class OutliningRegion {
318321
OutliningRegion(OutliningRegion &&) = default;
319322
OutliningRegion &operator=(OutliningRegion &&) = default;
320323

321-
static OutliningRegion create(BasicBlock &SinkBB, const DominatorTree &DT,
322-
const PostDominatorTree &PDT) {
323-
OutliningRegion ColdRegion;
324-
324+
static std::vector<OutliningRegion> create(BasicBlock &SinkBB,
325+
const DominatorTree &DT,
326+
const PostDominatorTree &PDT) {
327+
std::vector<OutliningRegion> Regions;
325328
SmallPtrSet<BasicBlock *, 4> RegionBlocks;
326329

330+
Regions.emplace_back();
331+
OutliningRegion *ColdRegion = &Regions.back();
332+
327333
auto addBlockToRegion = [&](BasicBlock *BB, unsigned Score) {
328334
RegionBlocks.insert(BB);
329-
ColdRegion.Blocks.emplace_back(BB, Score);
330-
assert(RegionBlocks.size() == ColdRegion.Blocks.size() && "Duplicate BB");
335+
ColdRegion->Blocks.emplace_back(BB, Score);
331336
};
332337

333338
// The ancestor farthest-away from SinkBB, and also post-dominated by it.
334339
unsigned SinkScore = getEntryPointScore(SinkBB, ScoreForSinkBlock);
335-
ColdRegion.SuggestedEntryPoint = (SinkScore > 0) ? &SinkBB : nullptr;
340+
ColdRegion->SuggestedEntryPoint = (SinkScore > 0) ? &SinkBB : nullptr;
336341
unsigned BestScore = SinkScore;
337342

338343
// Visit SinkBB's ancestors using inverse DFS.
@@ -345,8 +350,8 @@ class OutliningRegion {
345350
// If the predecessor is cold and has no predecessors, the entire
346351
// function must be cold.
347352
if (SinkPostDom && pred_empty(&PredBB)) {
348-
ColdRegion.EntireFunctionCold = true;
349-
return ColdRegion;
353+
ColdRegion->EntireFunctionCold = true;
354+
return Regions;
350355
}
351356

352357
// If SinkBB does not post-dominate a predecessor, do not mark the
@@ -361,18 +366,27 @@ class OutliningRegion {
361366
// considered as entry points before the sink block.
362367
unsigned PredScore = getEntryPointScore(PredBB, PredIt.getPathLength());
363368
if (PredScore > BestScore) {
364-
ColdRegion.SuggestedEntryPoint = &PredBB;
369+
ColdRegion->SuggestedEntryPoint = &PredBB;
365370
BestScore = PredScore;
366371
}
367372

368373
addBlockToRegion(&PredBB, PredScore);
369374
++PredIt;
370375
}
371376

372-
// Add SinkBB to the cold region. It's considered as an entry point before
373-
// any sink-successor blocks.
374-
if (mayExtractBlock(SinkBB))
377+
// If the sink can be added to the cold region, do so. It's considered as
378+
// an entry point before any sink-successor blocks.
379+
//
380+
// Otherwise, split cold sink-successor blocks using a separate region.
381+
// This satisfies the requirement that all extraction blocks other than the
382+
// first have predecessors within the extraction region.
383+
if (mayExtractBlock(SinkBB)) {
375384
addBlockToRegion(&SinkBB, SinkScore);
385+
} else {
386+
Regions.emplace_back();
387+
ColdRegion = &Regions.back();
388+
BestScore = 0;
389+
}
376390

377391
// Find all successors of SinkBB dominated by SinkBB using DFS.
378392
auto SuccIt = ++df_begin(&SinkBB);
@@ -393,15 +407,15 @@ class OutliningRegion {
393407

394408
unsigned SuccScore = getEntryPointScore(SuccBB, ScoreForSuccBlock);
395409
if (SuccScore > BestScore) {
396-
ColdRegion.SuggestedEntryPoint = &SuccBB;
410+
ColdRegion->SuggestedEntryPoint = &SuccBB;
397411
BestScore = SuccScore;
398412
}
399413

400414
addBlockToRegion(&SuccBB, SuccScore);
401415
++SuccIt;
402416
}
403417

404-
return ColdRegion;
418+
return Regions;
405419
}
406420

407421
/// Whether this region has nothing to extract.
@@ -496,28 +510,30 @@ bool HotColdSplitting::outlineColdRegions(Function &F, bool HasProfileSummary) {
496510
if (!PDT)
497511
PDT = make_unique<PostDominatorTree>(F);
498512

499-
auto Region = OutliningRegion::create(*BB, *DT, *PDT);
500-
if (Region.empty())
501-
continue;
513+
auto Regions = OutliningRegion::create(*BB, *DT, *PDT);
514+
for (OutliningRegion &Region : Regions) {
515+
if (Region.empty())
516+
continue;
502517

503-
if (Region.isEntireFunctionCold()) {
504-
LLVM_DEBUG(dbgs() << "Entire function is cold\n");
505-
return markFunctionCold(F);
506-
}
518+
if (Region.isEntireFunctionCold()) {
519+
LLVM_DEBUG(dbgs() << "Entire function is cold\n");
520+
return markFunctionCold(F);
521+
}
507522

508-
// If this outlining region intersects with another, drop the new region.
509-
//
510-
// TODO: It's theoretically possible to outline more by only keeping the
511-
// largest region which contains a block, but the extra bookkeeping to do
512-
// this is tricky/expensive.
513-
bool RegionsOverlap = any_of(Region.blocks(), [&](const BlockTy &Block) {
514-
return !ColdBlocks.insert(Block.first).second;
515-
});
516-
if (RegionsOverlap)
517-
continue;
523+
// If this outlining region intersects with another, drop the new region.
524+
//
525+
// TODO: It's theoretically possible to outline more by only keeping the
526+
// largest region which contains a block, but the extra bookkeeping to do
527+
// this is tricky/expensive.
528+
bool RegionsOverlap = any_of(Region.blocks(), [&](const BlockTy &Block) {
529+
return !ColdBlocks.insert(Block.first).second;
530+
});
531+
if (RegionsOverlap)
532+
continue;
518533

519-
OutliningWorklist.emplace_back(std::move(Region));
520-
++NumColdRegionsFound;
534+
OutliningWorklist.emplace_back(std::move(Region));
535+
++NumColdRegionsFound;
536+
}
521537
}
522538

523539
// Outline single-entry cold regions, splitting up larger regions as needed.

llvm/test/Transforms/HotColdSplit/eh-pads.ll

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,13 +54,44 @@ normal:
5454
ret void
5555
}
5656

57+
define void @baz() personality i8 0 {
58+
entry:
59+
br i1 undef, label %exit, label %cold1
60+
61+
exit:
62+
ret void
63+
64+
cold1:
65+
; The predecessor of a cold invoke may still be extracted (see baz.cold.2).
66+
call void @sideeffect(i32 0)
67+
br label %cold2
68+
69+
cold2:
70+
invoke void @sink() to label %cold3 unwind label %cold4
71+
72+
cold3:
73+
; The successor of a cold invoke may still be extracted (see baz.cold.1).
74+
call void @sideeffect(i32 1)
75+
ret void
76+
77+
cold4:
78+
landingpad i8 cleanup
79+
ret void
80+
}
81+
5782
; CHECK-LABEL: define {{.*}}@foo.cold.1(
5883
; CHECK: sideeffect(i32 0)
5984
; CHECK: sink
6085

6186
; CHECK-LABEL: define {{.*}}@bar.cold.1(
6287
; CHECK: sideeffect(i32 1)
6388

89+
; CHECK-LABEL: define {{.*}}@baz.cold.1(
90+
; CHECK: sideeffect(i32 1)
91+
92+
; CHECK-LABEL: define {{.*}}@baz.cold.2(
93+
; CHECK: sideeffect(i32 0)
94+
6495
declare void @sideeffect(i32)
6596

6697
declare void @sink() cold

0 commit comments

Comments
 (0)