Skip to content

Commit 421728b

Browse files
committed
[NFC] Factor out computation of best unswitch cost candidate
Split out a major peice of this method to make code more readable.
1 parent 56f94ed commit 421728b

File tree

1 file changed

+104
-81
lines changed

1 file changed

+104
-81
lines changed

llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp

Lines changed: 104 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -2662,19 +2662,20 @@ turnGuardIntoBranch(IntrinsicInst *GI, Loop &L,
26622662
/// That requires knowing not just the number of "remaining" candidates but
26632663
/// also costs of unswitching for each of these candidates.
26642664
static int CalculateUnswitchCostMultiplier(
2665-
Instruction &TI, Loop &L, LoopInfo &LI, DominatorTree &DT,
2666-
ArrayRef<std::pair<Instruction *, TinyPtrVector<Value *>>>
2665+
const Instruction &TI, const Loop &L, const LoopInfo &LI,
2666+
const DominatorTree &DT,
2667+
ArrayRef<std::pair<Instruction *, TinyPtrVector<Value *> > >
26672668
UnswitchCandidates) {
26682669

26692670
// Guards and other exiting conditions do not contribute to exponential
26702671
// explosion as soon as they dominate the latch (otherwise there might be
26712672
// another path to the latch remaining that does not allow to eliminate the
26722673
// loop copy on unswitch).
2673-
BasicBlock *Latch = L.getLoopLatch();
2674-
BasicBlock *CondBlock = TI.getParent();
2674+
const BasicBlock *Latch = L.getLoopLatch();
2675+
const BasicBlock *CondBlock = TI.getParent();
26752676
if (DT.dominates(CondBlock, Latch) &&
26762677
(isGuard(&TI) ||
2677-
llvm::count_if(successors(&TI), [&L](BasicBlock *SuccBB) {
2678+
llvm::count_if(successors(&TI), [&L](const BasicBlock *SuccBB) {
26782679
return L.contains(SuccBB);
26792680
}) <= 1)) {
26802681
NumCostMultiplierSkipped++;
@@ -2688,16 +2689,17 @@ static int CalculateUnswitchCostMultiplier(
26882689
// unswitching. Branch/guard counts as 1, switch counts as log2 of its cases.
26892690
int UnswitchedClones = 0;
26902691
for (auto Candidate : UnswitchCandidates) {
2691-
Instruction *CI = Candidate.first;
2692-
BasicBlock *CondBlock = CI->getParent();
2692+
const Instruction *CI = Candidate.first;
2693+
const BasicBlock *CondBlock = CI->getParent();
26932694
bool SkipExitingSuccessors = DT.dominates(CondBlock, Latch);
26942695
if (isGuard(CI)) {
26952696
if (!SkipExitingSuccessors)
26962697
UnswitchedClones++;
26972698
continue;
26982699
}
2699-
int NonExitingSuccessors = llvm::count_if(
2700-
successors(CondBlock), [SkipExitingSuccessors, &L](BasicBlock *SuccBB) {
2700+
int NonExitingSuccessors =
2701+
llvm::count_if(successors(CondBlock),
2702+
[SkipExitingSuccessors, &L](const BasicBlock *SuccBB) {
27012703
return !SkipExitingSuccessors || L.contains(SuccBB);
27022704
});
27032705
UnswitchedClones += Log2_32(NonExitingSuccessors);
@@ -2817,54 +2819,20 @@ static bool collectUnswitchCandidates(
28172819
return !UnswitchCandidates.empty();
28182820
}
28192821

2820-
static bool unswitchBestCondition(
2821-
Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
2822-
AAResults &AA, TargetTransformInfo &TTI,
2823-
function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
2824-
ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
2825-
function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
2826-
// Collect all invariant conditions within this loop (as opposed to an inner
2827-
// loop which would be handled when visiting that inner loop).
2828-
SmallVector<std::pair<Instruction *, TinyPtrVector<Value *> >, 4>
2829-
UnswitchCandidates;
2830-
IVConditionInfo PartialIVInfo;
2831-
Instruction *PartialIVCondBranch = nullptr;
2832-
// If we didn't find any candidates, we're done.
2833-
if (!collectUnswitchCandidates(UnswitchCandidates, PartialIVInfo,
2834-
PartialIVCondBranch, L, LI, AA, MSSAU))
2835-
return false;
2836-
2837-
// Check if there are irreducible CFG cycles in this loop. If so, we cannot
2838-
// easily unswitch non-trivial edges out of the loop. Doing so might turn the
2839-
// irreducible control flow into reducible control flow and introduce new
2840-
// loops "out of thin air". If we ever discover important use cases for doing
2841-
// this, we can add support to loop unswitch, but it is a lot of complexity
2842-
// for what seems little or no real world benefit.
2843-
LoopBlocksRPO RPOT(&L);
2844-
RPOT.perform(&LI);
2845-
if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
2846-
return false;
2847-
2848-
SmallVector<BasicBlock *, 4> ExitBlocks;
2849-
L.getUniqueExitBlocks(ExitBlocks);
2850-
2851-
// We cannot unswitch if exit blocks contain a cleanuppad/catchswitch
2852-
// instruction as we don't know how to split those exit blocks.
2853-
// FIXME: We should teach SplitBlock to handle this and remove this
2854-
// restriction.
2855-
for (auto *ExitBB : ExitBlocks) {
2856-
auto *I = ExitBB->getFirstNonPHI();
2857-
if (isa<CleanupPadInst>(I) || isa<CatchSwitchInst>(I)) {
2858-
LLVM_DEBUG(dbgs() << "Cannot unswitch because of cleanuppad/catchswitch "
2859-
"in exit block\n");
2860-
return false;
2861-
}
2862-
}
2863-
2864-
LLVM_DEBUG(
2865-
dbgs() << "Considering " << UnswitchCandidates.size()
2866-
<< " non-trivial loop invariant conditions for unswitching.\n");
2867-
2822+
namespace {
2823+
struct NonTrivialUnswitchCandidate {
2824+
Instruction *TI = nullptr;
2825+
InstructionCost Cost = 0;
2826+
ArrayRef<Value *> Invariants;
2827+
};
2828+
} // end anonymous namespace.
2829+
2830+
static Optional<NonTrivialUnswitchCandidate>
2831+
findBestNonTrivialUnswitchCandidate(
2832+
ArrayRef<std::pair<Instruction *, TinyPtrVector<Value *> > >
2833+
UnswitchCandidates, const Loop &L, const DominatorTree &DT,
2834+
const LoopInfo &LI, AssumptionCache &AC, const TargetTransformInfo &TTI,
2835+
const IVConditionInfo &PartialIVInfo) {
28682836
// Given that unswitching these terminators will require duplicating parts of
28692837
// the loop, so we need to be able to model that cost. Compute the ephemeral
28702838
// values and set up a data structure to hold per-BB costs. We cache each
@@ -2891,10 +2859,10 @@ static bool unswitchBestCondition(
28912859
continue;
28922860

28932861
if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB))
2894-
return false;
2862+
return None;
28952863
if (auto *CB = dyn_cast<CallBase>(&I))
28962864
if (CB->isConvergent() || CB->cannotDuplicate())
2897-
return false;
2865+
return None;
28982866

28992867
Cost += TTI.getInstructionCost(&I, CostKind);
29002868
}
@@ -2978,9 +2946,8 @@ static bool unswitchBestCondition(
29782946
"Cannot unswitch a condition without multiple distinct successors!");
29792947
return (LoopCost - Cost) * (SuccessorsCount - 1);
29802948
};
2981-
Instruction *BestUnswitchTI = nullptr;
2982-
InstructionCost BestUnswitchCost = 0;
2983-
ArrayRef<Value *> BestUnswitchInvariants;
2949+
2950+
NonTrivialUnswitchCandidate Best;
29842951
for (auto &TerminatorAndInvariants : UnswitchCandidates) {
29852952
Instruction &TI = *TerminatorAndInvariants.first;
29862953
ArrayRef<Value *> Invariants = TerminatorAndInvariants.second;
@@ -3006,34 +2973,90 @@ static bool unswitchBestCondition(
30062973
<< " for unswitch candidate: " << TI << "\n");
30072974
}
30082975

3009-
if (!BestUnswitchTI || CandidateCost < BestUnswitchCost) {
3010-
BestUnswitchTI = &TI;
3011-
BestUnswitchCost = CandidateCost;
3012-
BestUnswitchInvariants = Invariants;
2976+
if (!Best.TI || CandidateCost < Best.Cost) {
2977+
Best.TI = &TI;
2978+
Best.Cost = CandidateCost;
2979+
Best.Invariants = Invariants;
30132980
}
30142981
}
3015-
assert(BestUnswitchTI && "Failed to find loop unswitch candidate");
2982+
return Best;
2983+
}
30162984

3017-
if (BestUnswitchCost >= UnswitchThreshold) {
3018-
LLVM_DEBUG(dbgs() << "Cannot unswitch, lowest cost found: "
3019-
<< BestUnswitchCost << "\n");
2985+
static bool unswitchBestCondition(
2986+
Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
2987+
AAResults &AA, TargetTransformInfo &TTI,
2988+
function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
2989+
ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
2990+
function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
2991+
// Collect all invariant conditions within this loop (as opposed to an inner
2992+
// loop which would be handled when visiting that inner loop).
2993+
SmallVector<std::pair<Instruction *, TinyPtrVector<Value *> >, 4>
2994+
UnswitchCandidates;
2995+
IVConditionInfo PartialIVInfo;
2996+
Instruction *PartialIVCondBranch = nullptr;
2997+
// If we didn't find any candidates, we're done.
2998+
if (!collectUnswitchCandidates(UnswitchCandidates, PartialIVInfo,
2999+
PartialIVCondBranch, L, LI, AA, MSSAU))
3000+
return false;
3001+
3002+
// Check if there are irreducible CFG cycles in this loop. If so, we cannot
3003+
// easily unswitch non-trivial edges out of the loop. Doing so might turn the
3004+
// irreducible control flow into reducible control flow and introduce new
3005+
// loops "out of thin air". If we ever discover important use cases for doing
3006+
// this, we can add support to loop unswitch, but it is a lot of complexity
3007+
// for what seems little or no real world benefit.
3008+
LoopBlocksRPO RPOT(&L);
3009+
RPOT.perform(&LI);
3010+
if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
3011+
return false;
3012+
3013+
SmallVector<BasicBlock *, 4> ExitBlocks;
3014+
L.getUniqueExitBlocks(ExitBlocks);
3015+
3016+
// We cannot unswitch if exit blocks contain a cleanuppad/catchswitch
3017+
// instruction as we don't know how to split those exit blocks.
3018+
// FIXME: We should teach SplitBlock to handle this and remove this
3019+
// restriction.
3020+
for (auto *ExitBB : ExitBlocks) {
3021+
auto *I = ExitBB->getFirstNonPHI();
3022+
if (isa<CleanupPadInst>(I) || isa<CatchSwitchInst>(I)) {
3023+
LLVM_DEBUG(dbgs() << "Cannot unswitch because of cleanuppad/catchswitch "
3024+
"in exit block\n");
3025+
return false;
3026+
}
3027+
}
3028+
3029+
LLVM_DEBUG(
3030+
dbgs() << "Considering " << UnswitchCandidates.size()
3031+
<< " non-trivial loop invariant conditions for unswitching.\n");
3032+
3033+
Optional<NonTrivialUnswitchCandidate> Best =
3034+
findBestNonTrivialUnswitchCandidate(UnswitchCandidates, L, DT, LI, AC,
3035+
TTI, PartialIVInfo);
3036+
if (!Best)
3037+
return false;
3038+
3039+
assert(Best->TI && "Failed to find loop unswitch candidate");
3040+
3041+
if (Best->Cost >= UnswitchThreshold) {
3042+
LLVM_DEBUG(dbgs() << "Cannot unswitch, lowest cost found: " << Best->Cost
3043+
<< "\n");
30203044
return false;
30213045
}
30223046

3023-
if (BestUnswitchTI != PartialIVCondBranch)
3047+
if (Best->TI != PartialIVCondBranch)
30243048
PartialIVInfo.InstToDuplicate.clear();
30253049

30263050
// If the best candidate is a guard, turn it into a branch.
3027-
if (isGuard(BestUnswitchTI))
3028-
BestUnswitchTI = turnGuardIntoBranch(cast<IntrinsicInst>(BestUnswitchTI), L,
3029-
ExitBlocks, DT, LI, MSSAU);
3030-
3031-
LLVM_DEBUG(dbgs() << " Unswitching non-trivial (cost = "
3032-
<< BestUnswitchCost << ") terminator: " << *BestUnswitchTI
3033-
<< "\n");
3034-
unswitchNontrivialInvariants(L, *BestUnswitchTI, BestUnswitchInvariants,
3035-
ExitBlocks, PartialIVInfo, DT, LI, AC,
3036-
UnswitchCB, SE, MSSAU, DestroyLoopCB);
3051+
if (isGuard(Best->TI))
3052+
Best->TI = turnGuardIntoBranch(cast<IntrinsicInst>(Best->TI), L, ExitBlocks,
3053+
DT, LI, MSSAU);
3054+
3055+
LLVM_DEBUG(dbgs() << " Unswitching non-trivial (cost = " << Best->Cost
3056+
<< ") terminator: " << *Best->TI << "\n");
3057+
unswitchNontrivialInvariants(L, *Best->TI, Best->Invariants, ExitBlocks,
3058+
PartialIVInfo, DT, LI, AC, UnswitchCB, SE, MSSAU,
3059+
DestroyLoopCB);
30373060
return true;
30383061
}
30393062

0 commit comments

Comments
 (0)