Skip to content

Commit 5d10753

Browse files
committed
[SimpleLoopUnswitch] Inject loop-invariant conditions and unswitch them when it's profitable
Based on https://discourse.llvm.org/t/rfc-inject-invariant-conditions-to-loops-to-enable-unswitching-and-constraint-elimination This transform attempts to handle the following loop: ``` for (...) { x = <some variant> if (x <u C1) {} else break; if (x <u C2) {} else break; } ``` Here `x` is some loop-variant value, and `C1` and `C2` are loop invariants. As we see, this loop has no invariant checks we can unswitch on. However, there is an invariant condition that can make the second check redundant. Specifically, it is `C1 <=u C2`. We can modify this code in the following way: ``` for (...) { x = <some variant> if (x <u C1) {} else break; if (C1 <=u C2) { /* no check is required */ } else { // do the check normally if (x <u C2) {} else break; } } ``` Now we have an invariant condition `C1 <=u C2` and can unswitch on it. This patch introduces the basic version of this transform, with some limitations, all of them seem liftable (but needs more work & testing): - All checks are `ult` condition; - All branches in question stay in loop if the said condition is true and leave it otherwise; - All in-loop branches are hot enough; There is also a room for improvement cost model. So far we evalutate the cost of unswitching this newly injected invariant branch the same as if we would unswitch on 2nd condition, which is not exactly precise (but also not grossly wrong). Differential Revision: https://reviews.llvm.org/D136233 Reviewed By: skatkov
1 parent 125e690 commit 5d10753

File tree

2 files changed

+786
-8
lines changed

2 files changed

+786
-8
lines changed

llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp

Lines changed: 305 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
#include "llvm/IR/Instructions.h"
4343
#include "llvm/IR/IntrinsicInst.h"
4444
#include "llvm/IR/PatternMatch.h"
45+
#include "llvm/IR/ProfDataUtils.h"
4546
#include "llvm/IR/Use.h"
4647
#include "llvm/IR/Value.h"
4748
#include "llvm/InitializePasses.h"
@@ -78,6 +79,8 @@ STATISTIC(NumTrivial, "Number of unswitches that are trivial");
7879
STATISTIC(
7980
NumCostMultiplierSkipped,
8081
"Number of unswitch candidates that had their cost multiplier skipped");
82+
STATISTIC(NumInvariantConditionsInjected,
83+
"Number of invariant conditions injected and unswitched");
8184

8285
static cl::opt<bool> EnableNonTrivialUnswitch(
8386
"enable-nontrivial-unswitch", cl::init(false), cl::Hidden,
@@ -118,15 +121,53 @@ static cl::opt<bool> FreezeLoopUnswitchCond(
118121
cl::desc("If enabled, the freeze instruction will be added to condition "
119122
"of loop unswitch to prevent miscompilation."));
120123

124+
static cl::opt<bool> InjectInvariantConditions(
125+
"simple-loop-unswitch-inject-invariant-conditions", cl::Hidden,
126+
cl::desc("Whether we should inject new invariants and unswitch them to "
127+
"eliminate some existing (non-invariant) conditions."),
128+
cl::init(true));
129+
130+
static cl::opt<unsigned> InjectInvariantConditionHotnesThreshold(
131+
"simple-loop-unswitch-inject-invariant-condition-hotness-threshold",
132+
cl::Hidden, cl::desc("Only try to inject loop invariant conditions and "
133+
"unswitch on them to eliminate branches that are "
134+
"not-taken 1/<this option> times or less."),
135+
cl::init(16));
136+
121137
namespace {
138+
struct CompareDesc {
139+
BranchInst *Term;
140+
Value *Invariant;
141+
BasicBlock *InLoopSucc;
142+
143+
CompareDesc(BranchInst *Term, Value *Invariant, BasicBlock *InLoopSucc)
144+
: Term(Term), Invariant(Invariant), InLoopSucc(InLoopSucc) {}
145+
};
146+
147+
struct InjectedInvariant {
148+
ICmpInst::Predicate Pred;
149+
Value *LHS;
150+
Value *RHS;
151+
BasicBlock *InLoopSucc;
152+
153+
InjectedInvariant(ICmpInst::Predicate Pred, Value *LHS, Value *RHS,
154+
BasicBlock *InLoopSucc)
155+
: Pred(Pred), LHS(LHS), RHS(RHS), InLoopSucc(InLoopSucc) {}
156+
};
157+
122158
struct NonTrivialUnswitchCandidate {
123159
Instruction *TI = nullptr;
124160
TinyPtrVector<Value *> Invariants;
125161
std::optional<InstructionCost> Cost;
162+
std::optional<InjectedInvariant> PendingInjection;
126163
NonTrivialUnswitchCandidate(
127164
Instruction *TI, ArrayRef<Value *> Invariants,
128-
std::optional<InstructionCost> Cost = std::nullopt)
129-
: TI(TI), Invariants(Invariants), Cost(Cost){};
165+
std::optional<InstructionCost> Cost = std::nullopt,
166+
std::optional<InjectedInvariant> PendingInjection = std::nullopt)
167+
: TI(TI), Invariants(Invariants), Cost(Cost),
168+
PendingInjection(PendingInjection) {};
169+
170+
bool hasPendingInjection() const { return PendingInjection.has_value(); }
130171
};
131172
} // end anonymous namespace.
132173

@@ -2844,6 +2885,252 @@ static bool collectUnswitchCandidates(
28442885
return !UnswitchCandidates.empty();
28452886
}
28462887

2888+
/// Returns true, if predicate described by ( \p Pred, \p LHS, \p RHS )
2889+
/// succeeding into blocks ( \p IfTrue, \p IfFalse) can be optimized by
2890+
/// injecting a loop-invariant condition.
2891+
static bool shouldTryInjectInvariantCondition(
2892+
const ICmpInst::Predicate Pred, const Value *LHS, const Value *RHS,
2893+
const BasicBlock *IfTrue, const BasicBlock *IfFalse, const Loop &L) {
2894+
if (L.isLoopInvariant(LHS) || !L.isLoopInvariant(RHS))
2895+
return false;
2896+
// TODO: Support other predicates.
2897+
if (Pred != ICmpInst::ICMP_ULT)
2898+
return false;
2899+
// TODO: Support non-loop-exiting branches?
2900+
if (!L.contains(IfTrue) || L.contains(IfFalse))
2901+
return false;
2902+
// FIXME: For some reason this causes problems with MSSA updates, need to
2903+
// investigate why. So far, just don't unswitch latch.
2904+
if (L.getHeader() == IfTrue)
2905+
return false;
2906+
return true;
2907+
}
2908+
2909+
/// Returns true, if metadata on \p BI allows us to optimize branching into \p
2910+
/// TakenSucc via injection of invariant conditions. The branch should be not
2911+
/// enough and not previously unswitched, the information about this comes from
2912+
/// the metadata.
2913+
bool shouldTryInjectBasingOnMetadata(const BranchInst *BI,
2914+
const BasicBlock *TakenSucc) {
2915+
// Skip branches that have already been unswithed this way. After successful
2916+
// unswitching of injected condition, we will still have a copy of this loop
2917+
// which looks exactly the same as original one. To prevent the 2nd attempt
2918+
// of unswitching it in the same pass, mark this branch as "nothing to do
2919+
// here".
2920+
if (BI->hasMetadata("llvm.invariant.condition.injection.disabled"))
2921+
return false;
2922+
SmallVector<uint32_t> Weights;
2923+
if (!extractBranchWeights(*BI, Weights))
2924+
return false;
2925+
unsigned T = InjectInvariantConditionHotnesThreshold;
2926+
BranchProbability LikelyTaken(T - 1, T);
2927+
2928+
assert(Weights.size() == 2 && "Unexpected profile data!");
2929+
size_t Idx = BI->getSuccessor(0) == TakenSucc ? 0 : 1;
2930+
auto Num = Weights[Idx];
2931+
auto Denom = Weights[0] + Weights[1];
2932+
// Degenerate metadata.
2933+
if (Denom == 0)
2934+
return false;
2935+
BranchProbability ActualTaken(Num, Denom);
2936+
if (LikelyTaken > ActualTaken)
2937+
return false;
2938+
return true;
2939+
}
2940+
2941+
/// Materialize pending invariant condition of the given candidate into IR. The
2942+
/// injected loop-invariant condition implies the original loop-variant branch
2943+
/// condition, so the materialization turns
2944+
///
2945+
/// loop_block:
2946+
/// ...
2947+
/// br i1 %variant_cond, label InLoopSucc, label OutOfLoopSucc
2948+
///
2949+
/// into
2950+
///
2951+
/// preheader:
2952+
/// %invariant_cond = LHS pred RHS
2953+
/// ...
2954+
/// loop_block:
2955+
/// br i1 %invariant_cond, label InLoopSucc, label OriginalCheck
2956+
/// OriginalCheck:
2957+
/// br i1 %variant_cond, label InLoopSucc, label OutOfLoopSucc
2958+
/// ...
2959+
static NonTrivialUnswitchCandidate
2960+
injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L,
2961+
DominatorTree &DT, LoopInfo &LI,
2962+
AssumptionCache &AC, MemorySSAUpdater *MSSAU) {
2963+
assert(Candidate.hasPendingInjection() && "Nothing to inject!");
2964+
BasicBlock *Preheader = L.getLoopPreheader();
2965+
assert(Preheader && "Loop is not in simplified form?");
2966+
2967+
auto Pred = Candidate.PendingInjection->Pred;
2968+
auto *LHS = Candidate.PendingInjection->LHS;
2969+
auto *RHS = Candidate.PendingInjection->RHS;
2970+
auto *InLoopSucc = Candidate.PendingInjection->InLoopSucc;
2971+
auto *TI = cast<BranchInst>(Candidate.TI);
2972+
auto *BB = Candidate.TI->getParent();
2973+
assert(InLoopSucc == TI->getSuccessor(0));
2974+
auto *OutOfLoopSucc = TI->getSuccessor(1);
2975+
// FIXME: Remove this once limitation on successors is lifted.
2976+
assert(L.contains(InLoopSucc) && "Not supported yet!");
2977+
assert(!L.contains(OutOfLoopSucc) && "Not supported yet!");
2978+
auto &Ctx = BB->getContext();
2979+
2980+
assert(LHS->getType() == RHS->getType() && "Type mismatch!");
2981+
// Do not use builder here: CreateICmp may simplify this intro a constant and
2982+
// unswitching will break. Better optimize it away later.
2983+
auto *InjectedCond =
2984+
ICmpInst::Create(Instruction::ICmp, Pred, LHS, RHS, "injected.cond",
2985+
Preheader->getTerminator());
2986+
auto *OldCond = TI->getCondition();
2987+
2988+
BasicBlock *CheckBlock = BasicBlock::Create(Ctx, BB->getName() + ".check",
2989+
BB->getParent(), InLoopSucc);
2990+
IRBuilder<> Builder(TI);
2991+
auto *InvariantBr =
2992+
Builder.CreateCondBr(InjectedCond, InLoopSucc, CheckBlock);
2993+
2994+
Builder.SetInsertPoint(CheckBlock);
2995+
auto *NewTerm = Builder.CreateCondBr(OldCond, InLoopSucc, OutOfLoopSucc);
2996+
2997+
TI->eraseFromParent();
2998+
// Prevent infinite unswitching.
2999+
NewTerm->setMetadata("llvm.invariant.condition.injection.disabled",
3000+
MDNode::get(BB->getContext(), {}));
3001+
3002+
// Fixup phis.
3003+
for (auto &I : *InLoopSucc) {
3004+
auto *PN = dyn_cast<PHINode>(&I);
3005+
if (!PN)
3006+
break;
3007+
auto *Inc = PN->getIncomingValueForBlock(BB);
3008+
PN->addIncoming(Inc, CheckBlock);
3009+
}
3010+
OutOfLoopSucc->replacePhiUsesWith(BB, CheckBlock);
3011+
3012+
SmallVector<DominatorTree::UpdateType, 4> DTUpdates = {
3013+
{ DominatorTree::Insert, BB, CheckBlock },
3014+
{ DominatorTree::Insert, CheckBlock, InLoopSucc },
3015+
{ DominatorTree::Insert, CheckBlock, OutOfLoopSucc },
3016+
{ DominatorTree::Delete, BB, OutOfLoopSucc }
3017+
};
3018+
3019+
DT.applyUpdates(DTUpdates);
3020+
if (MSSAU)
3021+
MSSAU->applyUpdates(DTUpdates, DT);
3022+
L.addBasicBlockToLoop(CheckBlock, LI);
3023+
3024+
#ifdef EXPENSIVE_CHECKS
3025+
DT.verify();
3026+
LI.verify(DT);
3027+
if (MSSAU && VerifyMemorySSA)
3028+
MSSAU->getMemorySSA()->verifyMemorySSA();
3029+
#endif
3030+
3031+
// TODO: In fact, cost of unswitching a new invariant candidate is *slightly*
3032+
// higher because we have just inserted a new block. Need to think how to
3033+
// adjust the cost of injected candidates when it was first computed.
3034+
LLVM_DEBUG(dbgs() << "Injected a new loop-invariant branch " << *InvariantBr
3035+
<< " and considering it for unswitching.");
3036+
++NumInvariantConditionsInjected;
3037+
return NonTrivialUnswitchCandidate(InvariantBr, { InjectedCond },
3038+
Candidate.Cost);
3039+
}
3040+
3041+
/// Given chain of loop branch conditions looking like:
3042+
/// br (Variant < Invariant1)
3043+
/// br (Variant < Invariant2)
3044+
/// br (Variant < Invariant3)
3045+
/// ...
3046+
/// collect set of invariant conditions on which we want to unswitch, which
3047+
/// look like:
3048+
/// Invariant1 <= Invariant2
3049+
/// Invariant2 <= Invariant3
3050+
/// ...
3051+
/// Though they might not immediately exist in the IR, we can still inject them.
3052+
static bool insertCandidatesWithPendingInjections(
3053+
SmallVectorImpl<NonTrivialUnswitchCandidate> &UnswitchCandidates, Loop &L,
3054+
ICmpInst::Predicate Pred, ArrayRef<CompareDesc> Compares,
3055+
const DominatorTree &DT) {
3056+
3057+
assert(ICmpInst::isRelational(Pred));
3058+
assert(ICmpInst::isStrictPredicate(Pred));
3059+
if (Compares.size() < 2)
3060+
return false;
3061+
ICmpInst::Predicate NonStrictPred = ICmpInst::getNonStrictPredicate(Pred);
3062+
for (auto Prev = Compares.begin(), Next = Compares.begin() + 1;
3063+
Next != Compares.end(); ++Prev, ++Next) {
3064+
Value *LHS = Next->Invariant;
3065+
Value *RHS = Prev->Invariant;
3066+
BasicBlock *InLoopSucc = Prev->InLoopSucc;
3067+
InjectedInvariant ToInject(NonStrictPred, LHS, RHS, InLoopSucc);
3068+
NonTrivialUnswitchCandidate Candidate(Prev->Term, { LHS, RHS },
3069+
std::nullopt, std::move(ToInject));
3070+
UnswitchCandidates.push_back(std::move(Candidate));
3071+
}
3072+
return true;
3073+
}
3074+
3075+
/// Collect unswitch candidates by invariant conditions that are not immediately
3076+
/// present in the loop. However, they can be injected into the code if we
3077+
/// decide it's profitable.
3078+
/// An example of such conditions is following:
3079+
///
3080+
/// for (...) {
3081+
/// x = load ...
3082+
/// if (! x <u C1) break;
3083+
/// if (! x <u C2) break;
3084+
/// <do something>
3085+
/// }
3086+
///
3087+
/// We can unswitch by condition "C1 <=u C2". If that is true, then "x <u C1 <=
3088+
/// C2" automatically implies "x <u C2", so we can get rid of one of
3089+
/// loop-variant checks in unswitched loop version.
3090+
static bool collectUnswitchCandidatesWithInjections(
3091+
SmallVectorImpl<NonTrivialUnswitchCandidate> &UnswitchCandidates,
3092+
IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, Loop &L,
3093+
const DominatorTree &DT, const LoopInfo &LI, AAResults &AA,
3094+
const MemorySSAUpdater *MSSAU) {
3095+
if (!InjectInvariantConditions)
3096+
return false;
3097+
3098+
if (!DT.isReachableFromEntry(L.getHeader()))
3099+
return false;
3100+
auto *Latch = L.getLoopLatch();
3101+
// Need to have a single latch and a preheader.
3102+
if (!Latch)
3103+
return false;
3104+
assert(L.getLoopPreheader() && "Must have a preheader!");
3105+
3106+
DenseMap<Value *, SmallVector<CompareDesc, 4> > CandidatesULT;
3107+
// Traverse the conditions that dominate latch (and therefore dominate each
3108+
// other).
3109+
for (auto *DTN = DT.getNode(Latch); L.contains(DTN->getBlock());
3110+
DTN = DTN->getIDom()) {
3111+
ICmpInst::Predicate Pred;
3112+
Value *LHS = nullptr, *RHS = nullptr;
3113+
BasicBlock *IfTrue = nullptr, *IfFalse = nullptr;
3114+
auto *BB = DTN->getBlock();
3115+
auto *Term = BB->getTerminator();
3116+
if (!match(Term, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
3117+
m_BasicBlock(IfTrue), m_BasicBlock(IfFalse))))
3118+
continue;
3119+
if (!shouldTryInjectInvariantCondition(Pred, LHS, RHS, IfTrue, IfFalse, L))
3120+
continue;
3121+
if (!shouldTryInjectBasingOnMetadata(cast<BranchInst>(Term), IfTrue))
3122+
continue;
3123+
CompareDesc Desc(cast<BranchInst>(Term), RHS, IfTrue);
3124+
CandidatesULT[LHS].push_back(Desc);
3125+
}
3126+
3127+
bool Found = false;
3128+
for (auto &It : CandidatesULT)
3129+
Found |= insertCandidatesWithPendingInjections(
3130+
UnswitchCandidates, L, ICmpInst::ICMP_ULT, It.second, DT);
3131+
return Found;
3132+
}
3133+
28473134
static bool isSafeForNoNTrivialUnswitching(Loop &L, LoopInfo &LI) {
28483135
if (!L.isSafeToClone())
28493136
return false;
@@ -3003,10 +3290,11 @@ static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(
30033290
Instruction &TI = *Candidate.TI;
30043291
ArrayRef<Value *> Invariants = Candidate.Invariants;
30053292
BranchInst *BI = dyn_cast<BranchInst>(&TI);
3006-
InstructionCost CandidateCost = ComputeUnswitchedCost(
3007-
TI, /*FullUnswitch*/ !BI ||
3008-
(Invariants.size() == 1 &&
3009-
Invariants[0] == skipTrivialSelect(BI->getCondition())));
3293+
bool FullUnswitch =
3294+
!BI || Candidate.hasPendingInjection() ||
3295+
(Invariants.size() == 1 &&
3296+
Invariants[0] == skipTrivialSelect(BI->getCondition()));
3297+
InstructionCost CandidateCost = ComputeUnswitchedCost(TI, FullUnswitch);
30103298
// Calculate cost multiplier which is a tool to limit potentially
30113299
// exponential behavior of loop-unswitch.
30123300
if (EnableUnswitchCostMultiplier) {
@@ -3044,9 +3332,13 @@ static bool unswitchBestCondition(
30443332
SmallVector<NonTrivialUnswitchCandidate, 4> UnswitchCandidates;
30453333
IVConditionInfo PartialIVInfo;
30463334
Instruction *PartialIVCondBranch = nullptr;
3335+
collectUnswitchCandidates(UnswitchCandidates, PartialIVInfo,
3336+
PartialIVCondBranch, L, LI, AA, MSSAU);
3337+
collectUnswitchCandidatesWithInjections(UnswitchCandidates, PartialIVInfo,
3338+
PartialIVCondBranch, L, DT, LI, AA,
3339+
MSSAU);
30473340
// If we didn't find any candidates, we're done.
3048-
if (!collectUnswitchCandidates(UnswitchCandidates, PartialIVInfo,
3049-
PartialIVCondBranch, L, LI, AA, MSSAU))
3341+
if (UnswitchCandidates.empty())
30503342
return false;
30513343

30523344
LLVM_DEBUG(
@@ -3065,6 +3357,11 @@ static bool unswitchBestCondition(
30653357
return false;
30663358
}
30673359

3360+
if (Best.hasPendingInjection())
3361+
Best = injectPendingInvariantConditions(Best, L, DT, LI, AC, MSSAU);
3362+
assert(!Best.hasPendingInjection() &&
3363+
"All injections should have been done by now!");
3364+
30683365
if (Best.TI != PartialIVCondBranch)
30693366
PartialIVInfo.InstToDuplicate.clear();
30703367

0 commit comments

Comments
 (0)