42
42
#include " llvm/IR/Instructions.h"
43
43
#include " llvm/IR/IntrinsicInst.h"
44
44
#include " llvm/IR/PatternMatch.h"
45
+ #include " llvm/IR/ProfDataUtils.h"
45
46
#include " llvm/IR/Use.h"
46
47
#include " llvm/IR/Value.h"
47
48
#include " llvm/InitializePasses.h"
@@ -78,6 +79,8 @@ STATISTIC(NumTrivial, "Number of unswitches that are trivial");
78
79
STATISTIC (
79
80
NumCostMultiplierSkipped,
80
81
" Number of unswitch candidates that had their cost multiplier skipped" );
82
+ STATISTIC (NumInvariantConditionsInjected,
83
+ " Number of invariant conditions injected and unswitched" );
81
84
82
85
static cl::opt<bool > EnableNonTrivialUnswitch (
83
86
" enable-nontrivial-unswitch" , cl::init(false ), cl::Hidden,
@@ -118,15 +121,53 @@ static cl::opt<bool> FreezeLoopUnswitchCond(
118
121
cl::desc(" If enabled, the freeze instruction will be added to condition "
119
122
" of loop unswitch to prevent miscompilation." ));
120
123
124
+ static cl::opt<bool > InjectInvariantConditions (
125
+ " simple-loop-unswitch-inject-invariant-conditions" , cl::Hidden,
126
+ cl::desc (" Whether we should inject new invariants and unswitch them to "
127
+ " eliminate some existing (non-invariant) conditions." ),
128
+ cl::init(true ));
129
+
130
+ static cl::opt<unsigned > InjectInvariantConditionHotnesThreshold (
131
+ " simple-loop-unswitch-inject-invariant-condition-hotness-threshold" ,
132
+ cl::Hidden, cl::desc(" Only try to inject loop invariant conditions and "
133
+ " unswitch on them to eliminate branches that are "
134
+ " not-taken 1/<this option> times or less." ),
135
+ cl::init(16 ));
136
+
121
137
namespace {
138
+ struct CompareDesc {
139
+ BranchInst *Term;
140
+ Value *Invariant;
141
+ BasicBlock *InLoopSucc;
142
+
143
+ CompareDesc (BranchInst *Term, Value *Invariant, BasicBlock *InLoopSucc)
144
+ : Term(Term), Invariant(Invariant), InLoopSucc(InLoopSucc) {}
145
+ };
146
+
147
+ struct InjectedInvariant {
148
+ ICmpInst::Predicate Pred;
149
+ Value *LHS;
150
+ Value *RHS;
151
+ BasicBlock *InLoopSucc;
152
+
153
+ InjectedInvariant (ICmpInst::Predicate Pred, Value *LHS, Value *RHS,
154
+ BasicBlock *InLoopSucc)
155
+ : Pred(Pred), LHS(LHS), RHS(RHS), InLoopSucc(InLoopSucc) {}
156
+ };
157
+
122
158
struct NonTrivialUnswitchCandidate {
123
159
Instruction *TI = nullptr ;
124
160
TinyPtrVector<Value *> Invariants;
125
161
std::optional<InstructionCost> Cost;
162
+ std::optional<InjectedInvariant> PendingInjection;
126
163
NonTrivialUnswitchCandidate (
127
164
Instruction *TI, ArrayRef<Value *> Invariants,
128
- std::optional<InstructionCost> Cost = std::nullopt)
129
- : TI(TI), Invariants(Invariants), Cost(Cost){};
165
+ std::optional<InstructionCost> Cost = std::nullopt,
166
+ std::optional<InjectedInvariant> PendingInjection = std::nullopt)
167
+ : TI(TI), Invariants(Invariants), Cost(Cost),
168
+ PendingInjection (PendingInjection) {};
169
+
170
+ bool hasPendingInjection () const { return PendingInjection.has_value (); }
130
171
};
131
172
} // end anonymous namespace.
132
173
@@ -2844,6 +2885,252 @@ static bool collectUnswitchCandidates(
2844
2885
return !UnswitchCandidates.empty ();
2845
2886
}
2846
2887
2888
+ // / Returns true, if predicate described by ( \p Pred, \p LHS, \p RHS )
2889
+ // / succeeding into blocks ( \p IfTrue, \p IfFalse) can be optimized by
2890
+ // / injecting a loop-invariant condition.
2891
+ static bool shouldTryInjectInvariantCondition (
2892
+ const ICmpInst::Predicate Pred, const Value *LHS, const Value *RHS,
2893
+ const BasicBlock *IfTrue, const BasicBlock *IfFalse, const Loop &L) {
2894
+ if (L.isLoopInvariant (LHS) || !L.isLoopInvariant (RHS))
2895
+ return false ;
2896
+ // TODO: Support other predicates.
2897
+ if (Pred != ICmpInst::ICMP_ULT)
2898
+ return false ;
2899
+ // TODO: Support non-loop-exiting branches?
2900
+ if (!L.contains (IfTrue) || L.contains (IfFalse))
2901
+ return false ;
2902
+ // FIXME: For some reason this causes problems with MSSA updates, need to
2903
+ // investigate why. So far, just don't unswitch latch.
2904
+ if (L.getHeader () == IfTrue)
2905
+ return false ;
2906
+ return true ;
2907
+ }
2908
+
2909
+ // / Returns true, if metadata on \p BI allows us to optimize branching into \p
2910
+ // / TakenSucc via injection of invariant conditions. The branch should be not
2911
+ // / enough and not previously unswitched, the information about this comes from
2912
+ // / the metadata.
2913
+ bool shouldTryInjectBasingOnMetadata (const BranchInst *BI,
2914
+ const BasicBlock *TakenSucc) {
2915
+ // Skip branches that have already been unswithed this way. After successful
2916
+ // unswitching of injected condition, we will still have a copy of this loop
2917
+ // which looks exactly the same as original one. To prevent the 2nd attempt
2918
+ // of unswitching it in the same pass, mark this branch as "nothing to do
2919
+ // here".
2920
+ if (BI->hasMetadata (" llvm.invariant.condition.injection.disabled" ))
2921
+ return false ;
2922
+ SmallVector<uint32_t > Weights;
2923
+ if (!extractBranchWeights (*BI, Weights))
2924
+ return false ;
2925
+ unsigned T = InjectInvariantConditionHotnesThreshold;
2926
+ BranchProbability LikelyTaken (T - 1 , T);
2927
+
2928
+ assert (Weights.size () == 2 && " Unexpected profile data!" );
2929
+ size_t Idx = BI->getSuccessor (0 ) == TakenSucc ? 0 : 1 ;
2930
+ auto Num = Weights[Idx];
2931
+ auto Denom = Weights[0 ] + Weights[1 ];
2932
+ // Degenerate metadata.
2933
+ if (Denom == 0 )
2934
+ return false ;
2935
+ BranchProbability ActualTaken (Num, Denom);
2936
+ if (LikelyTaken > ActualTaken)
2937
+ return false ;
2938
+ return true ;
2939
+ }
2940
+
2941
+ // / Materialize pending invariant condition of the given candidate into IR. The
2942
+ // / injected loop-invariant condition implies the original loop-variant branch
2943
+ // / condition, so the materialization turns
2944
+ // /
2945
+ // / loop_block:
2946
+ // / ...
2947
+ // / br i1 %variant_cond, label InLoopSucc, label OutOfLoopSucc
2948
+ // /
2949
+ // / into
2950
+ // /
2951
+ // / preheader:
2952
+ // / %invariant_cond = LHS pred RHS
2953
+ // / ...
2954
+ // / loop_block:
2955
+ // / br i1 %invariant_cond, label InLoopSucc, label OriginalCheck
2956
+ // / OriginalCheck:
2957
+ // / br i1 %variant_cond, label InLoopSucc, label OutOfLoopSucc
2958
+ // / ...
2959
+ static NonTrivialUnswitchCandidate
2960
+ injectPendingInvariantConditions (NonTrivialUnswitchCandidate Candidate, Loop &L,
2961
+ DominatorTree &DT, LoopInfo &LI,
2962
+ AssumptionCache &AC, MemorySSAUpdater *MSSAU) {
2963
+ assert (Candidate.hasPendingInjection () && " Nothing to inject!" );
2964
+ BasicBlock *Preheader = L.getLoopPreheader ();
2965
+ assert (Preheader && " Loop is not in simplified form?" );
2966
+
2967
+ auto Pred = Candidate.PendingInjection ->Pred ;
2968
+ auto *LHS = Candidate.PendingInjection ->LHS ;
2969
+ auto *RHS = Candidate.PendingInjection ->RHS ;
2970
+ auto *InLoopSucc = Candidate.PendingInjection ->InLoopSucc ;
2971
+ auto *TI = cast<BranchInst>(Candidate.TI );
2972
+ auto *BB = Candidate.TI ->getParent ();
2973
+ assert (InLoopSucc == TI->getSuccessor (0 ));
2974
+ auto *OutOfLoopSucc = TI->getSuccessor (1 );
2975
+ // FIXME: Remove this once limitation on successors is lifted.
2976
+ assert (L.contains (InLoopSucc) && " Not supported yet!" );
2977
+ assert (!L.contains (OutOfLoopSucc) && " Not supported yet!" );
2978
+ auto &Ctx = BB->getContext ();
2979
+
2980
+ assert (LHS->getType () == RHS->getType () && " Type mismatch!" );
2981
+ // Do not use builder here: CreateICmp may simplify this intro a constant and
2982
+ // unswitching will break. Better optimize it away later.
2983
+ auto *InjectedCond =
2984
+ ICmpInst::Create (Instruction::ICmp, Pred, LHS, RHS, " injected.cond" ,
2985
+ Preheader->getTerminator ());
2986
+ auto *OldCond = TI->getCondition ();
2987
+
2988
+ BasicBlock *CheckBlock = BasicBlock::Create (Ctx, BB->getName () + " .check" ,
2989
+ BB->getParent (), InLoopSucc);
2990
+ IRBuilder<> Builder (TI);
2991
+ auto *InvariantBr =
2992
+ Builder.CreateCondBr (InjectedCond, InLoopSucc, CheckBlock);
2993
+
2994
+ Builder.SetInsertPoint (CheckBlock);
2995
+ auto *NewTerm = Builder.CreateCondBr (OldCond, InLoopSucc, OutOfLoopSucc);
2996
+
2997
+ TI->eraseFromParent ();
2998
+ // Prevent infinite unswitching.
2999
+ NewTerm->setMetadata (" llvm.invariant.condition.injection.disabled" ,
3000
+ MDNode::get (BB->getContext (), {}));
3001
+
3002
+ // Fixup phis.
3003
+ for (auto &I : *InLoopSucc) {
3004
+ auto *PN = dyn_cast<PHINode>(&I);
3005
+ if (!PN)
3006
+ break ;
3007
+ auto *Inc = PN->getIncomingValueForBlock (BB);
3008
+ PN->addIncoming (Inc, CheckBlock);
3009
+ }
3010
+ OutOfLoopSucc->replacePhiUsesWith (BB, CheckBlock);
3011
+
3012
+ SmallVector<DominatorTree::UpdateType, 4 > DTUpdates = {
3013
+ { DominatorTree::Insert, BB, CheckBlock },
3014
+ { DominatorTree::Insert, CheckBlock, InLoopSucc },
3015
+ { DominatorTree::Insert, CheckBlock, OutOfLoopSucc },
3016
+ { DominatorTree::Delete, BB, OutOfLoopSucc }
3017
+ };
3018
+
3019
+ DT.applyUpdates (DTUpdates);
3020
+ if (MSSAU)
3021
+ MSSAU->applyUpdates (DTUpdates, DT);
3022
+ L.addBasicBlockToLoop (CheckBlock, LI);
3023
+
3024
+ #ifdef EXPENSIVE_CHECKS
3025
+ DT.verify ();
3026
+ LI.verify (DT);
3027
+ if (MSSAU && VerifyMemorySSA)
3028
+ MSSAU->getMemorySSA ()->verifyMemorySSA ();
3029
+ #endif
3030
+
3031
+ // TODO: In fact, cost of unswitching a new invariant candidate is *slightly*
3032
+ // higher because we have just inserted a new block. Need to think how to
3033
+ // adjust the cost of injected candidates when it was first computed.
3034
+ LLVM_DEBUG (dbgs () << " Injected a new loop-invariant branch " << *InvariantBr
3035
+ << " and considering it for unswitching." );
3036
+ ++NumInvariantConditionsInjected;
3037
+ return NonTrivialUnswitchCandidate (InvariantBr, { InjectedCond },
3038
+ Candidate.Cost );
3039
+ }
3040
+
3041
+ // / Given chain of loop branch conditions looking like:
3042
+ // / br (Variant < Invariant1)
3043
+ // / br (Variant < Invariant2)
3044
+ // / br (Variant < Invariant3)
3045
+ // / ...
3046
+ // / collect set of invariant conditions on which we want to unswitch, which
3047
+ // / look like:
3048
+ // / Invariant1 <= Invariant2
3049
+ // / Invariant2 <= Invariant3
3050
+ // / ...
3051
+ // / Though they might not immediately exist in the IR, we can still inject them.
3052
+ static bool insertCandidatesWithPendingInjections (
3053
+ SmallVectorImpl<NonTrivialUnswitchCandidate> &UnswitchCandidates, Loop &L,
3054
+ ICmpInst::Predicate Pred, ArrayRef<CompareDesc> Compares,
3055
+ const DominatorTree &DT) {
3056
+
3057
+ assert (ICmpInst::isRelational (Pred));
3058
+ assert (ICmpInst::isStrictPredicate (Pred));
3059
+ if (Compares.size () < 2 )
3060
+ return false ;
3061
+ ICmpInst::Predicate NonStrictPred = ICmpInst::getNonStrictPredicate (Pred);
3062
+ for (auto Prev = Compares.begin (), Next = Compares.begin () + 1 ;
3063
+ Next != Compares.end (); ++Prev, ++Next) {
3064
+ Value *LHS = Next->Invariant ;
3065
+ Value *RHS = Prev->Invariant ;
3066
+ BasicBlock *InLoopSucc = Prev->InLoopSucc ;
3067
+ InjectedInvariant ToInject (NonStrictPred, LHS, RHS, InLoopSucc);
3068
+ NonTrivialUnswitchCandidate Candidate (Prev->Term , { LHS, RHS },
3069
+ std::nullopt, std::move (ToInject));
3070
+ UnswitchCandidates.push_back (std::move (Candidate));
3071
+ }
3072
+ return true ;
3073
+ }
3074
+
3075
+ // / Collect unswitch candidates by invariant conditions that are not immediately
3076
+ // / present in the loop. However, they can be injected into the code if we
3077
+ // / decide it's profitable.
3078
+ // / An example of such conditions is following:
3079
+ // /
3080
+ // / for (...) {
3081
+ // / x = load ...
3082
+ // / if (! x <u C1) break;
3083
+ // / if (! x <u C2) break;
3084
+ // / <do something>
3085
+ // / }
3086
+ // /
3087
+ // / We can unswitch by condition "C1 <=u C2". If that is true, then "x <u C1 <=
3088
+ // / C2" automatically implies "x <u C2", so we can get rid of one of
3089
+ // / loop-variant checks in unswitched loop version.
3090
+ static bool collectUnswitchCandidatesWithInjections (
3091
+ SmallVectorImpl<NonTrivialUnswitchCandidate> &UnswitchCandidates,
3092
+ IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, Loop &L,
3093
+ const DominatorTree &DT, const LoopInfo &LI, AAResults &AA,
3094
+ const MemorySSAUpdater *MSSAU) {
3095
+ if (!InjectInvariantConditions)
3096
+ return false ;
3097
+
3098
+ if (!DT.isReachableFromEntry (L.getHeader ()))
3099
+ return false ;
3100
+ auto *Latch = L.getLoopLatch ();
3101
+ // Need to have a single latch and a preheader.
3102
+ if (!Latch)
3103
+ return false ;
3104
+ assert (L.getLoopPreheader () && " Must have a preheader!" );
3105
+
3106
+ DenseMap<Value *, SmallVector<CompareDesc, 4 > > CandidatesULT;
3107
+ // Traverse the conditions that dominate latch (and therefore dominate each
3108
+ // other).
3109
+ for (auto *DTN = DT.getNode (Latch); L.contains (DTN->getBlock ());
3110
+ DTN = DTN->getIDom ()) {
3111
+ ICmpInst::Predicate Pred;
3112
+ Value *LHS = nullptr , *RHS = nullptr ;
3113
+ BasicBlock *IfTrue = nullptr , *IfFalse = nullptr ;
3114
+ auto *BB = DTN->getBlock ();
3115
+ auto *Term = BB->getTerminator ();
3116
+ if (!match (Term, m_Br (m_ICmp (Pred, m_Value (LHS), m_Value (RHS)),
3117
+ m_BasicBlock (IfTrue), m_BasicBlock (IfFalse))))
3118
+ continue ;
3119
+ if (!shouldTryInjectInvariantCondition (Pred, LHS, RHS, IfTrue, IfFalse, L))
3120
+ continue ;
3121
+ if (!shouldTryInjectBasingOnMetadata (cast<BranchInst>(Term), IfTrue))
3122
+ continue ;
3123
+ CompareDesc Desc (cast<BranchInst>(Term), RHS, IfTrue);
3124
+ CandidatesULT[LHS].push_back (Desc);
3125
+ }
3126
+
3127
+ bool Found = false ;
3128
+ for (auto &It : CandidatesULT)
3129
+ Found |= insertCandidatesWithPendingInjections (
3130
+ UnswitchCandidates, L, ICmpInst::ICMP_ULT, It.second , DT);
3131
+ return Found;
3132
+ }
3133
+
2847
3134
static bool isSafeForNoNTrivialUnswitching (Loop &L, LoopInfo &LI) {
2848
3135
if (!L.isSafeToClone ())
2849
3136
return false ;
@@ -3003,10 +3290,11 @@ static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(
3003
3290
Instruction &TI = *Candidate.TI ;
3004
3291
ArrayRef<Value *> Invariants = Candidate.Invariants ;
3005
3292
BranchInst *BI = dyn_cast<BranchInst>(&TI);
3006
- InstructionCost CandidateCost = ComputeUnswitchedCost (
3007
- TI, /* FullUnswitch*/ !BI ||
3008
- (Invariants.size () == 1 &&
3009
- Invariants[0 ] == skipTrivialSelect (BI->getCondition ())));
3293
+ bool FullUnswitch =
3294
+ !BI || Candidate.hasPendingInjection () ||
3295
+ (Invariants.size () == 1 &&
3296
+ Invariants[0 ] == skipTrivialSelect (BI->getCondition ()));
3297
+ InstructionCost CandidateCost = ComputeUnswitchedCost (TI, FullUnswitch);
3010
3298
// Calculate cost multiplier which is a tool to limit potentially
3011
3299
// exponential behavior of loop-unswitch.
3012
3300
if (EnableUnswitchCostMultiplier) {
@@ -3044,9 +3332,13 @@ static bool unswitchBestCondition(
3044
3332
SmallVector<NonTrivialUnswitchCandidate, 4 > UnswitchCandidates;
3045
3333
IVConditionInfo PartialIVInfo;
3046
3334
Instruction *PartialIVCondBranch = nullptr ;
3335
+ collectUnswitchCandidates (UnswitchCandidates, PartialIVInfo,
3336
+ PartialIVCondBranch, L, LI, AA, MSSAU);
3337
+ collectUnswitchCandidatesWithInjections (UnswitchCandidates, PartialIVInfo,
3338
+ PartialIVCondBranch, L, DT, LI, AA,
3339
+ MSSAU);
3047
3340
// If we didn't find any candidates, we're done.
3048
- if (!collectUnswitchCandidates (UnswitchCandidates, PartialIVInfo,
3049
- PartialIVCondBranch, L, LI, AA, MSSAU))
3341
+ if (UnswitchCandidates.empty ())
3050
3342
return false ;
3051
3343
3052
3344
LLVM_DEBUG (
@@ -3065,6 +3357,11 @@ static bool unswitchBestCondition(
3065
3357
return false ;
3066
3358
}
3067
3359
3360
+ if (Best.hasPendingInjection ())
3361
+ Best = injectPendingInvariantConditions (Best, L, DT, LI, AC, MSSAU);
3362
+ assert (!Best.hasPendingInjection () &&
3363
+ " All injections should have been done by now!" );
3364
+
3068
3365
if (Best.TI != PartialIVCondBranch)
3069
3366
PartialIVInfo.InstToDuplicate .clear ();
3070
3367
0 commit comments