Skip to content

Commit b8709a9

Browse files
committed
[LV] Support fixed order recurrences.
If the incoming previous value of a fixed-order recurrence is a phi in the header, go through incoming values from the latch until we find a non-phi value. Use this as the new Previous, all uses in the header will be dominated by the original phi, but need to be moved after the non-phi previous value. At the moment, fixed-order recurrences are modeled as a chain of first-order recurrences. Reviewed By: Ayal Differential Revision: https://reviews.llvm.org/D119661
1 parent 1436ada commit b8709a9

File tree

9 files changed

+554
-65
lines changed

9 files changed

+554
-65
lines changed

llvm/include/llvm/Analysis/IVDescriptors.h

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -180,15 +180,18 @@ class RecurrenceDescriptor {
180180
DemandedBits *DB = nullptr, AssumptionCache *AC = nullptr,
181181
DominatorTree *DT = nullptr, ScalarEvolution *SE = nullptr);
182182

183-
/// Returns true if Phi is a first-order recurrence. A first-order recurrence
183+
/// Returns true if Phi is a fixed-order recurrence. A fixed-order recurrence
184184
/// is a non-reduction recurrence relation in which the value of the
185-
/// recurrence in the current loop iteration equals a value defined in the
186-
/// previous iteration. \p SinkAfter includes pairs of instructions where the
187-
/// first will be rescheduled to appear after the second if/when the loop is
188-
/// vectorized. It may be augmented with additional pairs if needed in order
189-
/// to handle Phi as a first-order recurrence.
185+
/// recurrence in the current loop iteration equals a value defined in a
186+
/// previous iteration (e.g. if the value is defined in the previous
187+
/// iteration, we refer to it as first-order recurrence, if it is defined in
188+
/// the iteration before the previous, we refer to it as second-order
189+
/// recurrence and so on). \p SinkAfter includes pairs of instructions where
190+
/// the first will be rescheduled to appear after the second if/when the loop
191+
/// is vectorized. It may be augmented with additional pairs if needed in
192+
/// order to handle Phi as a first-order recurrence.
190193
static bool
191-
isFirstOrderRecurrence(PHINode *Phi, Loop *TheLoop,
194+
isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop,
192195
MapVector<Instruction *, Instruction *> &SinkAfter,
193196
DominatorTree *DT);
194197

llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -291,10 +291,10 @@ class LoopVectorizationLegality {
291291
/// Returns the induction variables found in the loop.
292292
const InductionList &getInductionVars() const { return Inductions; }
293293

294-
/// Return the first-order recurrences found in the loop.
295-
RecurrenceSet &getFirstOrderRecurrences() { return FirstOrderRecurrences; }
294+
/// Return the fixed-order recurrences found in the loop.
295+
RecurrenceSet &getFixedOrderRecurrences() { return FixedOrderRecurrences; }
296296

297-
/// Return the set of instructions to sink to handle first-order recurrences.
297+
/// Return the set of instructions to sink to handle fixed-order recurrences.
298298
MapVector<Instruction *, Instruction *> &getSinkAfter() { return SinkAfter; }
299299

300300
/// Returns the widest induction type.
@@ -332,8 +332,8 @@ class LoopVectorizationLegality {
332332
/// Returns True if PN is a reduction variable in this loop.
333333
bool isReductionVariable(PHINode *PN) const { return Reductions.count(PN); }
334334

335-
/// Returns True if Phi is a first-order recurrence in this loop.
336-
bool isFirstOrderRecurrence(const PHINode *Phi) const;
335+
/// Returns True if Phi is a fixed-order recurrence in this loop.
336+
bool isFixedOrderRecurrence(const PHINode *Phi) const;
337337

338338
/// Return true if the block BB needs to be predicated in order for the loop
339339
/// to be vectorized.
@@ -515,11 +515,11 @@ class LoopVectorizationLegality {
515515
/// loop body.
516516
SmallPtrSet<Instruction *, 4> InductionCastsToIgnore;
517517

518-
/// Holds the phi nodes that are first-order recurrences.
519-
RecurrenceSet FirstOrderRecurrences;
518+
/// Holds the phi nodes that are fixed-order recurrences.
519+
RecurrenceSet FixedOrderRecurrences;
520520

521521
/// Holds instructions that need to sink past other instructions to handle
522-
/// first-order recurrences.
522+
/// fixed-order recurrences.
523523
MapVector<Instruction *, Instruction *> SinkAfter;
524524

525525
/// Holds the widest induction type encountered.

llvm/lib/Analysis/IVDescriptors.cpp

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -921,7 +921,7 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
921921
return false;
922922
}
923923

924-
bool RecurrenceDescriptor::isFirstOrderRecurrence(
924+
bool RecurrenceDescriptor::isFixedOrderRecurrence(
925925
PHINode *Phi, Loop *TheLoop,
926926
MapVector<Instruction *, Instruction *> &SinkAfter, DominatorTree *DT) {
927927

@@ -945,6 +945,20 @@ bool RecurrenceDescriptor::isFirstOrderRecurrence(
945945
// Get the previous value. The previous value comes from the latch edge while
946946
// the initial value comes form the preheader edge.
947947
auto *Previous = dyn_cast<Instruction>(Phi->getIncomingValueForBlock(Latch));
948+
949+
// If Previous is a phi in the header, go through incoming values from the
950+
// latch until we find a non-phi value. Use this as the new Previous, all uses
951+
// in the header will be dominated by the original phi, but need to be moved
952+
// after the non-phi previous value.
953+
SmallPtrSet<PHINode *, 4> SeenPhis;
954+
while (auto *PrevPhi = dyn_cast_or_null<PHINode>(Previous)) {
955+
if (PrevPhi->getParent() != Phi->getParent())
956+
return false;
957+
if (!SeenPhis.insert(PrevPhi).second)
958+
return false;
959+
Previous = dyn_cast<Instruction>(PrevPhi->getIncomingValueForBlock(Latch));
960+
}
961+
948962
if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous) ||
949963
SinkAfter.count(Previous)) // Cannot rely on dominance due to motion.
950964
return false;
@@ -986,7 +1000,7 @@ bool RecurrenceDescriptor::isFirstOrderRecurrence(
9861000
return false;
9871001

9881002
// Avoid sinking an instruction multiple times (if multiple operands are
989-
// first order recurrences) by sinking once - after the latest 'previous'
1003+
// fixed order recurrences) by sinking once - after the latest 'previous'
9901004
// instruction.
9911005
auto It = SinkAfter.find(SinkCandidate);
9921006
if (It != SinkAfter.end()) {

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ cl::opt<TailFoldingKind, true, cl::parser<std::string>> SVETailFolding(
102102
"\nsimple Use tail-folding for simple loops (not reductions or "
103103
"recurrences)"
104104
"\nreductions Use tail-folding for loops containing reductions"
105-
"\nrecurrences Use tail-folding for loops containing first order "
105+
"\nrecurrences Use tail-folding for loops containing fixed order "
106106
"recurrences"),
107107
cl::location(TailFoldingKindLoc));
108108

@@ -3044,7 +3044,7 @@ bool AArch64TTIImpl::preferPredicateOverEpilogue(
30443044
TailFoldingKind Required; // Defaults to 0.
30453045
if (LVL->getReductionVars().size())
30463046
Required.add(TailFoldingKind::TFReductions);
3047-
if (LVL->getFirstOrderRecurrences().size())
3047+
if (LVL->getFixedOrderRecurrences().size())
30483048
Required.add(TailFoldingKind::TFRecurrences);
30493049
if (!Required)
30503050
Required.add(TailFoldingKind::TFSimple);

llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -677,7 +677,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
677677
// Non-header phi nodes that have outside uses can be vectorized. Add
678678
// them to the list of allowed exits.
679679
// Unsafe cyclic dependencies with header phis are identified during
680-
// legalization for reduction, induction and first order
680+
// legalization for reduction, induction and fixed order
681681
// recurrences.
682682
AllowedExit.insert(&I);
683683
continue;
@@ -710,7 +710,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
710710
// 3. Non-Phis with outside uses when SCEV predicates cannot be used
711711
// outside the loop - see call to hasOutsideLoopUser in the non-phi
712712
// handling below
713-
// 4. FirstOrderRecurrence phis that can possibly be handled by
713+
// 4. FixedOrderRecurrence phis that can possibly be handled by
714714
// extraction.
715715
// By recording these, we can then reason about ways to vectorize each
716716
// of these NotAllowedExit.
@@ -721,10 +721,10 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
721721
continue;
722722
}
723723

724-
if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop,
724+
if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop,
725725
SinkAfter, DT)) {
726726
AllowedExit.insert(Phi);
727-
FirstOrderRecurrences.insert(Phi);
727+
FixedOrderRecurrences.insert(Phi);
728728
continue;
729729
}
730730

@@ -894,12 +894,12 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
894894
}
895895
}
896896

897-
// For first order recurrences, we use the previous value (incoming value from
897+
// For fixed order recurrences, we use the previous value (incoming value from
898898
// the latch) to check if it dominates all users of the recurrence. Bail out
899899
// if we have to sink such an instruction for another recurrence, as the
900900
// dominance requirement may not hold after sinking.
901901
BasicBlock *LoopLatch = TheLoop->getLoopLatch();
902-
if (any_of(FirstOrderRecurrences, [LoopLatch, this](const PHINode *Phi) {
902+
if (any_of(FixedOrderRecurrences, [LoopLatch, this](const PHINode *Phi) {
903903
Instruction *V =
904904
cast<Instruction>(Phi->getIncomingValueForBlock(LoopLatch));
905905
return SinkAfter.find(V) != SinkAfter.end();
@@ -1080,9 +1080,9 @@ bool LoopVectorizationLegality::isInductionVariable(const Value *V) const {
10801080
return isInductionPhi(V) || isCastedInductionVariable(V);
10811081
}
10821082

1083-
bool LoopVectorizationLegality::isFirstOrderRecurrence(
1083+
bool LoopVectorizationLegality::isFixedOrderRecurrence(
10841084
const PHINode *Phi) const {
1085-
return FirstOrderRecurrences.count(Phi);
1085+
return FixedOrderRecurrences.count(Phi);
10861086
}
10871087

10881088
bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) const {

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 31 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -552,7 +552,7 @@ class InnerLoopVectorizer {
552552

553553
/// Create the exit value of first order recurrences in the middle block and
554554
/// update their users.
555-
void fixFirstOrderRecurrence(VPFirstOrderRecurrencePHIRecipe *PhiR,
555+
void fixFixedOrderRecurrence(VPFirstOrderRecurrencePHIRecipe *PhiR,
556556
VPTransformState &State);
557557

558558
/// Create code for the loop exit value of the reduction.
@@ -3705,11 +3705,11 @@ void InnerLoopVectorizer::fixCrossIterationPHIs(VPTransformState &State) {
37053705
if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(&R))
37063706
fixReduction(ReductionPhi, State);
37073707
else if (auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R))
3708-
fixFirstOrderRecurrence(FOR, State);
3708+
fixFixedOrderRecurrence(FOR, State);
37093709
}
37103710
}
37113711

3712-
void InnerLoopVectorizer::fixFirstOrderRecurrence(
3712+
void InnerLoopVectorizer::fixFixedOrderRecurrence(
37133713
VPFirstOrderRecurrencePHIRecipe *PhiR, VPTransformState &State) {
37143714
// This is the second phase of vectorizing first-order recurrences. An
37153715
// overview of the transformation is described below. Suppose we have the
@@ -4002,7 +4002,7 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
40024002

40034003
// We know that the loop is in LCSSA form. We need to update the PHI nodes
40044004
// in the exit blocks. See comment on analogous loop in
4005-
// fixFirstOrderRecurrence for a more complete explaination of the logic.
4005+
// fixFixedOrderRecurrence for a more complete explaination of the logic.
40064006
if (!Cost->requiresScalarEpilogue(VF))
40074007
for (PHINode &LCSSAPhi : LoopExitBlock->phis())
40084008
if (llvm::is_contained(LCSSAPhi.incoming_values(), LoopExitInst)) {
@@ -4738,7 +4738,7 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
47384738
// First order recurrence Phi's should typically be considered
47394739
// non-uniform.
47404740
auto *OP = dyn_cast<PHINode>(OV);
4741-
if (OP && Legal->isFirstOrderRecurrence(OP))
4741+
if (OP && Legal->isFixedOrderRecurrence(OP))
47424742
continue;
47434743
// If all the users of the operand are uniform, then add the
47444744
// operand into the uniform worklist.
@@ -5427,7 +5427,7 @@ bool LoopVectorizationCostModel::isCandidateForEpilogueVectorization(
54275427
// Cross iteration phis such as reductions need special handling and are
54285428
// currently unsupported.
54295429
if (any_of(L.getHeader()->phis(),
5430-
[&](PHINode &Phi) { return Legal->isFirstOrderRecurrence(&Phi); }))
5430+
[&](PHINode &Phi) { return Legal->isFixedOrderRecurrence(&Phi); }))
54315431
return false;
54325432

54335433
// Phis with uses outside of the loop require special handling and are
@@ -7032,7 +7032,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF,
70327032

70337033
// First-order recurrences are replaced by vector shuffles inside the loop.
70347034
// NOTE: Don't use ToVectorTy as SK_ExtractSubvector expects a vector type.
7035-
if (VF.isVector() && Legal->isFirstOrderRecurrence(Phi))
7035+
if (VF.isVector() && Legal->isFixedOrderRecurrence(Phi))
70367036
return TTI.getShuffleCost(
70377037
TargetTransformInfo::SK_ExtractSubvector, cast<VectorType>(VectorTy),
70387038
None, VF.getKnownMinValue() - 1, FixedVectorType::get(RetTy, 1));
@@ -8509,13 +8509,18 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
85098509
if (auto Phi = dyn_cast<PHINode>(Instr)) {
85108510
if (Phi->getParent() != OrigLoop->getHeader())
85118511
return tryToBlend(Phi, Operands, Plan);
8512+
8513+
// Always record recipes for header phis. Later first-order recurrence phis
8514+
// can have earlier phis as incoming values.
8515+
recordRecipeOf(Phi);
8516+
85128517
if ((Recipe = tryToOptimizeInductionPHI(Phi, Operands, *Plan, Range)))
85138518
return toVPRecipeResult(Recipe);
85148519

85158520
VPHeaderPHIRecipe *PhiRecipe = nullptr;
85168521
assert((Legal->isReductionVariable(Phi) ||
8517-
Legal->isFirstOrderRecurrence(Phi)) &&
8518-
"can only widen reductions and first-order recurrences here");
8522+
Legal->isFixedOrderRecurrence(Phi)) &&
8523+
"can only widen reductions and fixed-order recurrences here");
85198524
VPValue *StartV = Operands[0];
85208525
if (Legal->isReductionVariable(Phi)) {
85218526
const RecurrenceDescriptor &RdxDesc =
@@ -8526,13 +8531,21 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
85268531
CM.isInLoopReduction(Phi),
85278532
CM.useOrderedReductions(RdxDesc));
85288533
} else {
8534+
// TODO: Currently fixed-order recurrences are modeled as chains of
8535+
// first-order recurrences. If there are no users of the intermediate
8536+
// recurrences in the chain, the fixed order recurrence should be modeled
8537+
// directly, enabling more efficient codegen.
85298538
PhiRecipe = new VPFirstOrderRecurrencePHIRecipe(Phi, *StartV);
85308539
}
85318540

85328541
// Record the incoming value from the backedge, so we can add the incoming
85338542
// value from the backedge after all recipes have been created.
8534-
recordRecipeOf(cast<Instruction>(
8535-
Phi->getIncomingValueForBlock(OrigLoop->getLoopLatch())));
8543+
auto *Inc = cast<Instruction>(
8544+
Phi->getIncomingValueForBlock(OrigLoop->getLoopLatch()));
8545+
auto RecipeIter = Ingredient2Recipe.find(Inc);
8546+
if (RecipeIter == Ingredient2Recipe.end())
8547+
recordRecipeOf(Inc);
8548+
85368549
PhisToFix.push_back(PhiRecipe);
85378550
return toVPRecipeResult(PhiRecipe);
85388551
}
@@ -8597,7 +8610,7 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
85978610
assert(
85988611
SinkTarget != FirstInst &&
85998612
"Must find a live instruction (at least the one feeding the "
8600-
"first-order recurrence PHI) before reaching beginning of the block");
8613+
"fixed-order recurrence PHI) before reaching beginning of the block");
86018614
SinkTarget = SinkTarget->getPrevNode();
86028615
assert(SinkTarget != P.first &&
86038616
"sink source equals target, no sinking required");
@@ -8982,14 +8995,19 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
89828995
RecipeBuilder, Range.Start);
89838996

89848997
// Introduce a recipe to combine the incoming and previous values of a
8985-
// first-order recurrence.
8998+
// fixed-order recurrence.
89868999
for (VPRecipeBase &R :
89879000
Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
89889001
auto *RecurPhi = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R);
89899002
if (!RecurPhi)
89909003
continue;
89919004

89929005
VPRecipeBase *PrevRecipe = RecurPhi->getBackedgeRecipe();
9006+
// Fixed-order recurrences do not contain cycles, so this loop is guaranteed
9007+
// to terminate.
9008+
while (auto *PrevPhi =
9009+
dyn_cast<VPFirstOrderRecurrencePHIRecipe>(PrevRecipe))
9010+
PrevRecipe = PrevPhi->getBackedgeRecipe();
89939011
VPBasicBlock *InsertBlock = PrevRecipe->getParent();
89949012
auto *Region = GetReplicateRegion(PrevRecipe);
89959013
if (Region)

0 commit comments

Comments
 (0)