Skip to content

Commit ac00b90

Browse files
committed
[LV] Support fixed order recurrences.
If the incoming previous value of a fixed-order recurrence is a phi in the header, go through incoming values from the latch until we find a non-phi value. Use this as the new Previous, all uses in the header will be dominated by the original phi, but need to be moved after the non-phi previous value. At the moment, fixed-order recurrences are modeled as a chain of first-order recurrences. Reviewed By: Ayal Differential Revision: https://reviews.llvm.org/D119661 (cherry-picked from b8709a9)
1 parent eb60cb0 commit ac00b90

File tree

8 files changed

+866
-258
lines changed

8 files changed

+866
-258
lines changed

llvm/include/llvm/Analysis/IVDescriptors.h

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -180,15 +180,18 @@ class RecurrenceDescriptor {
180180
AssumptionCache *AC = nullptr,
181181
DominatorTree *DT = nullptr);
182182

183-
/// Returns true if Phi is a first-order recurrence. A first-order recurrence
183+
/// Returns true if Phi is a fixed-order recurrence. A fixed-order recurrence
184184
/// is a non-reduction recurrence relation in which the value of the
185-
/// recurrence in the current loop iteration equals a value defined in the
186-
/// previous iteration. \p SinkAfter includes pairs of instructions where the
187-
/// first will be rescheduled to appear after the second if/when the loop is
188-
/// vectorized. It may be augmented with additional pairs if needed in order
189-
/// to handle Phi as a first-order recurrence.
185+
/// recurrence in the current loop iteration equals a value defined in a
186+
/// previous iteration (e.g. if the value is defined in the previous
187+
/// iteration, we refer to it as first-order recurrence, if it is defined in
188+
/// the iteration before the previous, we refer to it as second-order
189+
/// recurrence and so on). \p SinkAfter includes pairs of instructions where
190+
/// the first will be rescheduled to appear after the second if/when the loop
191+
/// is vectorized. It may be augmented with additional pairs if needed in
192+
/// order to handle Phi as a first-order recurrence.
190193
static bool
191-
isFirstOrderRecurrence(PHINode *Phi, Loop *TheLoop,
194+
isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop,
192195
MapVector<Instruction *, Instruction *> &SinkAfter,
193196
DominatorTree *DT);
194197

llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -299,10 +299,10 @@ class LoopVectorizationLegality {
299299
/// Returns the induction variables found in the loop.
300300
const InductionList &getInductionVars() const { return Inductions; }
301301

302-
/// Return the first-order recurrences found in the loop.
303-
RecurrenceSet &getFirstOrderRecurrences() { return FirstOrderRecurrences; }
302+
/// Return the fixed-order recurrences found in the loop.
303+
RecurrenceSet &getFixedOrderRecurrences() { return FixedOrderRecurrences; }
304304

305-
/// Return the set of instructions to sink to handle first-order recurrences.
305+
/// Return the set of instructions to sink to handle fixed-order recurrences.
306306
MapVector<Instruction *, Instruction *> &getSinkAfter() { return SinkAfter; }
307307

308308
/// Returns the widest induction type.
@@ -332,8 +332,8 @@ class LoopVectorizationLegality {
332332
/// Returns True if PN is a reduction variable in this loop.
333333
bool isReductionVariable(PHINode *PN) const { return Reductions.count(PN); }
334334

335-
/// Returns True if Phi is a first-order recurrence in this loop.
336-
bool isFirstOrderRecurrence(const PHINode *Phi) const;
335+
/// Returns True if Phi is a fixed-order recurrence in this loop.
336+
bool isFixedOrderRecurrence(const PHINode *Phi) const;
337337

338338
/// Return true if the block BB needs to be predicated in order for the loop
339339
/// to be vectorized.
@@ -524,11 +524,11 @@ class LoopVectorizationLegality {
524524
/// loop body.
525525
SmallPtrSet<Instruction *, 4> InductionCastsToIgnore;
526526

527-
/// Holds the phi nodes that are first-order recurrences.
528-
RecurrenceSet FirstOrderRecurrences;
527+
/// Holds the phi nodes that are fixed-order recurrences.
528+
RecurrenceSet FixedOrderRecurrences;
529529

530530
/// Holds instructions that need to sink past other instructions to handle
531-
/// first-order recurrences.
531+
/// fixed-order recurrences.
532532
MapVector<Instruction *, Instruction *> SinkAfter;
533533

534534
/// Holds the widest induction type encountered.

llvm/lib/Analysis/IVDescriptors.cpp

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -843,7 +843,7 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
843843
return false;
844844
}
845845

846-
bool RecurrenceDescriptor::isFirstOrderRecurrence(
846+
bool RecurrenceDescriptor::isFixedOrderRecurrence(
847847
PHINode *Phi, Loop *TheLoop,
848848
MapVector<Instruction *, Instruction *> &SinkAfter, DominatorTree *DT) {
849849

@@ -867,6 +867,20 @@ bool RecurrenceDescriptor::isFirstOrderRecurrence(
867867
// Get the previous value. The previous value comes from the latch edge while
868868
// the initial value comes form the preheader edge.
869869
auto *Previous = dyn_cast<Instruction>(Phi->getIncomingValueForBlock(Latch));
870+
871+
// If Previous is a phi in the header, go through incoming values from the
872+
// latch until we find a non-phi value. Use this as the new Previous, all uses
873+
// in the header will be dominated by the original phi, but need to be moved
874+
// after the non-phi previous value.
875+
SmallPtrSet<PHINode *, 4> SeenPhis;
876+
while (auto *PrevPhi = dyn_cast_or_null<PHINode>(Previous)) {
877+
if (PrevPhi->getParent() != Phi->getParent())
878+
return false;
879+
if (!SeenPhis.insert(PrevPhi).second)
880+
return false;
881+
Previous = dyn_cast<Instruction>(PrevPhi->getIncomingValueForBlock(Latch));
882+
}
883+
870884
if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous) ||
871885
SinkAfter.count(Previous)) // Cannot rely on dominance due to motion.
872886
return false;
@@ -908,7 +922,7 @@ bool RecurrenceDescriptor::isFirstOrderRecurrence(
908922
return false;
909923

910924
// Avoid sinking an instruction multiple times (if multiple operands are
911-
// first order recurrences) by sinking once - after the latest 'previous'
925+
// fixed order recurrences) by sinking once - after the latest 'previous'
912926
// instruction.
913927
auto It = SinkAfter.find(SinkCandidate);
914928
if (It != SinkAfter.end()) {

llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -662,7 +662,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
662662
// Non-header phi nodes that have outside uses can be vectorized. Add
663663
// them to the list of allowed exits.
664664
// Unsafe cyclic dependencies with header phis are identified during
665-
// legalization for reduction, induction and first order
665+
// legalization for reduction, induction and fixed order
666666
// recurrences.
667667
AllowedExit.insert(&I);
668668
continue;
@@ -695,7 +695,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
695695
// 3. Non-Phis with outside uses when SCEV predicates cannot be used
696696
// outside the loop - see call to hasOutsideLoopUser in the non-phi
697697
// handling below
698-
// 4. FirstOrderRecurrence phis that can possibly be handled by
698+
// 4. FixedOrderRecurrence phis that can possibly be handled by
699699
// extraction.
700700
// By recording these, we can then reason about ways to vectorize each
701701
// of these NotAllowedExit.
@@ -706,10 +706,10 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
706706
continue;
707707
}
708708

709-
if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop,
709+
if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop,
710710
SinkAfter, DT)) {
711711
AllowedExit.insert(Phi);
712-
FirstOrderRecurrences.insert(Phi);
712+
FixedOrderRecurrences.insert(Phi);
713713
continue;
714714
}
715715

@@ -879,12 +879,12 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
879879
}
880880
}
881881

882-
// For first order recurrences, we use the previous value (incoming value from
882+
// For fixed order recurrences, we use the previous value (incoming value from
883883
// the latch) to check if it dominates all users of the recurrence. Bail out
884884
// if we have to sink such an instruction for another recurrence, as the
885885
// dominance requirement may not hold after sinking.
886886
BasicBlock *LoopLatch = TheLoop->getLoopLatch();
887-
if (any_of(FirstOrderRecurrences, [LoopLatch, this](const PHINode *Phi) {
887+
if (any_of(FixedOrderRecurrences, [LoopLatch, this](const PHINode *Phi) {
888888
Instruction *V =
889889
cast<Instruction>(Phi->getIncomingValueForBlock(LoopLatch));
890890
return SinkAfter.find(V) != SinkAfter.end();
@@ -991,9 +991,9 @@ bool LoopVectorizationLegality::isInductionVariable(const Value *V) const {
991991
return isInductionPhi(V) || isCastedInductionVariable(V);
992992
}
993993

994-
bool LoopVectorizationLegality::isFirstOrderRecurrence(
994+
bool LoopVectorizationLegality::isFixedOrderRecurrence(
995995
const PHINode *Phi) const {
996-
return FirstOrderRecurrences.count(Phi);
996+
return FixedOrderRecurrences.count(Phi);
997997
}
998998

999999
bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) const {

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 34 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -574,7 +574,7 @@ class InnerLoopVectorizer {
574574

575575
/// Create the exit value of first order recurrences in the middle block and
576576
/// update their users.
577-
void fixFirstOrderRecurrence(VPFirstOrderRecurrencePHIRecipe *PhiR,
577+
void fixFixedOrderRecurrence(VPFirstOrderRecurrencePHIRecipe *PhiR,
578578
VPTransformState &State);
579579

580580
/// Create code for the loop exit value of the reduction.
@@ -3722,11 +3722,11 @@ void InnerLoopVectorizer::fixCrossIterationPHIs(VPTransformState &State) {
37223722
if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(&R))
37233723
fixReduction(ReductionPhi, State);
37243724
else if (auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R))
3725-
fixFirstOrderRecurrence(FOR, State);
3725+
fixFixedOrderRecurrence(FOR, State);
37263726
}
37273727
}
37283728

3729-
void InnerLoopVectorizer::fixFirstOrderRecurrence(
3729+
void InnerLoopVectorizer::fixFixedOrderRecurrence(
37303730
VPFirstOrderRecurrencePHIRecipe *PhiR, VPTransformState &State) {
37313731
// This is the second phase of vectorizing first-order recurrences. An
37323732
// overview of the transformation is described below. Suppose we have the
@@ -4003,7 +4003,7 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
40034003

40044004
// We know that the loop is in LCSSA form. We need to update the PHI nodes
40054005
// in the exit blocks. See comment on analogous loop in
4006-
// fixFirstOrderRecurrence for a more complete explaination of the logic.
4006+
// fixFixedOrderRecurrence for a more complete explaination of the logic.
40074007
if (!Cost->requiresScalarEpilogue(VF))
40084008
for (PHINode &LCSSAPhi : LoopExitBlock->phis())
40094009
if (llvm::is_contained(LCSSAPhi.incoming_values(), LoopExitInst))
@@ -4752,7 +4752,7 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
47524752
// First order recurrence Phi's should typically be considered
47534753
// non-uniform.
47544754
auto *OP = dyn_cast<PHINode>(OV);
4755-
if (OP && Legal->isFirstOrderRecurrence(OP))
4755+
if (OP && Legal->isFixedOrderRecurrence(OP))
47564756
continue;
47574757
// If all the users of the operand are uniform, then add the
47584758
// operand into the uniform worklist.
@@ -5445,7 +5445,7 @@ bool LoopVectorizationCostModel::isCandidateForEpilogueVectorization(
54455445
// Cross iteration phis such as reductions need special handling and are
54465446
// currently unsupported.
54475447
if (any_of(L.getHeader()->phis(),
5448-
[&](PHINode &Phi) { return Legal->isFirstOrderRecurrence(&Phi); }))
5448+
[&](PHINode &Phi) { return Legal->isFixedOrderRecurrence(&Phi); }))
54495449
return false;
54505450

54515451
// Phis with uses outside of the loop require special handling and are
@@ -7028,7 +7028,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF,
70287028

70297029
// First-order recurrences are replaced by vector shuffles inside the loop.
70307030
// NOTE: Don't use ToVectorTy as SK_ExtractSubvector expects a vector type.
7031-
if (VF.isVector() && Legal->isFirstOrderRecurrence(Phi))
7031+
if (VF.isVector() && Legal->isFixedOrderRecurrence(Phi))
70327032
return TTI.getShuffleCost(
70337033
TargetTransformInfo::SK_ExtractSubvector, cast<VectorType>(VectorTy),
70347034
None, VF.getKnownMinValue() - 1, FixedVectorType::get(RetTy, 1));
@@ -8554,11 +8554,16 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
85548554
if (auto Phi = dyn_cast<PHINode>(Instr)) {
85558555
if (Phi->getParent() != OrigLoop->getHeader())
85568556
return tryToBlend(Phi, Operands, Plan);
8557+
8558+
// Always record recipes for header phis. Later first-order recurrence phis
8559+
// can have earlier phis as incoming values.
8560+
recordRecipeOf(Phi);
8561+
85578562
if ((Recipe = tryToOptimizeInductionPHI(Phi, Operands, *Plan, Range)))
85588563
return toVPRecipeResult(Recipe);
85598564

85608565
VPHeaderPHIRecipe *PhiRecipe = nullptr;
8561-
if (Legal->isReductionVariable(Phi) || Legal->isFirstOrderRecurrence(Phi)) {
8566+
if (Legal->isReductionVariable(Phi) || Legal->isFixedOrderRecurrence(Phi)) {
85628567
VPValue *StartV = Operands[0];
85638568
if (Legal->isReductionVariable(Phi)) {
85648569
const RecurrenceDescriptor &RdxDesc =
@@ -8569,14 +8574,22 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
85698574
CM.isInLoopReduction(Phi),
85708575
CM.useOrderedReductions(RdxDesc));
85718576
} else {
8572-
PhiRecipe = new VPFirstOrderRecurrencePHIRecipe(Phi, *StartV);
8577+
// TODO: Currently fixed-order recurrences are modeled as chains of
8578+
// first-order recurrences. If there are no users of the intermediate
8579+
// recurrences in the chain, the fixed order recurrence should be modeled
8580+
// directly, enabling more efficient codegen.
8581+
PhiRecipe = new VPFirstOrderRecurrencePHIRecipe(Phi, *StartV);
85738582
}
85748583

8575-
// Record the incoming value from the backedge, so we can add the incoming
8576-
// value from the backedge after all recipes have been created.
8577-
recordRecipeOf(cast<Instruction>(
8578-
Phi->getIncomingValueForBlock(OrigLoop->getLoopLatch())));
8579-
PhisToFix.push_back(PhiRecipe);
8584+
// Record the incoming value from the backedge, so we can add the incoming
8585+
// value from the backedge after all recipes have been created.
8586+
auto *Inc = cast<Instruction>(
8587+
Phi->getIncomingValueForBlock(OrigLoop->getLoopLatch()));
8588+
auto RecipeIter = Ingredient2Recipe.find(Inc);
8589+
if (RecipeIter == Ingredient2Recipe.end())
8590+
recordRecipeOf(Inc);
8591+
8592+
PhisToFix.push_back(PhiRecipe);
85808593
} else {
85818594
// TODO: record backedge value for remaining pointer induction phis.
85828595
assert(Phi->getType()->isPointerTy() &&
@@ -8587,7 +8600,6 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
85878600
VPValue *Start = Plan->getOrAddVPValue(II.getStartValue());
85888601
PhiRecipe = new VPWidenPHIRecipe(Phi, Start);
85898602
}
8590-
85918603
return toVPRecipeResult(PhiRecipe);
85928604
}
85938605

@@ -8648,7 +8660,7 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
86488660
assert(
86498661
SinkTarget != FirstInst &&
86508662
"Must find a live instruction (at least the one feeding the "
8651-
"first-order recurrence PHI) before reaching beginning of the block");
8663+
"fixed-order recurrence PHI) before reaching beginning of the block");
86528664
SinkTarget = SinkTarget->getPrevNode();
86538665
assert(SinkTarget != P.first &&
86548666
"sink source equals target, no sinking required");
@@ -8952,14 +8964,19 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
89528964
RecipeBuilder, Range.Start);
89538965

89548966
// Introduce a recipe to combine the incoming and previous values of a
8955-
// first-order recurrence.
8967+
// fixed-order recurrence.
89568968
for (VPRecipeBase &R :
89578969
Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
89588970
auto *RecurPhi = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R);
89598971
if (!RecurPhi)
89608972
continue;
89618973

89628974
VPRecipeBase *PrevRecipe = RecurPhi->getBackedgeRecipe();
8975+
// Fixed-order recurrences do not contain cycles, so this loop is guaranteed
8976+
// to terminate.
8977+
while (auto *PrevPhi =
8978+
dyn_cast<VPFirstOrderRecurrencePHIRecipe>(PrevRecipe))
8979+
PrevRecipe = PrevPhi->getBackedgeRecipe();
89638980
VPBasicBlock *InsertBlock = PrevRecipe->getParent();
89648981
auto *Region = GetReplicateRegion(PrevRecipe);
89658982
if (Region)

0 commit comments

Comments
 (0)