Skip to content

Commit 9019ec3

Browse files
committed
[LV] Vectorize select min/max index.
Add support for vectorizing loops that select the index of the minimum or maximum element. The patch implements vectorizing those patterns by combining Min/Max and FindFirstIV reductions. It extends matching Min/Max reductions to allow in-loop users that are FindLastIV reductions. It records a flag indicating that the Min/Max reduction is used by another reduction. When creating reduction recipes, we process any reduction that has other reduction users. The reduction using the min/max reduction needs adjusting to compute the correct result: 1. We need to find the first IV for which the condition based on the min/max reduction is true, 2. Compare the partial min/max reduction result to its final value and, 3. Select the lanes of the partial FindLastIV reductions which correspond to the lanes matching the min/max reduction result.
1 parent 81a7d17 commit 9019ec3

File tree

11 files changed

+1435
-242
lines changed

11 files changed

+1435
-242
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -798,6 +798,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
798798
// For each block in the loop.
799799
for (BasicBlock *BB : TheLoop->blocks()) {
800800
// Scan the instructions in the block and look for hazards.
801+
PHINode *UnclassifiedPhi = nullptr;
801802
for (Instruction &I : *BB) {
802803
if (auto *Phi = dyn_cast<PHINode>(&I)) {
803804
Type *PhiTy = Phi->getType();
@@ -887,12 +888,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
887888
addInductionPhi(Phi, ID, AllowedExit);
888889
continue;
889890
}
890-
891-
reportVectorizationFailure("Found an unidentified PHI",
892-
"value that could not be identified as "
893-
"reduction is used outside the loop",
894-
"NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
895-
return false;
891+
UnclassifiedPhi = Phi;
896892
} // end of PHI handling
897893

898894
// We handle calls that:
@@ -1045,6 +1041,19 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
10451041
return false;
10461042
}
10471043
} // next instr.
1044+
if (UnclassifiedPhi && none_of(BB->phis(), [this](PHINode &P) {
1045+
auto I = Reductions.find(&P);
1046+
return I != Reductions.end() &&
1047+
RecurrenceDescriptor::isFindLastIVRecurrenceKind(
1048+
I->second.getRecurrenceKind());
1049+
})) {
1050+
reportVectorizationFailure("Found an unidentified PHI",
1051+
"value that could not be identified as "
1052+
"reduction is used outside the loop",
1053+
"NonReductionValueUsedOutsideLoop", ORE,
1054+
TheLoop, UnclassifiedPhi);
1055+
return false;
1056+
}
10481057
}
10491058

10501059
if (!PrimaryInduction) {

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7261,6 +7261,9 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
72617261
} else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(Kind)) {
72627262
Value *StartV = getStartValueFromReductionResult(EpiRedResult);
72637263
using namespace llvm::PatternMatch;
7264+
MainResumeValue = cast<VPInstruction>(EpiRedHeaderPhi->getStartValue())
7265+
->getOperand(0)
7266+
->getUnderlyingValue();
72647267
Value *Cmp, *OrigResumeV, *CmpOp;
72657268
bool IsExpectedPattern =
72667269
match(MainResumeValue,
@@ -7274,7 +7277,11 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
72747277
assert(IsExpectedPattern && "Unexpected reduction resume pattern");
72757278
(void)IsExpectedPattern;
72767279
MainResumeValue = OrigResumeV;
7280+
} else {
7281+
if (auto *VPI = dyn_cast<VPInstruction>(EpiRedHeaderPhi->getStartValue()))
7282+
MainResumeValue = VPI->getOperand(0)->getUnderlyingValue();
72777283
}
7284+
72787285
PHINode *MainResumePhi = cast<PHINode>(MainResumeValue);
72797286

72807287
// When fixing reductions in the epilogue loop we should already have
@@ -8178,7 +8185,7 @@ bool VPRecipeBuilder::getScaledReductions(
81788185
Instruction *PHI, Instruction *RdxExitInstr, VFRange &Range,
81798186
SmallVectorImpl<std::pair<PartialReductionChain, unsigned>> &Chains) {
81808187

8181-
if (!CM.TheLoop->contains(RdxExitInstr))
8188+
if (!RdxExitInstr || !CM.TheLoop->contains(RdxExitInstr))
81828189
return false;
81838190

81848191
auto *Update = dyn_cast<BinaryOperator>(RdxExitInstr);
@@ -8273,9 +8280,6 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
82738280
return Recipe;
82748281

82758282
VPHeaderPHIRecipe *PhiRecipe = nullptr;
8276-
assert((Legal->isReductionVariable(Phi) ||
8277-
Legal->isFixedOrderRecurrence(Phi)) &&
8278-
"can only widen reductions and fixed-order recurrences here");
82798283
VPValue *StartV = Operands[0];
82808284
if (Legal->isReductionVariable(Phi)) {
82818285
const RecurrenceDescriptor &RdxDesc =
@@ -8289,12 +8293,17 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
82898293
PhiRecipe = new VPReductionPHIRecipe(
82908294
Phi, RdxDesc, *StartV, CM.isInLoopReduction(Phi),
82918295
CM.useOrderedReductions(RdxDesc), ScaleFactor);
8292-
} else {
8296+
} else if (Legal->isFixedOrderRecurrence(Phi)) {
82938297
// TODO: Currently fixed-order recurrences are modeled as chains of
82948298
// first-order recurrences. If there are no users of the intermediate
82958299
// recurrences in the chain, the fixed order recurrence should be modeled
82968300
// directly, enabling more efficient codegen.
82978301
PhiRecipe = new VPFirstOrderRecurrencePHIRecipe(Phi, *StartV);
8302+
} else {
8303+
// Failed to identify phi as reduction or fixed-order recurrence. Keep the
8304+
// original VPWidenPHIRecipe for now, to be legalized later if possible.
8305+
setRecipe(Phi, R);
8306+
return nullptr;
82988307
}
82998308
// Add backedge value.
83008309
PhiRecipe->addOperand(Operands[1]);
@@ -8479,7 +8488,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
84798488
// TODO: Extract final value from induction recipe initially, optimize to
84808489
// pre-computed end value together in optimizeInductionExitUsers.
84818490
auto *VectorPhiR =
8482-
cast<VPHeaderPHIRecipe>(Builder.getRecipe(&ScalarPhiIRI->getIRPhi()));
8491+
cast<VPSingleDefRecipe>(Builder.getRecipe(&ScalarPhiIRI->getIRPhi()));
84838492
if (auto *WideIVR = dyn_cast<VPWidenInductionRecipe>(VectorPhiR)) {
84848493
if (VPInstruction *ResumePhi = addResumePhiRecipeForInduction(
84858494
WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo,
@@ -8501,7 +8510,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
85018510
// which for FORs is a vector whose last element needs to be extracted. The
85028511
// start value provides the value if the loop is bypassed.
85038512
bool IsFOR = isa<VPFirstOrderRecurrencePHIRecipe>(VectorPhiR);
8504-
auto *ResumeFromVectorLoop = VectorPhiR->getBackedgeValue();
8513+
auto *ResumeFromVectorLoop = VectorPhiR->getOperand(1);
85058514
assert(VectorRegion->getSingleSuccessor() == Plan.getMiddleBlock() &&
85068515
"Cannot handle loops with uncountable early exits");
85078516
if (IsFOR)
@@ -8510,7 +8519,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
85108519
"vector.recur.extract");
85118520
StringRef Name = IsFOR ? "scalar.recur.init" : "bc.merge.rdx";
85128521
auto *ResumePhiR = ScalarPHBuilder.createScalarPhi(
8513-
{ResumeFromVectorLoop, VectorPhiR->getStartValue()}, {}, Name);
8522+
{ResumeFromVectorLoop, VectorPhiR->getOperand(0)}, {}, Name);
85148523
ScalarPhiIRI->addOperand(ResumePhiR);
85158524
}
85168525
}
@@ -8813,6 +8822,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
88138822
VPRecipeBase *Recipe =
88148823
RecipeBuilder.tryToCreateWidenRecipe(SingleDef, Range);
88158824
if (!Recipe) {
8825+
if (isa<VPWidenPHIRecipe>(SingleDef))
8826+
continue;
88168827
SmallVector<VPValue *, 4> Operands(R.operands());
88178828
Recipe = RecipeBuilder.handleReplication(Instr, Operands, Range);
88188829
}
@@ -8878,6 +8889,11 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
88788889
// Adjust the recipes for any inloop reductions.
88798890
adjustRecipesForReductions(Plan, RecipeBuilder, Range.Start);
88808891

8892+
// Try to convert remaining VPWidenPHIRecipes to reduction recipes.
8893+
if (!VPlanTransforms::runPass(VPlanTransforms::legalizeUnclassifiedPhis,
8894+
*Plan))
8895+
return nullptr;
8896+
88818897
// Transform recipes to abstract recipes if it is legal and beneficial and
88828898
// clamp the range for better cost estimation.
88838899
// TODO: Enable following transform when the EVL-version of extended-reduction
@@ -9340,6 +9356,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
93409356
PhiR->setOperand(0, StartV);
93419357
}
93429358
}
9359+
93439360
for (VPRecipeBase *R : ToDelete)
93449361
R->eraseFromParent();
93459362

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1826,7 +1826,8 @@ class VPHeaderPHIRecipe : public VPSingleDefRecipe, public VPPhiAccessors {
18261826
~VPHeaderPHIRecipe() override = default;
18271827

18281828
/// Method to support type inquiry through isa, cast, and dyn_cast.
1829-
static inline bool classof(const VPRecipeBase *B) {
1829+
static inline bool classof(const VPUser *U) {
1830+
auto *B = cast<VPRecipeBase>(U);
18301831
return B->getVPDefID() >= VPDef::VPFirstHeaderPHISC &&
18311832
B->getVPDefID() <= VPDef::VPLastHeaderPHISC;
18321833
}
@@ -1835,6 +1836,10 @@ class VPHeaderPHIRecipe : public VPSingleDefRecipe, public VPPhiAccessors {
18351836
return B && B->getVPDefID() >= VPRecipeBase::VPFirstHeaderPHISC &&
18361837
B->getVPDefID() <= VPRecipeBase::VPLastHeaderPHISC;
18371838
}
1839+
static inline bool classof(const VPSingleDefRecipe *B) {
1840+
return B->getVPDefID() >= VPDef::VPFirstHeaderPHISC &&
1841+
B->getVPDefID() <= VPDef::VPLastHeaderPHISC;
1842+
}
18381843

18391844
/// Generate the phi nodes.
18401845
void execute(VPTransformState &State) override = 0;
@@ -1896,7 +1901,7 @@ class VPWidenInductionRecipe : public VPHeaderPHIRecipe {
18961901
return R && classof(R);
18971902
}
18981903

1899-
static inline bool classof(const VPHeaderPHIRecipe *R) {
1904+
static inline bool classof(const VPSingleDefRecipe *R) {
19001905
return classof(static_cast<const VPRecipeBase *>(R));
19011906
}
19021907

llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -589,3 +589,148 @@ void VPlanTransforms::createLoopRegions(VPlan &Plan) {
589589
TopRegion->setName("vector loop");
590590
TopRegion->getEntryBasicBlock()->setName("vector.body");
591591
}
592+
593+
bool VPlanTransforms::legalizeUnclassifiedPhis(VPlan &Plan) {
594+
using namespace VPlanPatternMatch;
595+
for (auto &PhiR : make_early_inc_range(
596+
Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis())) {
597+
if (!isa<VPWidenPHIRecipe>(&PhiR))
598+
continue;
599+
600+
// Check if PhiR is a min/max reduction that has a user inside the loop
601+
// outside the min/max reduction chain. The other user must be the compare
602+
// of a FindLastIV reduction chain.
603+
auto *MinMaxPhiR = cast<VPWidenPHIRecipe>(&PhiR);
604+
auto *MinMaxOp = dyn_cast_or_null<VPSingleDefRecipe>(
605+
MinMaxPhiR->getOperand(1)->getDefiningRecipe());
606+
if (!MinMaxOp)
607+
return false;
608+
609+
// The incoming value must be a min/max instrinsic.
610+
// TODO: Also handle the select variant.
611+
Intrinsic::ID ID = Intrinsic::not_intrinsic;
612+
if (auto *WideInt = dyn_cast<VPWidenIntrinsicRecipe>(MinMaxOp))
613+
ID = WideInt->getVectorIntrinsicID();
614+
else {
615+
auto *RepR = dyn_cast<VPReplicateRecipe>(MinMaxOp);
616+
if (!RepR || !isa<IntrinsicInst>(RepR->getUnderlyingInstr()))
617+
return false;
618+
ID = cast<IntrinsicInst>(RepR->getUnderlyingInstr())->getIntrinsicID();
619+
}
620+
RecurKind RdxKind = RecurKind::None;
621+
switch (ID) {
622+
case Intrinsic::umax:
623+
RdxKind = RecurKind::UMax;
624+
break;
625+
case Intrinsic::umin:
626+
RdxKind = RecurKind::UMin;
627+
break;
628+
case Intrinsic::smax:
629+
RdxKind = RecurKind::SMax;
630+
break;
631+
case Intrinsic::smin:
632+
RdxKind = RecurKind::SMin;
633+
break;
634+
default:
635+
return false;
636+
}
637+
638+
// The min/max intrinsic must use the phi and itself must only be used by
639+
// the phi and a resume-phi in the scalar preheader.
640+
if (MinMaxOp->getOperand(0) != MinMaxPhiR &&
641+
MinMaxOp->getOperand(1) != MinMaxPhiR)
642+
return false;
643+
if (MinMaxPhiR->getNumUsers() != 2 ||
644+
any_of(MinMaxOp->users(), [MinMaxPhiR, &Plan](VPUser *U) {
645+
auto *Phi = dyn_cast<VPPhi>(U);
646+
return MinMaxPhiR != U &&
647+
(!Phi || Phi->getParent() != Plan.getScalarPreheader());
648+
}))
649+
return false;
650+
651+
// One user of MinMaxPhiR is MinMaxOp, the other users must be a compare
652+
// that's part of a FindLastIV chain.
653+
auto MinMaxUsers = to_vector(MinMaxPhiR->users());
654+
auto *Cmp = dyn_cast<VPRecipeWithIRFlags>(
655+
MinMaxUsers[0] == MinMaxOp ? MinMaxUsers[1] : MinMaxUsers[0]);
656+
VPValue *CmpOpA;
657+
VPValue *CmpOpB;
658+
if (!Cmp || Cmp->getNumUsers() != 1 ||
659+
!match(Cmp, m_Binary<Instruction::ICmp>(m_VPValue(CmpOpA),
660+
m_VPValue(CmpOpB))))
661+
return false;
662+
663+
// Normalize the predicate so MinMaxPhiR is on the right side.
664+
CmpInst::Predicate Pred = Cmp->getPredicate();
665+
if (CmpOpA == MinMaxPhiR)
666+
Pred = CmpInst::getSwappedPredicate(Pred);
667+
668+
// Determine if the predicate is not strict.
669+
bool IsNonStrictPred = ICmpInst::isLE(Pred) || ICmpInst::isGE(Pred);
670+
// Account for a mis-match between RdxKind and the predicate.
671+
switch (RdxKind) {
672+
case RecurKind::UMin:
673+
case RecurKind::SMin:
674+
IsNonStrictPred |= ICmpInst::isGT(Pred);
675+
break;
676+
case RecurKind::UMax:
677+
case RecurKind::SMax:
678+
IsNonStrictPred |= ICmpInst::isLT(Pred);
679+
break;
680+
default:
681+
llvm_unreachable("unsupported kind");
682+
}
683+
684+
// TODO: Strict predicates need to find the first IV value for which the
685+
// predicate holds, not the last.
686+
if (Pred == CmpInst::ICMP_NE || !IsNonStrictPred)
687+
return false;
688+
689+
// Cmp must be used by the select of a FindLastIV chain.
690+
VPValue *Sel = dyn_cast<VPSingleDefRecipe>(*Cmp->user_begin());
691+
VPValue *IVOp, *FindIV;
692+
if (!Sel ||
693+
!match(Sel,
694+
m_Select(m_Specific(Cmp), m_VPValue(IVOp), m_VPValue(FindIV))) ||
695+
Sel->getNumUsers() != 2 || !isa<VPWidenIntOrFpInductionRecipe>(IVOp))
696+
return false;
697+
auto *FindIVPhiR = dyn_cast<VPReductionPHIRecipe>(FindIV);
698+
if (!FindIVPhiR || !RecurrenceDescriptor::isFindLastIVRecurrenceKind(
699+
FindIVPhiR->getRecurrenceKind()))
700+
return false;
701+
702+
assert(!FindIVPhiR->isInLoop() && !FindIVPhiR->isOrdered() &&
703+
"cannot handle inloop/ordered reductions yet");
704+
705+
auto NewPhiR = new VPReductionPHIRecipe(
706+
cast<PHINode>(MinMaxPhiR->getUnderlyingInstr()), RdxKind,
707+
*MinMaxPhiR->getOperand(0), false, false, 1);
708+
NewPhiR->insertBefore(MinMaxPhiR);
709+
MinMaxPhiR->replaceAllUsesWith(NewPhiR);
710+
NewPhiR->addOperand(MinMaxPhiR->getOperand(1));
711+
MinMaxPhiR->eraseFromParent();
712+
713+
// The reduction using MinMaxPhiR needs adjusting to compute the correct
714+
// result:
715+
// 1. We need to find the last IV for which the condition based on the
716+
// min/max recurrence is true,
717+
// 2. Compare the partial min/max reduction result to its final value and,
718+
// 3. Select the lanes of the partial FindLastIV reductions which
719+
// correspond to the lanes matching the min/max reduction result.
720+
VPInstruction *FindIVResult = cast<VPInstruction>(
721+
*(Sel->user_begin() + (*Sel->user_begin() == FindIVPhiR ? 1 : 0)));
722+
VPBuilder B(FindIVResult);
723+
VPInstruction *MinMaxResult =
724+
B.createNaryOp(VPInstruction::ComputeReductionResult,
725+
{NewPhiR, NewPhiR->getBackedgeValue()}, VPIRFlags(), {});
726+
NewPhiR->getBackedgeValue()->replaceUsesWithIf(
727+
MinMaxResult, [](VPUser &U, unsigned) { return isa<VPPhi>(&U); });
728+
auto *FinalMinMaxCmp = B.createICmp(
729+
CmpInst::ICMP_EQ, MinMaxResult->getOperand(1), MinMaxResult);
730+
auto *FinalIVSelect =
731+
B.createSelect(FinalMinMaxCmp, FindIVResult->getOperand(3),
732+
FindIVResult->getOperand(2));
733+
FindIVResult->setOperand(3, FinalIVSelect);
734+
}
735+
return true;
736+
}

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,11 @@ struct VPlanTransforms {
8383
GetIntOrFpInductionDescriptor,
8484
ScalarEvolution &SE, const TargetLibraryInfo &TLI);
8585

86+
/// Try to legalize unclassified phis by converting VPWidenPHIRecipes to
87+
/// min-max reductions used by FindLastIV reductions if possible. Returns
88+
/// false if the VPlan contains VPWidenPHIRecipes that cannot be legalized.
89+
static bool legalizeUnclassifiedPhis(VPlan &Plan);
90+
8691
/// Try to have all users of fixed-order recurrences appear after the recipe
8792
/// defining their previous value, by either sinking users or hoisting recipes
8893
/// defining their previous value (and its operands). Then introduce

0 commit comments

Comments
 (0)