Skip to content

[LV] Vectorize selecting last IV of min/max element. #141431

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 15 additions & 6 deletions llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -798,6 +798,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// For each block in the loop.
for (BasicBlock *BB : TheLoop->blocks()) {
// Scan the instructions in the block and look for hazards.
PHINode *UnclassifiedPhi = nullptr;
for (Instruction &I : *BB) {
if (auto *Phi = dyn_cast<PHINode>(&I)) {
Type *PhiTy = Phi->getType();
Expand Down Expand Up @@ -887,12 +888,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
addInductionPhi(Phi, ID, AllowedExit);
continue;
}

reportVectorizationFailure("Found an unidentified PHI",
"value that could not be identified as "
"reduction is used outside the loop",
"NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
return false;
UnclassifiedPhi = Phi;
} // end of PHI handling

// We handle calls that:
Expand Down Expand Up @@ -1043,6 +1039,19 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
return false;
}
} // next instr.
if (UnclassifiedPhi && none_of(BB->phis(), [this](PHINode &P) {
auto I = Reductions.find(&P);
return I != Reductions.end() &&
RecurrenceDescriptor::isFindLastIVRecurrenceKind(
I->second.getRecurrenceKind());
})) {
reportVectorizationFailure("Found an unidentified PHI",
"value that could not be identified as "
"reduction is used outside the loop",
"NonReductionValueUsedOutsideLoop", ORE,
TheLoop, UnclassifiedPhi);
return false;
}
}

if (!PrimaryInduction) {
Expand Down
112 changes: 63 additions & 49 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7260,7 +7260,10 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
// Get the VPInstruction computing the reduction result in the middle block.
// The first operand may not be from the middle block if it is not connected
// to the scalar preheader. In that case, there's nothing to fix.
auto *EpiRedResult = dyn_cast<VPInstruction>(EpiResumePhiR->getOperand(0));
VPValue *Incoming = EpiResumePhiR->getOperand(0);
match(Incoming, VPlanPatternMatch::m_ZExtOrSExt(
VPlanPatternMatch::m_VPValue(Incoming)));
auto *EpiRedResult = dyn_cast<VPInstruction>(Incoming);
if (!EpiRedResult ||
(EpiRedResult->getOpcode() != VPInstruction::ComputeAnyOfResult &&
EpiRedResult->getOpcode() != VPInstruction::ComputeReductionResult &&
Expand All @@ -7269,8 +7272,7 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(

auto *EpiRedHeaderPhi =
cast<VPReductionPHIRecipe>(EpiRedResult->getOperand(0));
const RecurrenceDescriptor &RdxDesc =
EpiRedHeaderPhi->getRecurrenceDescriptor();
RecurKind Kind = EpiRedHeaderPhi->getRecurrenceKind();
Value *MainResumeValue;
if (auto *VPI = dyn_cast<VPInstruction>(EpiRedHeaderPhi->getStartValue())) {
assert((VPI->getOpcode() == VPInstruction::Broadcast ||
Expand All @@ -7279,8 +7281,7 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
MainResumeValue = VPI->getOperand(0)->getUnderlyingValue();
} else
MainResumeValue = EpiRedHeaderPhi->getStartValue()->getUnderlyingValue();
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(
RdxDesc.getRecurrenceKind())) {
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind)) {
Value *StartV = EpiRedResult->getOperand(1)->getLiveInIRValue();
(void)StartV;
auto *Cmp = cast<ICmpInst>(MainResumeValue);
Expand All @@ -7290,11 +7291,13 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
"AnyOf expected to start by comparing main resume value to original "
"start value");
MainResumeValue = Cmp->getOperand(0);
} else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(
RdxDesc.getRecurrenceKind())) {
} else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(Kind)) {
Value *StartV = getStartValueFromReductionResult(EpiRedResult);
Value *SentinelV = EpiRedResult->getOperand(2)->getLiveInIRValue();
using namespace llvm::PatternMatch;
MainResumeValue = cast<VPInstruction>(EpiRedHeaderPhi->getStartValue())
->getOperand(0)
->getUnderlyingValue();
Value *Cmp, *OrigResumeV, *CmpOp;
bool IsExpectedPattern =
match(MainResumeValue,
Expand All @@ -7306,7 +7309,11 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
assert(IsExpectedPattern && "Unexpected reduction resume pattern");
(void)IsExpectedPattern;
MainResumeValue = OrigResumeV;
} else {
if (auto *VPI = dyn_cast<VPInstruction>(EpiRedHeaderPhi->getStartValue()))
MainResumeValue = VPI->getOperand(0)->getUnderlyingValue();
}

PHINode *MainResumePhi = cast<PHINode>(MainResumeValue);

// When fixing reductions in the epilogue loop we should already have
Expand Down Expand Up @@ -8174,7 +8181,7 @@ bool VPRecipeBuilder::getScaledReductions(
Instruction *PHI, Instruction *RdxExitInstr, VFRange &Range,
SmallVectorImpl<std::pair<PartialReductionChain, unsigned>> &Chains) {

if (!CM.TheLoop->contains(RdxExitInstr))
if (!RdxExitInstr || !CM.TheLoop->contains(RdxExitInstr))
return false;

auto *Update = dyn_cast<BinaryOperator>(RdxExitInstr);
Expand Down Expand Up @@ -8268,9 +8275,6 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
return Recipe;

VPHeaderPHIRecipe *PhiRecipe = nullptr;
assert((Legal->isReductionVariable(Phi) ||
Legal->isFixedOrderRecurrence(Phi)) &&
"can only widen reductions and fixed-order recurrences here");
VPValue *StartV = Operands[0];
if (Legal->isReductionVariable(Phi)) {
const RecurrenceDescriptor &RdxDesc =
Expand All @@ -8284,12 +8288,17 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
PhiRecipe = new VPReductionPHIRecipe(
Phi, RdxDesc, *StartV, CM.isInLoopReduction(Phi),
CM.useOrderedReductions(RdxDesc), ScaleFactor);
} else {
} else if (Legal->isFixedOrderRecurrence(Phi)) {
// TODO: Currently fixed-order recurrences are modeled as chains of
// first-order recurrences. If there are no users of the intermediate
// recurrences in the chain, the fixed order recurrence should be modeled
// directly, enabling more efficient codegen.
PhiRecipe = new VPFirstOrderRecurrencePHIRecipe(Phi, *StartV);
} else {
// Failed to identify phi as reduction or fixed-order recurrence. Keep the
// original VPWidenPHIRecipe for now, to be legalized later if possible.
setRecipe(Phi, R);
return nullptr;
}
// Add backedge value.
PhiRecipe->addOperand(Operands[1]);
Expand Down Expand Up @@ -8474,7 +8483,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
// TODO: Extract final value from induction recipe initially, optimize to
// pre-computed end value together in optimizeInductionExitUsers.
auto *VectorPhiR =
cast<VPHeaderPHIRecipe>(Builder.getRecipe(&ScalarPhiIRI->getIRPhi()));
cast<VPSingleDefRecipe>(Builder.getRecipe(&ScalarPhiIRI->getIRPhi()));
if (auto *WideIVR = dyn_cast<VPWidenInductionRecipe>(VectorPhiR)) {
if (VPInstruction *ResumePhi = addResumePhiRecipeForInduction(
WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo,
Expand All @@ -8496,7 +8505,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
// which for FORs is a vector whose last element needs to be extracted. The
// start value provides the value if the loop is bypassed.
bool IsFOR = isa<VPFirstOrderRecurrencePHIRecipe>(VectorPhiR);
auto *ResumeFromVectorLoop = VectorPhiR->getBackedgeValue();
auto *ResumeFromVectorLoop = VectorPhiR->getOperand(1);
assert(VectorRegion->getSingleSuccessor() == Plan.getMiddleBlock() &&
"Cannot handle loops with uncountable early exits");
if (IsFOR)
Expand All @@ -8505,7 +8514,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
"vector.recur.extract");
StringRef Name = IsFOR ? "scalar.recur.init" : "bc.merge.rdx";
auto *ResumePhiR = ScalarPHBuilder.createScalarPhi(
{ResumeFromVectorLoop, VectorPhiR->getStartValue()}, {}, Name);
{ResumeFromVectorLoop, VectorPhiR->getOperand(0)}, {}, Name);
ScalarPhiIRI->addOperand(ResumePhiR);
}
}
Expand Down Expand Up @@ -8820,6 +8829,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
VPRecipeBase *Recipe =
RecipeBuilder.tryToCreateWidenRecipe(SingleDef, Range);
if (!Recipe) {
if (isa<VPWidenPHIRecipe>(SingleDef))
continue;
SmallVector<VPValue *, 4> Operands(R.operands());
Recipe = RecipeBuilder.handleReplication(Instr, Operands, Range);
}
Expand Down Expand Up @@ -8885,6 +8896,11 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// Adjust the recipes for any inloop reductions.
adjustRecipesForReductions(Plan, RecipeBuilder, Range.Start);

// Try to convert remaining VPWidenPHIRecipes to reduction recipes.
if (!VPlanTransforms::runPass(VPlanTransforms::legalizeUnclassifiedPhis,
*Plan))
return nullptr;

// Transform recipes to abstract recipes if it is legal and beneficial and
// clamp the range for better cost estimation.
// TODO: Enable following transform when the EVL-version of extended-reduction
Expand Down Expand Up @@ -9042,8 +9058,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
if (!PhiR || !PhiR->isInLoop() || (MinVF.isScalar() && !PhiR->isOrdered()))
continue;

const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
RecurKind Kind = RdxDesc.getRecurrenceKind();
RecurKind Kind = PhiR->getRecurrenceKind();
assert(
!RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) &&
!RecurrenceDescriptor::isFindLastIVRecurrenceKind(Kind) &&
Expand Down Expand Up @@ -9149,6 +9164,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
if (CM.blockNeedsPredicationForAnyReason(CurrentLinkI->getParent()))
CondOp = RecipeBuilder.getBlockInMask(CurrentLink->getParent());

const RecurrenceDescriptor &RdxDesc = Legal->getReductionVars().lookup(
cast<PHINode>(PhiR->getUnderlyingInstr()));
// Non-FP RdxDescs will have all fast math flags set, so clear them.
FastMathFlags FMFs = isa<FPMathOperator>(CurrentLinkI)
? RdxDesc.getFastMathFlags()
Expand Down Expand Up @@ -9179,7 +9196,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
if (!PhiR)
continue;

const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
const RecurrenceDescriptor &RdxDesc = Legal->getReductionVars().lookup(
cast<PHINode>(PhiR->getUnderlyingInstr()));
Type *PhiTy = PhiR->getUnderlyingValue()->getType();
// If tail is folded by masking, introduce selects between the phi
// and the users outside the vector region of each reduction, at the
Expand Down Expand Up @@ -9211,28 +9229,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
PhiR->setOperand(1, NewExitingVPV);
}

// If the vector reduction can be performed in a smaller type, we truncate
// then extend the loop exit value to enable InstCombine to evaluate the
// entire expression in the smaller type.
if (MinVF.isVector() && PhiTy != RdxDesc.getRecurrenceType() &&
!RecurrenceDescriptor::isAnyOfRecurrenceKind(
RdxDesc.getRecurrenceKind())) {
assert(!PhiR->isInLoop() && "Unexpected truncated inloop reduction!");
Type *RdxTy = RdxDesc.getRecurrenceType();
auto *Trunc =
new VPWidenCastRecipe(Instruction::Trunc, NewExitingVPV, RdxTy);
auto *Extnd =
RdxDesc.isSigned()
? new VPWidenCastRecipe(Instruction::SExt, Trunc, PhiTy)
: new VPWidenCastRecipe(Instruction::ZExt, Trunc, PhiTy);

Trunc->insertAfter(NewExitingVPV->getDefiningRecipe());
Extnd->insertAfter(Trunc);
if (PhiR->getOperand(1) == NewExitingVPV)
PhiR->setOperand(1, Extnd->getVPSingleValue());
NewExitingVPV = Extnd;
}

// We want code in the middle block to appear to execute on the location of
// the scalar loop's latch terminator because: (a) it is all compiler
// generated, (b) these instructions are always executed after evaluating
Expand Down Expand Up @@ -9271,6 +9267,31 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
Builder.createNaryOp(VPInstruction::ComputeReductionResult,
{PhiR, NewExitingVPV}, Flags, ExitDL);
}
// If the vector reduction can be performed in a smaller type, we truncate
// then extend the loop exit value to enable InstCombine to evaluate the
// entire expression in the smaller type.
if (MinVF.isVector() && PhiTy != RdxDesc.getRecurrenceType() &&
!RecurrenceDescriptor::isAnyOfRecurrenceKind(
RdxDesc.getRecurrenceKind())) {
assert(!PhiR->isInLoop() && "Unexpected truncated inloop reduction!");
Type *RdxTy = RdxDesc.getRecurrenceType();
auto *Trunc =
new VPWidenCastRecipe(Instruction::Trunc, NewExitingVPV, RdxTy);
Instruction::CastOps ExtendOpc =
RdxDesc.isSigned() ? Instruction::SExt : Instruction::ZExt;
auto *Extnd = new VPWidenCastRecipe(ExtendOpc, Trunc, PhiTy);
Trunc->insertAfter(NewExitingVPV->getDefiningRecipe());
Extnd->insertAfter(Trunc);
if (PhiR->getOperand(1) == NewExitingVPV)
PhiR->setOperand(1, Extnd->getVPSingleValue());

// Update ComputeReductionResult with the truncated exiting value and
// extend its result.
FinalReductionResult->setOperand(1, Trunc);
FinalReductionResult =
Builder.createScalarCast(ExtendOpc, FinalReductionResult, PhiTy, {});
}

// Update all users outside the vector region. Also replace redundant
// ExtractLastElement.
for (auto *U : to_vector(OrigExitingVPV->users())) {
Expand Down Expand Up @@ -9346,6 +9367,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
PhiR->setOperand(0, StartV);
}
}

for (VPRecipeBase *R : ToDelete)
R->eraseFromParent();

Expand Down Expand Up @@ -9819,14 +9841,9 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
}));
ResumeV = cast<PHINode>(ReductionPhi->getUnderlyingInstr())
->getIncomingValueForBlock(L->getLoopPreheader());
const RecurrenceDescriptor &RdxDesc =
ReductionPhi->getRecurrenceDescriptor();
RecurKind RK = RdxDesc.getRecurrenceKind();
RecurKind RK = ReductionPhi->getRecurrenceKind();
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) {
Value *StartV = RdxResult->getOperand(1)->getLiveInIRValue();
assert(RdxDesc.getRecurrenceStartValue() == StartV &&
"start value from ComputeAnyOfResult must match");

// VPReductionPHIRecipes for AnyOf reductions expect a boolean as
// start value; compare the final value from the main vector loop
// to the start value.
Expand All @@ -9835,9 +9852,6 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
ResumeV = Builder.CreateICmpNE(ResumeV, StartV);
} else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) {
Value *StartV = getStartValueFromReductionResult(RdxResult);
assert(RdxDesc.getRecurrenceStartValue() == StartV &&
"start value from ComputeFindLastIVResult must match");

ToFrozen[StartV] = cast<PHINode>(ResumeV)->getIncomingValueForBlock(
EPI.MainLoopIterationCountCheck);

Expand Down
30 changes: 20 additions & 10 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -1830,7 +1830,8 @@ class VPHeaderPHIRecipe : public VPSingleDefRecipe, public VPPhiAccessors {
~VPHeaderPHIRecipe() override = default;

/// Method to support type inquiry through isa, cast, and dyn_cast.
static inline bool classof(const VPRecipeBase *B) {
static inline bool classof(const VPUser *U) {
auto *B = cast<VPRecipeBase>(U);
return B->getVPDefID() >= VPDef::VPFirstHeaderPHISC &&
B->getVPDefID() <= VPDef::VPLastHeaderPHISC;
}
Expand All @@ -1839,6 +1840,10 @@ class VPHeaderPHIRecipe : public VPSingleDefRecipe, public VPPhiAccessors {
return B && B->getVPDefID() >= VPRecipeBase::VPFirstHeaderPHISC &&
B->getVPDefID() <= VPRecipeBase::VPLastHeaderPHISC;
}
static inline bool classof(const VPSingleDefRecipe *B) {
return B->getVPDefID() >= VPDef::VPFirstHeaderPHISC &&
B->getVPDefID() <= VPDef::VPLastHeaderPHISC;
}

/// Generate the phi nodes.
void execute(VPTransformState &State) override = 0;
Expand Down Expand Up @@ -1900,7 +1905,7 @@ class VPWidenInductionRecipe : public VPHeaderPHIRecipe {
return R && classof(R);
}

static inline bool classof(const VPHeaderPHIRecipe *R) {
static inline bool classof(const VPSingleDefRecipe *R) {
return classof(static_cast<const VPRecipeBase *>(R));
}

Expand Down Expand Up @@ -2174,7 +2179,7 @@ struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe {
class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
public VPUnrollPartAccessor<2> {
/// Descriptor for the reduction.
const RecurrenceDescriptor &RdxDesc;
const RecurKind Kind;

/// The phi is part of an in-loop reduction.
bool IsInLoop;
Expand All @@ -2193,17 +2198,24 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
VPValue &Start, bool IsInLoop = false,
bool IsOrdered = false, unsigned VFScaleFactor = 1)
: VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start),
RdxDesc(RdxDesc), IsInLoop(IsInLoop), IsOrdered(IsOrdered),
VFScaleFactor(VFScaleFactor) {
Kind(RdxDesc.getRecurrenceKind()), IsInLoop(IsInLoop),
IsOrdered(IsOrdered), VFScaleFactor(VFScaleFactor) {
assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop");
}
VPReductionPHIRecipe(PHINode *Phi, RecurKind Kind, VPValue &Start,
bool IsInLoop = false, bool IsOrdered = false,
unsigned VFScaleFactor = 1)
: VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start), Kind(Kind),
IsInLoop(IsInLoop), IsOrdered(IsOrdered), VFScaleFactor(VFScaleFactor) {
assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop");
}

~VPReductionPHIRecipe() override = default;

VPReductionPHIRecipe *clone() override {
auto *R = new VPReductionPHIRecipe(cast<PHINode>(getUnderlyingInstr()),
RdxDesc, *getOperand(0), IsInLoop,
IsOrdered, VFScaleFactor);
getRecurrenceKind(), *getOperand(0),
IsInLoop, IsOrdered, VFScaleFactor);
R->addOperand(getBackedgeValue());
return R;
}
Expand All @@ -2222,9 +2234,7 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
VPSlotTracker &SlotTracker) const override;
#endif

const RecurrenceDescriptor &getRecurrenceDescriptor() const {
return RdxDesc;
}
RecurKind getRecurrenceKind() const { return Kind; }

/// Returns true, if the phi is part of an ordered reduction.
bool isOrdered() const { return IsOrdered; }
Expand Down
Loading
Loading