Skip to content

Commit 9dd1c66

Browse files
authored
[VPlan] Expand VPWidenIntOrFpInductionRecipe into separate recipes (#118638)
The motivation of this PR is to make #115274 easier to implement, and should allow us to add EVL support by just passing EVL to the VF operand. The current difficulty with widening IVs with EVL is that VPWidenIntOrFpInductionRecipe generates its own backedge value. Since it's a VPHeaderPHIRecipe the VF operand must be in the preheader, which means we can't use the EVL since it's defined in the loop body. The gist in this PR is to take the approach in #114305 and expand VPWidenIntOrFpInductionRecipe into several recipes for the initial value, phi and backedge value just before execution. I.e. this example: ``` vector.ph: Successor(s): vector loop <x1> vector loop: { vector.body: WIDEN-INDUCTION %i = phi %start, %step, %vf ... EMIT branch-on-count ... No successors } ``` gets expanded to: ``` vector.ph: ... vp<%induction.start> = ... vp<%induction.increment> = ... Successor(s): vector loop <x1> vector loop: { vector.body: ir<%i> = WIDEN-PHI vp<%induction.start>, vp<%vec.ind.next> ... vp<%vec.ind.next> = add ir<%i>, vp<%induction.increment> EMIT branch-on-count ... No successors } ``` This allows us to a value defined in the loop in the backedge value, and also means we can just reuse the existing backedge fixups in VPlan::execute without having to specially handle it ourselves. After this #115274 should just become a matter of setting the VF operand to EVL (and building the increment step in the loop body, not the preheader).
1 parent 8513066 commit 9dd1c66

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+1069
-597
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2764,8 +2764,7 @@ LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI,
27642764

27652765
void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
27662766
// Fix widened non-induction PHIs by setting up the PHI operands.
2767-
if (EnableVPlanNativePath)
2768-
fixNonInductionPHIs(State);
2767+
fixNonInductionPHIs(State);
27692768

27702769
// After vectorization, the exit blocks of the original loop will have
27712770
// additional predecessors. Invalidate SCEVs for the exit phis in case SE
@@ -7324,7 +7323,6 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
73247323
"Trying to execute plan with unsupported VF");
73257324
assert(BestVPlan.hasUF(BestUF) &&
73267325
"Trying to execute plan with unsupported UF");
7327-
VPlanTransforms::runPass(VPlanTransforms::materializeStepVectors, BestVPlan);
73287326
// TODO: Move to VPlan transform stage once the transition to the VPlan-based
73297327
// cost model is complete for better cost estimates.
73307328
VPlanTransforms::runPass(VPlanTransforms::unrollByUF, BestVPlan, BestUF,

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1029,28 +1029,18 @@ void VPlan::execute(VPTransformState *State) {
10291029
if (isa<VPWidenPHIRecipe>(&R))
10301030
continue;
10311031

1032-
if (isa<VPWidenInductionRecipe>(&R)) {
1033-
PHINode *Phi = nullptr;
1034-
if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
1035-
Phi = cast<PHINode>(State->get(R.getVPSingleValue()));
1036-
} else {
1037-
auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
1038-
assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) &&
1039-
"recipe generating only scalars should have been replaced");
1040-
auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi));
1041-
Phi = cast<PHINode>(GEP->getPointerOperand());
1042-
}
1032+
if (auto *WidenPhi = dyn_cast<VPWidenPointerInductionRecipe>(&R)) {
1033+
assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) &&
1034+
"recipe generating only scalars should have been replaced");
1035+
auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi));
1036+
PHINode *Phi = cast<PHINode>(GEP->getPointerOperand());
10431037

10441038
Phi->setIncomingBlock(1, VectorLatchBB);
10451039

10461040
// Move the last step to the end of the latch block. This ensures
10471041
// consistent placement of all induction updates.
10481042
Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
10491043
Inc->moveBefore(std::prev(VectorLatchBB->getTerminator()->getIterator()));
1050-
1051-
// Use the steps for the last part as backedge value for the induction.
1052-
if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R))
1053-
Inc->setOperand(0, State->get(IV->getLastUnrolledPartOperand()));
10541044
continue;
10551045
}
10561046

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 7 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1951,12 +1951,13 @@ class VPWidenInductionRecipe : public VPHeaderPHIRecipe {
19511951
};
19521952

19531953
/// A recipe for handling phi nodes of integer and floating-point inductions,
1954-
/// producing their vector values.
1954+
/// producing their vector values. This is an abstract recipe and must be
1955+
/// converted to concrete recipes before executing.
19551956
class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe {
19561957
TruncInst *Trunc;
19571958

19581959
// If this recipe is unrolled it will have 2 additional operands.
1959-
bool isUnrolled() const { return getNumOperands() == 6; }
1960+
bool isUnrolled() const { return getNumOperands() == 5; }
19601961

19611962
public:
19621963
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step,
@@ -1992,9 +1993,10 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe {
19921993

19931994
VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
19941995

1995-
/// Generate the vectorized and scalarized versions of the phi node as
1996-
/// needed by their users.
1997-
void execute(VPTransformState &State) override;
1996+
void execute(VPTransformState &State) override {
1997+
llvm_unreachable("cannot execute this recipe, should be expanded via "
1998+
"expandVPWidenIntOrFpInductionRecipe");
1999+
}
19982000

19992001
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
20002002
/// Print the recipe.
@@ -2005,16 +2007,6 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe {
20052007
VPValue *getVFValue() { return getOperand(2); }
20062008
const VPValue *getVFValue() const { return getOperand(2); }
20072009

2008-
// TODO: Remove once VPWidenIntOrFpInduction is fully expanded in
2009-
// convertToConcreteRecipes.
2010-
VPInstructionWithType *getStepVector() {
2011-
auto *StepVector =
2012-
cast<VPInstructionWithType>(getOperand(3)->getDefiningRecipe());
2013-
assert(StepVector->getOpcode() == VPInstruction::StepVector &&
2014-
"step vector operand must be a VPInstruction::StepVector");
2015-
return StepVector;
2016-
}
2017-
20182010
VPValue *getSplatVFValue() {
20192011
// If the recipe has been unrolled return the VPValue for the induction
20202012
// increment.

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 10 additions & 144 deletions
Original file line numberDiff line numberDiff line change
@@ -952,6 +952,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
952952
case VPInstruction::CanonicalIVIncrementForPart:
953953
case VPInstruction::BranchOnCount:
954954
case VPInstruction::BranchOnCond:
955+
case VPInstruction::Broadcast:
955956
case VPInstruction::ReductionStartVector:
956957
return true;
957958
case VPInstruction::PtrAdd:
@@ -1077,15 +1078,14 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
10771078

10781079
void VPInstructionWithType::execute(VPTransformState &State) {
10791080
State.setDebugLocFrom(getDebugLoc());
1080-
switch (getOpcode()) {
1081-
case Instruction::ZExt:
1082-
case Instruction::Trunc: {
1081+
if (isScalarCast()) {
10831082
Value *Op = State.get(getOperand(0), VPLane(0));
10841083
Value *Cast = State.Builder.CreateCast(Instruction::CastOps(getOpcode()),
10851084
Op, ResultTy);
10861085
State.set(this, Cast, VPLane(0));
1087-
break;
1086+
return;
10881087
}
1088+
switch (getOpcode()) {
10891089
case VPInstruction::StepVector: {
10901090
Value *StepVector =
10911091
State.Builder.CreateStepVector(VectorType::get(ResultTy, State.VF));
@@ -1965,149 +1965,13 @@ InstructionCost VPHeaderPHIRecipe::computeCost(ElementCount VF,
19651965
return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);
19661966
}
19671967

1968-
/// This function adds
1969-
/// (0 * Step, 1 * Step, 2 * Step, ...)
1970-
/// to each vector element of Val.
1971-
/// \p Opcode is relevant for FP induction variable.
1972-
/// \p InitVec is an integer step vector from 0 with a step of 1.
1973-
static Value *getStepVector(Value *Val, Value *Step, Value *InitVec,
1974-
Instruction::BinaryOps BinOp, ElementCount VF,
1975-
IRBuilderBase &Builder) {
1976-
assert(VF.isVector() && "only vector VFs are supported");
1977-
1978-
// Create and check the types.
1979-
auto *ValVTy = cast<VectorType>(Val->getType());
1980-
ElementCount VLen = ValVTy->getElementCount();
1981-
1982-
Type *STy = Val->getType()->getScalarType();
1983-
assert((STy->isIntegerTy() || STy->isFloatingPointTy()) &&
1984-
"Induction Step must be an integer or FP");
1985-
assert(Step->getType() == STy && "Step has wrong type");
1986-
1987-
if (STy->isIntegerTy()) {
1988-
Step = Builder.CreateVectorSplat(VLen, Step);
1989-
assert(Step->getType() == Val->getType() && "Invalid step vec");
1990-
// FIXME: The newly created binary instructions should contain nsw/nuw
1991-
// flags, which can be found from the original scalar operations.
1992-
Step = Builder.CreateMul(InitVec, Step);
1993-
return Builder.CreateAdd(Val, Step, "induction");
1994-
}
1995-
1996-
// Floating point induction.
1997-
assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) &&
1998-
"Binary Opcode should be specified for FP induction");
1999-
InitVec = Builder.CreateUIToFP(InitVec, ValVTy);
2000-
2001-
Step = Builder.CreateVectorSplat(VLen, Step);
2002-
Value *MulOp = Builder.CreateFMul(InitVec, Step);
2003-
return Builder.CreateBinOp(BinOp, Val, MulOp, "induction");
2004-
}
2005-
20061968
/// A helper function that returns an integer or floating-point constant with
20071969
/// value C.
20081970
static Constant *getSignedIntOrFpConstant(Type *Ty, int64_t C) {
20091971
return Ty->isIntegerTy() ? ConstantInt::getSigned(Ty, C)
20101972
: ConstantFP::get(Ty, C);
20111973
}
20121974

2013-
void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
2014-
assert(!State.Lane && "Int or FP induction being replicated.");
2015-
2016-
Value *Start = getStartValue()->getLiveInIRValue();
2017-
const InductionDescriptor &ID = getInductionDescriptor();
2018-
TruncInst *Trunc = getTruncInst();
2019-
IRBuilderBase &Builder = State.Builder;
2020-
assert(getPHINode()->getType() == ID.getStartValue()->getType() &&
2021-
"Types must match");
2022-
assert(State.VF.isVector() && "must have vector VF");
2023-
2024-
// The value from the original loop to which we are mapping the new induction
2025-
// variable.
2026-
Instruction *EntryVal = Trunc ? cast<Instruction>(Trunc) : getPHINode();
2027-
2028-
// Fast-math-flags propagate from the original induction instruction.
2029-
IRBuilder<>::FastMathFlagGuard FMFG(Builder);
2030-
if (isa_and_present<FPMathOperator>(ID.getInductionBinOp()))
2031-
Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags());
2032-
2033-
// Now do the actual transformations, and start with fetching the step value.
2034-
Value *Step = State.get(getStepValue(), VPLane(0));
2035-
2036-
assert((isa<PHINode, TruncInst>(EntryVal)) &&
2037-
"Expected either an induction phi-node or a truncate of it!");
2038-
2039-
// Construct the initial value of the vector IV in the vector loop preheader
2040-
auto CurrIP = Builder.saveIP();
2041-
BasicBlock *VectorPH =
2042-
State.CFG.VPBB2IRBB.at(getParent()->getCFGPredecessor(0));
2043-
Builder.SetInsertPoint(VectorPH->getTerminator());
2044-
if (isa<TruncInst>(EntryVal)) {
2045-
assert(Start->getType()->isIntegerTy() &&
2046-
"Truncation requires an integer type");
2047-
auto *TruncType = cast<IntegerType>(EntryVal->getType());
2048-
Step = Builder.CreateTrunc(Step, TruncType);
2049-
Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType);
2050-
}
2051-
2052-
Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start);
2053-
Value *SteppedStart =
2054-
::getStepVector(SplatStart, Step, State.get(getStepVector()),
2055-
ID.getInductionOpcode(), State.VF, State.Builder);
2056-
2057-
// We create vector phi nodes for both integer and floating-point induction
2058-
// variables. Here, we determine the kind of arithmetic we will perform.
2059-
Instruction::BinaryOps AddOp;
2060-
Instruction::BinaryOps MulOp;
2061-
if (Step->getType()->isIntegerTy()) {
2062-
AddOp = Instruction::Add;
2063-
MulOp = Instruction::Mul;
2064-
} else {
2065-
AddOp = ID.getInductionOpcode();
2066-
MulOp = Instruction::FMul;
2067-
}
2068-
2069-
Value *SplatVF;
2070-
if (VPValue *SplatVFOperand = getSplatVFValue()) {
2071-
// The recipe has been unrolled. In that case, fetch the splat value for the
2072-
// induction increment.
2073-
SplatVF = State.get(SplatVFOperand);
2074-
} else {
2075-
// Multiply the vectorization factor by the step using integer or
2076-
// floating-point arithmetic as appropriate.
2077-
Type *StepType = Step->getType();
2078-
Value *RuntimeVF = State.get(getVFValue(), VPLane(0));
2079-
if (Step->getType()->isFloatingPointTy())
2080-
RuntimeVF = Builder.CreateUIToFP(RuntimeVF, StepType);
2081-
else
2082-
RuntimeVF = Builder.CreateZExtOrTrunc(RuntimeVF, StepType);
2083-
Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF);
2084-
2085-
// Create a vector splat to use in the induction update.
2086-
SplatVF = Builder.CreateVectorSplat(State.VF, Mul);
2087-
}
2088-
2089-
Builder.restoreIP(CurrIP);
2090-
2091-
// We may need to add the step a number of times, depending on the unroll
2092-
// factor. The last of those goes into the PHI.
2093-
PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind");
2094-
VecInd->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
2095-
VecInd->setDebugLoc(getDebugLoc());
2096-
State.set(this, VecInd);
2097-
2098-
Instruction *LastInduction = cast<Instruction>(
2099-
Builder.CreateBinOp(AddOp, VecInd, SplatVF, "vec.ind.next"));
2100-
LastInduction->setDebugLoc(getDebugLoc());
2101-
2102-
VecInd->addIncoming(SteppedStart, VectorPH);
2103-
// Add induction update using an incorrect block temporarily. The phi node
2104-
// will be fixed after VPlan execution. Note that at this point the latch
2105-
// block cannot be used, as it does not exist yet.
2106-
// TODO: Model increment value in VPlan, by turning the recipe into a
2107-
// multi-def and a subclass of VPHeaderPHIRecipe.
2108-
VecInd->addIncoming(LastInduction, VectorPH);
2109-
}
2110-
21111975
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
21121976
void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent,
21131977
VPSlotTracker &SlotTracker) const {
@@ -3871,12 +3735,14 @@ void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent,
38713735
#endif
38723736

38733737
void VPWidenPHIRecipe::execute(VPTransformState &State) {
3874-
assert(EnableVPlanNativePath &&
3875-
"Non-native vplans are not expected to have VPWidenPHIRecipes.");
3876-
38773738
Value *Op0 = State.get(getOperand(0));
38783739
Type *VecTy = Op0->getType();
3879-
Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, Name);
3740+
Instruction *VecPhi = State.Builder.CreatePHI(VecTy, 2, Name);
3741+
// Manually move it with the other PHIs in case PHI recipes above this one
3742+
// also inserted non-phi instructions.
3743+
// TODO: Remove once VPWidenPointerInductionRecipe is also expanded in
3744+
// convertToConcreteRecipes.
3745+
VecPhi->moveBefore(State.Builder.GetInsertBlock()->getFirstNonPHIIt());
38803746
State.set(this, VecPhi);
38813747
}
38823748

0 commit comments

Comments
 (0)