Skip to content

Commit ec472b9

Browse files
committed
[VPlan] Add support for VPWidenIntOrFpInductionRecipe in predicated DataWithEVL vectorization mode.
As an alternative approach to #82021, this patch lowers VPWidenIntOrFpInductionRecipe into a widen phi recipe and step recipes, computed using EVL in the EVL transformation phase.
1 parent 574f64c commit ec472b9

11 files changed

+739
-77
lines changed

llvm/lib/Analysis/VectorUtils.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,8 @@ bool llvm::isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID,
132132
case Intrinsic::umul_fix:
133133
case Intrinsic::umul_fix_sat:
134134
return (ScalarOpdIdx == 2);
135+
case Intrinsic::experimental_vp_splat:
136+
return (ScalarOpdIdx == 0);
135137
default:
136138
return false;
137139
}

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2932,8 +2932,7 @@ LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI,
29322932

29332933
void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
29342934
// Fix widened non-induction PHIs by setting up the PHI operands.
2935-
if (EnableVPlanNativePath)
2936-
fixNonInductionPHIs(State);
2935+
fixNonInductionPHIs(State);
29372936

29382937
// Forget the original basic block.
29392938
PSE.getSE()->forgetLoop(OrigLoop);

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -286,15 +286,15 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
286286
return Shuf;
287287
};
288288

289-
if (!hasScalarValue(Def, {0})) {
290-
assert(Def->isLiveIn() && "expected a live-in");
291-
Value *IRV = Def->getLiveInIRValue();
292-
Value *B = GetBroadcastInstrs(IRV);
289+
Value *ScalarValue = hasScalarValue(Def, {0}) ? get(Def, VPLane(0)) : nullptr;
290+
if (!ScalarValue || isa<Constant>(ScalarValue)) {
291+
assert((ScalarValue || Def->isLiveIn()) && "expected a live-in");
292+
Value *B = ScalarValue ? GetBroadcastInstrs(ScalarValue)
293+
: GetBroadcastInstrs(Def->getLiveInIRValue());
293294
set(Def, B);
294295
return B;
295296
}
296297

297-
Value *ScalarValue = get(Def, VPLane(0));
298298
// If we aren't vectorizing, we can just copy the scalar map values over
299299
// to the vector map.
300300
if (VF.isScalar()) {

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -648,7 +648,8 @@ bool VPInstruction::isVectorToScalar() const {
648648
}
649649

650650
bool VPInstruction::isSingleScalar() const {
651-
return getOpcode() == VPInstruction::ResumePhi;
651+
return getOpcode() == VPInstruction::ResumePhi ||
652+
getOpcode() == VPInstruction::ExplicitVectorLength;
652653
}
653654

654655
#if !defined(NDEBUG)
@@ -1034,6 +1035,8 @@ bool VPWidenIntrinsicRecipe::onlyFirstLaneUsed(const VPValue *Op) const {
10341035
assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
10351036
// Vector predication intrinsics only demand the the first lane the last
10361037
// operand (the EVL operand).
1038+
if (VectorIntrinsicID == Intrinsic::experimental_vp_splat)
1039+
return Op == getOperand(0) || Op == getOperand(2);
10371040
return VPIntrinsic::isVPIntrinsic(VectorIntrinsicID) &&
10381041
Op == getOperand(getNumOperands() - 1);
10391042
}
@@ -2317,9 +2320,8 @@ void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
23172320
#endif
23182321

23192322
Value *VPScalarCastRecipe ::generate(VPTransformState &State) {
2320-
assert(vputils::onlyFirstLaneUsed(this) &&
2321-
"Codegen only implemented for first lane.");
23222323
switch (Opcode) {
2324+
case Instruction::UIToFP:
23232325
case Instruction::SExt:
23242326
case Instruction::ZExt:
23252327
case Instruction::Trunc: {
@@ -3425,9 +3427,6 @@ void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent,
34253427
#endif
34263428

34273429
void VPWidenPHIRecipe::execute(VPTransformState &State) {
3428-
assert(EnableVPlanNativePath &&
3429-
"Non-native vplans are not expected to have VPWidenPHIRecipes.");
3430-
34313430
Value *Op0 = State.get(getOperand(0));
34323431
Type *VecTy = Op0->getType();
34333432
Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 132 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1547,6 +1547,126 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
15471547
}
15481548
}
15491549

1550+
/// This function adds (0 * Step, 1 * Step, 2 * Step, ...) to StartValue of
1551+
/// an induction variable at the preheader.
1552+
static VPSingleDefRecipe *createStepVector(VPValue *StartValue, VPValue *Step,
1553+
Type *InductionTy,
1554+
const InductionDescriptor &ID,
1555+
VPBasicBlock *VectorPHVPBB,
1556+
DebugLoc DL) {
1557+
Type *IntTy = InductionTy->isIntegerTy()
1558+
? InductionTy
1559+
: IntegerType::get(InductionTy->getContext(),
1560+
InductionTy->getScalarSizeInBits());
1561+
// Create a vector of consecutive numbers from zero to VF.
1562+
VPSingleDefRecipe *InitVec =
1563+
new VPWidenIntrinsicRecipe(Intrinsic::stepvector, {}, IntTy, DL);
1564+
VectorPHVPBB->appendRecipe(InitVec);
1565+
1566+
if (InductionTy->isIntegerTy()) {
1567+
auto *Mul = new VPInstruction(Instruction::Mul, {InitVec, Step}, DL);
1568+
VectorPHVPBB->appendRecipe(Mul);
1569+
auto *SteppedStart =
1570+
new VPInstruction(Instruction::Add, {StartValue, Mul}, {}, "induction");
1571+
VectorPHVPBB->appendRecipe(SteppedStart);
1572+
return SteppedStart;
1573+
} else {
1574+
FastMathFlags FMF = ID.getInductionBinOp()->getFastMathFlags();
1575+
InitVec = new VPWidenCastRecipe(Instruction::UIToFP, InitVec, InductionTy);
1576+
VectorPHVPBB->appendRecipe(InitVec);
1577+
auto *Mul = new VPInstruction(Instruction::FMul, {InitVec, Step}, FMF, DL);
1578+
VectorPHVPBB->appendRecipe(Mul);
1579+
Instruction::BinaryOps BinOp = ID.getInductionOpcode();
1580+
auto *SteppedStart =
1581+
new VPInstruction(BinOp, {StartValue, Mul}, FMF, DL, "induction");
1582+
VectorPHVPBB->appendRecipe(SteppedStart);
1583+
return SteppedStart;
1584+
}
1585+
}
1586+
1587+
/// Lower widen iv recipes into recipes with EVL.
1588+
static void
1589+
transformWidenIVRecipestoEVLRecipes(VPWidenIntOrFpInductionRecipe *WidenIV,
1590+
VPlan &Plan, VPValue *EVL) {
1591+
DebugLoc DL = WidenIV->getDebugLoc();
1592+
const InductionDescriptor &ID = WidenIV->getInductionDescriptor();
1593+
auto *CanonicalIVIncrement =
1594+
cast<VPInstruction>(Plan.getCanonicalIV()->getBackedgeValue());
1595+
VPBasicBlock *VectorPHVPBB = Plan.getVectorLoopRegion()->getPreheaderVPBB();
1596+
VPBasicBlock *ExitingVPBB =
1597+
Plan.getVectorLoopRegion()->getExitingBasicBlock();
1598+
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
1599+
VPValue *StartValue = WidenIV->getStartValue();
1600+
VPValue *Step = WidenIV->getStepValue();
1601+
if (TruncInst *I = WidenIV->getTruncInst()) {
1602+
Type *TruncTy = I->getType();
1603+
auto *R = new VPScalarCastRecipe(Instruction::Trunc, StartValue, TruncTy);
1604+
VectorPHVPBB->appendRecipe(R);
1605+
StartValue = R;
1606+
R = new VPScalarCastRecipe(Instruction::Trunc, Step, TruncTy);
1607+
VectorPHVPBB->appendRecipe(R);
1608+
Step = R;
1609+
}
1610+
Type *InductionTy = TypeInfo.inferScalarType(StartValue);
1611+
LLVMContext &Ctx = InductionTy->getContext();
1612+
VPValue *TrueMask = Plan.getOrAddLiveIn(ConstantInt::getTrue(Ctx));
1613+
1614+
// Construct the initial value of the vector IV in the vector loop preheader
1615+
VPSingleDefRecipe *SteppedStart =
1616+
createStepVector(StartValue, Step, InductionTy, ID, VectorPHVPBB, DL);
1617+
1618+
// Create the vector phi node for both int. and fp. induction variables
1619+
// and determine the kind of arithmetic we will perform
1620+
auto *VecInd = new VPWidenPHIRecipe(WidenIV->getPHINode());
1621+
VecInd->insertBefore(WidenIV);
1622+
WidenIV->replaceAllUsesWith(VecInd);
1623+
Intrinsic::ID VPArithOp;
1624+
Instruction::BinaryOps MulOp;
1625+
if (InductionTy->isIntegerTy()) {
1626+
VPArithOp = Intrinsic::vp_add;
1627+
MulOp = Instruction::Mul;
1628+
} else {
1629+
VPArithOp = ID.getInductionOpcode() == Instruction::FAdd
1630+
? Intrinsic::vp_fadd
1631+
: Intrinsic::vp_fsub;
1632+
MulOp = Instruction::FMul;
1633+
}
1634+
1635+
// Multiply the runtime VF by the step
1636+
VPSingleDefRecipe *ScalarMul;
1637+
if (InductionTy->isFloatingPointTy()) {
1638+
FastMathFlags FMF = ID.getInductionBinOp()->getFastMathFlags();
1639+
auto *CastEVL =
1640+
new VPScalarCastRecipe(Instruction::UIToFP, EVL, InductionTy);
1641+
CastEVL->insertBefore(CanonicalIVIncrement);
1642+
ScalarMul = new VPInstruction(MulOp, {Step, CastEVL}, FMF, DL);
1643+
} else {
1644+
unsigned InductionSz = InductionTy->getScalarSizeInBits();
1645+
unsigned EVLSz = TypeInfo.inferScalarType(EVL)->getScalarSizeInBits();
1646+
VPValue *CastEVL = EVL;
1647+
if (InductionSz != EVLSz) {
1648+
auto *R = new VPScalarCastRecipe(EVLSz > InductionSz ? Instruction::Trunc
1649+
: Instruction::ZExt,
1650+
EVL, InductionTy);
1651+
R->insertBefore(CanonicalIVIncrement);
1652+
CastEVL = R;
1653+
}
1654+
ScalarMul = new VPInstruction(MulOp, {Step, CastEVL}, DL);
1655+
}
1656+
ScalarMul->insertBefore(CanonicalIVIncrement);
1657+
// Create a vector splat to use in the induction update.
1658+
auto *SplatVF =
1659+
new VPWidenIntrinsicRecipe(Intrinsic::experimental_vp_splat,
1660+
{ScalarMul, TrueMask, EVL}, InductionTy, DL);
1661+
SplatVF->insertBefore(CanonicalIVIncrement);
1662+
// TODO: We may need to add the step a number of times if UF > 1
1663+
auto *LastInduction = new VPWidenIntrinsicRecipe(
1664+
VPArithOp, {VecInd, SplatVF, TrueMask, EVL}, InductionTy, DL);
1665+
LastInduction->insertBefore(CanonicalIVIncrement);
1666+
VecInd->addIncoming(SteppedStart, VectorPHVPBB);
1667+
VecInd->addIncoming(LastInduction, ExitingVPBB);
1668+
}
1669+
15501670
/// Add a VPEVLBasedIVPHIRecipe and related recipes to \p Plan and
15511671
/// replaces all uses except the canonical IV increment of
15521672
/// VPCanonicalIVPHIRecipe with a VPEVLBasedIVPHIRecipe. VPCanonicalIVPHIRecipe
@@ -1592,9 +1712,8 @@ bool VPlanTransforms::tryAddExplicitVectorLength(
15921712
// The transform updates all users of inductions to work based on EVL, instead
15931713
// of the VF directly. At the moment, widened inductions cannot be updated, so
15941714
// bail out if the plan contains any.
1595-
bool ContainsWidenInductions = any_of(
1596-
Header->phis(),
1597-
IsaPred<VPWidenIntOrFpInductionRecipe, VPWidenPointerInductionRecipe>);
1715+
bool ContainsWidenInductions =
1716+
any_of(Header->phis(), IsaPred<VPWidenPointerInductionRecipe>);
15981717
if (ContainsWidenInductions)
15991718
return false;
16001719

@@ -1638,6 +1757,16 @@ bool VPlanTransforms::tryAddExplicitVectorLength(
16381757

16391758
transformRecipestoEVLRecipes(Plan, *VPEVL);
16401759

1760+
VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
1761+
SmallVector<VPRecipeBase *> ToRemove;
1762+
for (VPRecipeBase &Phi : HeaderVPBB->phis())
1763+
if (auto *WidenIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi)) {
1764+
transformWidenIVRecipestoEVLRecipes(WidenIV, Plan, VPEVL);
1765+
ToRemove.push_back(WidenIV);
1766+
}
1767+
for (VPRecipeBase *R : ToRemove)
1768+
R->eraseFromParent();
1769+
16411770
// Replace all uses of VPCanonicalIVPHIRecipe by
16421771
// VPEVLBasedIVPHIRecipe except for the canonical IV increment.
16431772
CanonicalIVPHI->replaceAllUsesWith(EVLPhi);

llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,8 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const {
150150
.Case<VPScalarCastRecipe>(
151151
[&](const VPScalarCastRecipe *S) { return VerifyEVLUse(*S, 0); })
152152
.Case<VPInstruction>([&](const VPInstruction *I) {
153-
if (I->getOpcode() != Instruction::Add) {
153+
if ((I->getOpcode() != Instruction::Add) &&
154+
(I->getOpcode() != Instruction::Mul)) {
154155
errs() << "EVL is used as an operand in non-VPInstruction::Add\n";
155156
return false;
156157
}
@@ -159,11 +160,6 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const {
159160
"users\n";
160161
return false;
161162
}
162-
if (!isa<VPEVLBasedIVPHIRecipe>(*I->users().begin())) {
163-
errs() << "Result of VPInstruction::Add with EVL operand is "
164-
"not used by VPEVLBasedIVPHIRecipe\n";
165-
return false;
166-
}
167163
return true;
168164
})
169165
.Default([&](const VPUser *U) {

0 commit comments

Comments
 (0)