@@ -1547,6 +1547,126 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
1547
1547
}
1548
1548
}
1549
1549
1550
+ // / This function adds (0 * Step, 1 * Step, 2 * Step, ...) to StartValue of
1551
+ // / an induction variable at the preheader.
1552
+ static VPSingleDefRecipe *createStepVector (VPValue *StartValue, VPValue *Step,
1553
+ Type *InductionTy,
1554
+ const InductionDescriptor &ID,
1555
+ VPBasicBlock *VectorPHVPBB,
1556
+ DebugLoc DL) {
1557
+ Type *IntTy = InductionTy->isIntegerTy ()
1558
+ ? InductionTy
1559
+ : IntegerType::get (InductionTy->getContext (),
1560
+ InductionTy->getScalarSizeInBits ());
1561
+ // Create a vector of consecutive numbers from zero to VF.
1562
+ VPSingleDefRecipe *InitVec =
1563
+ new VPWidenIntrinsicRecipe (Intrinsic::stepvector, {}, IntTy, DL);
1564
+ VectorPHVPBB->appendRecipe (InitVec);
1565
+
1566
+ if (InductionTy->isIntegerTy ()) {
1567
+ auto *Mul = new VPInstruction (Instruction::Mul, {InitVec, Step}, DL);
1568
+ VectorPHVPBB->appendRecipe (Mul);
1569
+ auto *SteppedStart =
1570
+ new VPInstruction (Instruction::Add, {StartValue, Mul}, {}, " induction" );
1571
+ VectorPHVPBB->appendRecipe (SteppedStart);
1572
+ return SteppedStart;
1573
+ } else {
1574
+ FastMathFlags FMF = ID.getInductionBinOp ()->getFastMathFlags ();
1575
+ InitVec = new VPWidenCastRecipe (Instruction::UIToFP, InitVec, InductionTy);
1576
+ VectorPHVPBB->appendRecipe (InitVec);
1577
+ auto *Mul = new VPInstruction (Instruction::FMul, {InitVec, Step}, FMF, DL);
1578
+ VectorPHVPBB->appendRecipe (Mul);
1579
+ Instruction::BinaryOps BinOp = ID.getInductionOpcode ();
1580
+ auto *SteppedStart =
1581
+ new VPInstruction (BinOp, {StartValue, Mul}, FMF, DL, " induction" );
1582
+ VectorPHVPBB->appendRecipe (SteppedStart);
1583
+ return SteppedStart;
1584
+ }
1585
+ }
1586
+
1587
+ // / Lower widen iv recipes into recipes with EVL.
1588
+ static void
1589
+ transformWidenIVRecipestoEVLRecipes (VPWidenIntOrFpInductionRecipe *WidenIV,
1590
+ VPlan &Plan, VPValue *EVL) {
1591
+ DebugLoc DL = WidenIV->getDebugLoc ();
1592
+ const InductionDescriptor &ID = WidenIV->getInductionDescriptor ();
1593
+ auto *CanonicalIVIncrement =
1594
+ cast<VPInstruction>(Plan.getCanonicalIV ()->getBackedgeValue ());
1595
+ VPBasicBlock *VectorPHVPBB = Plan.getVectorLoopRegion ()->getPreheaderVPBB ();
1596
+ VPBasicBlock *ExitingVPBB =
1597
+ Plan.getVectorLoopRegion ()->getExitingBasicBlock ();
1598
+ VPTypeAnalysis TypeInfo (Plan.getCanonicalIV ()->getScalarType ());
1599
+ VPValue *StartValue = WidenIV->getStartValue ();
1600
+ VPValue *Step = WidenIV->getStepValue ();
1601
+ if (TruncInst *I = WidenIV->getTruncInst ()) {
1602
+ Type *TruncTy = I->getType ();
1603
+ auto *R = new VPScalarCastRecipe (Instruction::Trunc, StartValue, TruncTy);
1604
+ VectorPHVPBB->appendRecipe (R);
1605
+ StartValue = R;
1606
+ R = new VPScalarCastRecipe (Instruction::Trunc, Step, TruncTy);
1607
+ VectorPHVPBB->appendRecipe (R);
1608
+ Step = R;
1609
+ }
1610
+ Type *InductionTy = TypeInfo.inferScalarType (StartValue);
1611
+ LLVMContext &Ctx = InductionTy->getContext ();
1612
+ VPValue *TrueMask = Plan.getOrAddLiveIn (ConstantInt::getTrue (Ctx));
1613
+
1614
+ // Construct the initial value of the vector IV in the vector loop preheader
1615
+ VPSingleDefRecipe *SteppedStart =
1616
+ createStepVector (StartValue, Step, InductionTy, ID, VectorPHVPBB, DL);
1617
+
1618
+ // Create the vector phi node for both int. and fp. induction variables
1619
+ // and determine the kind of arithmetic we will perform
1620
+ auto *VecInd = new VPWidenPHIRecipe (WidenIV->getPHINode ());
1621
+ VecInd->insertBefore (WidenIV);
1622
+ WidenIV->replaceAllUsesWith (VecInd);
1623
+ Intrinsic::ID VPArithOp;
1624
+ Instruction::BinaryOps MulOp;
1625
+ if (InductionTy->isIntegerTy ()) {
1626
+ VPArithOp = Intrinsic::vp_add;
1627
+ MulOp = Instruction::Mul;
1628
+ } else {
1629
+ VPArithOp = ID.getInductionOpcode () == Instruction::FAdd
1630
+ ? Intrinsic::vp_fadd
1631
+ : Intrinsic::vp_fsub;
1632
+ MulOp = Instruction::FMul;
1633
+ }
1634
+
1635
+ // Multiply the runtime VF by the step
1636
+ VPSingleDefRecipe *ScalarMul;
1637
+ if (InductionTy->isFloatingPointTy ()) {
1638
+ FastMathFlags FMF = ID.getInductionBinOp ()->getFastMathFlags ();
1639
+ auto *CastEVL =
1640
+ new VPScalarCastRecipe (Instruction::UIToFP, EVL, InductionTy);
1641
+ CastEVL->insertBefore (CanonicalIVIncrement);
1642
+ ScalarMul = new VPInstruction (MulOp, {Step, CastEVL}, FMF, DL);
1643
+ } else {
1644
+ unsigned InductionSz = InductionTy->getScalarSizeInBits ();
1645
+ unsigned EVLSz = TypeInfo.inferScalarType (EVL)->getScalarSizeInBits ();
1646
+ VPValue *CastEVL = EVL;
1647
+ if (InductionSz != EVLSz) {
1648
+ auto *R = new VPScalarCastRecipe (EVLSz > InductionSz ? Instruction::Trunc
1649
+ : Instruction::ZExt,
1650
+ EVL, InductionTy);
1651
+ R->insertBefore (CanonicalIVIncrement);
1652
+ CastEVL = R;
1653
+ }
1654
+ ScalarMul = new VPInstruction (MulOp, {Step, CastEVL}, DL);
1655
+ }
1656
+ ScalarMul->insertBefore (CanonicalIVIncrement);
1657
+ // Create a vector splat to use in the induction update.
1658
+ auto *SplatVF =
1659
+ new VPWidenIntrinsicRecipe (Intrinsic::experimental_vp_splat,
1660
+ {ScalarMul, TrueMask, EVL}, InductionTy, DL);
1661
+ SplatVF->insertBefore (CanonicalIVIncrement);
1662
+ // TODO: We may need to add the step a number of times if UF > 1
1663
+ auto *LastInduction = new VPWidenIntrinsicRecipe (
1664
+ VPArithOp, {VecInd, SplatVF, TrueMask, EVL}, InductionTy, DL);
1665
+ LastInduction->insertBefore (CanonicalIVIncrement);
1666
+ VecInd->addIncoming (SteppedStart, VectorPHVPBB);
1667
+ VecInd->addIncoming (LastInduction, ExitingVPBB);
1668
+ }
1669
+
1550
1670
// / Add a VPEVLBasedIVPHIRecipe and related recipes to \p Plan and
1551
1671
// / replaces all uses except the canonical IV increment of
1552
1672
// / VPCanonicalIVPHIRecipe with a VPEVLBasedIVPHIRecipe. VPCanonicalIVPHIRecipe
@@ -1592,9 +1712,8 @@ bool VPlanTransforms::tryAddExplicitVectorLength(
1592
1712
// The transform updates all users of inductions to work based on EVL, instead
1593
1713
// of the VF directly. At the moment, widened inductions cannot be updated, so
1594
1714
// bail out if the plan contains any.
1595
- bool ContainsWidenInductions = any_of (
1596
- Header->phis (),
1597
- IsaPred<VPWidenIntOrFpInductionRecipe, VPWidenPointerInductionRecipe>);
1715
+ bool ContainsWidenInductions =
1716
+ any_of (Header->phis (), IsaPred<VPWidenPointerInductionRecipe>);
1598
1717
if (ContainsWidenInductions)
1599
1718
return false ;
1600
1719
@@ -1638,6 +1757,16 @@ bool VPlanTransforms::tryAddExplicitVectorLength(
1638
1757
1639
1758
transformRecipestoEVLRecipes (Plan, *VPEVL);
1640
1759
1760
+ VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion ()->getEntryBasicBlock ();
1761
+ SmallVector<VPRecipeBase *> ToRemove;
1762
+ for (VPRecipeBase &Phi : HeaderVPBB->phis ())
1763
+ if (auto *WidenIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi)) {
1764
+ transformWidenIVRecipestoEVLRecipes (WidenIV, Plan, VPEVL);
1765
+ ToRemove.push_back (WidenIV);
1766
+ }
1767
+ for (VPRecipeBase *R : ToRemove)
1768
+ R->eraseFromParent ();
1769
+
1641
1770
// Replace all uses of VPCanonicalIVPHIRecipe by
1642
1771
// VPEVLBasedIVPHIRecipe except for the canonical IV increment.
1643
1772
CanonicalIVPHI->replaceAllUsesWith (EVLPhi);
0 commit comments