@@ -9163,6 +9163,31 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
9163
9163
}
9164
9164
}
9165
9165
9166
+ // Add the necessary canonical IV and branch recipes required to control the
9167
+ // loop.
9168
+ static void addCanonicalIVRecipes (VPlan &Plan, Type *IdxTy, bool HasNUW,
9169
+ DebugLoc DL) {
9170
+ Value *StartIdx = ConstantInt::get (IdxTy, 0 );
9171
+ auto *StartV = Plan.getOrAddLiveIn (StartIdx);
9172
+
9173
+ // Add a VPCanonicalIVPHIRecipe starting at 0 to the header.
9174
+ auto *CanonicalIVPHI = new VPCanonicalIVPHIRecipe (StartV, DL);
9175
+ VPRegionBlock *TopRegion = Plan.getVectorLoopRegion ();
9176
+ VPBasicBlock *Header = TopRegion->getEntryBasicBlock ();
9177
+ Header->insert (CanonicalIVPHI, Header->begin ());
9178
+
9179
+ VPBuilder Builder (TopRegion->getExitingBasicBlock ());
9180
+ // Add a VPInstruction to increment the scalar canonical IV by VF * UF.
9181
+ auto *CanonicalIVIncrement = Builder.createOverflowingOp (
9182
+ Instruction::Add, {CanonicalIVPHI, &Plan.getVFxUF ()}, {HasNUW, false }, DL,
9183
+ " index.next" );
9184
+ CanonicalIVPHI->addOperand (CanonicalIVIncrement);
9185
+
9186
+ // Add the BranchOnCount VPInstruction to the latch.
9187
+ Builder.createNaryOp (VPInstruction::BranchOnCount,
9188
+ {CanonicalIVIncrement, &Plan.getVectorTripCount ()}, DL);
9189
+ }
9190
+
9166
9191
// / Create and return a ResumePhi for \p WideIV, unless it is truncated. If the
9167
9192
// / induction recipe is not canonical, creates a VPDerivedIVRecipe to compute
9168
9193
// / the end value of the induction.
@@ -9434,8 +9459,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
9434
9459
auto Plan = VPlanTransforms::buildPlainCFG (OrigLoop, *LI, VPB2IRBB);
9435
9460
VPlanTransforms::prepareForVectorization (
9436
9461
*Plan, Legal->getWidestInductionType (), PSE, RequiresScalarEpilogueCheck,
9437
- CM.foldTailByMasking (), OrigLoop,
9438
- getDebugLocFromInstOrOperands (Legal->getPrimaryInduction ()));
9462
+ CM.foldTailByMasking (), OrigLoop);
9439
9463
VPlanTransforms::createLoopRegions (*Plan);
9440
9464
9441
9465
// Don't use getDecisionAndClampRange here, because we don't know the UF
@@ -9446,22 +9470,14 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
9446
9470
for (ElementCount VF : Range)
9447
9471
IVUpdateMayOverflow |= !isIndvarOverflowCheckKnownFalse (&CM, VF);
9448
9472
9473
+ DebugLoc DL = getDebugLocFromInstOrOperands (Legal->getPrimaryInduction ());
9449
9474
TailFoldingStyle Style = CM.getTailFoldingStyle (IVUpdateMayOverflow);
9450
9475
// Use NUW for the induction increment if we proved that it won't overflow in
9451
9476
// the vector loop or when not folding the tail. In the later case, we know
9452
9477
// that the canonical induction increment will not overflow as the vector trip
9453
9478
// count is >= increment and a multiple of the increment.
9454
9479
bool HasNUW = !IVUpdateMayOverflow || Style == TailFoldingStyle::None;
9455
- if (!HasNUW) {
9456
- auto *IVInc = Plan->getVectorLoopRegion ()
9457
- ->getExitingBasicBlock ()
9458
- ->getTerminator ()
9459
- ->getOperand (0 );
9460
- assert (match (IVInc, m_VPInstruction<Instruction::Add>(
9461
- m_Specific (Plan->getCanonicalIV ()), m_VPValue ())) &&
9462
- " Did not find the canonical IV increment" );
9463
- cast<VPRecipeWithIRFlags>(IVInc)->dropPoisonGeneratingFlags ();
9464
- }
9480
+ addCanonicalIVRecipes (*Plan, Legal->getWidestInductionType (), HasNUW, DL);
9465
9481
9466
9482
VPRecipeBuilder RecipeBuilder (*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
9467
9483
Builder);
@@ -9735,13 +9751,19 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
9735
9751
DenseMap<VPBlockBase *, BasicBlock *> VPB2IRBB;
9736
9752
auto Plan = VPlanTransforms::buildPlainCFG (OrigLoop, *LI, VPB2IRBB);
9737
9753
VPlanTransforms::prepareForVectorization (
9738
- *Plan, Legal->getWidestInductionType (), PSE, true , false , OrigLoop,
9739
- getDebugLocFromInstOrOperands (Legal->getPrimaryInduction ()));
9754
+ *Plan, Legal->getWidestInductionType (), PSE, true , false , OrigLoop);
9740
9755
VPlanTransforms::createLoopRegions (*Plan);
9741
9756
9742
9757
for (ElementCount VF : Range)
9743
9758
Plan->addVF (VF);
9744
9759
9760
+ // Tail folding is not supported for outer loops, so the induction increment
9761
+ // is guaranteed to not wrap.
9762
+ bool HasNUW = true ;
9763
+ addCanonicalIVRecipes (
9764
+ *Plan, Legal->getWidestInductionType (), HasNUW,
9765
+ getDebugLocFromInstOrOperands (Legal->getPrimaryInduction ()));
9766
+
9745
9767
if (!VPlanTransforms::tryToConvertVPInstructionsToVPRecipes (
9746
9768
Plan,
9747
9769
[this ](PHINode *P) {
0 commit comments