@@ -2652,6 +2652,33 @@ static Value *getExpandedStep(const InductionDescriptor &ID,
2652
2652
return I->second ;
2653
2653
}
2654
2654
2655
+ // / Knowing that loop \p L would be fully unrolled after vectorisation, add
2656
+ // / instructions that will get simplified and thus should not have any cost to
2657
+ // / \p InstsToIgnore
2658
+ static void AddFullyUnrolledInstructionsToIgnore (
2659
+ Loop *L, const LoopVectorizationLegality::InductionList &IL,
2660
+ SmallPtrSetImpl<Instruction *> &InstsToIgnore) {
2661
+ auto *Cmp = L->getLatchCmpInst ();
2662
+ if (!Cmp)
2663
+ return ;
2664
+ InstsToIgnore.insert (Cmp);
2665
+ for (const auto &[IV, IndDesc] : IL) {
2666
+ // Get next iteration value of the induction variable
2667
+ Instruction *IVInst =
2668
+ cast<Instruction>(IV->getIncomingValueForBlock (L->getLoopLatch ()));
2669
+ bool IsSimplifiedAway = true ;
2670
+ // Check that this value used only to exit the loop
2671
+ for (auto *UIV : IVInst->users ()) {
2672
+ if (UIV != IV && UIV != Cmp) {
2673
+ IsSimplifiedAway = false ;
2674
+ break ;
2675
+ }
2676
+ }
2677
+ if (IsSimplifiedAway)
2678
+ InstsToIgnore.insert (IVInst);
2679
+ }
2680
+ }
2681
+
2655
2682
void InnerLoopVectorizer::createInductionResumeValues (
2656
2683
const SCEV2ValueTy &ExpandedSCEVs,
2657
2684
std::pair<BasicBlock *, Value *> AdditionalBypass) {
@@ -5557,19 +5584,13 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount(
5557
5584
InstructionCost LoopVectorizationCostModel::expectedCost (ElementCount VF) {
5558
5585
InstructionCost Cost;
5559
5586
5560
- // If with the given VF loop gets fully unrolled, ignore the costs of
5561
- // comparison and induction instructions, as they'll get simplified away
5562
- SmallPtrSet<const Value *, 16 > ValuesToIgnoreForVF;
5587
+ // If with the given fixed width VF loop gets fully unrolled, ignore the costs
5588
+ // of comparison and induction instructions, as they'll get simplified away
5589
+ SmallPtrSet<Instruction *, 2 > ValuesToIgnoreForVF;
5563
5590
auto TC = PSE.getSE ()->getSmallConstantTripCount (TheLoop);
5564
- auto *Cmp = TheLoop->getLatchCmpInst ();
5565
- if (Cmp && TC == VF.getKnownMinValue ()) {
5566
- ValuesToIgnoreForVF.insert (Cmp);
5567
- for (const auto &[IV, IndDesc] : Legal->getInductionVars ()) {
5568
- Instruction *IVInc = cast<Instruction>(
5569
- IV->getIncomingValueForBlock (TheLoop->getLoopLatch ()));
5570
- ValuesToIgnoreForVF.insert (IVInc);
5571
- }
5572
- }
5591
+ if (VF.isFixed () && TC == VF.getFixedValue ())
5592
+ AddFullyUnrolledInstructionsToIgnore (TheLoop, Legal->getInductionVars (),
5593
+ ValuesToIgnoreForVF);
5573
5594
5574
5595
// For each block.
5575
5596
for (BasicBlock *BB : TheLoop->blocks ()) {
@@ -7263,16 +7284,10 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
7263
7284
7264
7285
// If with the given VF loop gets fully unrolled, ignore the costs of
7265
7286
// comparison and induction instructions, as they'll get simplified away
7266
- auto TC = CM.PSE .getSE ()->getSmallConstantTripCount (OrigLoop);
7267
- auto *Cmp = OrigLoop->getLatchCmpInst ();
7268
- if (Cmp && TC == VF.getKnownMinValue ()) {
7269
- CostCtx.SkipCostComputation .insert (Cmp);
7270
- for (const auto &[IV, IndDesc] : Legal->getInductionVars ()) {
7271
- Instruction *IVInc = cast<Instruction>(
7272
- IV->getIncomingValueForBlock (OrigLoop->getLoopLatch ()));
7273
- CostCtx.SkipCostComputation .insert (IVInc);
7274
- }
7275
- }
7287
+ auto TC = PSE.getSE ()->getSmallConstantTripCount (OrigLoop);
7288
+ if (VF.isFixed () && TC == VF.getFixedValue ())
7289
+ AddFullyUnrolledInstructionsToIgnore (OrigLoop, Legal->getInductionVars (),
7290
+ CostCtx.SkipCostComputation );
7276
7291
7277
7292
for (Instruction *IVInst : IVInsts) {
7278
7293
if (CostCtx.skipCostComputation (IVInst, VF.isVector ()))
0 commit comments