@@ -1535,10 +1535,7 @@ class LoopVectorizationCostModel {
1535
1535
// / Returns true if epilogue vectorization is considered profitable, and
1536
1536
// / false otherwise.
1537
1537
// / \p VF is the vectorization factor chosen for the original loop.
1538
- // / \p Multiplier is an aditional scaling factor applied to VF before
1539
- // / comparing to EpilogueVectorizationMinVF.
1540
- bool isEpilogueVectorizationProfitable (const ElementCount VF,
1541
- const unsigned IC) const ;
1538
+ bool isEpilogueVectorizationProfitable (const ElementCount VF) const ;
1542
1539
1543
1540
// / Returns the execution time cost of an instruction for a given vector
1544
1541
// / width. Vector width of one means scalar.
@@ -4264,11 +4261,12 @@ static unsigned getEstimatedRuntimeVF(const Loop *L,
4264
4261
}
4265
4262
4266
4263
bool LoopVectorizationPlanner::isMoreProfitable (
4267
- const VectorizationFactor &A, const VectorizationFactor &B,
4268
- const unsigned MaxTripCount) const {
4264
+ const VectorizationFactor &A, const VectorizationFactor &B) const {
4269
4265
InstructionCost CostA = A.Cost ;
4270
4266
InstructionCost CostB = B.Cost ;
4271
4267
4268
+ unsigned MaxTripCount = PSE.getSmallConstantMaxTripCount ();
4269
+
4272
4270
// Improve estimate for the vector width if it is scalable.
4273
4271
unsigned EstimatedWidthA = A.Width .getKnownMinValue ();
4274
4272
unsigned EstimatedWidthB = B.Width .getKnownMinValue ();
@@ -4317,12 +4315,6 @@ bool LoopVectorizationPlanner::isMoreProfitable(
4317
4315
return CmpFn (RTCostA, RTCostB);
4318
4316
}
4319
4317
4320
- bool LoopVectorizationPlanner::isMoreProfitable (
4321
- const VectorizationFactor &A, const VectorizationFactor &B) const {
4322
- const unsigned MaxTripCount = PSE.getSmallConstantMaxTripCount ();
4323
- return LoopVectorizationPlanner::isMoreProfitable (A, B, MaxTripCount);
4324
- }
4325
-
4326
4318
void LoopVectorizationPlanner::emitInvalidCostRemarks (
4327
4319
OptimizationRemarkEmitter *ORE) {
4328
4320
using RecipeVFPair = std::pair<VPRecipeBase *, ElementCount>;
@@ -4637,7 +4629,7 @@ bool LoopVectorizationPlanner::isCandidateForEpilogueVectorization(
4637
4629
}
4638
4630
4639
4631
bool LoopVectorizationCostModel::isEpilogueVectorizationProfitable (
4640
- const ElementCount VF, const unsigned IC ) const {
4632
+ const ElementCount VF) const {
4641
4633
// FIXME: We need a much better cost-model to take different parameters such
4642
4634
// as register pressure, code size increase and cost of extra branches into
4643
4635
// account. For now we apply a very crude heuristic and only consider loops
@@ -4652,15 +4644,12 @@ bool LoopVectorizationCostModel::isEpilogueVectorizationProfitable(
4652
4644
if (TTI.getMaxInterleaveFactor (VF) <= 1 )
4653
4645
return false ;
4654
4646
4655
- // TODO: PR #108190 introduced a discrepancy between fixed-width and scalable
4656
- // VFs when deciding profitability.
4657
- // See related "TODO: extend to support scalable VFs." in
4658
- // selectEpilogueVectorizationFactor.
4659
- unsigned Multiplier = VF.isFixed () ? IC : 1 ;
4660
- unsigned MinVFThreshold = EpilogueVectorizationMinVF.getNumOccurrences () > 0
4661
- ? EpilogueVectorizationMinVF
4662
- : TTI.getEpilogueVectorizationMinVF ();
4663
- return getEstimatedRuntimeVF (TheLoop, TTI, VF * Multiplier) >= MinVFThreshold;
4647
+ unsigned Multiplier = 1 ;
4648
+ if (VF.isScalable ())
4649
+ Multiplier = getVScaleForTuning (TheLoop, TTI).value_or (1 );
4650
+ if ((Multiplier * VF.getKnownMinValue ()) >= EpilogueVectorizationMinVF)
4651
+ return true ;
4652
+ return false ;
4664
4653
}
4665
4654
4666
4655
VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor (
@@ -4703,7 +4692,7 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
4703
4692
return Result;
4704
4693
}
4705
4694
4706
- if (!CM.isEpilogueVectorizationProfitable (MainLoopVF, IC )) {
4695
+ if (!CM.isEpilogueVectorizationProfitable (MainLoopVF)) {
4707
4696
LLVM_DEBUG (dbgs () << " LEV: Epilogue vectorization is not profitable for "
4708
4697
" this loop\n " );
4709
4698
return Result;
@@ -4718,20 +4707,16 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
4718
4707
ScalarEvolution &SE = *PSE.getSE ();
4719
4708
Type *TCType = Legal->getWidestInductionType ();
4720
4709
const SCEV *RemainingIterations = nullptr ;
4721
- unsigned MaxTripCount = 0 ;
4722
4710
for (auto &NextVF : ProfitableVFs) {
4723
4711
// Skip candidate VFs without a corresponding VPlan.
4724
4712
if (!hasPlanWithVF (NextVF.Width ))
4725
4713
continue ;
4726
4714
4727
- // Skip candidate VFs with widths >= the (estimated) runtime VF (scalable
4728
- // vectors) or > the VF of the main loop (fixed vectors).
4715
+ // Skip candidate VFs with widths >= the estimate runtime VF (scalable
4716
+ // vectors) or the VF of the main loop (fixed vectors).
4729
4717
if ((!NextVF.Width .isScalable () && MainLoopVF.isScalable () &&
4730
4718
ElementCount::isKnownGE (NextVF.Width , EstimatedRuntimeVF)) ||
4731
- (NextVF.Width .isScalable () &&
4732
- ElementCount::isKnownGE (NextVF.Width , MainLoopVF)) ||
4733
- (!NextVF.Width .isScalable () && !MainLoopVF.isScalable () &&
4734
- ElementCount::isKnownGT (NextVF.Width , MainLoopVF)))
4719
+ ElementCount::isKnownGE (NextVF.Width , MainLoopVF))
4735
4720
continue ;
4736
4721
4737
4722
// If NextVF is greater than the number of remaining iterations, the
@@ -4745,14 +4730,6 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
4745
4730
" Trip count SCEV must be computable" );
4746
4731
RemainingIterations = SE.getURemExpr (
4747
4732
TC, SE.getConstant (TCType, MainLoopVF.getKnownMinValue () * IC));
4748
- MaxTripCount = MainLoopVF.getKnownMinValue () * IC - 1 ;
4749
- if (SE.isKnownPredicate (CmpInst::ICMP_ULT, RemainingIterations,
4750
- SE.getConstant (TCType, MaxTripCount))) {
4751
- MaxTripCount =
4752
- SE.getUnsignedRangeMax (RemainingIterations).getZExtValue ();
4753
- }
4754
- LLVM_DEBUG (dbgs () << " LEV: Maximum Trip Count for Epilogue: "
4755
- << MaxTripCount << " \n " );
4756
4733
}
4757
4734
if (SE.isKnownPredicate (
4758
4735
CmpInst::ICMP_UGT,
@@ -4761,8 +4738,7 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
4761
4738
continue ;
4762
4739
}
4763
4740
4764
- if (Result.Width .isScalar () ||
4765
- isMoreProfitable (NextVF, Result, MaxTripCount))
4741
+ if (Result.Width .isScalar () || isMoreProfitable (NextVF, Result))
4766
4742
Result = NextVF;
4767
4743
}
4768
4744
0 commit comments