@@ -1555,10 +1555,7 @@ class LoopVectorizationCostModel {
1555
1555
// / Returns true if epilogue vectorization is considered profitable, and
1556
1556
// / false otherwise.
1557
1557
// / \p VF is the vectorization factor chosen for the original loop.
1558
- // / \p Multiplier is an aditional scaling factor applied to VF before
1559
- // / comparing to EpilogueVectorizationMinVF.
1560
- bool isEpilogueVectorizationProfitable (const ElementCount VF,
1561
- const unsigned IC) const ;
1558
+ bool isEpilogueVectorizationProfitable (const ElementCount VF) const ;
1562
1559
1563
1560
// / Returns the execution time cost of an instruction for a given vector
1564
1561
// / width. Vector width of one means scalar.
@@ -4387,11 +4384,12 @@ static unsigned getEstimatedRuntimeVF(const Loop *L,
4387
4384
}
4388
4385
4389
4386
bool LoopVectorizationPlanner::isMoreProfitable (
4390
- const VectorizationFactor &A, const VectorizationFactor &B,
4391
- const unsigned MaxTripCount) const {
4387
+ const VectorizationFactor &A, const VectorizationFactor &B) const {
4392
4388
InstructionCost CostA = A.Cost ;
4393
4389
InstructionCost CostB = B.Cost ;
4394
4390
4391
+ unsigned MaxTripCount = PSE.getSmallConstantMaxTripCount ();
4392
+
4395
4393
// Improve estimate for the vector width if it is scalable.
4396
4394
unsigned EstimatedWidthA = A.Width .getKnownMinValue ();
4397
4395
unsigned EstimatedWidthB = B.Width .getKnownMinValue ();
@@ -4440,12 +4438,6 @@ bool LoopVectorizationPlanner::isMoreProfitable(
4440
4438
return CmpFn (RTCostA, RTCostB);
4441
4439
}
4442
4440
4443
- bool LoopVectorizationPlanner::isMoreProfitable (
4444
- const VectorizationFactor &A, const VectorizationFactor &B) const {
4445
- const unsigned MaxTripCount = PSE.getSmallConstantMaxTripCount ();
4446
- return LoopVectorizationPlanner::isMoreProfitable (A, B, MaxTripCount);
4447
- }
4448
-
4449
4441
void LoopVectorizationPlanner::emitInvalidCostRemarks (
4450
4442
OptimizationRemarkEmitter *ORE) {
4451
4443
using RecipeVFPair = std::pair<VPRecipeBase *, ElementCount>;
@@ -4760,7 +4752,7 @@ bool LoopVectorizationPlanner::isCandidateForEpilogueVectorization(
4760
4752
}
4761
4753
4762
4754
bool LoopVectorizationCostModel::isEpilogueVectorizationProfitable (
4763
- const ElementCount VF, const unsigned IC ) const {
4755
+ const ElementCount VF) const {
4764
4756
// FIXME: We need a much better cost-model to take different parameters such
4765
4757
// as register pressure, code size increase and cost of extra branches into
4766
4758
// account. For now we apply a very crude heuristic and only consider loops
@@ -4775,15 +4767,12 @@ bool LoopVectorizationCostModel::isEpilogueVectorizationProfitable(
4775
4767
if (TTI.getMaxInterleaveFactor (VF) <= 1 )
4776
4768
return false ;
4777
4769
4778
- // TODO: PR #108190 introduced a discrepancy between fixed-width and scalable
4779
- // VFs when deciding profitability.
4780
- // See related "TODO: extend to support scalable VFs." in
4781
- // selectEpilogueVectorizationFactor.
4782
- unsigned Multiplier = VF.isFixed () ? IC : 1 ;
4783
- unsigned MinVFThreshold = EpilogueVectorizationMinVF.getNumOccurrences () > 0
4784
- ? EpilogueVectorizationMinVF
4785
- : TTI.getEpilogueVectorizationMinVF ();
4786
- return getEstimatedRuntimeVF (TheLoop, TTI, VF * Multiplier) >= MinVFThreshold;
4770
+ unsigned Multiplier = 1 ;
4771
+ if (VF.isScalable ())
4772
+ Multiplier = getVScaleForTuning (TheLoop, TTI).value_or (1 );
4773
+ if ((Multiplier * VF.getKnownMinValue ()) >= EpilogueVectorizationMinVF)
4774
+ return true ;
4775
+ return false ;
4787
4776
}
4788
4777
4789
4778
VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor (
@@ -4826,7 +4815,7 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
4826
4815
return Result;
4827
4816
}
4828
4817
4829
- if (!CM.isEpilogueVectorizationProfitable (MainLoopVF, IC )) {
4818
+ if (!CM.isEpilogueVectorizationProfitable (MainLoopVF)) {
4830
4819
LLVM_DEBUG (dbgs () << " LEV: Epilogue vectorization is not profitable for "
4831
4820
" this loop\n " );
4832
4821
return Result;
@@ -4841,20 +4830,16 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
4841
4830
ScalarEvolution &SE = *PSE.getSE ();
4842
4831
Type *TCType = Legal->getWidestInductionType ();
4843
4832
const SCEV *RemainingIterations = nullptr ;
4844
- unsigned MaxTripCount = 0 ;
4845
4833
for (auto &NextVF : ProfitableVFs) {
4846
4834
// Skip candidate VFs without a corresponding VPlan.
4847
4835
if (!hasPlanWithVF (NextVF.Width ))
4848
4836
continue ;
4849
4837
4850
- // Skip candidate VFs with widths >= the (estimated) runtime VF (scalable
4851
- // vectors) or > the VF of the main loop (fixed vectors).
4838
+ // Skip candidate VFs with widths >= the estimate runtime VF (scalable
4839
+ // vectors) or the VF of the main loop (fixed vectors).
4852
4840
if ((!NextVF.Width .isScalable () && MainLoopVF.isScalable () &&
4853
4841
ElementCount::isKnownGE (NextVF.Width , EstimatedRuntimeVF)) ||
4854
- (NextVF.Width .isScalable () &&
4855
- ElementCount::isKnownGE (NextVF.Width , MainLoopVF)) ||
4856
- (!NextVF.Width .isScalable () && !MainLoopVF.isScalable () &&
4857
- ElementCount::isKnownGT (NextVF.Width , MainLoopVF)))
4842
+ ElementCount::isKnownGE (NextVF.Width , MainLoopVF))
4858
4843
continue ;
4859
4844
4860
4845
// If NextVF is greater than the number of remaining iterations, the
@@ -4868,14 +4853,6 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
4868
4853
" Trip count SCEV must be computable" );
4869
4854
RemainingIterations = SE.getURemExpr (
4870
4855
TC, SE.getConstant (TCType, MainLoopVF.getKnownMinValue () * IC));
4871
- MaxTripCount = MainLoopVF.getKnownMinValue () * IC - 1 ;
4872
- if (SE.isKnownPredicate (CmpInst::ICMP_ULT, RemainingIterations,
4873
- SE.getConstant (TCType, MaxTripCount))) {
4874
- MaxTripCount =
4875
- SE.getUnsignedRangeMax (RemainingIterations).getZExtValue ();
4876
- }
4877
- LLVM_DEBUG (dbgs () << " LEV: Maximum Trip Count for Epilogue: "
4878
- << MaxTripCount << " \n " );
4879
4856
}
4880
4857
if (SE.isKnownPredicate (
4881
4858
CmpInst::ICMP_UGT,
@@ -4884,8 +4861,7 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
4884
4861
continue ;
4885
4862
}
4886
4863
4887
- if (Result.Width .isScalar () ||
4888
- isMoreProfitable (NextVF, Result, MaxTripCount))
4864
+ if (Result.Width .isScalar () || isMoreProfitable (NextVF, Result))
4889
4865
Result = NextVF;
4890
4866
}
4891
4867
0 commit comments