@@ -4833,8 +4833,10 @@ static void emitInvalidCostRemarks(SmallVector<InstructionVFPair> InvalidCosts,
4833
4833
sort (InvalidCosts, [&Numbering](InstructionVFPair &A, InstructionVFPair &B) {
4834
4834
if (Numbering[A.first ] != Numbering[B.first ])
4835
4835
return Numbering[A.first ] < Numbering[B.first ];
4836
- ElementCountComparator ECC;
4837
- return ECC (A.second , B.second );
4836
+ const auto &LHS = A.second ;
4837
+ const auto &RHS = B.second ;
4838
+ return std::make_tuple (LHS.isScalable (), LHS.getKnownMinValue ()) <
4839
+ std::make_tuple (RHS.isScalable (), RHS.getKnownMinValue ());
4838
4840
});
4839
4841
4840
4842
// For a list of ordered instruction-vf pairs:
@@ -4877,65 +4879,71 @@ static void emitInvalidCostRemarks(SmallVector<InstructionVFPair> InvalidCosts,
4877
4879
} while (!Tail.empty ());
4878
4880
}
4879
4881
4880
- VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor (
4881
- const ElementCountSet &VFCandidates) {
4882
+ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor () {
4882
4883
InstructionCost ExpectedCost =
4883
4884
CM.expectedCost (ElementCount::getFixed (1 )).first ;
4884
4885
LLVM_DEBUG (dbgs () << " LV: Scalar loop costs: " << ExpectedCost << " .\n " );
4885
4886
assert (ExpectedCost.isValid () && " Unexpected invalid cost for scalar loop" );
4886
- assert (VFCandidates.count (ElementCount::getFixed (1 )) &&
4887
+ assert (any_of (VPlans,
4888
+ [](std::unique_ptr<VPlan> &P) {
4889
+ return P->hasVF (ElementCount::getFixed (1 ));
4890
+ }) &&
4887
4891
" Expected Scalar VF to be a candidate" );
4888
4892
4889
4893
const VectorizationFactor ScalarCost (ElementCount::getFixed (1 ), ExpectedCost,
4890
4894
ExpectedCost);
4891
4895
VectorizationFactor ChosenFactor = ScalarCost;
4892
4896
4893
4897
bool ForceVectorization = Hints.getForce () == LoopVectorizeHints::FK_Enabled;
4894
- if (ForceVectorization && VFCandidates.size () > 1 ) {
4898
+ if (ForceVectorization &&
4899
+ (VPlans.size () > 1 || !VPlans[0 ]->hasScalarVFOnly ())) {
4895
4900
// Ignore scalar width, because the user explicitly wants vectorization.
4896
4901
// Initialize cost to max so that VF = 2 is, at least, chosen during cost
4897
4902
// evaluation.
4898
4903
ChosenFactor.Cost = InstructionCost::getMax ();
4899
4904
}
4900
4905
4901
4906
SmallVector<InstructionVFPair> InvalidCosts;
4902
- for (const auto &i : VFCandidates) {
4903
- // The cost for scalar VF=1 is already calculated, so ignore it.
4904
- if (i.isScalar ())
4905
- continue ;
4907
+ for (auto &P : VPlans) {
4908
+ for (ElementCount VF : P->vectorFactors ()) {
4909
+ // The cost for scalar VF=1 is already calculated, so ignore it.
4910
+ if (VF.isScalar ())
4911
+ continue ;
4906
4912
4907
- LoopVectorizationCostModel::VectorizationCostTy C =
4908
- CM.expectedCost (i , &InvalidCosts);
4909
- VectorizationFactor Candidate (i , C.first , ScalarCost.ScalarCost );
4913
+ LoopVectorizationCostModel::VectorizationCostTy C =
4914
+ CM.expectedCost (VF , &InvalidCosts);
4915
+ VectorizationFactor Candidate (VF , C.first , ScalarCost.ScalarCost );
4910
4916
4911
4917
#ifndef NDEBUG
4912
- unsigned AssumedMinimumVscale =
4913
- getVScaleForTuning (OrigLoop, TTI).value_or (1 );
4914
- unsigned Width =
4915
- Candidate.Width .isScalable ()
4916
- ? Candidate.Width .getKnownMinValue () * AssumedMinimumVscale
4917
- : Candidate.Width .getFixedValue ();
4918
- LLVM_DEBUG (dbgs () << " LV: Vector loop of width " << i
4919
- << " costs: " << (Candidate.Cost / Width));
4920
- if (i .isScalable ())
4921
- LLVM_DEBUG (dbgs () << " (assuming a minimum vscale of "
4922
- << AssumedMinimumVscale << " )" );
4923
- LLVM_DEBUG (dbgs () << " .\n " );
4918
+ unsigned AssumedMinimumVscale =
4919
+ getVScaleForTuning (OrigLoop, TTI).value_or (1 );
4920
+ unsigned Width =
4921
+ Candidate.Width .isScalable ()
4922
+ ? Candidate.Width .getKnownMinValue () * AssumedMinimumVscale
4923
+ : Candidate.Width .getFixedValue ();
4924
+ LLVM_DEBUG (dbgs () << " LV: Vector loop of width " << VF
4925
+ << " costs: " << (Candidate.Cost / Width));
4926
+ if (VF .isScalable ())
4927
+ LLVM_DEBUG (dbgs () << " (assuming a minimum vscale of "
4928
+ << AssumedMinimumVscale << " )" );
4929
+ LLVM_DEBUG (dbgs () << " .\n " );
4924
4930
#endif
4925
4931
4926
- if (!C.second && !ForceVectorization) {
4927
- LLVM_DEBUG (
4928
- dbgs () << " LV: Not considering vector loop of width " << i
4929
- << " because it will not generate any vector instructions.\n " );
4930
- continue ;
4931
- }
4932
+ if (!C.second && !ForceVectorization) {
4933
+ LLVM_DEBUG (
4934
+ dbgs ()
4935
+ << " LV: Not considering vector loop of width " << VF
4936
+ << " because it will not generate any vector instructions.\n " );
4937
+ continue ;
4938
+ }
4932
4939
4933
- // If profitable add it to ProfitableVF list.
4934
- if (isMoreProfitable (Candidate, ScalarCost))
4935
- ProfitableVFs.push_back (Candidate);
4940
+ // If profitable add it to ProfitableVF list.
4941
+ if (isMoreProfitable (Candidate, ScalarCost))
4942
+ ProfitableVFs.push_back (Candidate);
4936
4943
4937
- if (isMoreProfitable (Candidate, ChosenFactor))
4938
- ChosenFactor = Candidate;
4944
+ if (isMoreProfitable (Candidate, ChosenFactor))
4945
+ ChosenFactor = Candidate;
4946
+ }
4939
4947
}
4940
4948
4941
4949
emitInvalidCostRemarks (InvalidCosts, ORE, OrigLoop);
@@ -7270,14 +7278,14 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
7270
7278
" InvalidCost" , ORE, OrigLoop);
7271
7279
}
7272
7280
7273
- // Populate the set of Vectorization Factor Candidates.
7274
- ElementCountSet VFCandidates;
7281
+ // Collect the Vectorization Factor Candidates.
7282
+ SmallVector<ElementCount> VFCandidates;
7275
7283
for (auto VF = ElementCount::getFixed (1 );
7276
7284
ElementCount::isKnownLE (VF, MaxFactors.FixedVF ); VF *= 2 )
7277
- VFCandidates.insert (VF);
7285
+ VFCandidates.push_back (VF);
7278
7286
for (auto VF = ElementCount::getScalable (1 );
7279
7287
ElementCount::isKnownLE (VF, MaxFactors.ScalableVF ); VF *= 2 )
7280
- VFCandidates.insert (VF);
7288
+ VFCandidates.push_back (VF);
7281
7289
7282
7290
CM.collectInLoopReductions ();
7283
7291
for (const auto &VF : VFCandidates) {
@@ -7299,7 +7307,7 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
7299
7307
return VectorizationFactor::Disabled ();
7300
7308
7301
7309
// Select the optimal vectorization factor.
7302
- VectorizationFactor VF = selectVectorizationFactor (VFCandidates );
7310
+ VectorizationFactor VF = selectVectorizationFactor ();
7303
7311
assert ((VF.Width .isScalar () || VF.ScalarCost > 0 ) && " when vectorizing, the scalar cost must be non-zero." );
7304
7312
if (!hasPlanWithVF (VF.Width )) {
7305
7313
LLVM_DEBUG (dbgs () << " LV: No VPlan could be built for " << VF.Width
0 commit comments