Skip to content

Commit 9d45077

Browse files
committed
[VPlan] Iterate over VPlans to get VFs to compute cost for (NFCI).
Instead for iterating over all VFs when computing costs, simply iterate over the VFs available in the created VPlans. Split off from #92555. This also prepares for moving the check if any vector instructions will be generated to be based on VPlan, to unblock recommitting #92555.
1 parent f09b024 commit 9d45077

File tree

3 files changed

+58
-55
lines changed

3 files changed

+58
-55
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -262,16 +262,6 @@ struct VectorizationFactor {
262262
}
263263
};
264264

265-
/// ElementCountComparator creates a total ordering for ElementCount
266-
/// for the purposes of using it in a set structure.
267-
struct ElementCountComparator {
268-
bool operator()(const ElementCount &LHS, const ElementCount &RHS) const {
269-
return std::make_tuple(LHS.isScalable(), LHS.getKnownMinValue()) <
270-
std::make_tuple(RHS.isScalable(), RHS.getKnownMinValue());
271-
}
272-
};
273-
using ElementCountSet = SmallSet<ElementCount, 16, ElementCountComparator>;
274-
275265
/// A class that represents two vectorization factors (initialized with 0 by
276266
/// default). One for fixed-width vectorization and one for scalable
277267
/// vectorization. This can be used by the vectorizer to choose from a range of
@@ -442,10 +432,9 @@ class LoopVectorizationPlanner {
442432
VPRecipeBuilder &RecipeBuilder,
443433
ElementCount MinVF);
444434

445-
/// \return The most profitable vectorization factor and the cost of that VF.
446-
/// This method checks every VF in \p CandidateVFs.
447-
VectorizationFactor
448-
selectVectorizationFactor(const ElementCountSet &CandidateVFs);
435+
/// \return The most profitable vectorization factor for the available VPlans
436+
/// and the cost of that VF.
437+
VectorizationFactor selectVectorizationFactor();
449438

450439
/// Returns true if the per-lane cost of VectorizationFactor A is lower than
451440
/// that of B.

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 49 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -4833,8 +4833,10 @@ static void emitInvalidCostRemarks(SmallVector<InstructionVFPair> InvalidCosts,
48334833
sort(InvalidCosts, [&Numbering](InstructionVFPair &A, InstructionVFPair &B) {
48344834
if (Numbering[A.first] != Numbering[B.first])
48354835
return Numbering[A.first] < Numbering[B.first];
4836-
ElementCountComparator ECC;
4837-
return ECC(A.second, B.second);
4836+
const auto &LHS = A.second;
4837+
const auto &RHS = B.second;
4838+
return std::make_tuple(LHS.isScalable(), LHS.getKnownMinValue()) <
4839+
std::make_tuple(RHS.isScalable(), RHS.getKnownMinValue());
48384840
});
48394841

48404842
// For a list of ordered instruction-vf pairs:
@@ -4877,65 +4879,71 @@ static void emitInvalidCostRemarks(SmallVector<InstructionVFPair> InvalidCosts,
48774879
} while (!Tail.empty());
48784880
}
48794881

4880-
VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor(
4881-
const ElementCountSet &VFCandidates) {
4882+
VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
48824883
InstructionCost ExpectedCost =
48834884
CM.expectedCost(ElementCount::getFixed(1)).first;
48844885
LLVM_DEBUG(dbgs() << "LV: Scalar loop costs: " << ExpectedCost << ".\n");
48854886
assert(ExpectedCost.isValid() && "Unexpected invalid cost for scalar loop");
4886-
assert(VFCandidates.count(ElementCount::getFixed(1)) &&
4887+
assert(any_of(VPlans,
4888+
[](std::unique_ptr<VPlan> &P) {
4889+
return P->hasVF(ElementCount::getFixed(1));
4890+
}) &&
48874891
"Expected Scalar VF to be a candidate");
48884892

48894893
const VectorizationFactor ScalarCost(ElementCount::getFixed(1), ExpectedCost,
48904894
ExpectedCost);
48914895
VectorizationFactor ChosenFactor = ScalarCost;
48924896

48934897
bool ForceVectorization = Hints.getForce() == LoopVectorizeHints::FK_Enabled;
4894-
if (ForceVectorization && VFCandidates.size() > 1) {
4898+
if (ForceVectorization &&
4899+
(VPlans.size() > 1 || !VPlans[0]->hasScalarVFOnly())) {
48954900
// Ignore scalar width, because the user explicitly wants vectorization.
48964901
// Initialize cost to max so that VF = 2 is, at least, chosen during cost
48974902
// evaluation.
48984903
ChosenFactor.Cost = InstructionCost::getMax();
48994904
}
49004905

49014906
SmallVector<InstructionVFPair> InvalidCosts;
4902-
for (const auto &i : VFCandidates) {
4903-
// The cost for scalar VF=1 is already calculated, so ignore it.
4904-
if (i.isScalar())
4905-
continue;
4907+
for (auto &P : VPlans) {
4908+
for (ElementCount VF : P->vectorFactors()) {
4909+
// The cost for scalar VF=1 is already calculated, so ignore it.
4910+
if (VF.isScalar())
4911+
continue;
49064912

4907-
LoopVectorizationCostModel::VectorizationCostTy C =
4908-
CM.expectedCost(i, &InvalidCosts);
4909-
VectorizationFactor Candidate(i, C.first, ScalarCost.ScalarCost);
4913+
LoopVectorizationCostModel::VectorizationCostTy C =
4914+
CM.expectedCost(VF, &InvalidCosts);
4915+
VectorizationFactor Candidate(VF, C.first, ScalarCost.ScalarCost);
49104916

49114917
#ifndef NDEBUG
4912-
unsigned AssumedMinimumVscale =
4913-
getVScaleForTuning(OrigLoop, TTI).value_or(1);
4914-
unsigned Width =
4915-
Candidate.Width.isScalable()
4916-
? Candidate.Width.getKnownMinValue() * AssumedMinimumVscale
4917-
: Candidate.Width.getFixedValue();
4918-
LLVM_DEBUG(dbgs() << "LV: Vector loop of width " << i
4919-
<< " costs: " << (Candidate.Cost / Width));
4920-
if (i.isScalable())
4921-
LLVM_DEBUG(dbgs() << " (assuming a minimum vscale of "
4922-
<< AssumedMinimumVscale << ")");
4923-
LLVM_DEBUG(dbgs() << ".\n");
4918+
unsigned AssumedMinimumVscale =
4919+
getVScaleForTuning(OrigLoop, TTI).value_or(1);
4920+
unsigned Width =
4921+
Candidate.Width.isScalable()
4922+
? Candidate.Width.getKnownMinValue() * AssumedMinimumVscale
4923+
: Candidate.Width.getFixedValue();
4924+
LLVM_DEBUG(dbgs() << "LV: Vector loop of width " << VF
4925+
<< " costs: " << (Candidate.Cost / Width));
4926+
if (VF.isScalable())
4927+
LLVM_DEBUG(dbgs() << " (assuming a minimum vscale of "
4928+
<< AssumedMinimumVscale << ")");
4929+
LLVM_DEBUG(dbgs() << ".\n");
49244930
#endif
49254931

4926-
if (!C.second && !ForceVectorization) {
4927-
LLVM_DEBUG(
4928-
dbgs() << "LV: Not considering vector loop of width " << i
4929-
<< " because it will not generate any vector instructions.\n");
4930-
continue;
4931-
}
4932+
if (!C.second && !ForceVectorization) {
4933+
LLVM_DEBUG(
4934+
dbgs()
4935+
<< "LV: Not considering vector loop of width " << VF
4936+
<< " because it will not generate any vector instructions.\n");
4937+
continue;
4938+
}
49324939

4933-
// If profitable add it to ProfitableVF list.
4934-
if (isMoreProfitable(Candidate, ScalarCost))
4935-
ProfitableVFs.push_back(Candidate);
4940+
// If profitable add it to ProfitableVF list.
4941+
if (isMoreProfitable(Candidate, ScalarCost))
4942+
ProfitableVFs.push_back(Candidate);
49364943

4937-
if (isMoreProfitable(Candidate, ChosenFactor))
4938-
ChosenFactor = Candidate;
4944+
if (isMoreProfitable(Candidate, ChosenFactor))
4945+
ChosenFactor = Candidate;
4946+
}
49394947
}
49404948

49414949
emitInvalidCostRemarks(InvalidCosts, ORE, OrigLoop);
@@ -7270,14 +7278,14 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
72707278
"InvalidCost", ORE, OrigLoop);
72717279
}
72727280

7273-
// Populate the set of Vectorization Factor Candidates.
7274-
ElementCountSet VFCandidates;
7281+
// Collect the Vectorization Factor Candidates.
7282+
SmallVector<ElementCount> VFCandidates;
72757283
for (auto VF = ElementCount::getFixed(1);
72767284
ElementCount::isKnownLE(VF, MaxFactors.FixedVF); VF *= 2)
7277-
VFCandidates.insert(VF);
7285+
VFCandidates.push_back(VF);
72787286
for (auto VF = ElementCount::getScalable(1);
72797287
ElementCount::isKnownLE(VF, MaxFactors.ScalableVF); VF *= 2)
7280-
VFCandidates.insert(VF);
7288+
VFCandidates.push_back(VF);
72817289

72827290
CM.collectInLoopReductions();
72837291
for (const auto &VF : VFCandidates) {
@@ -7299,7 +7307,7 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
72997307
return VectorizationFactor::Disabled();
73007308

73017309
// Select the optimal vectorization factor.
7302-
VectorizationFactor VF = selectVectorizationFactor(VFCandidates);
7310+
VectorizationFactor VF = selectVectorizationFactor();
73037311
assert((VF.Width.isScalar() || VF.ScalarCost > 0) && "when vectorizing, the scalar cost must be non-zero.");
73047312
if (!hasPlanWithVF(VF.Width)) {
73057313
LLVM_DEBUG(dbgs() << "LV: No VPlan could be built for " << VF.Width

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3255,6 +3255,12 @@ class VPlan {
32553255
return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); });
32563256
}
32573257

3258+
/// Returns an iterator range over all VFs of the plan.
3259+
iterator_range<SmallSetVector<ElementCount, 2>::iterator>
3260+
vectorFactors() const {
3261+
return {VFs.begin(), VFs.end()};
3262+
}
3263+
32583264
bool hasScalarVFOnly() const { return VFs.size() == 1 && VFs[0].isScalar(); }
32593265

32603266
bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); }

0 commit comments

Comments
 (0)