Skip to content

Commit 67a55e0

Browse files
authored
[VPlan] Replace getBestPlan by getBestVF use also for epilogue vec. (#98821)
Replace getBestPlan by getBestVF which simply finds the best VF out of the VFs for the available VPlans. Then use getBestPlan to retrieve the corresponding VPlan. This allows using getBestVF & getBestPlan for epilogue vectorization as well. As the same plan may be used to vectorize both the main and epilogue loop, restricting the VF of the best plan would cause issues. PR: #98821
1 parent 9d22095 commit 67a55e0

File tree

3 files changed

+22
-29
lines changed

3 files changed

+22
-29
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -365,8 +365,8 @@ class LoopVectorizationPlanner {
365365
/// Return the best VPlan for \p VF.
366366
VPlan &getBestPlanFor(ElementCount VF) const;
367367

368-
/// Return the most profitable plan and fix its VF to the most profitable one.
369-
VPlan &getBestPlan() const;
368+
/// Return the most profitable vectorization factor.
369+
ElementCount getBestVF() const;
370370

371371
/// Generate the IR code for the vectorized loop captured in VPlan \p BestPlan
372372
/// according to the best selected \p VF and \p UF.

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 18 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -7162,13 +7162,12 @@ InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan,
71627162
return Cost;
71637163
}
71647164

7165-
VPlan &LoopVectorizationPlanner::getBestPlan() const {
7165+
ElementCount LoopVectorizationPlanner::getBestVF() const {
71667166
// If there is a single VPlan with a single VF, return it directly.
71677167
VPlan &FirstPlan = *VPlans[0];
71687168
if (VPlans.size() == 1 && size(FirstPlan.vectorFactors()) == 1)
7169-
return FirstPlan;
7169+
return *FirstPlan.vectorFactors().begin();
71707170

7171-
VPlan *BestPlan = &FirstPlan;
71727171
ElementCount ScalarVF = ElementCount::getFixed(1);
71737172
assert(hasPlanWithVF(ScalarVF) &&
71747173
"More than a single plan/VF w/o any plan having scalar VF");
@@ -7199,14 +7198,11 @@ VPlan &LoopVectorizationPlanner::getBestPlan() const {
71997198

72007199
InstructionCost Cost = cost(*P, VF);
72017200
VectorizationFactor CurrentFactor(VF, Cost, ScalarCost);
7202-
if (isMoreProfitable(CurrentFactor, BestFactor)) {
7201+
if (isMoreProfitable(CurrentFactor, BestFactor))
72037202
BestFactor = CurrentFactor;
7204-
BestPlan = &*P;
7205-
}
72067203
}
72077204
}
7208-
BestPlan->setVF(BestFactor.Width);
7209-
return *BestPlan;
7205+
return BestFactor.Width;
72107206
}
72117207

72127208
VPlan &LoopVectorizationPlanner::getBestPlanFor(ElementCount VF) const {
@@ -10001,10 +9997,11 @@ bool LoopVectorizePass::processLoop(Loop *L) {
100019997
InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, ORE, IC, &LVL,
100029998
&CM, BFI, PSI, Checks);
100039999

10004-
VPlan &BestPlan = LVP.getBestPlan();
10005-
assert(BestPlan.hasScalarVFOnly() &&
10000+
ElementCount BestVF = LVP.getBestVF();
10001+
assert(BestVF.isScalar() &&
1000610002
"VPlan cost model and legacy cost model disagreed");
10007-
LVP.executePlan(VF.Width, IC, BestPlan, Unroller, DT, false);
10003+
VPlan &BestPlan = LVP.getBestPlanFor(BestVF);
10004+
LVP.executePlan(BestVF, IC, BestPlan, Unroller, DT, false);
1000810005

1000910006
ORE->emit([&]() {
1001010007
return OptimizationRemark(LV_NAME, "Interleaved", L->getStartLoc(),
@@ -10015,21 +10012,25 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1001510012
} else {
1001610013
// If we decided that it is *legal* to vectorize the loop, then do it.
1001710014

10015+
ElementCount BestVF = LVP.getBestVF();
10016+
LLVM_DEBUG(dbgs() << "VF picked by VPlan cost model: " << BestVF << "\n");
10017+
assert(VF.Width == BestVF &&
10018+
"VPlan cost model and legacy cost model disagreed");
10019+
VPlan &BestPlan = LVP.getBestPlanFor(BestVF);
1001810020
// Consider vectorizing the epilogue too if it's profitable.
1001910021
VectorizationFactor EpilogueVF =
10020-
LVP.selectEpilogueVectorizationFactor(VF.Width, IC);
10022+
LVP.selectEpilogueVectorizationFactor(BestVF, IC);
1002110023
if (EpilogueVF.Width.isVector()) {
1002210024

1002310025
// The first pass vectorizes the main loop and creates a scalar epilogue
1002410026
// to be vectorized by executing the plan (potentially with a different
1002510027
// factor) again shortly afterwards.
10026-
EpilogueLoopVectorizationInfo EPI(VF.Width, IC, EpilogueVF.Width, 1);
10028+
EpilogueLoopVectorizationInfo EPI(BestVF, IC, EpilogueVF.Width, 1);
1002710029
EpilogueVectorizerMainLoop MainILV(L, PSE, LI, DT, TLI, TTI, AC, ORE,
1002810030
EPI, &LVL, &CM, BFI, PSI, Checks);
1002910031

1003010032
assert(EPI.MainLoopVF == VF.Width && "VFs must match");
10031-
std::unique_ptr<VPlan> BestMainPlan(
10032-
LVP.getBestPlanFor(VF.Width).duplicate());
10033+
std::unique_ptr<VPlan> BestMainPlan(BestPlan.duplicate());
1003310034
const auto &[ExpandedSCEVs, ReductionResumeValues] = LVP.executePlan(
1003410035
EPI.MainLoopVF, EPI.MainLoopUF, *BestMainPlan, MainILV, DT, true);
1003510036
++LoopsVectorized;
@@ -10120,18 +10121,10 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1012010121
if (!MainILV.areSafetyChecksAdded())
1012110122
DisableRuntimeUnroll = true;
1012210123
} else {
10123-
VPlan &BestPlan = LVP.getBestPlan();
10124-
assert(size(BestPlan.vectorFactors()) == 1 &&
10125-
"Plan should have a single VF");
10126-
ElementCount Width = *BestPlan.vectorFactors().begin();
10127-
LLVM_DEBUG(dbgs() << "VF picked by VPlan cost model: " << Width
10128-
<< "\n");
10129-
assert(VF.Width == Width &&
10130-
"VPlan cost model and legacy cost model disagreed");
10131-
InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, Width,
10124+
InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, BestVF,
1013210125
VF.MinProfitableTripCount, IC, &LVL, &CM, BFI,
1013310126
PSI, Checks);
10134-
LVP.executePlan(Width, IC, BestPlan, LB, DT, false);
10127+
LVP.executePlan(BestVF, IC, BestPlan, LB, DT, false);
1013510128
++LoopsVectorized;
1013610129

1013710130
// Add metadata to disable runtime unrolling a scalar loop when there

llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,8 +135,8 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
135135
; CHECK-NEXT: LV: Not Interleaving.
136136
; CHECK-NEXT: LV: Interleaving is not beneficial.
137137
; CHECK-NEXT: LV: Found a vectorizable loop (vscale x 4) in <stdin>
138-
; CHECK-NEXT: LEV: Epilogue vectorization is not profitable for this loop
139138
; CHECK-NEXT: VF picked by VPlan cost model: vscale x 4
139+
; CHECK-NEXT: LEV: Epilogue vectorization is not profitable for this loop
140140
; CHECK-NEXT: Executing best plan with VF=vscale x 4, UF=1
141141
; CHECK-NEXT: VPlan 'Final VPlan for VF={vscale x 4},UF>=1' {
142142
; CHECK-NEXT: Live-in vp<%0> = VF * UF
@@ -340,8 +340,8 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
340340
; CHECK-NEXT: LV: Not Interleaving.
341341
; CHECK-NEXT: LV: Interleaving is not beneficial.
342342
; CHECK-NEXT: LV: Found a vectorizable loop (vscale x 4) in <stdin>
343-
; CHECK-NEXT: LEV: Epilogue vectorization is not profitable for this loop
344343
; CHECK-NEXT: VF picked by VPlan cost model: vscale x 4
344+
; CHECK-NEXT: LEV: Epilogue vectorization is not profitable for this loop
345345
; CHECK-NEXT: Executing best plan with VF=vscale x 4, UF=1
346346
; CHECK-NEXT: VPlan 'Final VPlan for VF={vscale x 4},UF>=1' {
347347
; CHECK-NEXT: Live-in vp<%0> = VF * UF

0 commit comments

Comments
 (0)