Skip to content

Commit ca01f2a

Browse files
committed
[LV] Enforce order of reductions with intermediate stores in VPlan (NFC)
Reductions with intermediate stores currently need to be fixed in order of their intermediate stores. Instead of doing this at fixup time after code has been generated, sort the reductions in adjustRecipesForReductions. This makes the order explicit in VPlan and will enable removing fixReductions with modeling computing the final reduction result in VPlan, followed by also modeling the intermediate stores explicitly.
1 parent 241c290 commit ca01f2a

File tree

2 files changed

+57
-45
lines changed

2 files changed

+57
-45
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
namespace llvm {
3232

3333
class LoopInfo;
34+
class DominatorTree;
3435
class LoopVectorizationLegality;
3536
class LoopVectorizationCostModel;
3637
class PredicatedScalarEvolution;
@@ -287,6 +288,9 @@ class LoopVectorizationPlanner {
287288
/// Loop Info analysis.
288289
LoopInfo *LI;
289290

291+
/// The dominator tree.
292+
DominatorTree *DT;
293+
290294
/// Target Library Info.
291295
const TargetLibraryInfo *TLI;
292296

@@ -317,16 +321,14 @@ class LoopVectorizationPlanner {
317321
VPBuilder Builder;
318322

319323
public:
320-
LoopVectorizationPlanner(Loop *L, LoopInfo *LI, const TargetLibraryInfo *TLI,
321-
const TargetTransformInfo &TTI,
322-
LoopVectorizationLegality *Legal,
323-
LoopVectorizationCostModel &CM,
324-
InterleavedAccessInfo &IAI,
325-
PredicatedScalarEvolution &PSE,
326-
const LoopVectorizeHints &Hints,
327-
OptimizationRemarkEmitter *ORE)
328-
: OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM), IAI(IAI),
329-
PSE(PSE), Hints(Hints), ORE(ORE) {}
324+
LoopVectorizationPlanner(
325+
Loop *L, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
326+
const TargetTransformInfo &TTI, LoopVectorizationLegality *Legal,
327+
LoopVectorizationCostModel &CM, InterleavedAccessInfo &IAI,
328+
PredicatedScalarEvolution &PSE, const LoopVectorizeHints &Hints,
329+
OptimizationRemarkEmitter *ORE)
330+
: OrigLoop(L), LI(LI), DT(DT), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM),
331+
IAI(IAI), PSE(PSE), Hints(Hints), ORE(ORE) {}
330332

331333
/// Plan how to best vectorize, return the best VF and its cost, or
332334
/// std::nullopt if vectorization and interleaving should be avoided up front.

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 45 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -3617,40 +3617,10 @@ void InnerLoopVectorizer::fixCrossIterationPHIs(VPTransformState &State) {
36173617
VPBasicBlock *Header =
36183618
State.Plan->getVectorLoopRegion()->getEntryBasicBlock();
36193619

3620-
// Gather all VPReductionPHIRecipe and sort them so that Intermediate stores
3621-
// sank outside of the loop would keep the same order as they had in the
3622-
// original loop.
3623-
SmallVector<VPReductionPHIRecipe *> ReductionPHIList;
36243620
for (VPRecipeBase &R : Header->phis()) {
36253621
if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(&R))
3626-
ReductionPHIList.emplace_back(ReductionPhi);
3622+
fixReduction(ReductionPhi, State);
36273623
}
3628-
stable_sort(ReductionPHIList, [this](const VPReductionPHIRecipe *R1,
3629-
const VPReductionPHIRecipe *R2) {
3630-
auto *IS1 = R1->getRecurrenceDescriptor().IntermediateStore;
3631-
auto *IS2 = R2->getRecurrenceDescriptor().IntermediateStore;
3632-
3633-
// If neither of the recipes has an intermediate store, keep the order the
3634-
// same.
3635-
if (!IS1 && !IS2)
3636-
return false;
3637-
3638-
// If only one of the recipes has an intermediate store, then move it
3639-
// towards the beginning of the list.
3640-
if (IS1 && !IS2)
3641-
return true;
3642-
3643-
if (!IS1 && IS2)
3644-
return false;
3645-
3646-
// If both recipes have an intermediate store, then the recipe with the
3647-
// later store should be processed earlier. So it should go to the beginning
3648-
// of the list.
3649-
return DT->dominates(IS2, IS1);
3650-
});
3651-
3652-
for (VPReductionPHIRecipe *ReductionPhi : ReductionPHIList)
3653-
fixReduction(ReductionPhi, State);
36543624

36553625
for (VPRecipeBase &R : Header->phis()) {
36563626
if (auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R))
@@ -9041,9 +9011,48 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
90419011
void LoopVectorizationPlanner::adjustRecipesForReductions(
90429012
VPBasicBlock *LatchVPBB, VPlanPtr &Plan, VPRecipeBuilder &RecipeBuilder,
90439013
ElementCount MinVF) {
9014+
VPBasicBlock *Header = Plan->getVectorLoopRegion()->getEntryBasicBlock();
9015+
// Gather all VPReductionPHIRecipe and sort them so that Intermediate stores
9016+
// sank outside of the loop would keep the same order as they had in the
9017+
// original loop.
9018+
SmallVector<VPReductionPHIRecipe *> ReductionPHIList;
9019+
for (VPRecipeBase &R : Header->phis()) {
9020+
if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(&R))
9021+
ReductionPHIList.emplace_back(ReductionPhi);
9022+
}
9023+
bool HasIntermediateStore = false;
9024+
stable_sort(ReductionPHIList,
9025+
[this, &HasIntermediateStore](const VPReductionPHIRecipe *R1,
9026+
const VPReductionPHIRecipe *R2) {
9027+
auto *IS1 = R1->getRecurrenceDescriptor().IntermediateStore;
9028+
auto *IS2 = R2->getRecurrenceDescriptor().IntermediateStore;
9029+
HasIntermediateStore |= IS1 || IS2;
9030+
9031+
// If neither of the recipes has an intermediate store, keep the
9032+
// order the same.
9033+
if (!IS1 && !IS2)
9034+
return false;
9035+
9036+
// If only one of the recipes has an intermediate store, then
9037+
// move it towards the beginning of the list.
9038+
if (IS1 && !IS2)
9039+
return true;
9040+
9041+
if (!IS1 && IS2)
9042+
return false;
9043+
9044+
// If both recipes have an intermediate store, then the recipe
9045+
// with the later store should be processed earlier. So it
9046+
// should go to the beginning of the list.
9047+
return DT->dominates(IS2, IS1);
9048+
});
9049+
9050+
if (HasIntermediateStore && ReductionPHIList.size() > 1)
9051+
for (VPRecipeBase *R : ReductionPHIList)
9052+
R->moveBefore(*Header, Header->getFirstNonPhi());
9053+
90449054
SmallVector<VPReductionPHIRecipe *> InLoopReductionPhis;
9045-
for (VPRecipeBase &R :
9046-
Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
9055+
for (VPRecipeBase &R : Header->phis()) {
90479056
auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
90489057
if (!PhiR || !PhiR->isInLoop() || (MinVF.isScalar() && !PhiR->isOrdered()))
90499058
continue;
@@ -9682,7 +9691,8 @@ static bool processLoopInVPlanNativePath(
96829691
// Use the planner for outer loop vectorization.
96839692
// TODO: CM is not used at this point inside the planner. Turn CM into an
96849693
// optional argument if we don't need it in the future.
9685-
LoopVectorizationPlanner LVP(L, LI, TLI, *TTI, LVL, CM, IAI, PSE, Hints, ORE);
9694+
LoopVectorizationPlanner LVP(L, LI, DT, TLI, *TTI, LVL, CM, IAI, PSE, Hints,
9695+
ORE);
96869696

96879697
// Get user vectorization factor.
96889698
ElementCount UserVF = Hints.getWidth();
@@ -10024,7 +10034,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1002410034
LoopVectorizationCostModel CM(SEL, L, PSE, LI, &LVL, *TTI, TLI, DB, AC, ORE,
1002510035
F, &Hints, IAI);
1002610036
// Use the planner for vectorization.
10027-
LoopVectorizationPlanner LVP(L, LI, TLI, *TTI, &LVL, CM, IAI, PSE, Hints,
10037+
LoopVectorizationPlanner LVP(L, LI, DT, TLI, *TTI, &LVL, CM, IAI, PSE, Hints,
1002810038
ORE);
1002910039

1003010040
// Get user vectorization factor and interleave count.

0 commit comments

Comments
 (0)