@@ -3617,40 +3617,10 @@ void InnerLoopVectorizer::fixCrossIterationPHIs(VPTransformState &State) {
3617
3617
VPBasicBlock *Header =
3618
3618
State.Plan ->getVectorLoopRegion ()->getEntryBasicBlock ();
3619
3619
3620
- // Gather all VPReductionPHIRecipe and sort them so that Intermediate stores
3621
- // sank outside of the loop would keep the same order as they had in the
3622
- // original loop.
3623
- SmallVector<VPReductionPHIRecipe *> ReductionPHIList;
3624
3620
for (VPRecipeBase &R : Header->phis ()) {
3625
3621
if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(&R))
3626
- ReductionPHIList. emplace_back (ReductionPhi);
3622
+ fixReduction (ReductionPhi, State );
3627
3623
}
3628
- stable_sort (ReductionPHIList, [this ](const VPReductionPHIRecipe *R1,
3629
- const VPReductionPHIRecipe *R2) {
3630
- auto *IS1 = R1->getRecurrenceDescriptor ().IntermediateStore ;
3631
- auto *IS2 = R2->getRecurrenceDescriptor ().IntermediateStore ;
3632
-
3633
- // If neither of the recipes has an intermediate store, keep the order the
3634
- // same.
3635
- if (!IS1 && !IS2)
3636
- return false ;
3637
-
3638
- // If only one of the recipes has an intermediate store, then move it
3639
- // towards the beginning of the list.
3640
- if (IS1 && !IS2)
3641
- return true ;
3642
-
3643
- if (!IS1 && IS2)
3644
- return false ;
3645
-
3646
- // If both recipes have an intermediate store, then the recipe with the
3647
- // later store should be processed earlier. So it should go to the beginning
3648
- // of the list.
3649
- return DT->dominates (IS2, IS1);
3650
- });
3651
-
3652
- for (VPReductionPHIRecipe *ReductionPhi : ReductionPHIList)
3653
- fixReduction (ReductionPhi, State);
3654
3624
3655
3625
for (VPRecipeBase &R : Header->phis ()) {
3656
3626
if (auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R))
@@ -9041,9 +9011,48 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
9041
9011
void LoopVectorizationPlanner::adjustRecipesForReductions (
9042
9012
VPBasicBlock *LatchVPBB, VPlanPtr &Plan, VPRecipeBuilder &RecipeBuilder,
9043
9013
ElementCount MinVF) {
9014
+ VPBasicBlock *Header = Plan->getVectorLoopRegion ()->getEntryBasicBlock ();
9015
+ // Gather all VPReductionPHIRecipe and sort them so that Intermediate stores
9016
+ // sank outside of the loop would keep the same order as they had in the
9017
+ // original loop.
9018
+ SmallVector<VPReductionPHIRecipe *> ReductionPHIList;
9019
+ for (VPRecipeBase &R : Header->phis ()) {
9020
+ if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(&R))
9021
+ ReductionPHIList.emplace_back (ReductionPhi);
9022
+ }
9023
+ bool HasIntermediateStore = false ;
9024
+ stable_sort (ReductionPHIList,
9025
+ [this , &HasIntermediateStore](const VPReductionPHIRecipe *R1,
9026
+ const VPReductionPHIRecipe *R2) {
9027
+ auto *IS1 = R1->getRecurrenceDescriptor ().IntermediateStore ;
9028
+ auto *IS2 = R2->getRecurrenceDescriptor ().IntermediateStore ;
9029
+ HasIntermediateStore |= IS1 || IS2;
9030
+
9031
+ // If neither of the recipes has an intermediate store, keep the
9032
+ // order the same.
9033
+ if (!IS1 && !IS2)
9034
+ return false ;
9035
+
9036
+ // If only one of the recipes has an intermediate store, then
9037
+ // move it towards the beginning of the list.
9038
+ if (IS1 && !IS2)
9039
+ return true ;
9040
+
9041
+ if (!IS1 && IS2)
9042
+ return false ;
9043
+
9044
+ // If both recipes have an intermediate store, then the recipe
9045
+ // with the later store should be processed earlier. So it
9046
+ // should go to the beginning of the list.
9047
+ return DT->dominates (IS2, IS1);
9048
+ });
9049
+
9050
+ if (HasIntermediateStore && ReductionPHIList.size () > 1 )
9051
+ for (VPRecipeBase *R : ReductionPHIList)
9052
+ R->moveBefore (*Header, Header->getFirstNonPhi ());
9053
+
9044
9054
SmallVector<VPReductionPHIRecipe *> InLoopReductionPhis;
9045
- for (VPRecipeBase &R :
9046
- Plan->getVectorLoopRegion ()->getEntryBasicBlock ()->phis ()) {
9055
+ for (VPRecipeBase &R : Header->phis ()) {
9047
9056
auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
9048
9057
if (!PhiR || !PhiR->isInLoop () || (MinVF.isScalar () && !PhiR->isOrdered ()))
9049
9058
continue ;
@@ -9682,7 +9691,8 @@ static bool processLoopInVPlanNativePath(
9682
9691
// Use the planner for outer loop vectorization.
9683
9692
// TODO: CM is not used at this point inside the planner. Turn CM into an
9684
9693
// optional argument if we don't need it in the future.
9685
- LoopVectorizationPlanner LVP (L, LI, TLI, *TTI, LVL, CM, IAI, PSE, Hints, ORE);
9694
+ LoopVectorizationPlanner LVP (L, LI, DT, TLI, *TTI, LVL, CM, IAI, PSE, Hints,
9695
+ ORE);
9686
9696
9687
9697
// Get user vectorization factor.
9688
9698
ElementCount UserVF = Hints.getWidth ();
@@ -10024,7 +10034,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10024
10034
LoopVectorizationCostModel CM (SEL, L, PSE, LI, &LVL, *TTI, TLI, DB, AC, ORE,
10025
10035
F, &Hints, IAI);
10026
10036
// Use the planner for vectorization.
10027
- LoopVectorizationPlanner LVP (L, LI, TLI, *TTI, &LVL, CM, IAI, PSE, Hints,
10037
+ LoopVectorizationPlanner LVP (L, LI, DT, TLI, *TTI, &LVL, CM, IAI, PSE, Hints,
10028
10038
ORE);
10029
10039
10030
10040
// Get user vectorization factor and interleave count.
0 commit comments