@@ -8926,6 +8926,9 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
8926
8926
bool NeedsBlends = BB != HeaderBB && !BB->phis ().empty ();
8927
8927
return Legal->blockNeedsPredication (BB) || NeedsBlends;
8928
8928
});
8929
+ auto *MiddleVPBB =
8930
+ cast<VPBasicBlock>(Plan->getVectorLoopRegion ()->getSingleSuccessor ());
8931
+ VPBasicBlock::iterator MBIP = MiddleVPBB->getFirstNonPhi ();
8929
8932
for (BasicBlock *BB : make_range (DFS.beginRPO (), DFS.endRPO ())) {
8930
8933
// Relevant instructions from basic block BB will be grouped into VPRecipe
8931
8934
// ingredients and fill a new VPBasicBlock.
@@ -8956,8 +8959,17 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
8956
8959
// with the final reduction value will be added to the exit block
8957
8960
StoreInst *SI;
8958
8961
if ((SI = dyn_cast<StoreInst>(&I)) &&
8959
- Legal->isInvariantAddressOfReduction (SI->getPointerOperand ()))
8962
+ Legal->isInvariantAddressOfReduction (SI->getPointerOperand ())) {
8963
+ // Only create recipe for the last intermediate store of the reduction.
8964
+ if (!Legal->isInvariantStoreOfReduction (SI))
8965
+ continue ;
8966
+ auto *Recipe = new VPReplicateRecipe (
8967
+ SI, RecipeBuilder.mapToVPValues (Instr->operands ()),
8968
+ true /* IsUniform */ );
8969
+ RecipeBuilder.setRecipe (SI, Recipe);
8970
+ Recipe->insertBefore (*MiddleVPBB, MBIP);
8960
8971
continue ;
8972
+ }
8961
8973
8962
8974
VPRecipeBase *Recipe =
8963
8975
RecipeBuilder.tryToCreateWidenRecipe (Instr, Operands, Range, VPBB);
@@ -9126,51 +9138,13 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9126
9138
using namespace VPlanPatternMatch ;
9127
9139
VPRegionBlock *VectorLoopRegion = Plan->getVectorLoopRegion ();
9128
9140
VPBasicBlock *Header = VectorLoopRegion->getEntryBasicBlock ();
9129
- // Gather all VPReductionPHIRecipe and sort them so that Intermediate stores
9130
- // sank outside of the loop would keep the same order as they had in the
9131
- // original loop.
9132
- SmallVector<VPReductionPHIRecipe *> ReductionPHIList;
9133
- for (VPRecipeBase &R : Header->phis ()) {
9134
- if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(&R))
9135
- ReductionPHIList.emplace_back (ReductionPhi);
9136
- }
9137
- bool HasIntermediateStore = false ;
9138
- stable_sort (ReductionPHIList,
9139
- [this , &HasIntermediateStore](const VPReductionPHIRecipe *R1,
9140
- const VPReductionPHIRecipe *R2) {
9141
- auto *IS1 = R1->getRecurrenceDescriptor ().IntermediateStore ;
9142
- auto *IS2 = R2->getRecurrenceDescriptor ().IntermediateStore ;
9143
- HasIntermediateStore |= IS1 || IS2;
9144
-
9145
- // If neither of the recipes has an intermediate store, keep the
9146
- // order the same.
9147
- if (!IS1 && !IS2)
9148
- return false ;
9149
-
9150
- // If only one of the recipes has an intermediate store, then
9151
- // move it towards the beginning of the list.
9152
- if (IS1 && !IS2)
9153
- return true ;
9154
-
9155
- if (!IS1 && IS2)
9156
- return false ;
9157
-
9158
- // If both recipes have an intermediate store, then the recipe
9159
- // with the later store should be processed earlier. So it
9160
- // should go to the beginning of the list.
9161
- return DT->dominates (IS2, IS1);
9162
- });
9163
-
9164
- if (HasIntermediateStore && ReductionPHIList.size () > 1 )
9165
- for (VPRecipeBase *R : ReductionPHIList)
9166
- R->moveBefore (*Header, Header->getFirstNonPhi ());
9167
-
9168
9141
for (VPRecipeBase &R : Header->phis ()) {
9169
9142
auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
9170
9143
if (!PhiR || !PhiR->isInLoop () || (MinVF.isScalar () && !PhiR->isOrdered ()))
9171
9144
continue ;
9172
9145
9173
9146
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor ();
9147
+ StoreInst *IntermediateStore = RdxDesc.IntermediateStore ;
9174
9148
RecurKind Kind = RdxDesc.getRecurrenceKind ();
9175
9149
assert (!RecurrenceDescriptor::isAnyOfRecurrenceKind (Kind) &&
9176
9150
" AnyOf reductions are not allowed for in-loop reductions" );
@@ -9183,9 +9157,13 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9183
9157
for (VPUser *U : Cur->users ()) {
9184
9158
auto *UserRecipe = cast<VPSingleDefRecipe>(U);
9185
9159
if (!UserRecipe->getParent ()->getEnclosingLoopRegion ()) {
9186
- assert (match (U, m_Binary<VPInstruction::ExtractFromEnd>(
9187
- m_VPValue (), m_VPValue ())) &&
9188
- " U must be an ExtractFromEnd VPInstruction" );
9160
+ assert ((match (U, m_Binary<VPInstruction::ExtractFromEnd>(
9161
+ m_VPValue (), m_VPValue ())) ||
9162
+ (isa<VPReplicateRecipe>(U) &&
9163
+ cast<VPReplicateRecipe>(U)->getUnderlyingValue () ==
9164
+ IntermediateStore)) &&
9165
+ " U must be either an ExtractFromEnd VPInstruction or a "
9166
+ " uniform store sourced from the intermediate store." );
9189
9167
continue ;
9190
9168
}
9191
9169
Worklist.insert (UserRecipe);
@@ -9300,6 +9278,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9300
9278
continue ;
9301
9279
9302
9280
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor ();
9281
+ StoreInst *IntermediateStore = RdxDesc.IntermediateStore ;
9303
9282
// Adjust AnyOf reductions; replace the reduction phi for the selected value
9304
9283
// with a boolean reduction phi node to check if the condition is true in
9305
9284
// any iteration. The final value is selected by the final
@@ -9402,11 +9381,14 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9402
9381
auto *FinalReductionResult = new VPInstruction (
9403
9382
VPInstruction::ComputeReductionResult, {PhiR, NewExitingVPV}, ExitDL);
9404
9383
FinalReductionResult->insertBefore (*MiddleVPBB, IP);
9405
- OrigExitingVPV->replaceUsesWithIf (FinalReductionResult, [](VPUser &User,
9406
- unsigned ) {
9407
- return match (&User, m_Binary<VPInstruction::ExtractFromEnd>(m_VPValue (),
9408
- m_VPValue ()));
9409
- });
9384
+ OrigExitingVPV->replaceUsesWithIf (
9385
+ FinalReductionResult, [IntermediateStore](VPUser &User, unsigned ) {
9386
+ return match (&User, m_Binary<VPInstruction::ExtractFromEnd>(
9387
+ m_VPValue (), m_VPValue ())) ||
9388
+ (isa<VPReplicateRecipe>(&User) &&
9389
+ cast<VPReplicateRecipe>(&User)->getUnderlyingValue () ==
9390
+ IntermediateStore);
9391
+ });
9410
9392
}
9411
9393
9412
9394
VPlanTransforms::clearReductionWrapFlags (*Plan);
0 commit comments