@@ -2388,8 +2388,8 @@ void InnerLoopVectorizer::scalarizeInstruction(const Instruction *Instr,
2388
2388
AC->registerAssumption (II);
2389
2389
2390
2390
// End if-block.
2391
- bool IfPredicateInstr = RepRecipe->getParent ()->getParent ()-> isReplicator ();
2392
- if (IfPredicateInstr )
2391
+ const VPRegionBlock *Region = RepRecipe->getParent ()->getParent ();
2392
+ if (Region && Region-> isReplicator () )
2393
2393
PredicatedInstructions.push_back (Cloned);
2394
2394
}
2395
2395
@@ -8901,6 +8901,9 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
8901
8901
bool NeedsBlends = BB != HeaderBB && !BB->phis ().empty ();
8902
8902
return Legal->blockNeedsPredication (BB) || NeedsBlends;
8903
8903
});
8904
+ auto *MiddleVPBB =
8905
+ cast<VPBasicBlock>(Plan->getVectorLoopRegion ()->getSingleSuccessor ());
8906
+ VPBasicBlock::iterator MBIP = MiddleVPBB->getFirstNonPhi ();
8904
8907
for (BasicBlock *BB : make_range (DFS.beginRPO (), DFS.endRPO ())) {
8905
8908
// Relevant instructions from basic block BB will be grouped into VPRecipe
8906
8909
// ingredients and fill a new VPBasicBlock.
@@ -8931,8 +8934,17 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
8931
8934
// with the final reduction value will be added to the exit block
8932
8935
StoreInst *SI;
8933
8936
if ((SI = dyn_cast<StoreInst>(&I)) &&
8934
- Legal->isInvariantAddressOfReduction (SI->getPointerOperand ()))
8937
+ Legal->isInvariantAddressOfReduction (SI->getPointerOperand ())) {
8938
+ // Only create recipe for the last intermediate store of the reduction.
8939
+ if (!Legal->isInvariantStoreOfReduction (SI))
8940
+ continue ;
8941
+ auto *Recipe = new VPReplicateRecipe (
8942
+ SI, RecipeBuilder.mapToVPValues (Instr->operands ()),
8943
+ true /* IsUniform */ );
8944
+ RecipeBuilder.setRecipe (SI, Recipe);
8945
+ Recipe->insertBefore (*MiddleVPBB, MBIP);
8935
8946
continue ;
8947
+ }
8936
8948
8937
8949
VPRecipeBase *Recipe =
8938
8950
RecipeBuilder.tryToCreateWidenRecipe (Instr, Operands, Range, VPBB);
@@ -9130,51 +9142,13 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9130
9142
using namespace VPlanPatternMatch ;
9131
9143
VPRegionBlock *VectorLoopRegion = Plan->getVectorLoopRegion ();
9132
9144
VPBasicBlock *Header = VectorLoopRegion->getEntryBasicBlock ();
9133
- // Gather all VPReductionPHIRecipe and sort them so that Intermediate stores
9134
- // sank outside of the loop would keep the same order as they had in the
9135
- // original loop.
9136
- SmallVector<VPReductionPHIRecipe *> ReductionPHIList;
9137
- for (VPRecipeBase &R : Header->phis ()) {
9138
- if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(&R))
9139
- ReductionPHIList.emplace_back (ReductionPhi);
9140
- }
9141
- bool HasIntermediateStore = false ;
9142
- stable_sort (ReductionPHIList,
9143
- [this , &HasIntermediateStore](const VPReductionPHIRecipe *R1,
9144
- const VPReductionPHIRecipe *R2) {
9145
- auto *IS1 = R1->getRecurrenceDescriptor ().IntermediateStore ;
9146
- auto *IS2 = R2->getRecurrenceDescriptor ().IntermediateStore ;
9147
- HasIntermediateStore |= IS1 || IS2;
9148
-
9149
- // If neither of the recipes has an intermediate store, keep the
9150
- // order the same.
9151
- if (!IS1 && !IS2)
9152
- return false ;
9153
-
9154
- // If only one of the recipes has an intermediate store, then
9155
- // move it towards the beginning of the list.
9156
- if (IS1 && !IS2)
9157
- return true ;
9158
-
9159
- if (!IS1 && IS2)
9160
- return false ;
9161
-
9162
- // If both recipes have an intermediate store, then the recipe
9163
- // with the later store should be processed earlier. So it
9164
- // should go to the beginning of the list.
9165
- return DT->dominates (IS2, IS1);
9166
- });
9167
-
9168
- if (HasIntermediateStore && ReductionPHIList.size () > 1 )
9169
- for (VPRecipeBase *R : ReductionPHIList)
9170
- R->moveBefore (*Header, Header->getFirstNonPhi ());
9171
-
9172
9145
for (VPRecipeBase &R : Header->phis ()) {
9173
9146
auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
9174
9147
if (!PhiR || !PhiR->isInLoop () || (MinVF.isScalar () && !PhiR->isOrdered ()))
9175
9148
continue ;
9176
9149
9177
9150
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor ();
9151
+ StoreInst* IntermediateStore = RdxDesc.IntermediateStore ;
9178
9152
RecurKind Kind = RdxDesc.getRecurrenceKind ();
9179
9153
assert (!RecurrenceDescriptor::isAnyOfRecurrenceKind (Kind) &&
9180
9154
" AnyOf reductions are not allowed for in-loop reductions" );
@@ -9187,9 +9161,13 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9187
9161
for (VPUser *U : Cur->users ()) {
9188
9162
auto *UserRecipe = cast<VPSingleDefRecipe>(U);
9189
9163
if (!UserRecipe->getParent ()->getEnclosingLoopRegion ()) {
9190
- assert (match (U, m_Binary<VPInstruction::ExtractFromEnd>(
9191
- m_VPValue (), m_VPValue ())) &&
9192
- " U must be an ExtractFromEnd VPInstruction" );
9164
+ assert ((match (U, m_Binary<VPInstruction::ExtractFromEnd>(
9165
+ m_VPValue (), m_VPValue ())) ||
9166
+ (isa<VPReplicateRecipe>(U) &&
9167
+ cast<VPReplicateRecipe>(U)->getUnderlyingValue () ==
9168
+ IntermediateStore)) &&
9169
+ " U must be either an ExtractFromEnd VPInstruction or a "
9170
+ " uniform store sourced from the intermediate store." );
9193
9171
continue ;
9194
9172
}
9195
9173
Worklist.insert (UserRecipe);
@@ -9304,6 +9282,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9304
9282
continue ;
9305
9283
9306
9284
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor ();
9285
+ StoreInst *IntermediateStore = RdxDesc.IntermediateStore ;
9307
9286
// Adjust AnyOf reductions; replace the reduction phi for the selected value
9308
9287
// with a boolean reduction phi node to check if the condition is true in
9309
9288
// any iteration. The final value is selected by the final
@@ -9406,11 +9385,14 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9406
9385
auto *FinalReductionResult = new VPInstruction (
9407
9386
VPInstruction::ComputeReductionResult, {PhiR, NewExitingVPV}, ExitDL);
9408
9387
FinalReductionResult->insertBefore (*MiddleVPBB, IP);
9409
- OrigExitingVPV->replaceUsesWithIf (FinalReductionResult, [](VPUser &User,
9410
- unsigned ) {
9411
- return match (&User, m_Binary<VPInstruction::ExtractFromEnd>(m_VPValue (),
9412
- m_VPValue ()));
9413
- });
9388
+ OrigExitingVPV->replaceUsesWithIf (
9389
+ FinalReductionResult, [IntermediateStore](VPUser &User, unsigned ) {
9390
+ return match (&User, m_Binary<VPInstruction::ExtractFromEnd>(
9391
+ m_VPValue (), m_VPValue ())) ||
9392
+ (isa<VPReplicateRecipe>(&User) &&
9393
+ cast<VPReplicateRecipe>(&User)->getUnderlyingValue () ==
9394
+ IntermediateStore);
9395
+ });
9414
9396
}
9415
9397
9416
9398
VPlanTransforms::clearReductionWrapFlags (*Plan);
0 commit comments