@@ -2346,6 +2346,12 @@ void InnerLoopVectorizer::scalarizeInstruction(const Instruction *Instr,
2346
2346
// End if-block.
2347
2347
VPRegionBlock *Parent = RepRecipe->getParent ()->getParent ();
2348
2348
bool IfPredicateInstr = Parent ? Parent->isReplicator () : false ;
2349
+ assert ((Parent || all_of (RepRecipe->operands (),
2350
+ [](VPValue *Op) {
2351
+ return Op->isDefinedOutsideLoopRegions ();
2352
+ })) &&
2353
+ " Expected a recipe is either within a region or all of its operands "
2354
+ " are defined outside the vectorized region." );
2349
2355
if (IfPredicateInstr)
2350
2356
PredicatedInstructions.push_back (Cloned);
2351
2357
}
@@ -8950,6 +8956,9 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
8950
8956
bool NeedsBlends = BB != HeaderBB && !BB->phis ().empty ();
8951
8957
return Legal->blockNeedsPredication (BB) || NeedsBlends;
8952
8958
});
8959
+ auto *MiddleVPBB =
8960
+ cast<VPBasicBlock>(Plan->getVectorLoopRegion ()->getSingleSuccessor ());
8961
+ VPBasicBlock::iterator MBIP = MiddleVPBB->getFirstNonPhi ();
8953
8962
for (BasicBlock *BB : make_range (DFS.beginRPO (), DFS.endRPO ())) {
8954
8963
// Relevant instructions from basic block BB will be grouped into VPRecipe
8955
8964
// ingredients and fill a new VPBasicBlock.
@@ -8976,12 +8985,21 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
8976
8985
Operands = {OpRange.begin (), OpRange.end ()};
8977
8986
}
8978
8987
8979
- // Invariant stores inside loop will be deleted and a single store
8980
- // with the final reduction value will be added to the exit block
8988
+ // The stores with invariant address inside the loop will be deleted, and
8989
+ // in the exit block, a uniform store recipe will be created for the final
8990
+ // invariant store of the reduction.
8981
8991
StoreInst *SI;
8982
8992
if ((SI = dyn_cast<StoreInst>(&I)) &&
8983
- Legal->isInvariantAddressOfReduction (SI->getPointerOperand ()))
8993
+ Legal->isInvariantAddressOfReduction (SI->getPointerOperand ())) {
8994
+ // Only create recipe for the final invariant store of the reduction.
8995
+ if (!Legal->isInvariantStoreOfReduction (SI))
8996
+ continue ;
8997
+ auto *Recipe = new VPReplicateRecipe (
8998
+ SI, RecipeBuilder.mapToVPValues (Instr->operands ()),
8999
+ true /* IsUniform */ );
9000
+ Recipe->insertBefore (*MiddleVPBB, MBIP);
8984
9001
continue ;
9002
+ }
8985
9003
8986
9004
VPRecipeBase *Recipe =
8987
9005
RecipeBuilder.tryToCreateWidenRecipe (Instr, Operands, Range, VPBB);
@@ -9150,45 +9168,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9150
9168
using namespace VPlanPatternMatch ;
9151
9169
VPRegionBlock *VectorLoopRegion = Plan->getVectorLoopRegion ();
9152
9170
VPBasicBlock *Header = VectorLoopRegion->getEntryBasicBlock ();
9153
- // Gather all VPReductionPHIRecipe and sort them so that Intermediate stores
9154
- // sank outside of the loop would keep the same order as they had in the
9155
- // original loop.
9156
- SmallVector<VPReductionPHIRecipe *> ReductionPHIList;
9157
- for (VPRecipeBase &R : Header->phis ()) {
9158
- if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(&R))
9159
- ReductionPHIList.emplace_back (ReductionPhi);
9160
- }
9161
- bool HasIntermediateStore = false ;
9162
- stable_sort (ReductionPHIList,
9163
- [this , &HasIntermediateStore](const VPReductionPHIRecipe *R1,
9164
- const VPReductionPHIRecipe *R2) {
9165
- auto *IS1 = R1->getRecurrenceDescriptor ().IntermediateStore ;
9166
- auto *IS2 = R2->getRecurrenceDescriptor ().IntermediateStore ;
9167
- HasIntermediateStore |= IS1 || IS2;
9168
-
9169
- // If neither of the recipes has an intermediate store, keep the
9170
- // order the same.
9171
- if (!IS1 && !IS2)
9172
- return false ;
9173
-
9174
- // If only one of the recipes has an intermediate store, then
9175
- // move it towards the beginning of the list.
9176
- if (IS1 && !IS2)
9177
- return true ;
9178
-
9179
- if (!IS1 && IS2)
9180
- return false ;
9181
-
9182
- // If both recipes have an intermediate store, then the recipe
9183
- // with the later store should be processed earlier. So it
9184
- // should go to the beginning of the list.
9185
- return DT->dominates (IS2, IS1);
9186
- });
9187
-
9188
- if (HasIntermediateStore && ReductionPHIList.size () > 1 )
9189
- for (VPRecipeBase *R : ReductionPHIList)
9190
- R->moveBefore (*Header, Header->getFirstNonPhi ());
9191
-
9171
+ VPBasicBlock *MiddleVPBB =
9172
+ cast<VPBasicBlock>(VectorLoopRegion->getSingleSuccessor ());
9192
9173
for (VPRecipeBase &R : Header->phis ()) {
9193
9174
auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
9194
9175
if (!PhiR || !PhiR->isInLoop () || (MinVF.isScalar () && !PhiR->isOrdered ()))
@@ -9207,9 +9188,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9207
9188
for (VPUser *U : Cur->users ()) {
9208
9189
auto *UserRecipe = cast<VPSingleDefRecipe>(U);
9209
9190
if (!UserRecipe->getParent ()->getEnclosingLoopRegion ()) {
9210
- assert (match (U, m_Binary<VPInstruction::ExtractFromEnd>(
9211
- m_VPValue (), m_VPValue ())) &&
9212
- " U must be an ExtractFromEnd VPInstruction" );
9191
+ assert (UserRecipe->getParent () == MiddleVPBB &&
9192
+ " U must be either in the loop region or the middle block." );
9213
9193
continue ;
9214
9194
}
9215
9195
Worklist.insert (UserRecipe);
@@ -9314,8 +9294,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9314
9294
}
9315
9295
VPBasicBlock *LatchVPBB = VectorLoopRegion->getExitingBasicBlock ();
9316
9296
Builder.setInsertPoint (&*LatchVPBB->begin ());
9317
- VPBasicBlock *MiddleVPBB =
9318
- cast<VPBasicBlock>(VectorLoopRegion->getSingleSuccessor ());
9319
9297
VPBasicBlock::iterator IP = MiddleVPBB->getFirstNonPhi ();
9320
9298
for (VPRecipeBase &R :
9321
9299
Plan->getVectorLoopRegion ()->getEntryBasicBlock ()->phis ()) {
@@ -9390,12 +9368,13 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9390
9368
// also modeled in VPlan.
9391
9369
auto *FinalReductionResult = new VPInstruction (
9392
9370
VPInstruction::ComputeReductionResult, {PhiR, NewExitingVPV}, ExitDL);
9371
+ // Update all users outside the vector region.
9372
+ OrigExitingVPV->replaceUsesWithIf (
9373
+ FinalReductionResult, [](VPUser &User, unsigned ) {
9374
+ auto *Parent = cast<VPRecipeBase>(&User)->getParent ();
9375
+ return Parent && !Parent->getParent ();
9376
+ });
9393
9377
FinalReductionResult->insertBefore (*MiddleVPBB, IP);
9394
- OrigExitingVPV->replaceUsesWithIf (FinalReductionResult, [](VPUser &User,
9395
- unsigned ) {
9396
- return match (&User, m_Binary<VPInstruction::ExtractFromEnd>(m_VPValue (),
9397
- m_VPValue ()));
9398
- });
9399
9378
9400
9379
// Adjust AnyOf reductions; replace the reduction phi for the selected value
9401
9380
// with a boolean reduction phi node to check if the condition is true in
0 commit comments