@@ -6710,37 +6710,6 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) {
6710
6710
return BlockMaskCache[BB] = BlockMask;
6711
6711
}
6712
6712
6713
- VPInterleaveRecipe *VPRecipeBuilder::tryToInterleaveMemory (Instruction *I,
6714
- VFRange &Range,
6715
- VPlanPtr &Plan) {
6716
- const InterleaveGroup<Instruction> *IG = CM.getInterleavedAccessGroup (I);
6717
- if (!IG)
6718
- return nullptr ;
6719
-
6720
- // Now check if IG is relevant for VF's in the given range.
6721
- auto isIGMember = [&](Instruction *I) -> std::function<bool (unsigned )> {
6722
- return [=](unsigned VF) -> bool {
6723
- return (VF >= 2 && // Query is illegal for VF == 1
6724
- CM.getWideningDecision (I, VF) ==
6725
- LoopVectorizationCostModel::CM_Interleave);
6726
- };
6727
- };
6728
- if (!LoopVectorizationPlanner::getDecisionAndClampRange (isIGMember (I), Range))
6729
- return nullptr ;
6730
-
6731
- // I is a member of an InterleaveGroup for VF's in the (possibly trimmed)
6732
- // range. If it's the primary member of the IG construct a VPInterleaveRecipe.
6733
- // Otherwise, it's an adjunct member of the IG, do not construct any Recipe.
6734
- assert (I == IG->getInsertPos () &&
6735
- " Generating a recipe for an adjunct member of an interleave group" );
6736
-
6737
- VPValue *Mask = nullptr ;
6738
- if (Legal->isMaskRequired (I))
6739
- Mask = createBlockInMask (I->getParent (), Plan);
6740
-
6741
- return new VPInterleaveRecipe (IG, Mask);
6742
- }
6743
-
6744
6713
VPWidenMemoryInstructionRecipe *
6745
6714
VPRecipeBuilder::tryToWidenMemory (Instruction *I, VFRange &Range,
6746
6715
VPlanPtr &Plan) {
@@ -6757,8 +6726,6 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range,
6757
6726
CM.getWideningDecision (I, VF);
6758
6727
assert (Decision != LoopVectorizationCostModel::CM_Unknown &&
6759
6728
" CM decision should be taken at this point." );
6760
- assert (Decision != LoopVectorizationCostModel::CM_Interleave &&
6761
- " Interleave memory opportunity should be caught earlier." );
6762
6729
return Decision != LoopVectorizationCostModel::CM_Scalarize;
6763
6730
};
6764
6731
@@ -6923,15 +6890,21 @@ bool VPRecipeBuilder::tryToWiden(Instruction *I, VPBasicBlock *VPBB,
6923
6890
if (!LoopVectorizationPlanner::getDecisionAndClampRange (willWiden, Range))
6924
6891
return false ;
6925
6892
6893
+ // If this ingredient's recipe is to be recorded, keep its recipe a singleton
6894
+ // to avoid having to split recipes later.
6895
+ bool IsSingleton = Ingredient2Recipe.count (I);
6896
+
6926
6897
// Success: widen this instruction. We optimize the common case where
6927
6898
// consecutive instructions can be represented by a single recipe.
6928
- if (!VPBB->empty ()) {
6929
- VPWidenRecipe *LastWidenRecipe = dyn_cast<VPWidenRecipe>(&VPBB->back ());
6930
- if (LastWidenRecipe && LastWidenRecipe->appendInstruction (I))
6931
- return true ;
6932
- }
6899
+ if (!IsSingleton && !VPBB->empty () && LastExtensibleRecipe == &VPBB->back () &&
6900
+ LastExtensibleRecipe->appendInstruction (I))
6901
+ return true ;
6933
6902
6934
- VPBB->appendRecipe (new VPWidenRecipe (I));
6903
+ VPWidenRecipe *WidenRecipe = new VPWidenRecipe (I);
6904
+ if (!IsSingleton)
6905
+ LastExtensibleRecipe = WidenRecipe;
6906
+ setRecipe (I, WidenRecipe);
6907
+ VPBB->appendRecipe (WidenRecipe);
6935
6908
return true ;
6936
6909
}
6937
6910
@@ -6947,6 +6920,7 @@ VPBasicBlock *VPRecipeBuilder::handleReplication(
6947
6920
[&](unsigned VF) { return CM.isScalarWithPredication (I, VF); }, Range);
6948
6921
6949
6922
auto *Recipe = new VPReplicateRecipe (I, IsUniform, IsPredicated);
6923
+ setRecipe (I, Recipe);
6950
6924
6951
6925
// Find if I uses a predicated instruction. If so, it will use its scalar
6952
6926
// value. Avoid hoisting the insert-element which packs the scalar value into
@@ -7005,36 +6979,20 @@ VPRegionBlock *VPRecipeBuilder::createReplicateRegion(Instruction *Instr,
7005
6979
bool VPRecipeBuilder::tryToCreateRecipe (Instruction *Instr, VFRange &Range,
7006
6980
VPlanPtr &Plan, VPBasicBlock *VPBB) {
7007
6981
VPRecipeBase *Recipe = nullptr ;
7008
- // Check if Instr should belong to an interleave memory recipe, or already
7009
- // does. In the latter case Instr is irrelevant.
7010
- if ((Recipe = tryToInterleaveMemory (Instr, Range, Plan))) {
7011
- VPBB->appendRecipe (Recipe);
7012
- return true ;
7013
- }
7014
-
7015
- // Check if Instr is a memory operation that should be widened.
7016
- if ((Recipe = tryToWidenMemory (Instr, Range, Plan))) {
7017
- VPBB->appendRecipe (Recipe);
7018
- return true ;
7019
- }
7020
6982
7021
- // Check if Instr should form some PHI recipe.
7022
- if ((Recipe = tryToOptimizeInduction (Instr, Range))) {
7023
- VPBB->appendRecipe (Recipe);
7024
- return true ;
7025
- }
7026
- if ((Recipe = tryToBlend (Instr, Plan))) {
6983
+ // First, check for specific widening recipes that deal with memory
6984
+ // operations, inductions and Phi nodes.
6985
+ if ((Recipe = tryToWidenMemory (Instr, Range, Plan)) ||
6986
+ (Recipe = tryToOptimizeInduction (Instr, Range)) ||
6987
+ (Recipe = tryToBlend (Instr, Plan)) ||
6988
+ (isa<PHINode>(Instr) &&
6989
+ (Recipe = new VPWidenPHIRecipe (cast<PHINode>(Instr))))) {
6990
+ setRecipe (Instr, Recipe);
7027
6991
VPBB->appendRecipe (Recipe);
7028
6992
return true ;
7029
6993
}
7030
- if (PHINode *Phi = dyn_cast<PHINode>(Instr)) {
7031
- VPBB->appendRecipe (new VPWidenPHIRecipe (Phi));
7032
- return true ;
7033
- }
7034
6994
7035
- // Check if Instr is to be widened by a general VPWidenRecipe, after
7036
- // having first checked for specific widening recipes that deal with
7037
- // Interleave Groups, Inductions and Phi nodes.
6995
+ // Check if Instr is to be widened by a general VPWidenRecipe.
7038
6996
if (tryToWiden (Instr, VPBB, Range))
7039
6997
return true ;
7040
6998
@@ -7090,19 +7048,57 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(unsigned MinVF,
7090
7048
VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes (
7091
7049
VFRange &Range, SmallPtrSetImpl<Value *> &NeedDef,
7092
7050
SmallPtrSetImpl<Instruction *> &DeadInstructions) {
7051
+
7093
7052
// Hold a mapping from predicated instructions to their recipes, in order to
7094
7053
// fix their AlsoPack behavior if a user is determined to replicate and use a
7095
7054
// scalar instead of vector value.
7096
7055
DenseMap<Instruction *, VPReplicateRecipe *> PredInst2Recipe;
7097
7056
7098
7057
DenseMap<Instruction *, Instruction *> &SinkAfter = Legal->getSinkAfter ();
7099
- DenseMap<Instruction *, Instruction *> SinkAfterInverse;
7058
+
7059
+ SmallPtrSet<const InterleaveGroup<Instruction> *, 1 > InterleaveGroups;
7060
+
7061
+ VPRecipeBuilder RecipeBuilder (OrigLoop, TLI, Legal, CM, Builder);
7062
+
7063
+ // ---------------------------------------------------------------------------
7064
+ // Pre-construction: record ingredients whose recipes we'll need to further
7065
+ // process after constructing the initial VPlan.
7066
+ // ---------------------------------------------------------------------------
7067
+
7068
+ // Mark instructions we'll need to sink later and their targets as
7069
+ // ingredients whose recipe we'll need to record.
7070
+ for (auto &Entry : SinkAfter) {
7071
+ RecipeBuilder.recordRecipeOf (Entry.first );
7072
+ RecipeBuilder.recordRecipeOf (Entry.second );
7073
+ }
7074
+
7075
+ // For each interleave group which is relevant for this (possibly trimmed)
7076
+ // Range, add it to the set of groups to be later applied to the VPlan and add
7077
+ // placeholders for its members' Recipes which we'll be replacing with a
7078
+ // single VPInterleaveRecipe.
7079
+ for (InterleaveGroup<Instruction> *IG : IAI.getInterleaveGroups ()) {
7080
+ auto applyIG = [IG, this ](unsigned VF) -> bool {
7081
+ return (VF >= 2 && // Query is illegal for VF == 1
7082
+ CM.getWideningDecision (IG->getInsertPos (), VF) ==
7083
+ LoopVectorizationCostModel::CM_Interleave);
7084
+ };
7085
+ if (!getDecisionAndClampRange (applyIG, Range))
7086
+ continue ;
7087
+ InterleaveGroups.insert (IG);
7088
+ for (unsigned i = 0 ; i < IG->getFactor (); i++)
7089
+ if (Instruction *Member = IG->getMember (i))
7090
+ RecipeBuilder.recordRecipeOf (Member);
7091
+ };
7092
+
7093
+ // ---------------------------------------------------------------------------
7094
+ // Build initial VPlan: Scan the body of the loop in a topological order to
7095
+ // visit each basic block after having visited its predecessor basic blocks.
7096
+ // ---------------------------------------------------------------------------
7100
7097
7101
7098
// Create a dummy pre-entry VPBasicBlock to start building the VPlan.
7102
7099
VPBasicBlock *VPBB = new VPBasicBlock (" Pre-Entry" );
7103
7100
auto Plan = std::make_unique<VPlan>(VPBB);
7104
7101
7105
- VPRecipeBuilder RecipeBuilder (OrigLoop, TLI, Legal, CM, Builder);
7106
7102
// Represent values that will have defs inside VPlan.
7107
7103
for (Value *V : NeedDef)
7108
7104
Plan->addVPValue (V);
@@ -7123,8 +7119,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
7123
7119
7124
7120
std::vector<Instruction *> Ingredients;
7125
7121
7126
- // Organize the ingredients to vectorize from current basic block in the
7127
- // right order.
7122
+ // Introduce each ingredient into VPlan.
7128
7123
for (Instruction &I : BB->instructionsWithoutDebug ()) {
7129
7124
Instruction *Instr = &I;
7130
7125
@@ -7134,43 +7129,6 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
7134
7129
DeadInstructions.find (Instr) != DeadInstructions.end ())
7135
7130
continue ;
7136
7131
7137
- // I is a member of an InterleaveGroup for Range.Start. If it's an adjunct
7138
- // member of the IG, do not construct any Recipe for it.
7139
- const InterleaveGroup<Instruction> *IG =
7140
- CM.getInterleavedAccessGroup (Instr);
7141
- if (IG && Instr != IG->getInsertPos () &&
7142
- Range.Start >= 2 && // Query is illegal for VF == 1
7143
- CM.getWideningDecision (Instr, Range.Start ) ==
7144
- LoopVectorizationCostModel::CM_Interleave) {
7145
- auto SinkCandidate = SinkAfterInverse.find (Instr);
7146
- if (SinkCandidate != SinkAfterInverse.end ())
7147
- Ingredients.push_back (SinkCandidate->second );
7148
- continue ;
7149
- }
7150
-
7151
- // Move instructions to handle first-order recurrences, step 1: avoid
7152
- // handling this instruction until after we've handled the instruction it
7153
- // should follow.
7154
- auto SAIt = SinkAfter.find (Instr);
7155
- if (SAIt != SinkAfter.end ()) {
7156
- LLVM_DEBUG (dbgs () << " Sinking" << *SAIt->first << " after"
7157
- << *SAIt->second
7158
- << " to vectorize a 1st order recurrence.\n " );
7159
- SinkAfterInverse[SAIt->second ] = Instr;
7160
- continue ;
7161
- }
7162
-
7163
- Ingredients.push_back (Instr);
7164
-
7165
- // Move instructions to handle first-order recurrences, step 2: push the
7166
- // instruction to be sunk at its insertion point.
7167
- auto SAInvIt = SinkAfterInverse.find (Instr);
7168
- if (SAInvIt != SinkAfterInverse.end ())
7169
- Ingredients.push_back (SAInvIt->second );
7170
- }
7171
-
7172
- // Introduce each ingredient into VPlan.
7173
- for (Instruction *Instr : Ingredients) {
7174
7132
if (RecipeBuilder.tryToCreateRecipe (Instr, Range, Plan, VPBB))
7175
7133
continue ;
7176
7134
@@ -7195,6 +7153,32 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
7195
7153
VPBlockUtils::disconnectBlocks (PreEntry, Entry);
7196
7154
delete PreEntry;
7197
7155
7156
+ // ---------------------------------------------------------------------------
7157
+ // Transform initial VPlan: Apply previously taken decisions, in order, to
7158
+ // bring the VPlan to its final state.
7159
+ // ---------------------------------------------------------------------------
7160
+
7161
+ // Apply Sink-After legal constraints.
7162
+ for (auto &Entry : SinkAfter) {
7163
+ VPRecipeBase *Sink = RecipeBuilder.getRecipe (Entry.first );
7164
+ VPRecipeBase *Target = RecipeBuilder.getRecipe (Entry.second );
7165
+ Sink->moveAfter (Target);
7166
+ }
7167
+
7168
+ // Interleave memory: for each Interleave Group we marked earlier as relevant
7169
+ // for this VPlan, replace the Recipes widening its memory instructions with a
7170
+ // single VPInterleaveRecipe at its insertion point.
7171
+ for (auto IG : InterleaveGroups) {
7172
+ auto *Recipe = cast<VPWidenMemoryInstructionRecipe>(
7173
+ RecipeBuilder.getRecipe (IG->getInsertPos ()));
7174
+ (new VPInterleaveRecipe (IG, Recipe->getMask ()))->insertBefore (Recipe);
7175
+
7176
+ for (unsigned i = 0 ; i < IG->getFactor (); ++i)
7177
+ if (Instruction *Member = IG->getMember (i)) {
7178
+ RecipeBuilder.getRecipe (Member)->eraseFromParent ();
7179
+ }
7180
+ }
7181
+
7198
7182
// Finally, if tail is folded by masking, introduce selects between the phi
7199
7183
// and the live-out instruction of each reduction, at the end of the latch.
7200
7184
if (CM.foldTailByMasking ()) {
@@ -7427,12 +7411,11 @@ void VPPredInstPHIRecipe::execute(VPTransformState &State) {
7427
7411
}
7428
7412
7429
7413
void VPWidenMemoryInstructionRecipe::execute (VPTransformState &State) {
7430
- if (!User)
7414
+ VPValue *Mask = getMask ();
7415
+ if (!Mask)
7431
7416
return State.ILV ->vectorizeMemoryInstruction (&Instr);
7432
7417
7433
- // Last (and currently only) operand is a mask.
7434
7418
InnerLoopVectorizer::VectorParts MaskValues (State.UF );
7435
- VPValue *Mask = User->getOperand (User->getNumOperands () - 1 );
7436
7419
for (unsigned Part = 0 ; Part < State.UF ; ++Part)
7437
7420
MaskValues[Part] = State.get (Mask, Part);
7438
7421
State.ILV ->vectorizeMemoryInstruction (&Instr, &MaskValues);
@@ -7481,7 +7464,7 @@ static bool processLoopInVPlanNativePath(
7481
7464
// Use the planner for outer loop vectorization.
7482
7465
// TODO: CM is not used at this point inside the planner. Turn CM into an
7483
7466
// optional argument if we don't need it in the future.
7484
- LoopVectorizationPlanner LVP (L, LI, TLI, TTI, LVL, CM);
7467
+ LoopVectorizationPlanner LVP (L, LI, TLI, TTI, LVL, CM, IAI );
7485
7468
7486
7469
// Get user vectorization factor.
7487
7470
const unsigned UserVF = Hints.getWidth ();
@@ -7641,7 +7624,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
7641
7624
CM.collectValuesToIgnore ();
7642
7625
7643
7626
// Use the planner for vectorization.
7644
- LoopVectorizationPlanner LVP (L, LI, TLI, TTI, &LVL, CM);
7627
+ LoopVectorizationPlanner LVP (L, LI, TLI, TTI, &LVL, CM, IAI );
7645
7628
7646
7629
// Get user vectorization factor.
7647
7630
unsigned UserVF = Hints.getWidth ();
0 commit comments