@@ -6710,6 +6710,37 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) {
6710
6710
return BlockMaskCache[BB] = BlockMask;
6711
6711
}
6712
6712
6713
+ VPInterleaveRecipe *VPRecipeBuilder::tryToInterleaveMemory (Instruction *I,
6714
+ VFRange &Range,
6715
+ VPlanPtr &Plan) {
6716
+ const InterleaveGroup<Instruction> *IG = CM.getInterleavedAccessGroup (I);
6717
+ if (!IG)
6718
+ return nullptr ;
6719
+
6720
+ // Now check if IG is relevant for VF's in the given range.
6721
+ auto isIGMember = [&](Instruction *I) -> std::function<bool (unsigned )> {
6722
+ return [=](unsigned VF) -> bool {
6723
+ return (VF >= 2 && // Query is illegal for VF == 1
6724
+ CM.getWideningDecision (I, VF) ==
6725
+ LoopVectorizationCostModel::CM_Interleave);
6726
+ };
6727
+ };
6728
+ if (!LoopVectorizationPlanner::getDecisionAndClampRange (isIGMember (I), Range))
6729
+ return nullptr ;
6730
+
6731
+ // I is a member of an InterleaveGroup for VF's in the (possibly trimmed)
6732
+ // range. If it's the primary member of the IG construct a VPInterleaveRecipe.
6733
+ // Otherwise, it's an adjunct member of the IG, do not construct any Recipe.
6734
+ assert (I == IG->getInsertPos () &&
6735
+ " Generating a recipe for an adjunct member of an interleave group" );
6736
+
6737
+ VPValue *Mask = nullptr ;
6738
+ if (Legal->isMaskRequired (I))
6739
+ Mask = createBlockInMask (I->getParent (), Plan);
6740
+
6741
+ return new VPInterleaveRecipe (IG, Mask);
6742
+ }
6743
+
6713
6744
VPWidenMemoryInstructionRecipe *
6714
6745
VPRecipeBuilder::tryToWidenMemory (Instruction *I, VFRange &Range,
6715
6746
VPlanPtr &Plan) {
@@ -6726,6 +6757,8 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range,
6726
6757
CM.getWideningDecision (I, VF);
6727
6758
assert (Decision != LoopVectorizationCostModel::CM_Unknown &&
6728
6759
" CM decision should be taken at this point." );
6760
+ assert (Decision != LoopVectorizationCostModel::CM_Interleave &&
6761
+ " Interleave memory opportunity should be caught earlier." );
6729
6762
return Decision != LoopVectorizationCostModel::CM_Scalarize;
6730
6763
};
6731
6764
@@ -6890,21 +6923,15 @@ bool VPRecipeBuilder::tryToWiden(Instruction *I, VPBasicBlock *VPBB,
6890
6923
if (!LoopVectorizationPlanner::getDecisionAndClampRange (willWiden, Range))
6891
6924
return false ;
6892
6925
6893
- // If this ingredient's recipe is to be recorded, keep its recipe a singleton
6894
- // to avoid having to split recipes later.
6895
- bool IsSingleton = Ingredient2Recipe.count (I);
6896
-
6897
6926
// Success: widen this instruction. We optimize the common case where
6898
6927
// consecutive instructions can be represented by a single recipe.
6899
- if (!IsSingleton && !VPBB->empty () && LastExtensibleRecipe == &VPBB->back () &&
6900
- LastExtensibleRecipe->appendInstruction (I))
6901
- return true ;
6928
+ if (!VPBB->empty ()) {
6929
+ VPWidenRecipe *LastWidenRecipe = dyn_cast<VPWidenRecipe>(&VPBB->back ());
6930
+ if (LastWidenRecipe && LastWidenRecipe->appendInstruction (I))
6931
+ return true ;
6932
+ }
6902
6933
6903
- VPWidenRecipe *WidenRecipe = new VPWidenRecipe (I);
6904
- if (!IsSingleton)
6905
- LastExtensibleRecipe = WidenRecipe;
6906
- setRecipe (I, WidenRecipe);
6907
- VPBB->appendRecipe (WidenRecipe);
6934
+ VPBB->appendRecipe (new VPWidenRecipe (I));
6908
6935
return true ;
6909
6936
}
6910
6937
@@ -6920,7 +6947,6 @@ VPBasicBlock *VPRecipeBuilder::handleReplication(
6920
6947
[&](unsigned VF) { return CM.isScalarWithPredication (I, VF); }, Range);
6921
6948
6922
6949
auto *Recipe = new VPReplicateRecipe (I, IsUniform, IsPredicated);
6923
- setRecipe (I, Recipe);
6924
6950
6925
6951
// Find if I uses a predicated instruction. If so, it will use its scalar
6926
6952
// value. Avoid hoisting the insert-element which packs the scalar value into
@@ -6979,20 +7005,36 @@ VPRegionBlock *VPRecipeBuilder::createReplicateRegion(Instruction *Instr,
6979
7005
bool VPRecipeBuilder::tryToCreateRecipe (Instruction *Instr, VFRange &Range,
6980
7006
VPlanPtr &Plan, VPBasicBlock *VPBB) {
6981
7007
VPRecipeBase *Recipe = nullptr ;
7008
+ // Check if Instr should belong to an interleave memory recipe, or already
7009
+ // does. In the latter case Instr is irrelevant.
7010
+ if ((Recipe = tryToInterleaveMemory (Instr, Range, Plan))) {
7011
+ VPBB->appendRecipe (Recipe);
7012
+ return true ;
7013
+ }
7014
+
7015
+ // Check if Instr is a memory operation that should be widened.
7016
+ if ((Recipe = tryToWidenMemory (Instr, Range, Plan))) {
7017
+ VPBB->appendRecipe (Recipe);
7018
+ return true ;
7019
+ }
6982
7020
6983
- // First, check for specific widening recipes that deal with memory
6984
- // operations, inductions and Phi nodes.
6985
- if ((Recipe = tryToWidenMemory (Instr, Range, Plan)) ||
6986
- (Recipe = tryToOptimizeInduction (Instr, Range)) ||
6987
- (Recipe = tryToBlend (Instr, Plan)) ||
6988
- (isa<PHINode>(Instr) &&
6989
- (Recipe = new VPWidenPHIRecipe (cast<PHINode>(Instr))))) {
6990
- setRecipe (Instr, Recipe);
7021
+ // Check if Instr should form some PHI recipe.
7022
+ if ((Recipe = tryToOptimizeInduction (Instr, Range))) {
7023
+ VPBB->appendRecipe (Recipe);
7024
+ return true ;
7025
+ }
7026
+ if ((Recipe = tryToBlend (Instr, Plan))) {
6991
7027
VPBB->appendRecipe (Recipe);
6992
7028
return true ;
6993
7029
}
7030
+ if (PHINode *Phi = dyn_cast<PHINode>(Instr)) {
7031
+ VPBB->appendRecipe (new VPWidenPHIRecipe (Phi));
7032
+ return true ;
7033
+ }
6994
7034
6995
- // Check if Instr is to be widened by a general VPWidenRecipe.
7035
+ // Check if Instr is to be widened by a general VPWidenRecipe, after
7036
+ // having first checked for specific widening recipes that deal with
7037
+ // Interleave Groups, Inductions and Phi nodes.
6996
7038
if (tryToWiden (Instr, VPBB, Range))
6997
7039
return true ;
6998
7040
@@ -7048,57 +7090,19 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(unsigned MinVF,
7048
7090
VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes (
7049
7091
VFRange &Range, SmallPtrSetImpl<Value *> &NeedDef,
7050
7092
SmallPtrSetImpl<Instruction *> &DeadInstructions) {
7051
-
7052
7093
// Hold a mapping from predicated instructions to their recipes, in order to
7053
7094
// fix their AlsoPack behavior if a user is determined to replicate and use a
7054
7095
// scalar instead of vector value.
7055
7096
DenseMap<Instruction *, VPReplicateRecipe *> PredInst2Recipe;
7056
7097
7057
7098
DenseMap<Instruction *, Instruction *> &SinkAfter = Legal->getSinkAfter ();
7058
-
7059
- SmallPtrSet<const InterleaveGroup<Instruction> *, 1 > InterleaveGroups;
7060
-
7061
- VPRecipeBuilder RecipeBuilder (OrigLoop, TLI, Legal, CM, Builder);
7062
-
7063
- // ---------------------------------------------------------------------------
7064
- // Pre-construction: record ingredients whose recipes we'll need to further
7065
- // process after constructing the initial VPlan.
7066
- // ---------------------------------------------------------------------------
7067
-
7068
- // Mark instructions we'll need to sink later and their targets as
7069
- // ingredients whose recipe we'll need to record.
7070
- for (auto &Entry : SinkAfter) {
7071
- RecipeBuilder.recordRecipeOf (Entry.first );
7072
- RecipeBuilder.recordRecipeOf (Entry.second );
7073
- }
7074
-
7075
- // For each interleave group which is relevant for this (possibly trimmed)
7076
- // Range, add it to the set of groups to be later applied to the VPlan and add
7077
- // placeholders for its members' Recipes which we'll be replacing with a
7078
- // single VPInterleaveRecipe.
7079
- for (InterleaveGroup<Instruction> *IG : IAI.getInterleaveGroups ()) {
7080
- auto applyIG = [IG, this ](unsigned VF) -> bool {
7081
- return (VF >= 2 && // Query is illegal for VF == 1
7082
- CM.getWideningDecision (IG->getInsertPos (), VF) ==
7083
- LoopVectorizationCostModel::CM_Interleave);
7084
- };
7085
- if (!getDecisionAndClampRange (applyIG, Range))
7086
- continue ;
7087
- InterleaveGroups.insert (IG);
7088
- for (unsigned i = 0 ; i < IG->getFactor (); i++)
7089
- if (Instruction *Member = IG->getMember (i))
7090
- RecipeBuilder.recordRecipeOf (Member);
7091
- };
7092
-
7093
- // ---------------------------------------------------------------------------
7094
- // Build initial VPlan: Scan the body of the loop in a topological order to
7095
- // visit each basic block after having visited its predecessor basic blocks.
7096
- // ---------------------------------------------------------------------------
7099
+ DenseMap<Instruction *, Instruction *> SinkAfterInverse;
7097
7100
7098
7101
// Create a dummy pre-entry VPBasicBlock to start building the VPlan.
7099
7102
VPBasicBlock *VPBB = new VPBasicBlock (" Pre-Entry" );
7100
7103
auto Plan = std::make_unique<VPlan>(VPBB);
7101
7104
7105
+ VPRecipeBuilder RecipeBuilder (OrigLoop, TLI, Legal, CM, Builder);
7102
7106
// Represent values that will have defs inside VPlan.
7103
7107
for (Value *V : NeedDef)
7104
7108
Plan->addVPValue (V);
@@ -7119,7 +7123,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
7119
7123
7120
7124
std::vector<Instruction *> Ingredients;
7121
7125
7122
- // Introduce each ingredient into VPlan.
7126
+ // Organize the ingredients to vectorize from current basic block in the
7127
+ // right order.
7123
7128
for (Instruction &I : BB->instructionsWithoutDebug ()) {
7124
7129
Instruction *Instr = &I;
7125
7130
@@ -7129,6 +7134,43 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
7129
7134
DeadInstructions.find (Instr) != DeadInstructions.end ())
7130
7135
continue ;
7131
7136
7137
+ // I is a member of an InterleaveGroup for Range.Start. If it's an adjunct
7138
+ // member of the IG, do not construct any Recipe for it.
7139
+ const InterleaveGroup<Instruction> *IG =
7140
+ CM.getInterleavedAccessGroup (Instr);
7141
+ if (IG && Instr != IG->getInsertPos () &&
7142
+ Range.Start >= 2 && // Query is illegal for VF == 1
7143
+ CM.getWideningDecision (Instr, Range.Start ) ==
7144
+ LoopVectorizationCostModel::CM_Interleave) {
7145
+ auto SinkCandidate = SinkAfterInverse.find (Instr);
7146
+ if (SinkCandidate != SinkAfterInverse.end ())
7147
+ Ingredients.push_back (SinkCandidate->second );
7148
+ continue ;
7149
+ }
7150
+
7151
+ // Move instructions to handle first-order recurrences, step 1: avoid
7152
+ // handling this instruction until after we've handled the instruction it
7153
+ // should follow.
7154
+ auto SAIt = SinkAfter.find (Instr);
7155
+ if (SAIt != SinkAfter.end ()) {
7156
+ LLVM_DEBUG (dbgs () << " Sinking" << *SAIt->first << " after"
7157
+ << *SAIt->second
7158
+ << " to vectorize a 1st order recurrence.\n " );
7159
+ SinkAfterInverse[SAIt->second ] = Instr;
7160
+ continue ;
7161
+ }
7162
+
7163
+ Ingredients.push_back (Instr);
7164
+
7165
+ // Move instructions to handle first-order recurrences, step 2: push the
7166
+ // instruction to be sunk at its insertion point.
7167
+ auto SAInvIt = SinkAfterInverse.find (Instr);
7168
+ if (SAInvIt != SinkAfterInverse.end ())
7169
+ Ingredients.push_back (SAInvIt->second );
7170
+ }
7171
+
7172
+ // Introduce each ingredient into VPlan.
7173
+ for (Instruction *Instr : Ingredients) {
7132
7174
if (RecipeBuilder.tryToCreateRecipe (Instr, Range, Plan, VPBB))
7133
7175
continue ;
7134
7176
@@ -7153,32 +7195,6 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
7153
7195
VPBlockUtils::disconnectBlocks (PreEntry, Entry);
7154
7196
delete PreEntry;
7155
7197
7156
- // ---------------------------------------------------------------------------
7157
- // Transform initial VPlan: Apply previously taken decisions, in order, to
7158
- // bring the VPlan to its final state.
7159
- // ---------------------------------------------------------------------------
7160
-
7161
- // Apply Sink-After legal constraints.
7162
- for (auto &Entry : SinkAfter) {
7163
- VPRecipeBase *Sink = RecipeBuilder.getRecipe (Entry.first );
7164
- VPRecipeBase *Target = RecipeBuilder.getRecipe (Entry.second );
7165
- Sink->moveAfter (Target);
7166
- }
7167
-
7168
- // Interleave memory: for each Interleave Group we marked earlier as relevant
7169
- // for this VPlan, replace the Recipes widening its memory instructions with a
7170
- // single VPInterleaveRecipe at its insertion point.
7171
- for (auto IG : InterleaveGroups) {
7172
- auto *Recipe = cast<VPWidenMemoryInstructionRecipe>(
7173
- RecipeBuilder.getRecipe (IG->getInsertPos ()));
7174
- (new VPInterleaveRecipe (IG, Recipe->getMask ()))->insertBefore (Recipe);
7175
-
7176
- for (unsigned i = 0 ; i < IG->getFactor (); ++i)
7177
- if (Instruction *Member = IG->getMember (i)) {
7178
- RecipeBuilder.getRecipe (Member)->eraseFromParent ();
7179
- }
7180
- }
7181
-
7182
7198
// Finally, if tail is folded by masking, introduce selects between the phi
7183
7199
// and the live-out instruction of each reduction, at the end of the latch.
7184
7200
if (CM.foldTailByMasking ()) {
@@ -7411,11 +7427,12 @@ void VPPredInstPHIRecipe::execute(VPTransformState &State) {
7411
7427
}
7412
7428
7413
7429
void VPWidenMemoryInstructionRecipe::execute (VPTransformState &State) {
7414
- VPValue *Mask = getMask ();
7415
- if (!Mask)
7430
+ if (!User)
7416
7431
return State.ILV ->vectorizeMemoryInstruction (&Instr);
7417
7432
7433
+ // Last (and currently only) operand is a mask.
7418
7434
InnerLoopVectorizer::VectorParts MaskValues (State.UF );
7435
+ VPValue *Mask = User->getOperand (User->getNumOperands () - 1 );
7419
7436
for (unsigned Part = 0 ; Part < State.UF ; ++Part)
7420
7437
MaskValues[Part] = State.get (Mask, Part);
7421
7438
State.ILV ->vectorizeMemoryInstruction (&Instr, &MaskValues);
@@ -7464,7 +7481,7 @@ static bool processLoopInVPlanNativePath(
7464
7481
// Use the planner for outer loop vectorization.
7465
7482
// TODO: CM is not used at this point inside the planner. Turn CM into an
7466
7483
// optional argument if we don't need it in the future.
7467
- LoopVectorizationPlanner LVP (L, LI, TLI, TTI, LVL, CM, IAI );
7484
+ LoopVectorizationPlanner LVP (L, LI, TLI, TTI, LVL, CM);
7468
7485
7469
7486
// Get user vectorization factor.
7470
7487
const unsigned UserVF = Hints.getWidth ();
@@ -7624,7 +7641,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
7624
7641
CM.collectValuesToIgnore ();
7625
7642
7626
7643
// Use the planner for vectorization.
7627
- LoopVectorizationPlanner LVP (L, LI, TLI, TTI, &LVL, CM, IAI );
7644
+ LoopVectorizationPlanner LVP (L, LI, TLI, TTI, &LVL, CM);
7628
7645
7629
7646
// Get user vectorization factor.
7630
7647
unsigned UserVF = Hints.getWidth ();
0 commit comments