Skip to content

Commit 009e032

Browse files
committed
Temporarily Revert "[LV] Apply sink-after & interleave-groups as VPlan transformations (NFC)"
as it's causing assert failures. This reverts commit 100e797.
1 parent 9f10cc2 commit 009e032

File tree

7 files changed

+131
-174
lines changed

7 files changed

+131
-174
lines changed

llvm/include/llvm/Analysis/VectorUtils.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -542,10 +542,13 @@ class InterleavedAccessInfo {
542542
/// formation for predicated accesses, we may be able to relax this limitation
543543
/// in the future once we handle more complicated blocks.
544544
void reset() {
545-
InterleaveGroupMap.clear();
546-
for (auto *Ptr : InterleaveGroups)
545+
SmallPtrSet<InterleaveGroup<Instruction> *, 4> DelSet;
546+
// Avoid releasing a pointer twice.
547+
for (auto &I : InterleaveGroupMap)
548+
DelSet.insert(I.second);
549+
for (auto *Ptr : DelSet)
547550
delete Ptr;
548-
InterleaveGroups.clear();
551+
InterleaveGroupMap.clear();
549552
RequiresScalarEpilogue = false;
550553
}
551554

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -201,9 +201,6 @@ class LoopVectorizationPlanner {
201201
/// The profitability analysis.
202202
LoopVectorizationCostModel &CM;
203203

204-
/// The interleaved access analysis.
205-
InterleavedAccessInfo &IAI;
206-
207204
SmallVector<VPlanPtr, 4> VPlans;
208205

209206
/// This class is used to enable the VPlan to invoke a method of ILV. This is
@@ -226,10 +223,8 @@ class LoopVectorizationPlanner {
226223
LoopVectorizationPlanner(Loop *L, LoopInfo *LI, const TargetLibraryInfo *TLI,
227224
const TargetTransformInfo *TTI,
228225
LoopVectorizationLegality *Legal,
229-
LoopVectorizationCostModel &CM,
230-
InterleavedAccessInfo &IAI)
231-
: OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM),
232-
IAI(IAI) {}
226+
LoopVectorizationCostModel &CM)
227+
: OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM) {}
233228

234229
/// Plan how to best vectorize, return the best VF and its cost, or None if
235230
/// vectorization and interleaving should be avoided up front.

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 110 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -6710,6 +6710,37 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) {
67106710
return BlockMaskCache[BB] = BlockMask;
67116711
}
67126712

6713+
VPInterleaveRecipe *VPRecipeBuilder::tryToInterleaveMemory(Instruction *I,
6714+
VFRange &Range,
6715+
VPlanPtr &Plan) {
6716+
const InterleaveGroup<Instruction> *IG = CM.getInterleavedAccessGroup(I);
6717+
if (!IG)
6718+
return nullptr;
6719+
6720+
// Now check if IG is relevant for VF's in the given range.
6721+
auto isIGMember = [&](Instruction *I) -> std::function<bool(unsigned)> {
6722+
return [=](unsigned VF) -> bool {
6723+
return (VF >= 2 && // Query is illegal for VF == 1
6724+
CM.getWideningDecision(I, VF) ==
6725+
LoopVectorizationCostModel::CM_Interleave);
6726+
};
6727+
};
6728+
if (!LoopVectorizationPlanner::getDecisionAndClampRange(isIGMember(I), Range))
6729+
return nullptr;
6730+
6731+
// I is a member of an InterleaveGroup for VF's in the (possibly trimmed)
6732+
// range. If it's the primary member of the IG construct a VPInterleaveRecipe.
6733+
// Otherwise, it's an adjunct member of the IG, do not construct any Recipe.
6734+
assert(I == IG->getInsertPos() &&
6735+
"Generating a recipe for an adjunct member of an interleave group");
6736+
6737+
VPValue *Mask = nullptr;
6738+
if (Legal->isMaskRequired(I))
6739+
Mask = createBlockInMask(I->getParent(), Plan);
6740+
6741+
return new VPInterleaveRecipe(IG, Mask);
6742+
}
6743+
67136744
VPWidenMemoryInstructionRecipe *
67146745
VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range,
67156746
VPlanPtr &Plan) {
@@ -6726,6 +6757,8 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range,
67266757
CM.getWideningDecision(I, VF);
67276758
assert(Decision != LoopVectorizationCostModel::CM_Unknown &&
67286759
"CM decision should be taken at this point.");
6760+
assert(Decision != LoopVectorizationCostModel::CM_Interleave &&
6761+
"Interleave memory opportunity should be caught earlier.");
67296762
return Decision != LoopVectorizationCostModel::CM_Scalarize;
67306763
};
67316764

@@ -6890,21 +6923,15 @@ bool VPRecipeBuilder::tryToWiden(Instruction *I, VPBasicBlock *VPBB,
68906923
if (!LoopVectorizationPlanner::getDecisionAndClampRange(willWiden, Range))
68916924
return false;
68926925

6893-
// If this ingredient's recipe is to be recorded, keep its recipe a singleton
6894-
// to avoid having to split recipes later.
6895-
bool IsSingleton = Ingredient2Recipe.count(I);
6896-
68976926
// Success: widen this instruction. We optimize the common case where
68986927
// consecutive instructions can be represented by a single recipe.
6899-
if (!IsSingleton && !VPBB->empty() && LastExtensibleRecipe == &VPBB->back() &&
6900-
LastExtensibleRecipe->appendInstruction(I))
6901-
return true;
6928+
if (!VPBB->empty()) {
6929+
VPWidenRecipe *LastWidenRecipe = dyn_cast<VPWidenRecipe>(&VPBB->back());
6930+
if (LastWidenRecipe && LastWidenRecipe->appendInstruction(I))
6931+
return true;
6932+
}
69026933

6903-
VPWidenRecipe *WidenRecipe = new VPWidenRecipe(I);
6904-
if (!IsSingleton)
6905-
LastExtensibleRecipe = WidenRecipe;
6906-
setRecipe(I, WidenRecipe);
6907-
VPBB->appendRecipe(WidenRecipe);
6934+
VPBB->appendRecipe(new VPWidenRecipe(I));
69086935
return true;
69096936
}
69106937

@@ -6920,7 +6947,6 @@ VPBasicBlock *VPRecipeBuilder::handleReplication(
69206947
[&](unsigned VF) { return CM.isScalarWithPredication(I, VF); }, Range);
69216948

69226949
auto *Recipe = new VPReplicateRecipe(I, IsUniform, IsPredicated);
6923-
setRecipe(I, Recipe);
69246950

69256951
// Find if I uses a predicated instruction. If so, it will use its scalar
69266952
// value. Avoid hoisting the insert-element which packs the scalar value into
@@ -6979,20 +7005,36 @@ VPRegionBlock *VPRecipeBuilder::createReplicateRegion(Instruction *Instr,
69797005
bool VPRecipeBuilder::tryToCreateRecipe(Instruction *Instr, VFRange &Range,
69807006
VPlanPtr &Plan, VPBasicBlock *VPBB) {
69817007
VPRecipeBase *Recipe = nullptr;
7008+
// Check if Instr should belong to an interleave memory recipe, or already
7009+
// does. In the latter case Instr is irrelevant.
7010+
if ((Recipe = tryToInterleaveMemory(Instr, Range, Plan))) {
7011+
VPBB->appendRecipe(Recipe);
7012+
return true;
7013+
}
7014+
7015+
// Check if Instr is a memory operation that should be widened.
7016+
if ((Recipe = tryToWidenMemory(Instr, Range, Plan))) {
7017+
VPBB->appendRecipe(Recipe);
7018+
return true;
7019+
}
69827020

6983-
// First, check for specific widening recipes that deal with memory
6984-
// operations, inductions and Phi nodes.
6985-
if ((Recipe = tryToWidenMemory(Instr, Range, Plan)) ||
6986-
(Recipe = tryToOptimizeInduction(Instr, Range)) ||
6987-
(Recipe = tryToBlend(Instr, Plan)) ||
6988-
(isa<PHINode>(Instr) &&
6989-
(Recipe = new VPWidenPHIRecipe(cast<PHINode>(Instr))))) {
6990-
setRecipe(Instr, Recipe);
7021+
// Check if Instr should form some PHI recipe.
7022+
if ((Recipe = tryToOptimizeInduction(Instr, Range))) {
7023+
VPBB->appendRecipe(Recipe);
7024+
return true;
7025+
}
7026+
if ((Recipe = tryToBlend(Instr, Plan))) {
69917027
VPBB->appendRecipe(Recipe);
69927028
return true;
69937029
}
7030+
if (PHINode *Phi = dyn_cast<PHINode>(Instr)) {
7031+
VPBB->appendRecipe(new VPWidenPHIRecipe(Phi));
7032+
return true;
7033+
}
69947034

6995-
// Check if Instr is to be widened by a general VPWidenRecipe.
7035+
// Check if Instr is to be widened by a general VPWidenRecipe, after
7036+
// having first checked for specific widening recipes that deal with
7037+
// Interleave Groups, Inductions and Phi nodes.
69967038
if (tryToWiden(Instr, VPBB, Range))
69977039
return true;
69987040

@@ -7048,57 +7090,19 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(unsigned MinVF,
70487090
VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
70497091
VFRange &Range, SmallPtrSetImpl<Value *> &NeedDef,
70507092
SmallPtrSetImpl<Instruction *> &DeadInstructions) {
7051-
70527093
// Hold a mapping from predicated instructions to their recipes, in order to
70537094
// fix their AlsoPack behavior if a user is determined to replicate and use a
70547095
// scalar instead of vector value.
70557096
DenseMap<Instruction *, VPReplicateRecipe *> PredInst2Recipe;
70567097

70577098
DenseMap<Instruction *, Instruction *> &SinkAfter = Legal->getSinkAfter();
7058-
7059-
SmallPtrSet<const InterleaveGroup<Instruction> *, 1> InterleaveGroups;
7060-
7061-
VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, Builder);
7062-
7063-
// ---------------------------------------------------------------------------
7064-
// Pre-construction: record ingredients whose recipes we'll need to further
7065-
// process after constructing the initial VPlan.
7066-
// ---------------------------------------------------------------------------
7067-
7068-
// Mark instructions we'll need to sink later and their targets as
7069-
// ingredients whose recipe we'll need to record.
7070-
for (auto &Entry : SinkAfter) {
7071-
RecipeBuilder.recordRecipeOf(Entry.first);
7072-
RecipeBuilder.recordRecipeOf(Entry.second);
7073-
}
7074-
7075-
// For each interleave group which is relevant for this (possibly trimmed)
7076-
// Range, add it to the set of groups to be later applied to the VPlan and add
7077-
// placeholders for its members' Recipes which we'll be replacing with a
7078-
// single VPInterleaveRecipe.
7079-
for (InterleaveGroup<Instruction> *IG : IAI.getInterleaveGroups()) {
7080-
auto applyIG = [IG, this](unsigned VF) -> bool {
7081-
return (VF >= 2 && // Query is illegal for VF == 1
7082-
CM.getWideningDecision(IG->getInsertPos(), VF) ==
7083-
LoopVectorizationCostModel::CM_Interleave);
7084-
};
7085-
if (!getDecisionAndClampRange(applyIG, Range))
7086-
continue;
7087-
InterleaveGroups.insert(IG);
7088-
for (unsigned i = 0; i < IG->getFactor(); i++)
7089-
if (Instruction *Member = IG->getMember(i))
7090-
RecipeBuilder.recordRecipeOf(Member);
7091-
};
7092-
7093-
// ---------------------------------------------------------------------------
7094-
// Build initial VPlan: Scan the body of the loop in a topological order to
7095-
// visit each basic block after having visited its predecessor basic blocks.
7096-
// ---------------------------------------------------------------------------
7099+
DenseMap<Instruction *, Instruction *> SinkAfterInverse;
70977100

70987101
// Create a dummy pre-entry VPBasicBlock to start building the VPlan.
70997102
VPBasicBlock *VPBB = new VPBasicBlock("Pre-Entry");
71007103
auto Plan = std::make_unique<VPlan>(VPBB);
71017104

7105+
VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, Builder);
71027106
// Represent values that will have defs inside VPlan.
71037107
for (Value *V : NeedDef)
71047108
Plan->addVPValue(V);
@@ -7119,7 +7123,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
71197123

71207124
std::vector<Instruction *> Ingredients;
71217125

7122-
// Introduce each ingredient into VPlan.
7126+
// Organize the ingredients to vectorize from current basic block in the
7127+
// right order.
71237128
for (Instruction &I : BB->instructionsWithoutDebug()) {
71247129
Instruction *Instr = &I;
71257130

@@ -7129,6 +7134,43 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
71297134
DeadInstructions.find(Instr) != DeadInstructions.end())
71307135
continue;
71317136

7137+
// I is a member of an InterleaveGroup for Range.Start. If it's an adjunct
7138+
// member of the IG, do not construct any Recipe for it.
7139+
const InterleaveGroup<Instruction> *IG =
7140+
CM.getInterleavedAccessGroup(Instr);
7141+
if (IG && Instr != IG->getInsertPos() &&
7142+
Range.Start >= 2 && // Query is illegal for VF == 1
7143+
CM.getWideningDecision(Instr, Range.Start) ==
7144+
LoopVectorizationCostModel::CM_Interleave) {
7145+
auto SinkCandidate = SinkAfterInverse.find(Instr);
7146+
if (SinkCandidate != SinkAfterInverse.end())
7147+
Ingredients.push_back(SinkCandidate->second);
7148+
continue;
7149+
}
7150+
7151+
// Move instructions to handle first-order recurrences, step 1: avoid
7152+
// handling this instruction until after we've handled the instruction it
7153+
// should follow.
7154+
auto SAIt = SinkAfter.find(Instr);
7155+
if (SAIt != SinkAfter.end()) {
7156+
LLVM_DEBUG(dbgs() << "Sinking" << *SAIt->first << " after"
7157+
<< *SAIt->second
7158+
<< " to vectorize a 1st order recurrence.\n");
7159+
SinkAfterInverse[SAIt->second] = Instr;
7160+
continue;
7161+
}
7162+
7163+
Ingredients.push_back(Instr);
7164+
7165+
// Move instructions to handle first-order recurrences, step 2: push the
7166+
// instruction to be sunk at its insertion point.
7167+
auto SAInvIt = SinkAfterInverse.find(Instr);
7168+
if (SAInvIt != SinkAfterInverse.end())
7169+
Ingredients.push_back(SAInvIt->second);
7170+
}
7171+
7172+
// Introduce each ingredient into VPlan.
7173+
for (Instruction *Instr : Ingredients) {
71327174
if (RecipeBuilder.tryToCreateRecipe(Instr, Range, Plan, VPBB))
71337175
continue;
71347176

@@ -7153,32 +7195,6 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
71537195
VPBlockUtils::disconnectBlocks(PreEntry, Entry);
71547196
delete PreEntry;
71557197

7156-
// ---------------------------------------------------------------------------
7157-
// Transform initial VPlan: Apply previously taken decisions, in order, to
7158-
// bring the VPlan to its final state.
7159-
// ---------------------------------------------------------------------------
7160-
7161-
// Apply Sink-After legal constraints.
7162-
for (auto &Entry : SinkAfter) {
7163-
VPRecipeBase *Sink = RecipeBuilder.getRecipe(Entry.first);
7164-
VPRecipeBase *Target = RecipeBuilder.getRecipe(Entry.second);
7165-
Sink->moveAfter(Target);
7166-
}
7167-
7168-
// Interleave memory: for each Interleave Group we marked earlier as relevant
7169-
// for this VPlan, replace the Recipes widening its memory instructions with a
7170-
// single VPInterleaveRecipe at its insertion point.
7171-
for (auto IG : InterleaveGroups) {
7172-
auto *Recipe = cast<VPWidenMemoryInstructionRecipe>(
7173-
RecipeBuilder.getRecipe(IG->getInsertPos()));
7174-
(new VPInterleaveRecipe(IG, Recipe->getMask()))->insertBefore(Recipe);
7175-
7176-
for (unsigned i = 0; i < IG->getFactor(); ++i)
7177-
if (Instruction *Member = IG->getMember(i)) {
7178-
RecipeBuilder.getRecipe(Member)->eraseFromParent();
7179-
}
7180-
}
7181-
71827198
// Finally, if tail is folded by masking, introduce selects between the phi
71837199
// and the live-out instruction of each reduction, at the end of the latch.
71847200
if (CM.foldTailByMasking()) {
@@ -7411,11 +7427,12 @@ void VPPredInstPHIRecipe::execute(VPTransformState &State) {
74117427
}
74127428

74137429
void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
7414-
VPValue *Mask = getMask();
7415-
if (!Mask)
7430+
if (!User)
74167431
return State.ILV->vectorizeMemoryInstruction(&Instr);
74177432

7433+
// Last (and currently only) operand is a mask.
74187434
InnerLoopVectorizer::VectorParts MaskValues(State.UF);
7435+
VPValue *Mask = User->getOperand(User->getNumOperands() - 1);
74197436
for (unsigned Part = 0; Part < State.UF; ++Part)
74207437
MaskValues[Part] = State.get(Mask, Part);
74217438
State.ILV->vectorizeMemoryInstruction(&Instr, &MaskValues);
@@ -7464,7 +7481,7 @@ static bool processLoopInVPlanNativePath(
74647481
// Use the planner for outer loop vectorization.
74657482
// TODO: CM is not used at this point inside the planner. Turn CM into an
74667483
// optional argument if we don't need it in the future.
7467-
LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM, IAI);
7484+
LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM);
74687485

74697486
// Get user vectorization factor.
74707487
const unsigned UserVF = Hints.getWidth();
@@ -7624,7 +7641,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
76247641
CM.collectValuesToIgnore();
76257642

76267643
// Use the planner for vectorization.
7627-
LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM, IAI);
7644+
LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM);
76287645

76297646
// Get user vectorization factor.
76307647
unsigned UserVF = Hints.getWidth();

0 commit comments

Comments
 (0)