Skip to content

Commit 743946e

Browse files
authored
[VPlan] Replace VPRecipeOrVPValue with VP2VP recipe simplification. (#76090)
Move simplification of VPBlendRecipes from early VPlan construction to VPlan-to-VPlan based recipe simplification. This simplifies initial construction. Note that some in-loop reduction tests are failing at the moment, due to the reduction predicate being created after the reduction recipe. I will provide a patch for that soon. PR: #76090
1 parent ce80323 commit 743946e

File tree

3 files changed

+92
-104
lines changed

3 files changed

+92
-104
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 58 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -8112,10 +8112,9 @@ void VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlan &Plan) {
81128112
BlockMaskCache[BB] = BlockMask;
81138113
}
81148114

8115-
VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(Instruction *I,
8116-
ArrayRef<VPValue *> Operands,
8117-
VFRange &Range,
8118-
VPlanPtr &Plan) {
8115+
VPWidenMemoryInstructionRecipe *
8116+
VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
8117+
VFRange &Range, VPlanPtr &Plan) {
81198118
assert((isa<LoadInst>(I) || isa<StoreInst>(I)) &&
81208119
"Must be called with either a load or store");
81218120

@@ -8187,7 +8186,7 @@ createWidenInductionRecipes(PHINode *Phi, Instruction *PhiOrTrunc,
81878186
return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, IndDesc);
81888187
}
81898188

8190-
VPRecipeBase *VPRecipeBuilder::tryToOptimizeInductionPHI(
8189+
VPHeaderPHIRecipe *VPRecipeBuilder::tryToOptimizeInductionPHI(
81918190
PHINode *Phi, ArrayRef<VPValue *> Operands, VPlan &Plan, VFRange &Range) {
81928191

81938192
// Check if this is an integer or fp induction. If so, build the recipe that
@@ -8239,31 +8238,10 @@ VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionTruncate(
82398238
return nullptr;
82408239
}
82418240

8242-
VPRecipeOrVPValueTy VPRecipeBuilder::tryToBlend(PHINode *Phi,
8243-
ArrayRef<VPValue *> Operands,
8244-
VPlanPtr &Plan) {
8245-
// If all incoming values are equal, the incoming VPValue can be used directly
8246-
// instead of creating a new VPBlendRecipe.
8247-
if (llvm::all_equal(Operands))
8248-
return Operands[0];
8249-
8241+
VPBlendRecipe *VPRecipeBuilder::tryToBlend(PHINode *Phi,
8242+
ArrayRef<VPValue *> Operands,
8243+
VPlanPtr &Plan) {
82508244
unsigned NumIncoming = Phi->getNumIncomingValues();
8251-
// For in-loop reductions, we do not need to create an additional select.
8252-
VPValue *InLoopVal = nullptr;
8253-
for (unsigned In = 0; In < NumIncoming; In++) {
8254-
PHINode *PhiOp =
8255-
dyn_cast_or_null<PHINode>(Operands[In]->getUnderlyingValue());
8256-
if (PhiOp && CM.isInLoopReduction(PhiOp)) {
8257-
assert(!InLoopVal && "Found more than one in-loop reduction!");
8258-
InLoopVal = Operands[In];
8259-
}
8260-
}
8261-
8262-
assert((!InLoopVal || NumIncoming == 2) &&
8263-
"Found an in-loop reduction for PHI with unexpected number of "
8264-
"incoming values");
8265-
if (InLoopVal)
8266-
return Operands[Operands[0] == InLoopVal ? 1 : 0];
82678245

82688246
// We know that all PHIs in non-header blocks are converted into selects, so
82698247
// we don't have to worry about the insertion order and we can just use the
@@ -8273,15 +8251,18 @@ VPRecipeOrVPValueTy VPRecipeBuilder::tryToBlend(PHINode *Phi,
82738251
SmallVector<VPValue *, 2> OperandsWithMask;
82748252

82758253
for (unsigned In = 0; In < NumIncoming; In++) {
8254+
OperandsWithMask.push_back(Operands[In]);
82768255
VPValue *EdgeMask =
82778256
createEdgeMask(Phi->getIncomingBlock(In), Phi->getParent(), *Plan);
8278-
assert((EdgeMask || NumIncoming == 1) &&
8279-
"Multiple predecessors with one having a full mask");
8280-
OperandsWithMask.push_back(Operands[In]);
8281-
if (EdgeMask)
8282-
OperandsWithMask.push_back(EdgeMask);
8257+
if (!EdgeMask) {
8258+
assert(In == 0 && "Both null and non-null edge masks found");
8259+
assert(all_equal(Operands) &&
8260+
"Distinct incoming values with one having a full mask");
8261+
break;
8262+
}
8263+
OperandsWithMask.push_back(EdgeMask);
82838264
}
8284-
return toVPRecipeResult(new VPBlendRecipe(Phi, OperandsWithMask));
8265+
return new VPBlendRecipe(Phi, OperandsWithMask);
82858266
}
82868267

82878268
VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
@@ -8390,9 +8371,9 @@ bool VPRecipeBuilder::shouldWiden(Instruction *I, VFRange &Range) const {
83908371
Range);
83918372
}
83928373

8393-
VPRecipeBase *VPRecipeBuilder::tryToWiden(Instruction *I,
8394-
ArrayRef<VPValue *> Operands,
8395-
VPBasicBlock *VPBB, VPlanPtr &Plan) {
8374+
VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
8375+
ArrayRef<VPValue *> Operands,
8376+
VPBasicBlock *VPBB, VPlanPtr &Plan) {
83968377
switch (I->getOpcode()) {
83978378
default:
83988379
return nullptr;
@@ -8449,9 +8430,9 @@ void VPRecipeBuilder::fixHeaderPhis() {
84498430
}
84508431
}
84518432

8452-
VPRecipeOrVPValueTy VPRecipeBuilder::handleReplication(Instruction *I,
8453-
VFRange &Range,
8454-
VPlan &Plan) {
8433+
VPReplicateRecipe *VPRecipeBuilder::handleReplication(Instruction *I,
8434+
VFRange &Range,
8435+
VPlan &Plan) {
84558436
bool IsUniform = LoopVectorizationPlanner::getDecisionAndClampRange(
84568437
[&](ElementCount VF) { return CM.isUniformAfterVectorization(I, VF); },
84578438
Range);
@@ -8503,14 +8484,12 @@ VPRecipeOrVPValueTy VPRecipeBuilder::handleReplication(Instruction *I,
85038484

85048485
auto *Recipe = new VPReplicateRecipe(I, Plan.mapToVPValues(I->operands()),
85058486
IsUniform, BlockInMask);
8506-
return toVPRecipeResult(Recipe);
8487+
return Recipe;
85078488
}
85088489

8509-
VPRecipeOrVPValueTy
8510-
VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
8511-
ArrayRef<VPValue *> Operands,
8512-
VFRange &Range, VPBasicBlock *VPBB,
8513-
VPlanPtr &Plan) {
8490+
VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(
8491+
Instruction *Instr, ArrayRef<VPValue *> Operands, VFRange &Range,
8492+
VPBasicBlock *VPBB, VPlanPtr &Plan) {
85148493
// First, check for specific widening recipes that deal with inductions, Phi
85158494
// nodes, calls and memory operations.
85168495
VPRecipeBase *Recipe;
@@ -8523,7 +8502,7 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
85238502
recordRecipeOf(Phi);
85248503

85258504
if ((Recipe = tryToOptimizeInductionPHI(Phi, Operands, *Plan, Range)))
8526-
return toVPRecipeResult(Recipe);
8505+
return Recipe;
85278506

85288507
VPHeaderPHIRecipe *PhiRecipe = nullptr;
85298508
assert((Legal->isReductionVariable(Phi) ||
@@ -8555,43 +8534,43 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
85558534
recordRecipeOf(Inc);
85568535

85578536
PhisToFix.push_back(PhiRecipe);
8558-
return toVPRecipeResult(PhiRecipe);
8537+
return PhiRecipe;
85598538
}
85608539

85618540
if (isa<TruncInst>(Instr) &&
85628541
(Recipe = tryToOptimizeInductionTruncate(cast<TruncInst>(Instr), Operands,
85638542
Range, *Plan)))
8564-
return toVPRecipeResult(Recipe);
8543+
return Recipe;
85658544

85668545
// All widen recipes below deal only with VF > 1.
85678546
if (LoopVectorizationPlanner::getDecisionAndClampRange(
85688547
[&](ElementCount VF) { return VF.isScalar(); }, Range))
85698548
return nullptr;
85708549

85718550
if (auto *CI = dyn_cast<CallInst>(Instr))
8572-
return toVPRecipeResult(tryToWidenCall(CI, Operands, Range, Plan));
8551+
return tryToWidenCall(CI, Operands, Range, Plan);
85738552

85748553
if (isa<LoadInst>(Instr) || isa<StoreInst>(Instr))
8575-
return toVPRecipeResult(tryToWidenMemory(Instr, Operands, Range, Plan));
8554+
return tryToWidenMemory(Instr, Operands, Range, Plan);
85768555

85778556
if (!shouldWiden(Instr, Range))
85788557
return nullptr;
85798558

85808559
if (auto GEP = dyn_cast<GetElementPtrInst>(Instr))
8581-
return toVPRecipeResult(new VPWidenGEPRecipe(
8582-
GEP, make_range(Operands.begin(), Operands.end())));
8560+
return new VPWidenGEPRecipe(GEP,
8561+
make_range(Operands.begin(), Operands.end()));
85838562

85848563
if (auto *SI = dyn_cast<SelectInst>(Instr)) {
8585-
return toVPRecipeResult(new VPWidenSelectRecipe(
8586-
*SI, make_range(Operands.begin(), Operands.end())));
8564+
return new VPWidenSelectRecipe(
8565+
*SI, make_range(Operands.begin(), Operands.end()));
85878566
}
85888567

85898568
if (auto *CI = dyn_cast<CastInst>(Instr)) {
8590-
return toVPRecipeResult(new VPWidenCastRecipe(CI->getOpcode(), Operands[0],
8591-
CI->getType(), *CI));
8569+
return new VPWidenCastRecipe(CI->getOpcode(), Operands[0], CI->getType(),
8570+
*CI);
85928571
}
85938572

8594-
return toVPRecipeResult(tryToWiden(Instr, Operands, VPBB, Plan));
8573+
return tryToWiden(Instr, Operands, VPBB, Plan);
85958574
}
85968575

85978576
void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
@@ -8779,22 +8758,10 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
87798758
Legal->isInvariantAddressOfReduction(SI->getPointerOperand()))
87808759
continue;
87818760

8782-
auto RecipeOrValue = RecipeBuilder.tryToCreateWidenRecipe(
8761+
VPRecipeBase *Recipe = RecipeBuilder.tryToCreateWidenRecipe(
87838762
Instr, Operands, Range, VPBB, Plan);
8784-
if (!RecipeOrValue)
8785-
RecipeOrValue = RecipeBuilder.handleReplication(Instr, Range, *Plan);
8786-
// If Instr can be simplified to an existing VPValue, use it.
8787-
if (isa<VPValue *>(RecipeOrValue)) {
8788-
auto *VPV = cast<VPValue *>(RecipeOrValue);
8789-
Plan->addVPValue(Instr, VPV);
8790-
// If the re-used value is a recipe, register the recipe for the
8791-
// instruction, in case the recipe for Instr needs to be recorded.
8792-
if (VPRecipeBase *R = VPV->getDefiningRecipe())
8793-
RecipeBuilder.setRecipe(Instr, R);
8794-
continue;
8795-
}
8796-
// Otherwise, add the new recipe.
8797-
VPRecipeBase *Recipe = cast<VPRecipeBase *>(RecipeOrValue);
8763+
if (!Recipe)
8764+
Recipe = RecipeBuilder.handleReplication(Instr, Range, *Plan);
87988765
for (auto *Def : Recipe->definedValues()) {
87998766
auto *UV = Def->getUnderlyingValue();
88008767
Plan->addVPValue(UV, Def);
@@ -9041,7 +9008,9 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
90419008
// the phi until LoopExitValue. We keep track of the previous item
90429009
// (PreviousLink) to tell which of the two operands of a Link will remain
90439010
// scalar and which will be reduced. For minmax by select(cmp), Link will be
9044-
// the select instructions.
9011+
// the select instructions. Blend recipes of in-loop reduction phi's will
9012+
// get folded to their non-phi operand, as the reduction recipe handles the
9013+
// condition directly.
90459014
VPSingleDefRecipe *PreviousLink = PhiR; // Aka Worklist[0].
90469015
for (VPSingleDefRecipe *CurrentLink : Worklist.getArrayRef().drop_front()) {
90479016
Instruction *CurrentLinkI = CurrentLink->getUnderlyingInstr();
@@ -9072,6 +9041,20 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
90729041
LinkVPBB->insert(FMulRecipe, CurrentLink->getIterator());
90739042
VecOp = FMulRecipe;
90749043
} else {
9044+
auto *Blend = dyn_cast<VPBlendRecipe>(CurrentLink);
9045+
if (PhiR->isInLoop() && Blend) {
9046+
assert(Blend->getNumIncomingValues() == 2 &&
9047+
"Blend must have 2 incoming values");
9048+
if (Blend->getIncomingValue(0) == PhiR)
9049+
Blend->replaceAllUsesWith(Blend->getIncomingValue(1));
9050+
else {
9051+
assert(Blend->getIncomingValue(1) == PhiR &&
9052+
"PhiR must be an operand of the blend");
9053+
Blend->replaceAllUsesWith(Blend->getIncomingValue(0));
9054+
}
9055+
continue;
9056+
}
9057+
90759058
if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) {
90769059
if (isa<VPWidenRecipe>(CurrentLink)) {
90779060
assert(isa<CmpInst>(CurrentLinkI) &&

llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h

Lines changed: 22 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,6 @@ class LoopVectorizationLegality;
2121
class LoopVectorizationCostModel;
2222
class TargetLibraryInfo;
2323

24-
using VPRecipeOrVPValueTy = PointerUnion<VPRecipeBase *, VPValue *>;
25-
2624
/// Helper class to create VPRecipies from IR instructions.
2725
class VPRecipeBuilder {
2826
/// The loop that we evaluate.
@@ -69,27 +67,28 @@ class VPRecipeBuilder {
6967
/// Check if the load or store instruction \p I should widened for \p
7068
/// Range.Start and potentially masked. Such instructions are handled by a
7169
/// recipe that takes an additional VPInstruction for the mask.
72-
VPRecipeBase *tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
73-
VFRange &Range, VPlanPtr &Plan);
70+
VPWidenMemoryInstructionRecipe *tryToWidenMemory(Instruction *I,
71+
ArrayRef<VPValue *> Operands,
72+
VFRange &Range,
73+
VPlanPtr &Plan);
7474

7575
/// Check if an induction recipe should be constructed for \p Phi. If so build
7676
/// and return it. If not, return null.
77-
VPRecipeBase *tryToOptimizeInductionPHI(PHINode *Phi,
78-
ArrayRef<VPValue *> Operands,
79-
VPlan &Plan, VFRange &Range);
77+
VPHeaderPHIRecipe *tryToOptimizeInductionPHI(PHINode *Phi,
78+
ArrayRef<VPValue *> Operands,
79+
VPlan &Plan, VFRange &Range);
8080

8181
/// Optimize the special case where the operand of \p I is a constant integer
8282
/// induction variable.
8383
VPWidenIntOrFpInductionRecipe *
8484
tryToOptimizeInductionTruncate(TruncInst *I, ArrayRef<VPValue *> Operands,
8585
VFRange &Range, VPlan &Plan);
8686

87-
/// Handle non-loop phi nodes. Return a VPValue, if all incoming values match
88-
/// or a new VPBlendRecipe otherwise. Currently all such phi nodes are turned
89-
/// into a sequence of select instructions as the vectorizer currently
90-
/// performs full if-conversion.
91-
VPRecipeOrVPValueTy tryToBlend(PHINode *Phi, ArrayRef<VPValue *> Operands,
92-
VPlanPtr &Plan);
87+
/// Handle non-loop phi nodes. Return a new VPBlendRecipe otherwise. Currently
88+
/// all such phi nodes are turned into a sequence of select instructions as
89+
/// the vectorizer currently performs full if-conversion.
90+
VPBlendRecipe *tryToBlend(PHINode *Phi, ArrayRef<VPValue *> Operands,
91+
VPlanPtr &Plan);
9392

9493
/// Handle call instructions. If \p CI can be widened for \p Range.Start,
9594
/// return a new VPWidenCallRecipe. Range.End may be decreased to ensure same
@@ -100,11 +99,8 @@ class VPRecipeBuilder {
10099
/// Check if \p I has an opcode that can be widened and return a VPWidenRecipe
101100
/// if it can. The function should only be called if the cost-model indicates
102101
/// that widening should be performed.
103-
VPRecipeBase *tryToWiden(Instruction *I, ArrayRef<VPValue *> Operands,
104-
VPBasicBlock *VPBB, VPlanPtr &Plan);
105-
106-
/// Return a VPRecipeOrValueTy with VPRecipeBase * being set. This can be used to force the use as VPRecipeBase* for recipe sub-types that also inherit from VPValue.
107-
VPRecipeOrVPValueTy toVPRecipeResult(VPRecipeBase *R) const { return R; }
102+
VPWidenRecipe *tryToWiden(Instruction *I, ArrayRef<VPValue *> Operands,
103+
VPBasicBlock *VPBB, VPlanPtr &Plan);
108104

109105
public:
110106
VPRecipeBuilder(Loop *OrigLoop, const TargetLibraryInfo *TLI,
@@ -114,14 +110,12 @@ class VPRecipeBuilder {
114110
: OrigLoop(OrigLoop), TLI(TLI), Legal(Legal), CM(CM), PSE(PSE),
115111
Builder(Builder) {}
116112

117-
/// Check if an existing VPValue can be used for \p Instr or a recipe can be
118-
/// create for \p I withing the given VF \p Range. If an existing VPValue can
119-
/// be used or if a recipe can be created, return it. Otherwise return a
120-
/// VPRecipeOrVPValueTy with nullptr.
121-
VPRecipeOrVPValueTy tryToCreateWidenRecipe(Instruction *Instr,
122-
ArrayRef<VPValue *> Operands,
123-
VFRange &Range, VPBasicBlock *VPBB,
124-
VPlanPtr &Plan);
113+
/// Create and return a widened recipe for \p I if one can be created within
114+
/// the given VF \p Range.
115+
VPRecipeBase *tryToCreateWidenRecipe(Instruction *Instr,
116+
ArrayRef<VPValue *> Operands,
117+
VFRange &Range, VPBasicBlock *VPBB,
118+
VPlanPtr &Plan);
125119

126120
/// Set the recipe created for given ingredient. This operation is a no-op for
127121
/// ingredients that were not marked using a nullptr entry in the map.
@@ -172,8 +166,8 @@ class VPRecipeBuilder {
172166
/// Build a VPReplicationRecipe for \p I. If it is predicated, add the mask as
173167
/// last operand. Range.End may be decreased to ensure same recipe behavior
174168
/// from \p Range.Start to \p Range.End.
175-
VPRecipeOrVPValueTy handleReplication(Instruction *I, VFRange &Range,
176-
VPlan &Plan);
169+
VPReplicateRecipe *handleReplication(Instruction *I, VFRange &Range,
170+
VPlan &Plan);
177171

178172
/// Add the incoming values from the backedge to reduction & first-order
179173
/// recurrence cross-iteration phis.

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -827,6 +827,17 @@ static unsigned getOpcodeForRecipe(VPRecipeBase &R) {
827827

828828
/// Try to simplify recipe \p R.
829829
static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
830+
// Try to remove redundant blend recipes.
831+
if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) {
832+
VPValue *Inc0 = Blend->getIncomingValue(0);
833+
for (unsigned I = 1; I != Blend->getNumIncomingValues(); ++I)
834+
if (Inc0 != Blend->getIncomingValue(I))
835+
return;
836+
Blend->replaceAllUsesWith(Inc0);
837+
Blend->eraseFromParent();
838+
return;
839+
}
840+
830841
switch (getOpcodeForRecipe(R)) {
831842
case Instruction::Mul: {
832843
VPValue *A = R.getOperand(0);
@@ -1031,8 +1042,8 @@ void VPlanTransforms::optimize(VPlan &Plan, ScalarEvolution &SE) {
10311042
removeRedundantCanonicalIVs(Plan);
10321043
removeRedundantInductionCasts(Plan);
10331044

1034-
optimizeInductions(Plan, SE);
10351045
simplifyRecipes(Plan, SE.getContext());
1046+
optimizeInductions(Plan, SE);
10361047
removeDeadRecipes(Plan);
10371048

10381049
createAndOptimizeReplicateRegions(Plan);

0 commit comments

Comments
 (0)