Skip to content

[VPlan] Replace VPRecipeOrVPValue with VP2VP recipe simplification. #76090

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Jan 29, 2024
133 changes: 58 additions & 75 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8112,10 +8112,9 @@ void VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlan &Plan) {
BlockMaskCache[BB] = BlockMask;
}

VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(Instruction *I,
ArrayRef<VPValue *> Operands,
VFRange &Range,
VPlanPtr &Plan) {
VPWidenMemoryInstructionRecipe *
VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
VFRange &Range, VPlanPtr &Plan) {
assert((isa<LoadInst>(I) || isa<StoreInst>(I)) &&
"Must be called with either a load or store");

Expand Down Expand Up @@ -8187,7 +8186,7 @@ createWidenInductionRecipes(PHINode *Phi, Instruction *PhiOrTrunc,
return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, IndDesc);
}

VPRecipeBase *VPRecipeBuilder::tryToOptimizeInductionPHI(
VPHeaderPHIRecipe *VPRecipeBuilder::tryToOptimizeInductionPHI(
PHINode *Phi, ArrayRef<VPValue *> Operands, VPlan &Plan, VFRange &Range) {

// Check if this is an integer or fp induction. If so, build the recipe that
Expand Down Expand Up @@ -8239,31 +8238,10 @@ VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionTruncate(
return nullptr;
}

VPRecipeOrVPValueTy VPRecipeBuilder::tryToBlend(PHINode *Phi,
ArrayRef<VPValue *> Operands,
VPlanPtr &Plan) {
// If all incoming values are equal, the incoming VPValue can be used directly
// instead of creating a new VPBlendRecipe.
if (llvm::all_equal(Operands))
return Operands[0];

VPBlendRecipe *VPRecipeBuilder::tryToBlend(PHINode *Phi,
ArrayRef<VPValue *> Operands,
VPlanPtr &Plan) {
unsigned NumIncoming = Phi->getNumIncomingValues();
// For in-loop reductions, we do not need to create an additional select.
VPValue *InLoopVal = nullptr;
for (unsigned In = 0; In < NumIncoming; In++) {
PHINode *PhiOp =
dyn_cast_or_null<PHINode>(Operands[In]->getUnderlyingValue());
if (PhiOp && CM.isInLoopReduction(PhiOp)) {
assert(!InLoopVal && "Found more than one in-loop reduction!");
InLoopVal = Operands[In];
}
}

assert((!InLoopVal || NumIncoming == 2) &&
"Found an in-loop reduction for PHI with unexpected number of "
"incoming values");
if (InLoopVal)
return Operands[Operands[0] == InLoopVal ? 1 : 0];

// We know that all PHIs in non-header blocks are converted into selects, so
// we don't have to worry about the insertion order and we can just use the
Expand All @@ -8273,15 +8251,18 @@ VPRecipeOrVPValueTy VPRecipeBuilder::tryToBlend(PHINode *Phi,
SmallVector<VPValue *, 2> OperandsWithMask;

for (unsigned In = 0; In < NumIncoming; In++) {
OperandsWithMask.push_back(Operands[In]);
VPValue *EdgeMask =
createEdgeMask(Phi->getIncomingBlock(In), Phi->getParent(), *Plan);
assert((EdgeMask || NumIncoming == 1) &&
"Multiple predecessors with one having a full mask");
OperandsWithMask.push_back(Operands[In]);
if (EdgeMask)
OperandsWithMask.push_back(EdgeMask);
if (!EdgeMask) {
assert(In == 0 && "Both null and non-null edge masks found");
assert(all_equal(Operands) &&
"Distinct incoming values with one having a full mask");
break;
}
OperandsWithMask.push_back(EdgeMask);
}
return toVPRecipeResult(new VPBlendRecipe(Phi, OperandsWithMask));
return new VPBlendRecipe(Phi, OperandsWithMask);
}

VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
Expand Down Expand Up @@ -8390,9 +8371,9 @@ bool VPRecipeBuilder::shouldWiden(Instruction *I, VFRange &Range) const {
Range);
}

VPRecipeBase *VPRecipeBuilder::tryToWiden(Instruction *I,
ArrayRef<VPValue *> Operands,
VPBasicBlock *VPBB, VPlanPtr &Plan) {
VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
ArrayRef<VPValue *> Operands,
VPBasicBlock *VPBB, VPlanPtr &Plan) {
switch (I->getOpcode()) {
default:
return nullptr;
Expand Down Expand Up @@ -8449,9 +8430,9 @@ void VPRecipeBuilder::fixHeaderPhis() {
}
}

VPRecipeOrVPValueTy VPRecipeBuilder::handleReplication(Instruction *I,
VFRange &Range,
VPlan &Plan) {
VPReplicateRecipe *VPRecipeBuilder::handleReplication(Instruction *I,
VFRange &Range,
VPlan &Plan) {
bool IsUniform = LoopVectorizationPlanner::getDecisionAndClampRange(
[&](ElementCount VF) { return CM.isUniformAfterVectorization(I, VF); },
Range);
Expand Down Expand Up @@ -8503,14 +8484,12 @@ VPRecipeOrVPValueTy VPRecipeBuilder::handleReplication(Instruction *I,

auto *Recipe = new VPReplicateRecipe(I, Plan.mapToVPValues(I->operands()),
IsUniform, BlockInMask);
return toVPRecipeResult(Recipe);
return Recipe;
}

VPRecipeOrVPValueTy
VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
ArrayRef<VPValue *> Operands,
VFRange &Range, VPBasicBlock *VPBB,
VPlanPtr &Plan) {
VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(
Instruction *Instr, ArrayRef<VPValue *> Operands, VFRange &Range,
VPBasicBlock *VPBB, VPlanPtr &Plan) {
// First, check for specific widening recipes that deal with inductions, Phi
// nodes, calls and memory operations.
VPRecipeBase *Recipe;
Expand All @@ -8523,7 +8502,7 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
recordRecipeOf(Phi);

if ((Recipe = tryToOptimizeInductionPHI(Phi, Operands, *Plan, Range)))
return toVPRecipeResult(Recipe);
return Recipe;

VPHeaderPHIRecipe *PhiRecipe = nullptr;
assert((Legal->isReductionVariable(Phi) ||
Expand Down Expand Up @@ -8555,43 +8534,43 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
recordRecipeOf(Inc);

PhisToFix.push_back(PhiRecipe);
return toVPRecipeResult(PhiRecipe);
return PhiRecipe;
}

if (isa<TruncInst>(Instr) &&
(Recipe = tryToOptimizeInductionTruncate(cast<TruncInst>(Instr), Operands,
Range, *Plan)))
return toVPRecipeResult(Recipe);
return Recipe;

// All widen recipes below deal only with VF > 1.
if (LoopVectorizationPlanner::getDecisionAndClampRange(
[&](ElementCount VF) { return VF.isScalar(); }, Range))
return nullptr;

if (auto *CI = dyn_cast<CallInst>(Instr))
return toVPRecipeResult(tryToWidenCall(CI, Operands, Range, Plan));
return tryToWidenCall(CI, Operands, Range, Plan);

if (isa<LoadInst>(Instr) || isa<StoreInst>(Instr))
return toVPRecipeResult(tryToWidenMemory(Instr, Operands, Range, Plan));
return tryToWidenMemory(Instr, Operands, Range, Plan);

if (!shouldWiden(Instr, Range))
return nullptr;

if (auto GEP = dyn_cast<GetElementPtrInst>(Instr))
return toVPRecipeResult(new VPWidenGEPRecipe(
GEP, make_range(Operands.begin(), Operands.end())));
return new VPWidenGEPRecipe(GEP,
make_range(Operands.begin(), Operands.end()));

if (auto *SI = dyn_cast<SelectInst>(Instr)) {
return toVPRecipeResult(new VPWidenSelectRecipe(
*SI, make_range(Operands.begin(), Operands.end())));
return new VPWidenSelectRecipe(
*SI, make_range(Operands.begin(), Operands.end()));
}

if (auto *CI = dyn_cast<CastInst>(Instr)) {
return toVPRecipeResult(new VPWidenCastRecipe(CI->getOpcode(), Operands[0],
CI->getType(), *CI));
return new VPWidenCastRecipe(CI->getOpcode(), Operands[0], CI->getType(),
*CI);
}

return toVPRecipeResult(tryToWiden(Instr, Operands, VPBB, Plan));
return tryToWiden(Instr, Operands, VPBB, Plan);
}

void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
Expand Down Expand Up @@ -8779,22 +8758,10 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
Legal->isInvariantAddressOfReduction(SI->getPointerOperand()))
continue;

auto RecipeOrValue = RecipeBuilder.tryToCreateWidenRecipe(
VPRecipeBase *Recipe = RecipeBuilder.tryToCreateWidenRecipe(
Instr, Operands, Range, VPBB, Plan);
if (!RecipeOrValue)
RecipeOrValue = RecipeBuilder.handleReplication(Instr, Range, *Plan);
// If Instr can be simplified to an existing VPValue, use it.
if (isa<VPValue *>(RecipeOrValue)) {
auto *VPV = cast<VPValue *>(RecipeOrValue);
Plan->addVPValue(Instr, VPV);
// If the re-used value is a recipe, register the recipe for the
// instruction, in case the recipe for Instr needs to be recorded.
if (VPRecipeBase *R = VPV->getDefiningRecipe())
RecipeBuilder.setRecipe(Instr, R);
continue;
}
// Otherwise, add the new recipe.
VPRecipeBase *Recipe = cast<VPRecipeBase *>(RecipeOrValue);
if (!Recipe)
Recipe = RecipeBuilder.handleReplication(Instr, Range, *Plan);
for (auto *Def : Recipe->definedValues()) {
auto *UV = Def->getUnderlyingValue();
Plan->addVPValue(UV, Def);
Expand Down Expand Up @@ -9041,7 +9008,9 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
// the phi until LoopExitValue. We keep track of the previous item
// (PreviousLink) to tell which of the two operands of a Link will remain
// scalar and which will be reduced. For minmax by select(cmp), Link will be
// the select instructions.
// the select instructions. Blend recipes of in-loop reduction phi's will
// get folded to their non-phi operand, as the reduction recipe handles the
// condition directly.
VPSingleDefRecipe *PreviousLink = PhiR; // Aka Worklist[0].
for (VPSingleDefRecipe *CurrentLink : Worklist.getArrayRef().drop_front()) {
Instruction *CurrentLinkI = CurrentLink->getUnderlyingInstr();
Expand Down Expand Up @@ -9072,6 +9041,20 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
LinkVPBB->insert(FMulRecipe, CurrentLink->getIterator());
VecOp = FMulRecipe;
} else {
auto *Blend = dyn_cast<VPBlendRecipe>(CurrentLink);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add folding of inloop blend to documentation above.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done, thanks!

if (PhiR->isInLoop() && Blend) {
assert(Blend->getNumIncomingValues() == 2 &&
"Blend must have 2 incoming values");
if (Blend->getIncomingValue(0) == PhiR)
Blend->replaceAllUsesWith(Blend->getIncomingValue(1));
else {
assert(Blend->getIncomingValue(1) == PhiR &&
"PhiR must be an operand of the blend");
Comment on lines +9051 to +9052
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: may be slightly more consistent to have the assert first.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done, thanks!

Blend->replaceAllUsesWith(Blend->getIncomingValue(0));
}
continue;
}

if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) {
if (isa<VPWidenRecipe>(CurrentLink)) {
assert(isa<CmpInst>(CurrentLinkI) &&
Expand Down
50 changes: 22 additions & 28 deletions llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@ class LoopVectorizationLegality;
class LoopVectorizationCostModel;
class TargetLibraryInfo;

using VPRecipeOrVPValueTy = PointerUnion<VPRecipeBase *, VPValue *>;

/// Helper class to create VPRecipies from IR instructions.
class VPRecipeBuilder {
/// The loop that we evaluate.
Expand Down Expand Up @@ -69,27 +67,28 @@ class VPRecipeBuilder {
/// Check if the load or store instruction \p I should widened for \p
/// Range.Start and potentially masked. Such instructions are handled by a
/// recipe that takes an additional VPInstruction for the mask.
VPRecipeBase *tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
VFRange &Range, VPlanPtr &Plan);
VPWidenMemoryInstructionRecipe *tryToWidenMemory(Instruction *I,
ArrayRef<VPValue *> Operands,
VFRange &Range,
VPlanPtr &Plan);

/// Check if an induction recipe should be constructed for \p Phi. If so build
/// and return it. If not, return null.
VPRecipeBase *tryToOptimizeInductionPHI(PHINode *Phi,
ArrayRef<VPValue *> Operands,
VPlan &Plan, VFRange &Range);
VPHeaderPHIRecipe *tryToOptimizeInductionPHI(PHINode *Phi,
ArrayRef<VPValue *> Operands,
VPlan &Plan, VFRange &Range);

/// Optimize the special case where the operand of \p I is a constant integer
/// induction variable.
VPWidenIntOrFpInductionRecipe *
tryToOptimizeInductionTruncate(TruncInst *I, ArrayRef<VPValue *> Operands,
VFRange &Range, VPlan &Plan);

/// Handle non-loop phi nodes. Return a VPValue, if all incoming values match
/// or a new VPBlendRecipe otherwise. Currently all such phi nodes are turned
/// into a sequence of select instructions as the vectorizer currently
/// performs full if-conversion.
VPRecipeOrVPValueTy tryToBlend(PHINode *Phi, ArrayRef<VPValue *> Operands,
VPlanPtr &Plan);
/// Handle non-loop phi nodes. Return a new VPBlendRecipe otherwise. Currently
/// all such phi nodes are turned into a sequence of select instructions as
/// the vectorizer currently performs full if-conversion.
VPBlendRecipe *tryToBlend(PHINode *Phi, ArrayRef<VPValue *> Operands,
VPlanPtr &Plan);

/// Handle call instructions. If \p CI can be widened for \p Range.Start,
/// return a new VPWidenCallRecipe. Range.End may be decreased to ensure same
Expand All @@ -100,11 +99,8 @@ class VPRecipeBuilder {
/// Check if \p I has an opcode that can be widened and return a VPWidenRecipe
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Indeed back to returning VPWidenRecipe as documented...)

/// if it can. The function should only be called if the cost-model indicates
/// that widening should be performed.
VPRecipeBase *tryToWiden(Instruction *I, ArrayRef<VPValue *> Operands,
VPBasicBlock *VPBB, VPlanPtr &Plan);

/// Return a VPRecipeOrValueTy with VPRecipeBase * being set. This can be used to force the use as VPRecipeBase* for recipe sub-types that also inherit from VPValue.
VPRecipeOrVPValueTy toVPRecipeResult(VPRecipeBase *R) const { return R; }
VPWidenRecipe *tryToWiden(Instruction *I, ArrayRef<VPValue *> Operands,
VPBasicBlock *VPBB, VPlanPtr &Plan);

public:
VPRecipeBuilder(Loop *OrigLoop, const TargetLibraryInfo *TLI,
Expand All @@ -114,14 +110,12 @@ class VPRecipeBuilder {
: OrigLoop(OrigLoop), TLI(TLI), Legal(Legal), CM(CM), PSE(PSE),
Builder(Builder) {}

/// Check if an existing VPValue can be used for \p Instr or a recipe can be
/// create for \p I withing the given VF \p Range. If an existing VPValue can
/// be used or if a recipe can be created, return it. Otherwise return a
/// VPRecipeOrVPValueTy with nullptr.
VPRecipeOrVPValueTy tryToCreateWidenRecipe(Instruction *Instr,
ArrayRef<VPValue *> Operands,
VFRange &Range, VPBasicBlock *VPBB,
VPlanPtr &Plan);
/// Create and return a widened recipe for \p I if one can be created within
/// the given VF \p Range.
VPRecipeBase *tryToCreateWidenRecipe(Instruction *Instr,
ArrayRef<VPValue *> Operands,
VFRange &Range, VPBasicBlock *VPBB,
VPlanPtr &Plan);

/// Set the recipe created for given ingredient. This operation is a no-op for
/// ingredients that were not marked using a nullptr entry in the map.
Expand Down Expand Up @@ -172,8 +166,8 @@ class VPRecipeBuilder {
/// Build a VPReplicationRecipe for \p I. If it is predicated, add the mask as
/// last operand. Range.End may be decreased to ensure same recipe behavior
/// from \p Range.Start to \p Range.End.
VPRecipeOrVPValueTy handleReplication(Instruction *I, VFRange &Range,
VPlan &Plan);
VPReplicateRecipe *handleReplication(Instruction *I, VFRange &Range,
VPlan &Plan);

/// Add the incoming values from the backedge to reduction & first-order
/// recurrence cross-iteration phis.
Expand Down
13 changes: 12 additions & 1 deletion llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -827,6 +827,17 @@ static unsigned getOpcodeForRecipe(VPRecipeBase &R) {

/// Try to simplify recipe \p R.
static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
// Try to remove redundant blend recipes.
if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: may be worth defining and reusing IncomingValue0 = Blend->getIncomingValue(0);

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done, thanks!

VPValue *Inc0 = Blend->getIncomingValue(0);
for (unsigned I = 1; I != Blend->getNumIncomingValues(); ++I)
if (Inc0 != Blend->getIncomingValue(I))
return;
Blend->replaceAllUsesWith(Inc0);
Blend->eraseFromParent();
return;
}

switch (getOpcodeForRecipe(R)) {
case Instruction::Mul: {
VPValue *A = R.getOperand(0);
Expand Down Expand Up @@ -1031,8 +1042,8 @@ void VPlanTransforms::optimize(VPlan &Plan, ScalarEvolution &SE) {
removeRedundantCanonicalIVs(Plan);
removeRedundantInductionCasts(Plan);

optimizeInductions(Plan, SE);
simplifyRecipes(Plan, SE.getContext());
optimizeInductions(Plan, SE);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this phase ordering change still needed, if the mandatory removal of a conditional in-loop reduction blend is moved away from simplifyRecipes(), and into adjustRecipesForReductions()?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Without the change, here's a function in lvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll where optimizeInductions won't simplify an induction, because there's a wide blend use left.

removeDeadRecipes(Plan);

createAndOptimizeReplicateRegions(Plan);
Expand Down