-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[VPlan] Replace VPRecipeOrVPValue with VP2VP recipe simplification. #76090
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
7c31c8b
39d020c
d592661
96fcdc5
8627dd4
0727949
ba5e787
2e49100
42fa0a0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8112,10 +8112,9 @@ void VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlan &Plan) { | |
BlockMaskCache[BB] = BlockMask; | ||
} | ||
|
||
VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(Instruction *I, | ||
ArrayRef<VPValue *> Operands, | ||
VFRange &Range, | ||
VPlanPtr &Plan) { | ||
VPWidenMemoryInstructionRecipe * | ||
VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands, | ||
VFRange &Range, VPlanPtr &Plan) { | ||
assert((isa<LoadInst>(I) || isa<StoreInst>(I)) && | ||
"Must be called with either a load or store"); | ||
|
||
|
@@ -8187,7 +8186,7 @@ createWidenInductionRecipes(PHINode *Phi, Instruction *PhiOrTrunc, | |
return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, IndDesc); | ||
} | ||
|
||
VPRecipeBase *VPRecipeBuilder::tryToOptimizeInductionPHI( | ||
VPHeaderPHIRecipe *VPRecipeBuilder::tryToOptimizeInductionPHI( | ||
PHINode *Phi, ArrayRef<VPValue *> Operands, VPlan &Plan, VFRange &Range) { | ||
|
||
// Check if this is an integer or fp induction. If so, build the recipe that | ||
|
@@ -8239,31 +8238,10 @@ VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionTruncate( | |
return nullptr; | ||
} | ||
|
||
VPRecipeOrVPValueTy VPRecipeBuilder::tryToBlend(PHINode *Phi, | ||
ArrayRef<VPValue *> Operands, | ||
VPlanPtr &Plan) { | ||
// If all incoming values are equal, the incoming VPValue can be used directly | ||
// instead of creating a new VPBlendRecipe. | ||
if (llvm::all_equal(Operands)) | ||
return Operands[0]; | ||
|
||
VPBlendRecipe *VPRecipeBuilder::tryToBlend(PHINode *Phi, | ||
ArrayRef<VPValue *> Operands, | ||
VPlanPtr &Plan) { | ||
unsigned NumIncoming = Phi->getNumIncomingValues(); | ||
// For in-loop reductions, we do not need to create an additional select. | ||
VPValue *InLoopVal = nullptr; | ||
for (unsigned In = 0; In < NumIncoming; In++) { | ||
PHINode *PhiOp = | ||
dyn_cast_or_null<PHINode>(Operands[In]->getUnderlyingValue()); | ||
if (PhiOp && CM.isInLoopReduction(PhiOp)) { | ||
assert(!InLoopVal && "Found more than one in-loop reduction!"); | ||
InLoopVal = Operands[In]; | ||
} | ||
} | ||
|
||
assert((!InLoopVal || NumIncoming == 2) && | ||
"Found an in-loop reduction for PHI with unexpected number of " | ||
"incoming values"); | ||
if (InLoopVal) | ||
return Operands[Operands[0] == InLoopVal ? 1 : 0]; | ||
|
||
// We know that all PHIs in non-header blocks are converted into selects, so | ||
// we don't have to worry about the insertion order and we can just use the | ||
|
@@ -8273,15 +8251,18 @@ VPRecipeOrVPValueTy VPRecipeBuilder::tryToBlend(PHINode *Phi, | |
SmallVector<VPValue *, 2> OperandsWithMask; | ||
|
||
for (unsigned In = 0; In < NumIncoming; In++) { | ||
OperandsWithMask.push_back(Operands[In]); | ||
VPValue *EdgeMask = | ||
createEdgeMask(Phi->getIncomingBlock(In), Phi->getParent(), *Plan); | ||
assert((EdgeMask || NumIncoming == 1) && | ||
"Multiple predecessors with one having a full mask"); | ||
OperandsWithMask.push_back(Operands[In]); | ||
if (EdgeMask) | ||
OperandsWithMask.push_back(EdgeMask); | ||
if (!EdgeMask) { | ||
assert(In == 0 && "Both null and non-null edge masks found"); | ||
assert(all_equal(Operands) && | ||
"Distinct incoming values with one having a full mask"); | ||
break; | ||
} | ||
OperandsWithMask.push_back(EdgeMask); | ||
} | ||
return toVPRecipeResult(new VPBlendRecipe(Phi, OperandsWithMask)); | ||
return new VPBlendRecipe(Phi, OperandsWithMask); | ||
} | ||
|
||
VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI, | ||
|
@@ -8390,9 +8371,9 @@ bool VPRecipeBuilder::shouldWiden(Instruction *I, VFRange &Range) const { | |
Range); | ||
} | ||
|
||
VPRecipeBase *VPRecipeBuilder::tryToWiden(Instruction *I, | ||
ArrayRef<VPValue *> Operands, | ||
VPBasicBlock *VPBB, VPlanPtr &Plan) { | ||
VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I, | ||
ArrayRef<VPValue *> Operands, | ||
VPBasicBlock *VPBB, VPlanPtr &Plan) { | ||
switch (I->getOpcode()) { | ||
default: | ||
return nullptr; | ||
|
@@ -8449,9 +8430,9 @@ void VPRecipeBuilder::fixHeaderPhis() { | |
} | ||
} | ||
|
||
VPRecipeOrVPValueTy VPRecipeBuilder::handleReplication(Instruction *I, | ||
VFRange &Range, | ||
VPlan &Plan) { | ||
VPReplicateRecipe *VPRecipeBuilder::handleReplication(Instruction *I, | ||
VFRange &Range, | ||
VPlan &Plan) { | ||
bool IsUniform = LoopVectorizationPlanner::getDecisionAndClampRange( | ||
[&](ElementCount VF) { return CM.isUniformAfterVectorization(I, VF); }, | ||
Range); | ||
|
@@ -8503,14 +8484,12 @@ VPRecipeOrVPValueTy VPRecipeBuilder::handleReplication(Instruction *I, | |
|
||
auto *Recipe = new VPReplicateRecipe(I, Plan.mapToVPValues(I->operands()), | ||
IsUniform, BlockInMask); | ||
return toVPRecipeResult(Recipe); | ||
return Recipe; | ||
} | ||
|
||
VPRecipeOrVPValueTy | ||
VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr, | ||
ArrayRef<VPValue *> Operands, | ||
VFRange &Range, VPBasicBlock *VPBB, | ||
VPlanPtr &Plan) { | ||
VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe( | ||
Instruction *Instr, ArrayRef<VPValue *> Operands, VFRange &Range, | ||
VPBasicBlock *VPBB, VPlanPtr &Plan) { | ||
// First, check for specific widening recipes that deal with inductions, Phi | ||
// nodes, calls and memory operations. | ||
VPRecipeBase *Recipe; | ||
|
@@ -8523,7 +8502,7 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr, | |
recordRecipeOf(Phi); | ||
|
||
if ((Recipe = tryToOptimizeInductionPHI(Phi, Operands, *Plan, Range))) | ||
return toVPRecipeResult(Recipe); | ||
return Recipe; | ||
|
||
VPHeaderPHIRecipe *PhiRecipe = nullptr; | ||
assert((Legal->isReductionVariable(Phi) || | ||
|
@@ -8555,43 +8534,43 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr, | |
recordRecipeOf(Inc); | ||
|
||
PhisToFix.push_back(PhiRecipe); | ||
return toVPRecipeResult(PhiRecipe); | ||
return PhiRecipe; | ||
} | ||
|
||
if (isa<TruncInst>(Instr) && | ||
(Recipe = tryToOptimizeInductionTruncate(cast<TruncInst>(Instr), Operands, | ||
Range, *Plan))) | ||
return toVPRecipeResult(Recipe); | ||
return Recipe; | ||
|
||
// All widen recipes below deal only with VF > 1. | ||
if (LoopVectorizationPlanner::getDecisionAndClampRange( | ||
[&](ElementCount VF) { return VF.isScalar(); }, Range)) | ||
return nullptr; | ||
|
||
if (auto *CI = dyn_cast<CallInst>(Instr)) | ||
return toVPRecipeResult(tryToWidenCall(CI, Operands, Range, Plan)); | ||
return tryToWidenCall(CI, Operands, Range, Plan); | ||
|
||
if (isa<LoadInst>(Instr) || isa<StoreInst>(Instr)) | ||
return toVPRecipeResult(tryToWidenMemory(Instr, Operands, Range, Plan)); | ||
return tryToWidenMemory(Instr, Operands, Range, Plan); | ||
|
||
if (!shouldWiden(Instr, Range)) | ||
return nullptr; | ||
|
||
if (auto GEP = dyn_cast<GetElementPtrInst>(Instr)) | ||
return toVPRecipeResult(new VPWidenGEPRecipe( | ||
GEP, make_range(Operands.begin(), Operands.end()))); | ||
return new VPWidenGEPRecipe(GEP, | ||
make_range(Operands.begin(), Operands.end())); | ||
|
||
if (auto *SI = dyn_cast<SelectInst>(Instr)) { | ||
return toVPRecipeResult(new VPWidenSelectRecipe( | ||
*SI, make_range(Operands.begin(), Operands.end()))); | ||
return new VPWidenSelectRecipe( | ||
*SI, make_range(Operands.begin(), Operands.end())); | ||
} | ||
|
||
if (auto *CI = dyn_cast<CastInst>(Instr)) { | ||
return toVPRecipeResult(new VPWidenCastRecipe(CI->getOpcode(), Operands[0], | ||
CI->getType(), *CI)); | ||
return new VPWidenCastRecipe(CI->getOpcode(), Operands[0], CI->getType(), | ||
*CI); | ||
} | ||
|
||
return toVPRecipeResult(tryToWiden(Instr, Operands, VPBB, Plan)); | ||
return tryToWiden(Instr, Operands, VPBB, Plan); | ||
} | ||
|
||
void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF, | ||
|
@@ -8779,22 +8758,10 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { | |
Legal->isInvariantAddressOfReduction(SI->getPointerOperand())) | ||
continue; | ||
|
||
auto RecipeOrValue = RecipeBuilder.tryToCreateWidenRecipe( | ||
VPRecipeBase *Recipe = RecipeBuilder.tryToCreateWidenRecipe( | ||
Instr, Operands, Range, VPBB, Plan); | ||
if (!RecipeOrValue) | ||
RecipeOrValue = RecipeBuilder.handleReplication(Instr, Range, *Plan); | ||
// If Instr can be simplified to an existing VPValue, use it. | ||
if (isa<VPValue *>(RecipeOrValue)) { | ||
auto *VPV = cast<VPValue *>(RecipeOrValue); | ||
Plan->addVPValue(Instr, VPV); | ||
// If the re-used value is a recipe, register the recipe for the | ||
// instruction, in case the recipe for Instr needs to be recorded. | ||
if (VPRecipeBase *R = VPV->getDefiningRecipe()) | ||
RecipeBuilder.setRecipe(Instr, R); | ||
continue; | ||
} | ||
// Otherwise, add the new recipe. | ||
VPRecipeBase *Recipe = cast<VPRecipeBase *>(RecipeOrValue); | ||
if (!Recipe) | ||
Recipe = RecipeBuilder.handleReplication(Instr, Range, *Plan); | ||
for (auto *Def : Recipe->definedValues()) { | ||
auto *UV = Def->getUnderlyingValue(); | ||
Plan->addVPValue(UV, Def); | ||
|
@@ -9041,7 +9008,9 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( | |
// the phi until LoopExitValue. We keep track of the previous item | ||
// (PreviousLink) to tell which of the two operands of a Link will remain | ||
// scalar and which will be reduced. For minmax by select(cmp), Link will be | ||
// the select instructions. | ||
// the select instructions. Blend recipes of in-loop reduction phi's will | ||
// get folded to their non-phi operand, as the reduction recipe handles the | ||
// condition directly. | ||
VPSingleDefRecipe *PreviousLink = PhiR; // Aka Worklist[0]. | ||
for (VPSingleDefRecipe *CurrentLink : Worklist.getArrayRef().drop_front()) { | ||
Instruction *CurrentLinkI = CurrentLink->getUnderlyingInstr(); | ||
|
@@ -9072,6 +9041,20 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( | |
LinkVPBB->insert(FMulRecipe, CurrentLink->getIterator()); | ||
VecOp = FMulRecipe; | ||
} else { | ||
auto *Blend = dyn_cast<VPBlendRecipe>(CurrentLink); | ||
if (PhiR->isInLoop() && Blend) { | ||
assert(Blend->getNumIncomingValues() == 2 && | ||
"Blend must have 2 incoming values"); | ||
if (Blend->getIncomingValue(0) == PhiR) | ||
Blend->replaceAllUsesWith(Blend->getIncomingValue(1)); | ||
else { | ||
assert(Blend->getIncomingValue(1) == PhiR && | ||
"PhiR must be an operand of the blend"); | ||
Comment on lines
+9051
to
+9052
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: may be slightly more consistent to have the assert first. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done, thanks! |
||
Blend->replaceAllUsesWith(Blend->getIncomingValue(0)); | ||
} | ||
continue; | ||
} | ||
|
||
if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) { | ||
if (isa<VPWidenRecipe>(CurrentLink)) { | ||
assert(isa<CmpInst>(CurrentLinkI) && | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,8 +21,6 @@ class LoopVectorizationLegality; | |
class LoopVectorizationCostModel; | ||
class TargetLibraryInfo; | ||
|
||
using VPRecipeOrVPValueTy = PointerUnion<VPRecipeBase *, VPValue *>; | ||
|
||
/// Helper class to create VPRecipies from IR instructions. | ||
class VPRecipeBuilder { | ||
/// The loop that we evaluate. | ||
|
@@ -69,27 +67,28 @@ class VPRecipeBuilder { | |
/// Check if the load or store instruction \p I should widened for \p | ||
/// Range.Start and potentially masked. Such instructions are handled by a | ||
/// recipe that takes an additional VPInstruction for the mask. | ||
VPRecipeBase *tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands, | ||
VFRange &Range, VPlanPtr &Plan); | ||
VPWidenMemoryInstructionRecipe *tryToWidenMemory(Instruction *I, | ||
ArrayRef<VPValue *> Operands, | ||
VFRange &Range, | ||
VPlanPtr &Plan); | ||
|
||
/// Check if an induction recipe should be constructed for \p Phi. If so build | ||
/// and return it. If not, return null. | ||
VPRecipeBase *tryToOptimizeInductionPHI(PHINode *Phi, | ||
ArrayRef<VPValue *> Operands, | ||
VPlan &Plan, VFRange &Range); | ||
VPHeaderPHIRecipe *tryToOptimizeInductionPHI(PHINode *Phi, | ||
ArrayRef<VPValue *> Operands, | ||
VPlan &Plan, VFRange &Range); | ||
|
||
/// Optimize the special case where the operand of \p I is a constant integer | ||
/// induction variable. | ||
VPWidenIntOrFpInductionRecipe * | ||
tryToOptimizeInductionTruncate(TruncInst *I, ArrayRef<VPValue *> Operands, | ||
VFRange &Range, VPlan &Plan); | ||
|
||
/// Handle non-loop phi nodes. Return a VPValue, if all incoming values match | ||
/// or a new VPBlendRecipe otherwise. Currently all such phi nodes are turned | ||
/// into a sequence of select instructions as the vectorizer currently | ||
/// performs full if-conversion. | ||
VPRecipeOrVPValueTy tryToBlend(PHINode *Phi, ArrayRef<VPValue *> Operands, | ||
VPlanPtr &Plan); | ||
/// Handle non-loop phi nodes. Return a new VPBlendRecipe otherwise. Currently | ||
/// all such phi nodes are turned into a sequence of select instructions as | ||
/// the vectorizer currently performs full if-conversion. | ||
VPBlendRecipe *tryToBlend(PHINode *Phi, ArrayRef<VPValue *> Operands, | ||
VPlanPtr &Plan); | ||
|
||
/// Handle call instructions. If \p CI can be widened for \p Range.Start, | ||
/// return a new VPWidenCallRecipe. Range.End may be decreased to ensure same | ||
|
@@ -100,11 +99,8 @@ class VPRecipeBuilder { | |
/// Check if \p I has an opcode that can be widened and return a VPWidenRecipe | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (Indeed back to returning VPWidenRecipe as documented...) |
||
/// if it can. The function should only be called if the cost-model indicates | ||
/// that widening should be performed. | ||
VPRecipeBase *tryToWiden(Instruction *I, ArrayRef<VPValue *> Operands, | ||
VPBasicBlock *VPBB, VPlanPtr &Plan); | ||
|
||
/// Return a VPRecipeOrValueTy with VPRecipeBase * being set. This can be used to force the use as VPRecipeBase* for recipe sub-types that also inherit from VPValue. | ||
VPRecipeOrVPValueTy toVPRecipeResult(VPRecipeBase *R) const { return R; } | ||
VPWidenRecipe *tryToWiden(Instruction *I, ArrayRef<VPValue *> Operands, | ||
VPBasicBlock *VPBB, VPlanPtr &Plan); | ||
|
||
public: | ||
VPRecipeBuilder(Loop *OrigLoop, const TargetLibraryInfo *TLI, | ||
|
@@ -114,14 +110,12 @@ class VPRecipeBuilder { | |
: OrigLoop(OrigLoop), TLI(TLI), Legal(Legal), CM(CM), PSE(PSE), | ||
Builder(Builder) {} | ||
|
||
/// Check if an existing VPValue can be used for \p Instr or a recipe can be | ||
/// create for \p I withing the given VF \p Range. If an existing VPValue can | ||
/// be used or if a recipe can be created, return it. Otherwise return a | ||
/// VPRecipeOrVPValueTy with nullptr. | ||
VPRecipeOrVPValueTy tryToCreateWidenRecipe(Instruction *Instr, | ||
ArrayRef<VPValue *> Operands, | ||
VFRange &Range, VPBasicBlock *VPBB, | ||
VPlanPtr &Plan); | ||
/// Create and return a widened recipe for \p I if one can be created within | ||
/// the given VF \p Range. | ||
VPRecipeBase *tryToCreateWidenRecipe(Instruction *Instr, | ||
ArrayRef<VPValue *> Operands, | ||
VFRange &Range, VPBasicBlock *VPBB, | ||
VPlanPtr &Plan); | ||
|
||
/// Set the recipe created for given ingredient. This operation is a no-op for | ||
/// ingredients that were not marked using a nullptr entry in the map. | ||
|
@@ -172,8 +166,8 @@ class VPRecipeBuilder { | |
/// Build a VPReplicationRecipe for \p I. If it is predicated, add the mask as | ||
/// last operand. Range.End may be decreased to ensure same recipe behavior | ||
/// from \p Range.Start to \p Range.End. | ||
VPRecipeOrVPValueTy handleReplication(Instruction *I, VFRange &Range, | ||
VPlan &Plan); | ||
VPReplicateRecipe *handleReplication(Instruction *I, VFRange &Range, | ||
VPlan &Plan); | ||
|
||
/// Add the incoming values from the backedge to reduction & first-order | ||
/// recurrence cross-iteration phis. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -827,6 +827,17 @@ static unsigned getOpcodeForRecipe(VPRecipeBase &R) { | |
|
||
/// Try to simplify recipe \p R. | ||
static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { | ||
// Try to remove redundant blend recipes. | ||
if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: may be worth defining and reusing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done, thanks! |
||
VPValue *Inc0 = Blend->getIncomingValue(0); | ||
for (unsigned I = 1; I != Blend->getNumIncomingValues(); ++I) | ||
if (Inc0 != Blend->getIncomingValue(I)) | ||
return; | ||
Blend->replaceAllUsesWith(Inc0); | ||
Blend->eraseFromParent(); | ||
return; | ||
} | ||
|
||
switch (getOpcodeForRecipe(R)) { | ||
case Instruction::Mul: { | ||
VPValue *A = R.getOperand(0); | ||
|
@@ -1031,8 +1042,8 @@ void VPlanTransforms::optimize(VPlan &Plan, ScalarEvolution &SE) { | |
removeRedundantCanonicalIVs(Plan); | ||
removeRedundantInductionCasts(Plan); | ||
|
||
optimizeInductions(Plan, SE); | ||
simplifyRecipes(Plan, SE.getContext()); | ||
optimizeInductions(Plan, SE); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this phase ordering change still needed, if the mandatory removal of a conditional in-loop reduction blend is moved away from simplifyRecipes(), and into adjustRecipesForReductions()? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Without the change, here's a function in |
||
removeDeadRecipes(Plan); | ||
|
||
createAndOptimizeReplicateRegions(Plan); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add folding of inloop blend to documentation above.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done, thanks!