Skip to content

[VPlan] Process simplifyRecipes via a worklist #133977

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanAnalysis.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ class VPTypeAnalysis {

/// Return the LLVMContext used by the analysis.
LLVMContext &getContext() { return Ctx; }

/// Remove \p V from the cache. You must call this after a value is erased.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
/// Remove \p V from the cache. You must call this after a value is erased.
/// Remove \p V from the cache. You must call this after a VPValue in the cache is erased.

(I think just erasing isn't a problem, the problem is when new VPValues/recipes are created and added to the cache, if they get allocated to the same address as a previous cached VPValue. But that's too detailed ;))

void erase(VPValue *V) { CachedTypes.erase(V); }
};

// Collect a VPlan's ephemeral recipes (those used only by an assume).
Expand Down
75 changes: 44 additions & 31 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -923,7 +923,7 @@ static void recursivelyDeleteDeadRecipes(VPValue *V) {
}

/// Try to simplify recipe \p R.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
/// Try to simplify recipe \p R.
/// Try to simplify recipe \p R and return the simplified VPValue or nullptr if it could not be simplified..

static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
static VPValue *simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
using namespace llvm::VPlanPatternMatch;

// VPScalarIVSteps can only be simplified after unrolling. VPScalarIVSteps for
Expand All @@ -932,8 +932,7 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
if (auto *Steps = dyn_cast<VPScalarIVStepsRecipe>(&R)) {
if (Steps->getParent()->getPlan()->isUnrolled() && Steps->isPart0() &&
vputils::onlyFirstLaneUsed(Steps)) {
Steps->replaceAllUsesWith(Steps->getOperand(0));
return;
return Steps->getOperand(0);
}
}

Expand All @@ -943,11 +942,11 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
Type *TruncTy = TypeInfo.inferScalarType(Trunc);
Type *ATy = TypeInfo.inferScalarType(A);
if (TruncTy == ATy) {
Trunc->replaceAllUsesWith(A);
return A;
} else {
// Don't replace a scalarizing recipe with a widened cast.
if (isa<VPReplicateRecipe>(&R))
return;
return nullptr;
if (ATy->getScalarSizeInBits() < TruncTy->getScalarSizeInBits()) {

unsigned ExtOpcode = match(R.getOperand(0), m_SExt(m_VPValue()))
Expand All @@ -960,25 +959,13 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
VPC->setUnderlyingValue(UnderlyingExt);
}
VPC->insertBefore(&R);
Trunc->replaceAllUsesWith(VPC);
return VPC;
} else if (ATy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits()) {
auto *VPC = new VPWidenCastRecipe(Instruction::Trunc, A, TruncTy);
VPC->insertBefore(&R);
Trunc->replaceAllUsesWith(VPC);
return VPC;
}
}
#ifndef NDEBUG
// Verify that the cached type info is for both A and its users is still
// accurate by comparing it to freshly computed types.
VPTypeAnalysis TypeInfo2(
R.getParent()->getPlan()->getCanonicalIV()->getScalarType());
assert(TypeInfo.inferScalarType(A) == TypeInfo2.inferScalarType(A));
for (VPUser *U : A->users()) {
auto *R = cast<VPRecipeBase>(U);
for (VPValue *VPV : R->definedValues())
assert(TypeInfo.inferScalarType(VPV) == TypeInfo2.inferScalarType(VPV));
}
#endif
}

// Simplify (X && Y) || (X && !Y) -> X.
Expand All @@ -988,20 +975,17 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
VPValue *X, *Y;
if (match(&R,
m_c_BinaryOr(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)),
m_LogicalAnd(m_Deferred(X), m_Not(m_Deferred(Y)))))) {
R.getVPSingleValue()->replaceAllUsesWith(X);
R.eraseFromParent();
return;
}
m_LogicalAnd(m_Deferred(X), m_Not(m_Deferred(Y))))))
return X;

if (match(&R, m_Select(m_VPValue(), m_VPValue(X), m_Deferred(X))))
return R.getVPSingleValue()->replaceAllUsesWith(X);
return X;

if (match(&R, m_c_Mul(m_VPValue(A), m_SpecificInt(1))))
return R.getVPSingleValue()->replaceAllUsesWith(A);
return A;

if (match(&R, m_Not(m_Not(m_VPValue(A)))))
return R.getVPSingleValue()->replaceAllUsesWith(A);
return A;

// Remove redundant DerviedIVs, that is 0 + A * 1 -> A and 0 + 0 * x -> 0.
if ((match(&R,
Expand All @@ -1010,16 +994,45 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
m_DerivedIV(m_SpecificInt(0), m_SpecificInt(0), m_VPValue()))) &&
TypeInfo.inferScalarType(R.getOperand(1)) ==
TypeInfo.inferScalarType(R.getVPSingleValue()))
return R.getVPSingleValue()->replaceAllUsesWith(R.getOperand(1));
return R.getOperand(1);

return nullptr;
}

void VPlanTransforms::simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy) {
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
Plan.getEntry());
VPTypeAnalysis TypeInfo(&CanonicalIVTy);
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
simplifyRecipe(R, TypeInfo);
SetVector<VPRecipeBase *> Worklist;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this needs to be a std::deque, because we want to pop_front, and push_back. See also: #93998.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh thanks for sharing that, I didn't know about that previous PR. Why do we need it to be pop_front/push_back? I believe InstCombine uses pop_back/push_back, i.e. immediately processes anything added to the list.

I also just realised that InstCombine pushes the initial instructions onto the worklist in order, so the perceived order is actually top to bottom. I've updated that here 3ca3643

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It doesn't matter yet, but matters when you will put up a patch with functional changes: I believe the order of simplification matters unlike in InstCombine, but I'm happy to address the point in the follow-up patch. Let's keep this thread open in case other reviewers have something to say about this.

for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT))
for (VPRecipeBase &R : reverse(*VPBB))
Worklist.insert(&R);

while (!Worklist.empty()) {
VPRecipeBase *R = Worklist.pop_back_val();
if (VPValue *Result = simplifyRecipe(*R, TypeInfo)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might be worth Turing the check into early continue, to reduce nesting level

R->getVPSingleValue()->replaceAllUsesWith(Result);
TypeInfo.erase(R->getVPSingleValue());
R->eraseFromParent();
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think erasing R might invalidate the VTypeAnalysis cache, I'll take a look to see if this is a problem

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure if we need eraseFromParent, as there is recursivelyRemoveDeadRecipes.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've manually erased the deleted value from the cache in 4e05ab9.

This was an existing issue with the simplification pass but it looks like it didn't trigger in the wild. I think it's probably easiest to fix it in this PR now that we erase all replaced recipes, which should prevent #120252.

I've also moved the VTypeAnalysis assertion up so it should run for all transforms, not just the trunc combine.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@artagnon erasing the old instruction decreases the number of uses which can affect what transforms kick in, e.g. the blend simplifications are sensitive to the number of uses.

Also if the old recipe uses the new result, it will get added back to the worklist which will loop

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I should mention I took the structure from this from InstructionCombining.cpp! There's a few things there that I think we could eventually copy over, e.g. adding any operands of the recipe to the worklist now that they have one less use.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it would be good if we would have a way to verify we aren't stuck in an infinite cycle.

Not 100% sure how instcombine checks for that, but we could add the new users to simplify in a worklist and switch to it when we are done with the current wordlist and limit the times we switch.

if (VPRecipeBase *ResultR = Result->getDefiningRecipe())
Worklist.insert(ResultR);
for (VPUser *U : Result->users())
if (auto *UR = dyn_cast<VPRecipeBase>(U))
Worklist.insert(UR);

#ifndef NDEBUG
// Verify that the cached type info is for both Result and its users is
// still accurate by comparing it to freshly computed types.
VPTypeAnalysis TypeInfo2(&CanonicalIVTy);
assert(TypeInfo.inferScalarType(Result) ==
TypeInfo2.inferScalarType(Result));
for (VPUser *U : Result->users()) {
auto *R = cast<VPRecipeBase>(U);
for (VPValue *VPV : R->definedValues())
assert(TypeInfo.inferScalarType(VPV) ==
TypeInfo2.inferScalarType(VPV));
}
#endif
}
}
}
Expand Down