-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[LV] Create block in mask up-front if needed. #76635
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7947,7 +7947,7 @@ VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst, | |
if (ECEntryIt != EdgeMaskCache.end()) | ||
return ECEntryIt->second; | ||
|
||
VPValue *SrcMask = createBlockInMask(Src, Plan); | ||
VPValue *SrcMask = getBlockInMask(Src); | ||
|
||
// The terminator has to be a branch inst! | ||
BranchInst *BI = dyn_cast<BranchInst>(Src->getTerminator()); | ||
|
@@ -8009,14 +8009,17 @@ void VPRecipeBuilder::createHeaderMask(VPlan &Plan) { | |
BlockMaskCache[Header] = BlockMask; | ||
} | ||
|
||
VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlan &Plan) { | ||
assert(OrigLoop->contains(BB) && "Block is not a part of a loop"); | ||
|
||
// Look for cached value. | ||
BlockMaskCacheTy::iterator BCEntryIt = BlockMaskCache.find(BB); | ||
if (BCEntryIt != BlockMaskCache.end()) | ||
return BCEntryIt->second; | ||
VPValue *VPRecipeBuilder::getBlockInMask(BasicBlock *BB) const { | ||
// Return the cached value. | ||
BlockMaskCacheTy::const_iterator BCEntryIt = BlockMaskCache.find(BB); | ||
assert(BCEntryIt != BlockMaskCache.end() && | ||
"Trying to access mask for block without one."); | ||
return BCEntryIt->second; | ||
} | ||
|
||
void VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlan &Plan) { | ||
assert(OrigLoop->contains(BB) && "Block is not a part of a loop"); | ||
assert(BlockMaskCache.count(BB) == 0 && "Mask for block already computed"); | ||
assert(OrigLoop->getHeader() != BB && | ||
"Loop header must have cached block mask"); | ||
|
||
|
@@ -8026,8 +8029,9 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlan &Plan) { | |
// This is the block mask. We OR all incoming edges. | ||
for (auto *Predecessor : predecessors(BB)) { | ||
VPValue *EdgeMask = createEdgeMask(Predecessor, BB, Plan); | ||
if (!EdgeMask) // Mask of predecessor is all-one so mask of block is too. | ||
return BlockMaskCache[BB] = EdgeMask; | ||
if (!EdgeMask) { // Mask of predecessor is all-one so mask of block is too. | ||
BlockMaskCache[BB] = EdgeMask; | ||
} | ||
|
||
if (!BlockMask) { // BlockMask has its initialized nullptr value. | ||
BlockMask = EdgeMask; | ||
|
@@ -8037,7 +8041,7 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlan &Plan) { | |
BlockMask = Builder.createOr(BlockMask, EdgeMask, {}); | ||
} | ||
|
||
return BlockMaskCache[BB] = BlockMask; | ||
BlockMaskCache[BB] = BlockMask; | ||
} | ||
|
||
VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(Instruction *I, | ||
|
@@ -8065,7 +8069,7 @@ VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(Instruction *I, | |
|
||
VPValue *Mask = nullptr; | ||
if (Legal->isMaskRequired(I)) | ||
Mask = createBlockInMask(I->getParent(), *Plan); | ||
Mask = getBlockInMask(I->getParent()); | ||
|
||
// Determine if the pointer operand of the access is either consecutive or | ||
// reverse consecutive. | ||
|
@@ -8287,7 +8291,7 @@ VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI, | |
// all-true mask. | ||
VPValue *Mask = nullptr; | ||
if (Legal->isMaskRequired(CI)) | ||
Mask = createBlockInMask(CI->getParent(), *Plan); | ||
Mask = getBlockInMask(CI->getParent()); | ||
else | ||
Mask = Plan->getVPValueOrAddLiveIn(ConstantInt::getTrue( | ||
IntegerType::getInt1Ty(Variant->getFunctionType()->getContext()))); | ||
|
@@ -8330,7 +8334,7 @@ VPRecipeBase *VPRecipeBuilder::tryToWiden(Instruction *I, | |
// div/rem operation itself. Otherwise fall through to general handling below. | ||
if (CM.isPredicatedInst(I)) { | ||
SmallVector<VPValue *> Ops(Operands.begin(), Operands.end()); | ||
VPValue *Mask = createBlockInMask(I->getParent(), *Plan); | ||
VPValue *Mask = getBlockInMask(I->getParent()); | ||
VPValue *One = Plan->getVPValueOrAddLiveIn( | ||
ConstantInt::get(I->getType(), 1u, false)); | ||
auto *SafeRHS = | ||
|
@@ -8424,7 +8428,7 @@ VPRecipeOrVPValueTy VPRecipeBuilder::handleReplication(Instruction *I, | |
// added initially. Masked replicate recipes will later be placed under an | ||
// if-then construct to prevent side-effects. Generate recipes to compute | ||
// the block mask for this region. | ||
BlockInMask = createBlockInMask(I->getParent(), Plan); | ||
BlockInMask = getBlockInMask(I->getParent()); | ||
} | ||
|
||
auto *Recipe = new VPReplicateRecipe(I, Plan.mapToVPValues(I->operands()), | ||
|
@@ -8659,23 +8663,28 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { | |
bool HasNUW = Style == TailFoldingStyle::None; | ||
addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), HasNUW, DL); | ||
|
||
// Proactively create header mask. Masks for other blocks are created on | ||
// demand. | ||
RecipeBuilder.createHeaderMask(*Plan); | ||
|
||
// Scan the body of the loop in a topological order to visit each basic block | ||
// after having visited its predecessor basic blocks. | ||
LoopBlocksDFS DFS(OrigLoop); | ||
DFS.perform(LI); | ||
|
||
VPBasicBlock *VPBB = HeaderVPBB; | ||
bool NeedsMasks = CM.foldTailByMasking() || | ||
any_of(OrigLoop->blocks(), [this](BasicBlock *BB) { | ||
return Legal->blockNeedsPredication(BB); | ||
}); | ||
for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO())) { | ||
// Relevant instructions from basic block BB will be grouped into VPRecipe | ||
// ingredients and fill a new VPBasicBlock. | ||
if (VPBB != HeaderVPBB) | ||
VPBB->setName(BB->getName()); | ||
Builder.setInsertPoint(VPBB); | ||
|
||
if (VPBB == HeaderVPBB) | ||
RecipeBuilder.createHeaderMask(*Plan); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: this retains the previous behavior of always creating a mask for HeaderVPBB, proactively. But we can refrain from doing so if !NeedsMasks. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unfortunately So even if tail-folding wasn't picked by the cost-model, loads/stores (and others) will be marked as requiring a mask, which will then lead to looking up a header mask. This could be potentially un-tangled, as in #77612 |
||
else if (NeedsMasks) | ||
RecipeBuilder.createBlockInMask(BB, *Plan); | ||
|
||
// Introduce each ingredient into VPlan. | ||
// TODO: Model and preserve debug intrinsics in VPlan. | ||
for (Instruction &I : drop_end(BB->instructionsWithoutDebug(false))) { | ||
|
@@ -9024,7 +9033,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( | |
if (CM.blockNeedsPredicationForAnyReason(BB)) { | ||
VPBuilder::InsertPointGuard Guard(Builder); | ||
Builder.setInsertPoint(CurrentLink); | ||
CondOp = RecipeBuilder.createBlockInMask(BB, *Plan); | ||
CondOp = RecipeBuilder.getBlockInMask(BB); | ||
} | ||
|
||
VPReductionRecipe *RedRecipe = new VPReductionRecipe( | ||
|
@@ -9052,8 +9061,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( | |
auto *OrigExitingVPV = PhiR->getBackedgeValue(); | ||
auto *NewExitingVPV = PhiR->getBackedgeValue(); | ||
if (!PhiR->isInLoop() && CM.foldTailByMasking()) { | ||
VPValue *Cond = | ||
RecipeBuilder.createBlockInMask(OrigLoop->getHeader(), *Plan); | ||
VPValue *Cond = RecipeBuilder.getBlockInMask(OrigLoop->getHeader()); | ||
assert(OrigExitingVPV->getDefiningRecipe()->getParent() != LatchVPBB && | ||
"reduction recipe must be defined before latch"); | ||
Type *PhiTy = PhiR->getOperand(0)->getLiveInIRValue()->getType(); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -138,8 +138,11 @@ class VPRecipeBuilder { | |
|
||
/// A helper function that computes the predicate of the block BB, assuming | ||
/// that the header block of the loop is set to True or the loop mask when | ||
/// tail folding. It returns the *entry* mask for the block BB. | ||
VPValue *createBlockInMask(BasicBlock *BB, VPlan &Plan); | ||
/// tail folding. | ||
void createBlockInMask(BasicBlock *BB, VPlan &Plan); | ||
|
||
/// Returns the *entry* mask for the block \p BB. | ||
VPValue *getBlockInMask(BasicBlock *BB) const; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn't these two be |
||
|
||
/// A helper function that computes the predicate of the edge between SRC | ||
/// and DST. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -120,6 +120,7 @@ bool VPRecipeBase::mayHaveSideEffects() const { | |
return false; | ||
case VPInstructionSC: | ||
switch (cast<VPInstruction>(this)->getOpcode()) { | ||
case Instruction::Or: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fix independent of this patch? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep it is technically independent but effectively dead code without this change, hence the direct inclusion here. |
||
case Instruction::ICmp: | ||
case Instruction::Select: | ||
case VPInstruction::Not: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should retain early-exit and
return
here?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep, re-added in 8b7bbed