Skip to content

Commit 507e366

Browse files
committed
[LV] Split checking if tail-folding is possible, collecting masked ops.
Introduce new canFoldTail helper which only checks if tail-folding is possible, but without modifying MaskedOps. Just because tail-folding is possible doesn't mean the tail will be folded; that's up to the cost-model to decide. Separating the check if tail-folding is possible and preparing for tail-folding makes sure that MaskedOps is only populated when tail-folding is actually selected. This allows only creating the header mask if needed after llvm#76635.
1 parent 9aa8c82 commit 507e366

File tree

3 files changed

+34
-10
lines changed

3 files changed

+34
-10
lines changed

llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -276,9 +276,12 @@ class LoopVectorizationLegality {
276276
bool canVectorizeFPMath(bool EnableStrictReductions);
277277

278278
/// Return true if we can vectorize this loop while folding its tail by
279-
/// masking, and mark all respective loads/stores for masking.
280-
/// This object's state is only modified iff this function returns true.
281-
bool prepareToFoldTailByMasking();
279+
/// masking.
280+
bool canFoldTailByMasking() const;
281+
282+
/// Mark all respective loads/stores for masking. Must only be called when
283+
/// ail-folding is possible.
284+
void prepareToFoldTailByMasking();
282285

283286
/// Returns the primary induction variable.
284287
PHINode *getPrimaryInduction() { return PrimaryInduction; }

llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1525,7 +1525,7 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
15251525
return Result;
15261526
}
15271527

1528-
bool LoopVectorizationLegality::prepareToFoldTailByMasking() {
1528+
bool LoopVectorizationLegality::canFoldTailByMasking() const {
15291529

15301530
LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
15311531

@@ -1570,8 +1570,24 @@ bool LoopVectorizationLegality::prepareToFoldTailByMasking() {
15701570

15711571
LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
15721572

1573-
MaskedOp.insert(TmpMaskedOp.begin(), TmpMaskedOp.end());
15741573
return true;
15751574
}
15761575

1576+
void LoopVectorizationLegality::prepareToFoldTailByMasking() {
1577+
// The list of pointers that we can safely read and write to remains empty.
1578+
SmallPtrSet<Value *, 8> SafePointers;
1579+
1580+
// Collect masked ops in temporary set first to avoid partially populating
1581+
// MaskedOp if a block cannot be predicated.
1582+
SmallPtrSet<const Instruction *, 8> TmpMaskedOp;
1583+
1584+
// Check and mark all blocks for predication, including those that ordinarily
1585+
// do not need predication such as the header block.
1586+
for (BasicBlock *BB : TheLoop->blocks()) {
1587+
bool R = blockCanBePredicated(BB, SafePointers, MaskedOp);
1588+
(void)R;
1589+
assert(R && "Must be able to predicate block when tail-folding.");
1590+
}
1591+
}
1592+
15771593
} // namespace llvm

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4689,7 +4689,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
46894689
// found modulo the vectorization factor is not zero, try to fold the tail
46904690
// by masking.
46914691
// FIXME: look for a smaller MaxVF that does divide TC rather than masking.
4692-
if (Legal->prepareToFoldTailByMasking()) {
4692+
if (Legal->canFoldTailByMasking()) {
46934693
CanFoldTailByMasking = true;
46944694
return MaxFactors;
46954695
}
@@ -7307,6 +7307,9 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
73077307
CM.invalidateCostModelingDecisions();
73087308
}
73097309

7310+
if (CM.foldTailByMasking())
7311+
Legal->prepareToFoldTailByMasking();
7312+
73107313
ElementCount MaxUserVF =
73117314
UserVF.isScalable() ? MaxFactors.ScalableVF : MaxFactors.FixedVF;
73127315
bool UserVFIsLegal = ElementCount::isKnownLE(UserVF, MaxUserVF);
@@ -8680,10 +8683,12 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
86808683
VPBB->setName(BB->getName());
86818684
Builder.setInsertPoint(VPBB);
86828685

8683-
if (VPBB == HeaderVPBB)
8684-
RecipeBuilder.createHeaderMask(*Plan);
8685-
else if (NeedsMasks)
8686-
RecipeBuilder.createBlockInMask(BB, *Plan);
8686+
if (NeedsMasks) {
8687+
if (VPBB == HeaderVPBB)
8688+
RecipeBuilder.createHeaderMask(*Plan);
8689+
else
8690+
RecipeBuilder.createBlockInMask(BB, *Plan);
8691+
}
86878692

86888693
// Introduce each ingredient into VPlan.
86898694
// TODO: Model and preserve debug intrinsics in VPlan.

0 commit comments

Comments
 (0)