Skip to content

Commit 2aaf792

Browse files
committed
[LV] Split checking if tail-folding is possible, collecting masked ops.
Introduce new canFoldTail helper which only checks if tail-folding is possible, but without modifying MaskedOps. Just because tail-folding is possible doesn't mean the tail will be folded; that's up to the cost-model to decide. Separating the check if tail-folding is possible and preparing for tail-folding makes sure that MaskedOps is only populated when tail-folding is actually selected. This allows only creating the header mask if needed after #76635.
1 parent 11f7c89 commit 2aaf792

File tree

3 files changed

+28
-6
lines changed

3 files changed

+28
-6
lines changed

llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -276,9 +276,12 @@ class LoopVectorizationLegality {
276276
bool canVectorizeFPMath(bool EnableStrictReductions);
277277

278278
/// Return true if we can vectorize this loop while folding its tail by
279-
/// masking, and mark all respective loads/stores for masking.
280-
/// This object's state is only modified iff this function returns true.
281-
bool prepareToFoldTailByMasking();
279+
/// masking.
280+
bool canFoldTailByMasking() const;
281+
282+
/// Mark all respective loads/stores for masking. Must only be called when
283+
/// ail-folding is possible.
284+
void prepareToFoldTailByMasking();
282285

283286
/// Returns the primary induction variable.
284287
PHINode *getPrimaryInduction() { return PrimaryInduction; }

llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1543,7 +1543,7 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
15431543
return Result;
15441544
}
15451545

1546-
bool LoopVectorizationLegality::prepareToFoldTailByMasking() {
1546+
bool LoopVectorizationLegality::canFoldTailByMasking() const {
15471547

15481548
LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
15491549

@@ -1601,8 +1601,24 @@ bool LoopVectorizationLegality::prepareToFoldTailByMasking() {
16011601

16021602
LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
16031603

1604-
MaskedOp.insert(TmpMaskedOp.begin(), TmpMaskedOp.end());
16051604
return true;
16061605
}
16071606

1607+
void LoopVectorizationLegality::prepareToFoldTailByMasking() {
1608+
// The list of pointers that we can safely read and write to remains empty.
1609+
SmallPtrSet<Value *, 8> SafePointers;
1610+
1611+
// Collect masked ops in temporary set first to avoid partially populating
1612+
// MaskedOp if a block cannot be predicated.
1613+
SmallPtrSet<const Instruction *, 8> TmpMaskedOp;
1614+
1615+
// Check and mark all blocks for predication, including those that ordinarily
1616+
// do not need predication such as the header block.
1617+
for (BasicBlock *BB : TheLoop->blocks()) {
1618+
bool R = blockCanBePredicated(BB, SafePointers, MaskedOp);
1619+
(void)R;
1620+
assert(R && "Must be able to predicate block when tail-folding.");
1621+
}
1622+
}
1623+
16081624
} // namespace llvm

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1502,7 +1502,7 @@ class LoopVectorizationCostModel {
15021502
/// \param UserIC User specific interleave count.
15031503
void setTailFoldingStyles(bool IsScalableVF, unsigned UserIC) {
15041504
assert(!ChosenTailFoldingStyle && "Tail folding must not be selected yet.");
1505-
if (!Legal->prepareToFoldTailByMasking()) {
1505+
if (!Legal->canFoldTailByMasking()) {
15061506
ChosenTailFoldingStyle =
15071507
std::make_pair(TailFoldingStyle::None, TailFoldingStyle::None);
15081508
return;
@@ -7226,6 +7226,9 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
72267226
CM.invalidateCostModelingDecisions();
72277227
}
72287228

7229+
if (CM.foldTailByMasking())
7230+
Legal->prepareToFoldTailByMasking();
7231+
72297232
ElementCount MaxUserVF =
72307233
UserVF.isScalable() ? MaxFactors.ScalableVF : MaxFactors.FixedVF;
72317234
bool UserVFIsLegal = ElementCount::isKnownLE(UserVF, MaxUserVF);

0 commit comments

Comments
 (0)