Skip to content

Commit 35741d9

Browse files
committed
[LV] Split checking if tail-folding is possible, collecting masked ops.
Introduce new canFoldTail helper which only checks if tail-folding is possible, but without modifying MaskedOps. Just because tail-folding is possible doesn't mean the tail will be folded; that's up to the cost-model to decide. Separating the check if tail-folding is possible and preparing for tail-folding makes sure that MaskedOps is only populated when tail-folding is actually selected. This allows only creating the header mask if needed after #76635.
1 parent db6de1a commit 35741d9

File tree

3 files changed

+28
-6
lines changed

3 files changed

+28
-6
lines changed

llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -276,9 +276,12 @@ class LoopVectorizationLegality {
276276
bool canVectorizeFPMath(bool EnableStrictReductions);
277277

278278
/// Return true if we can vectorize this loop while folding its tail by
279-
/// masking, and mark all respective loads/stores for masking.
280-
/// This object's state is only modified iff this function returns true.
281-
bool prepareToFoldTailByMasking();
279+
/// masking.
280+
bool canFoldTailByMasking() const;
281+
282+
/// Mark all respective loads/stores for masking. Must only be called when
283+
/// ail-folding is possible.
284+
void prepareToFoldTailByMasking();
282285

283286
/// Returns the primary induction variable.
284287
PHINode *getPrimaryInduction() { return PrimaryInduction; }

llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1533,7 +1533,7 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
15331533
return Result;
15341534
}
15351535

1536-
bool LoopVectorizationLegality::prepareToFoldTailByMasking() {
1536+
bool LoopVectorizationLegality::canFoldTailByMasking() const {
15371537

15381538
LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
15391539

@@ -1591,8 +1591,24 @@ bool LoopVectorizationLegality::prepareToFoldTailByMasking() {
15911591

15921592
LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
15931593

1594-
MaskedOp.insert(TmpMaskedOp.begin(), TmpMaskedOp.end());
15951594
return true;
15961595
}
15971596

1597+
void LoopVectorizationLegality::prepareToFoldTailByMasking() {
1598+
// The list of pointers that we can safely read and write to remains empty.
1599+
SmallPtrSet<Value *, 8> SafePointers;
1600+
1601+
// Collect masked ops in temporary set first to avoid partially populating
1602+
// MaskedOp if a block cannot be predicated.
1603+
SmallPtrSet<const Instruction *, 8> TmpMaskedOp;
1604+
1605+
// Check and mark all blocks for predication, including those that ordinarily
1606+
// do not need predication such as the header block.
1607+
for (BasicBlock *BB : TheLoop->blocks()) {
1608+
bool R = blockCanBePredicated(BB, SafePointers, MaskedOp);
1609+
(void)R;
1610+
assert(R && "Must be able to predicate block when tail-folding.");
1611+
}
1612+
}
1613+
15981614
} // namespace llvm

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1508,7 +1508,7 @@ class LoopVectorizationCostModel {
15081508
/// \param UserIC User specific interleave count.
15091509
void setTailFoldingStyles(bool IsScalableVF, unsigned UserIC) {
15101510
assert(!ChosenTailFoldingStyle && "Tail folding must not be selected yet.");
1511-
if (!Legal->prepareToFoldTailByMasking()) {
1511+
if (!Legal->canFoldTailByMasking()) {
15121512
ChosenTailFoldingStyle =
15131513
std::make_pair(TailFoldingStyle::None, TailFoldingStyle::None);
15141514
return;
@@ -7309,6 +7309,9 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
73097309
CM.invalidateCostModelingDecisions();
73107310
}
73117311

7312+
if (CM.foldTailByMasking())
7313+
Legal->prepareToFoldTailByMasking();
7314+
73127315
ElementCount MaxUserVF =
73137316
UserVF.isScalable() ? MaxFactors.ScalableVF : MaxFactors.FixedVF;
73147317
bool UserVFIsLegal = ElementCount::isKnownLE(UserVF, MaxUserVF);

0 commit comments

Comments
 (0)