Skip to content

Commit e5263e3

Browse files
authored
[LV][NFC] Clean up tail-folding check for early-exit loops (llvm#133931)
This patch moves the check for a single latch exit from computeMaxVF() to LoopVectorizationLegality::canFoldTailByMasking(), as it duplicates the logic when foldTailByMasking() returns false. It also updates the NoScalarEpilogueNeeded logic to return false for loops that are neither single-latch-exit nor early-exit. This avoids applying tail-folding in unsupported cases and prevents triggering assertions during analysis.
1 parent a04580f commit e5263e3

File tree

2 files changed

+20
-21
lines changed

2 files changed

+20
-21
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1872,6 +1872,16 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
18721872
}
18731873

18741874
bool LoopVectorizationLegality::canFoldTailByMasking() const {
1875+
// The only loops we can vectorize without a scalar epilogue, are loops with
1876+
// a bottom-test and a single exiting block. We'd have to handle the fact
1877+
// that not every instruction executes on the last iteration. This will
1878+
// require a lane mask which varies through the vector loop body. (TODO)
1879+
if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
1880+
LLVM_DEBUG(
1881+
dbgs()
1882+
<< "LV: Cannot fold tail by masking. Requires a singe latch exit\n");
1883+
return false;
1884+
}
18751885

18761886
LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
18771887

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 10 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3968,22 +3968,6 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
39683968
break;
39693969
}
39703970

3971-
// The only loops we can vectorize without a scalar epilogue, are loops with
3972-
// a bottom-test and a single exiting block. We'd have to handle the fact
3973-
// that not every instruction executes on the last iteration. This will
3974-
// require a lane mask which varies through the vector loop body. (TODO)
3975-
if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
3976-
// If there was a tail-folding hint/switch, but we can't fold the tail by
3977-
// masking, fallback to a vectorization with a scalar epilogue.
3978-
if (ScalarEpilogueStatus == CM_ScalarEpilogueNotNeededUsePredicate) {
3979-
LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking: vectorize with a "
3980-
"scalar epilogue instead.\n");
3981-
ScalarEpilogueStatus = CM_ScalarEpilogueAllowed;
3982-
return computeFeasibleMaxVF(MaxTC, UserVF, false);
3983-
}
3984-
return FixedScalableVFPair::getNone();
3985-
}
3986-
39873971
// Now try the tail folding
39883972

39893973
// Invalidate interleave groups that require an epilogue if we can't mask
@@ -4013,14 +3997,19 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
40133997
}
40143998

40153999
auto NoScalarEpilogueNeeded = [this, &UserIC](unsigned MaxVF) {
4000+
// Return false if the loop is neither a single-latch-exit loop nor an
4001+
// early-exit loop as tail-folding is not supported in that case.
4002+
if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch() &&
4003+
!Legal->hasUncountableEarlyExit())
4004+
return false;
40164005
unsigned MaxVFtimesIC = UserIC ? MaxVF * UserIC : MaxVF;
40174006
ScalarEvolution *SE = PSE.getSE();
4018-
// Currently only loops with countable exits are vectorized, but calling
4019-
// getSymbolicMaxBackedgeTakenCount allows enablement work for loops with
4020-
// uncountable exits whilst also ensuring the symbolic maximum and known
4021-
// back-edge taken count remain identical for loops with countable exits.
4007+
// Calling getSymbolicMaxBackedgeTakenCount enables support for loops
4008+
// with uncountable exits. For countable loops, the symbolic maximum must
4009+
// remain identical to the known back-edge taken count.
40224010
const SCEV *BackedgeTakenCount = PSE.getSymbolicMaxBackedgeTakenCount();
4023-
assert(BackedgeTakenCount == PSE.getBackedgeTakenCount() &&
4011+
assert((Legal->hasUncountableEarlyExit() ||
4012+
BackedgeTakenCount == PSE.getBackedgeTakenCount()) &&
40244013
"Invalid loop count");
40254014
const SCEV *ExitCount = SE->getAddExpr(
40264015
BackedgeTakenCount, SE->getOne(BackedgeTakenCount->getType()));

0 commit comments

Comments
 (0)