Skip to content

Commit c49546b

Browse files
committed
[LV] Improve code in selectInterleaveCount (NFC)
1 parent c1923cf commit c49546b

File tree

1 file changed

+30
-29
lines changed

1 file changed

+30
-29
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 30 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -4930,7 +4930,6 @@ LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
49304930
if (Legal->hasUncountableEarlyExit())
49314931
return 1;
49324932

4933-
auto BestKnownTC = getSmallBestKnownTC(PSE, TheLoop);
49344933
const bool HasReductions = !Legal->getReductionVars().empty();
49354934

49364935
// If we did not calculate the cost for VF (because the user selected the VF)
@@ -5006,25 +5005,33 @@ LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
50065005
}
50075006

50085007
unsigned EstimatedVF = getEstimatedRuntimeVF(VF, VScaleForTuning);
5009-
unsigned KnownTC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
5010-
if (KnownTC > 0) {
5011-
// At least one iteration must be scalar when this constraint holds. So the
5012-
// maximum available iterations for interleaving is one less.
5013-
unsigned AvailableTC =
5014-
requiresScalarEpilogue(VF.isVector()) ? KnownTC - 1 : KnownTC;
5015-
5016-
// If trip count is known we select between two prospective ICs, where
5008+
5009+
// Try to get the exact trip count, or an estimate based on profiling data or
5010+
// ConstantMax from PSE, failing that.
5011+
if (auto BestKnownTC = getSmallBestKnownTC(PSE, TheLoop)) {
5012+
// At least one iteration must be scalar when this constraint holds. So the
5013+
// maximum available iterations for interleaving is one less.
5014+
unsigned AvailableTC = requiresScalarEpilogue(VF.isVector())
5015+
? (*BestKnownTC) - 1
5016+
: *BestKnownTC;
5017+
5018+
unsigned InterleaveCountLB = bit_floor(std::max(
5019+
1u, std::min(AvailableTC / (EstimatedVF * 2), MaxInterleaveCount)));
5020+
5021+
if (PSE.getSE()->getSmallConstantTripCount(TheLoop) > 0) {
5022+
// If the estimated trip count is actually an exact one we select between
5023+
// two prospective ICs, where
5024+
//
50175025
// 1) the aggressive IC is capped by the trip count divided by VF
50185026
// 2) the conservative IC is capped by the trip count divided by (VF * 2)
5027+
//
50195028
// The final IC is selected in a way that the epilogue loop trip count is
50205029
// minimized while maximizing the IC itself, so that we either run the
5021-
// vector loop at least once if it generates a small epilogue loop, or else
5022-
// we run the vector loop at least twice.
5030+
// vector loop at least once if it generates a small epilogue loop, or
5031+
// else we run the vector loop at least twice.
50235032

5024-
unsigned InterleaveCountUB = bit_floor(
5025-
std::max(1u, std::min(AvailableTC / EstimatedVF, MaxInterleaveCount)));
5026-
unsigned InterleaveCountLB = bit_floor(std::max(
5027-
1u, std::min(AvailableTC / (EstimatedVF * 2), MaxInterleaveCount)));
5033+
unsigned InterleaveCountUB = bit_floor(std::max(
5034+
1u, std::min(AvailableTC / EstimatedVF, MaxInterleaveCount)));
50285035
MaxInterleaveCount = InterleaveCountLB;
50295036

50305037
if (InterleaveCountUB != InterleaveCountLB) {
@@ -5037,20 +5044,14 @@ LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
50375044
if (TailTripCountUB == TailTripCountLB)
50385045
MaxInterleaveCount = InterleaveCountUB;
50395046
}
5040-
} else if (BestKnownTC) {
5041-
// At least one iteration must be scalar when this constraint holds. So the
5042-
// maximum available iterations for interleaving is one less.
5043-
unsigned AvailableTC = requiresScalarEpilogue(VF.isVector())
5044-
? (*BestKnownTC) - 1
5045-
: *BestKnownTC;
5046-
5047-
// If trip count is an estimated compile time constant, limit the
5048-
// IC to be capped by the trip count divided by VF * 2, such that the vector
5049-
// loop runs at least twice to make interleaving seem profitable when there
5050-
// is an epilogue loop present. Since exact Trip count is not known we
5051-
// choose to be conservative in our IC estimate.
5052-
MaxInterleaveCount = bit_floor(std::max(
5053-
1u, std::min(AvailableTC / (EstimatedVF * 2), MaxInterleaveCount)));
5047+
} else {
5048+
// If trip count is an estimated compile time constant, limit the
5049+
// IC to be capped by the trip count divided by VF * 2, such that the
5050+
// vector loop runs at least twice to make interleaving seem profitable
5051+
// when there is an epilogue loop present. Since exact Trip count is not
5052+
// known we choose to be conservative in our IC estimate.
5053+
MaxInterleaveCount = InterleaveCountLB;
5054+
}
50545055
}
50555056

50565057
assert(MaxInterleaveCount > 0 &&

0 commit comments

Comments
 (0)