Skip to content

Commit 8a2ed43

Browse files
committed
Extending VF estimate computation for scalable vectors
1 parent 3ffafb7 commit 8a2ed43

File tree

1 file changed

+15
-9
lines changed

1 file changed

+15
-9
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1252,8 +1252,10 @@ class LoopVectorizationCostModel {
12521252
/// \return The desired interleave count.
12531253
/// If interleave count has been specified by metadata it will be returned.
12541254
/// Otherwise, the interleave count is computed and returned. VF and LoopCost
1255-
/// are the selected vectorization factor and the cost of the selected VF.
1256-
unsigned selectInterleaveCount(ElementCount VF, InstructionCost LoopCost);
1255+
/// are the selected vectorization factor and the cost of the selected VF for
1256+
/// loop L.
1257+
unsigned selectInterleaveCount(Loop *L, ElementCount VF,
1258+
InstructionCost LoopCost);
12571259

12581260
/// Memory access instruction may be vectorized in more than one way.
12591261
/// Form of instruction after vectorization depends on cost.
@@ -5621,7 +5623,7 @@ void LoopVectorizationCostModel::collectElementTypesForWidening() {
56215623
}
56225624

56235625
unsigned
5624-
LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
5626+
LoopVectorizationCostModel::selectInterleaveCount(Loop *L, ElementCount VF,
56255627
InstructionCost LoopCost) {
56265628
// -- The interleave heuristics --
56275629
// We interleave the loop in order to expose ILP and reduce the loop overhead.
@@ -5741,13 +5743,17 @@ LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
57415743
// the InterleaveCount as if vscale is '1', although if some information about
57425744
// the vector is known (e.g. min vector size), we can make a better decision.
57435745
if (BestKnownTC) {
5744-
if (InterleaveSmallLoopScalarReduction ||
5745-
(*BestKnownTC % VF.getKnownMinValue() == 0))
5746+
unsigned EstimatedVF = VF.getKnownMinValue();
5747+
if (VF.isScalable()) {
5748+
if (std::optional<unsigned> VScale = getVScaleForTuning(L, TTI))
5749+
EstimatedVF *= *VScale;
5750+
}
5751+
if (InterleaveSmallLoopScalarReduction || (*BestKnownTC % EstimatedVF == 0))
57465752
MaxInterleaveCount =
5747-
std::min(*BestKnownTC / VF.getKnownMinValue(), MaxInterleaveCount);
5753+
std::min(*BestKnownTC / EstimatedVF, MaxInterleaveCount);
57485754
else
5749-
MaxInterleaveCount = std::min(*BestKnownTC / (VF.getKnownMinValue() * 2),
5750-
MaxInterleaveCount);
5755+
MaxInterleaveCount =
5756+
std::min(*BestKnownTC / (EstimatedVF * 2), MaxInterleaveCount);
57515757
// Make sure MaxInterleaveCount is greater than 0 & a power of 2.
57525758
MaxInterleaveCount = llvm::bit_floor(std::max(1u, MaxInterleaveCount));
57535759
}
@@ -10166,7 +10172,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1016610172
if (MaybeVF) {
1016710173
VF = *MaybeVF;
1016810174
// Select the interleave count.
10169-
IC = CM.selectInterleaveCount(VF.Width, VF.Cost);
10175+
IC = CM.selectInterleaveCount(L, VF.Width, VF.Cost);
1017010176

1017110177
unsigned SelectedIC = std::max(IC, UserIC);
1017210178
// Optimistically generate runtime checks if they are needed. Drop them if

0 commit comments

Comments
 (0)