@@ -1252,8 +1252,10 @@ class LoopVectorizationCostModel {
1252
1252
// / \return The desired interleave count.
1253
1253
// / If interleave count has been specified by metadata it will be returned.
1254
1254
// / Otherwise, the interleave count is computed and returned. VF and LoopCost
1255
- // / are the selected vectorization factor and the cost of the selected VF.
1256
- unsigned selectInterleaveCount (ElementCount VF, InstructionCost LoopCost);
1255
+ // / are the selected vectorization factor and the cost of the selected VF for
1256
+ // / loop L.
1257
+ unsigned selectInterleaveCount (Loop *L, ElementCount VF,
1258
+ InstructionCost LoopCost);
1257
1259
1258
1260
// / Memory access instruction may be vectorized in more than one way.
1259
1261
// / Form of instruction after vectorization depends on cost.
@@ -5621,7 +5623,7 @@ void LoopVectorizationCostModel::collectElementTypesForWidening() {
5621
5623
}
5622
5624
5623
5625
unsigned
5624
- LoopVectorizationCostModel::selectInterleaveCount (ElementCount VF,
5626
+ LoopVectorizationCostModel::selectInterleaveCount (Loop *L, ElementCount VF,
5625
5627
InstructionCost LoopCost) {
5626
5628
// -- The interleave heuristics --
5627
5629
// We interleave the loop in order to expose ILP and reduce the loop overhead.
@@ -5741,13 +5743,17 @@ LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
5741
5743
// the InterleaveCount as if vscale is '1', although if some information about
5742
5744
// the vector is known (e.g. min vector size), we can make a better decision.
5743
5745
if (BestKnownTC) {
5744
- if (InterleaveSmallLoopScalarReduction ||
5745
- (*BestKnownTC % VF.getKnownMinValue () == 0 ))
5746
+ unsigned EstimatedVF = VF.getKnownMinValue ();
5747
+ if (VF.isScalable ()) {
5748
+ if (std::optional<unsigned > VScale = getVScaleForTuning (L, TTI))
5749
+ EstimatedVF *= *VScale;
5750
+ }
5751
+ if (InterleaveSmallLoopScalarReduction || (*BestKnownTC % EstimatedVF == 0 ))
5746
5752
MaxInterleaveCount =
5747
- std::min (*BestKnownTC / VF. getKnownMinValue () , MaxInterleaveCount);
5753
+ std::min (*BestKnownTC / EstimatedVF , MaxInterleaveCount);
5748
5754
else
5749
- MaxInterleaveCount = std::min (*BestKnownTC / (VF. getKnownMinValue () * 2 ),
5750
- MaxInterleaveCount);
5755
+ MaxInterleaveCount =
5756
+ std::min (*BestKnownTC / (EstimatedVF * 2 ), MaxInterleaveCount);
5751
5757
// Make sure MaxInterleaveCount is greater than 0 & a power of 2.
5752
5758
MaxInterleaveCount = llvm::bit_floor (std::max (1u , MaxInterleaveCount));
5753
5759
}
@@ -10166,7 +10172,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10166
10172
if (MaybeVF) {
10167
10173
VF = *MaybeVF;
10168
10174
// Select the interleave count.
10169
- IC = CM.selectInterleaveCount (VF.Width , VF.Cost );
10175
+ IC = CM.selectInterleaveCount (L, VF.Width , VF.Cost );
10170
10176
10171
10177
unsigned SelectedIC = std::max (IC, UserIC);
10172
10178
// Optimistically generate runtime checks if they are needed. Drop them if
0 commit comments