@@ -10165,6 +10165,12 @@ static void checkMixedPrecision(Loop *L, OptimizationRemarkEmitter *ORE) {
10165
10165
}
10166
10166
}
10167
10167
10168
+ // / For loops with uncountable early exits, find the cost of doing work when
10169
+ // / exiting the loop early, such as calculating the final exit values of
10170
+ // / variables used outside the loop.
10171
+ // / TODO: This is currently overly pessimistic because the loop may not take
10172
+ // / the early exit, but better to keep this conservative for now. In future,
10173
+ // / it might be possible to relax this by using branch probabilities.
10168
10174
static InstructionCost calculateEarlyExitCost (LoopVectorizationCostModel &CM,
10169
10175
VPlan &Plan, ElementCount VF) {
10170
10176
InstructionCost Cost = 0 ;
@@ -10173,37 +10179,44 @@ static InstructionCost calculateEarlyExitCost(LoopVectorizationCostModel &CM,
10173
10179
LLVM_DEBUG (
10174
10180
dbgs () << " Calculating cost of work in vector early exit block:\n " );
10175
10181
for (auto *ExitVPBB : Plan.getExitBlocks ()) {
10176
- for (auto *PredVPBB : ExitVPBB->getPredecessors ())
10182
+ for (auto *PredVPBB : ExitVPBB->getPredecessors ()) {
10183
+ // If the predecessor is not the middle.block, then it must be the
10184
+ // vector.early.exit block, which may contain work to calculate the exit
10185
+ // values of variables used outside the loop.
10177
10186
if (PredVPBB != Plan.getMiddleBlock ())
10178
10187
for (auto &R : *(cast<VPBasicBlock>(PredVPBB)))
10179
10188
Cost += R.cost (VF, CostCtx);
10189
+ }
10180
10190
}
10181
10191
return Cost;
10182
10192
}
10183
10193
10194
+ // / This function determines whether or not it's still profitable to vectorize
10195
+ // / the loop given the extra work we have to do outside of the loop:
10196
+ // / 1. Perform the runtime checks before entering the loop to ensure it's safe
10197
+ // / to vectorize.
10198
+ // / 2. In the case of loops with uncountable early exits, we may have to do
10199
+ // / extra work when exiting the loop early, such as calculating the final
10200
+ // / exit values of variables used outside the loop.
10184
10201
static bool isOutsideLoopWorkProfitable (GeneratedRTChecks &Checks,
10185
- VectorizationFactor &VF, Loop *L,
10186
- const TargetTransformInfo &TTI ,
10202
+ VectorizationFactor &VF,
10203
+ LoopVectorizationCostModel &CM ,
10187
10204
PredicatedScalarEvolution &PSE,
10188
- ScalarEpilogueLowering SEL,
10189
- std::optional<unsigned > VScale,
10190
- InstructionCost EarlyExitCost) {
10191
- InstructionCost CheckCost = Checks.getCost ();
10192
- if (!CheckCost.isValid () && !EarlyExitCost.isValid ())
10205
+ VPlan &Plan,
10206
+ ScalarEpilogueLowering SEL) {
10207
+ InstructionCost TotalCost = Checks.getCost ();
10208
+ if (!TotalCost.isValid ())
10193
10209
return false ;
10194
10210
10195
- InstructionCost TotalCost = 0 ;
10196
- if (CheckCost.isValid ())
10197
- TotalCost += CheckCost;
10198
-
10199
10211
// Add on the cost of work required in the vector early exit block, if one
10200
10212
// exists.
10201
- if (EarlyExitCost. isValid ())
10202
- TotalCost += EarlyExitCost ;
10213
+ if (CM. Legal -> hasUncountableEarlyExit ())
10214
+ TotalCost += calculateEarlyExitCost (CM, Plan, VF. Width ) ;
10203
10215
10204
10216
// When interleaving only scalar and vector cost will be equal, which in turn
10205
10217
// would lead to a divide by 0. Fall back to hard threshold.
10206
10218
if (VF.Width .isScalar ()) {
10219
+ // TODO: Should we rename VectorizeMemoryCheckThreshold?
10207
10220
if (TotalCost > VectorizeMemoryCheckThreshold) {
10208
10221
LLVM_DEBUG (
10209
10222
dbgs ()
@@ -10229,7 +10242,9 @@ static bool isOutsideLoopWorkProfitable(GeneratedRTChecks &Checks,
10229
10242
// The total cost of the vector loop is
10230
10243
// RtC + VecC * (TC / VF) + EpiC
10231
10244
// where
10232
- // * RtC is the cost of the generated runtime checks
10245
+ // * RtC is the cost of the generated runtime checks plus the cost of
10246
+ // performing any additional work in the vector.early.exit block for loops
10247
+ // with uncountable early exits.
10233
10248
// * VecC is the cost of a single vector iteration.
10234
10249
// * TC is the actual trip count of the loop
10235
10250
// * VF is the vectorization factor
@@ -10246,7 +10261,7 @@ static bool isOutsideLoopWorkProfitable(GeneratedRTChecks &Checks,
10246
10261
// For now we assume the epilogue cost EpiC = 0 for simplicity. Note that
10247
10262
// the computations are performed on doubles, not integers and the result
10248
10263
// is rounded up, hence we get an upper estimate of the TC.
10249
- unsigned IntVF = getEstimatedRuntimeVF (VF.Width , VScale );
10264
+ unsigned IntVF = getEstimatedRuntimeVF (VF.Width , CM. getVScaleForTuning () );
10250
10265
uint64_t RtC = *TotalCost.getValue ();
10251
10266
uint64_t Div = ScalarC * IntVF - *VF.Cost .getValue ();
10252
10267
uint64_t MinTC1 = Div == 0 ? 0 : divideCeil (RtC * IntVF, Div);
@@ -10274,7 +10289,7 @@ static bool isOutsideLoopWorkProfitable(GeneratedRTChecks &Checks,
10274
10289
10275
10290
// Skip vectorization if the expected trip count is less than the minimum
10276
10291
// required trip count.
10277
- if (auto ExpectedTC = getSmallBestKnownTC (PSE, L )) {
10292
+ if (auto ExpectedTC = getSmallBestKnownTC (PSE, CM. TheLoop )) {
10278
10293
if (ElementCount::isKnownLT (ElementCount::getFixed (*ExpectedTC),
10279
10294
VF.MinProfitableTripCount )) {
10280
10295
LLVM_DEBUG (dbgs () << " LV: Vectorization is not beneficial: expected "
@@ -10671,17 +10686,12 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10671
10686
if (VF.Width .isVector () || SelectedIC > 1 )
10672
10687
Checks.create (L, *LVL.getLAI (), PSE.getPredicate (), VF.Width , SelectedIC);
10673
10688
10674
- InstructionCost EarlyExitCost = InstructionCost::getInvalid ();
10675
- if (VF.Width .isVector () && LVL.hasUncountableEarlyExit ())
10676
- EarlyExitCost =
10677
- calculateEarlyExitCost (CM, LVP.getPlanFor (VF.Width ), VF.Width );
10678
-
10679
10689
// Check if it is profitable to vectorize with runtime checks.
10680
10690
bool ForceVectorization =
10681
10691
Hints.getForce () == LoopVectorizeHints::FK_Enabled;
10682
10692
if (!ForceVectorization &&
10683
- !isOutsideLoopWorkProfitable (Checks, VF, L, *TTI, PSE, SEL ,
10684
- CM. getVScaleForTuning ( ), EarlyExitCost )) {
10693
+ !isOutsideLoopWorkProfitable (Checks, VF, CM, PSE,
10694
+ LVP. getPlanFor (VF. Width ), SEL )) {
10685
10695
ORE->emit ([&]() {
10686
10696
return OptimizationRemarkAnalysisAliasing (
10687
10697
DEBUG_TYPE, " CantReorderMemOps" , L->getStartLoc (),
0 commit comments