@@ -10175,6 +10175,12 @@ static void checkMixedPrecision(Loop *L, OptimizationRemarkEmitter *ORE) {
10175
10175
}
10176
10176
}
10177
10177
10178
+ // / For loops with uncountable early exits, find the cost of doing work when
10179
+ // / exiting the loop early, such as calculating the final exit values of
10180
+ // / variables used outside the loop.
10181
+ // / TODO: This is currently overly pessimistic because the loop may not take
10182
+ // / the early exit, but better to keep this conservative for now. In future,
10183
+ // / it might be possible to relax this by using branch probabilities.
10178
10184
static InstructionCost calculateEarlyExitCost (LoopVectorizationCostModel &CM,
10179
10185
VPlan &Plan, ElementCount VF) {
10180
10186
InstructionCost Cost = 0 ;
@@ -10183,37 +10189,44 @@ static InstructionCost calculateEarlyExitCost(LoopVectorizationCostModel &CM,
10183
10189
LLVM_DEBUG (
10184
10190
dbgs () << " Calculating cost of work in vector early exit block:\n " );
10185
10191
for (auto *ExitVPBB : Plan.getExitBlocks ()) {
10186
- for (auto *PredVPBB : ExitVPBB->getPredecessors ())
10192
+ for (auto *PredVPBB : ExitVPBB->getPredecessors ()) {
10193
+ // If the predecessor is not the middle.block, then it must be the
10194
+ // vector.early.exit block, which may contain work to calculate the exit
10195
+ // values of variables used outside the loop.
10187
10196
if (PredVPBB != Plan.getMiddleBlock ())
10188
10197
for (auto &R : *(cast<VPBasicBlock>(PredVPBB)))
10189
10198
Cost += R.cost (VF, CostCtx);
10199
+ }
10190
10200
}
10191
10201
return Cost;
10192
10202
}
10193
10203
10204
+ // / This function determines whether or not it's still profitable to vectorize
10205
+ // / the loop given the extra work we have to do outside of the loop:
10206
+ // / 1. Perform the runtime checks before entering the loop to ensure it's safe
10207
+ // / to vectorize.
10208
+ // / 2. In the case of loops with uncountable early exits, we may have to do
10209
+ // / extra work when exiting the loop early, such as calculating the final
10210
+ // / exit values of variables used outside the loop.
10194
10211
static bool isOutsideLoopWorkProfitable (GeneratedRTChecks &Checks,
10195
- VectorizationFactor &VF, Loop *L,
10196
- const TargetTransformInfo &TTI ,
10212
+ VectorizationFactor &VF,
10213
+ LoopVectorizationCostModel &CM ,
10197
10214
PredicatedScalarEvolution &PSE,
10198
- ScalarEpilogueLowering SEL,
10199
- std::optional<unsigned > VScale,
10200
- InstructionCost EarlyExitCost) {
10201
- InstructionCost CheckCost = Checks.getCost ();
10202
- if (!CheckCost.isValid () && !EarlyExitCost.isValid ())
10215
+ VPlan &Plan,
10216
+ ScalarEpilogueLowering SEL) {
10217
+ InstructionCost TotalCost = Checks.getCost ();
10218
+ if (!TotalCost.isValid ())
10203
10219
return false ;
10204
10220
10205
- InstructionCost TotalCost = 0 ;
10206
- if (CheckCost.isValid ())
10207
- TotalCost += CheckCost;
10208
-
10209
10221
// Add on the cost of work required in the vector early exit block, if one
10210
10222
// exists.
10211
- if (EarlyExitCost. isValid ())
10212
- TotalCost += EarlyExitCost ;
10223
+ if (CM. Legal -> hasUncountableEarlyExit ())
10224
+ TotalCost += calculateEarlyExitCost (CM, Plan, VF. Width ) ;
10213
10225
10214
10226
// When interleaving only scalar and vector cost will be equal, which in turn
10215
10227
// would lead to a divide by 0. Fall back to hard threshold.
10216
10228
if (VF.Width .isScalar ()) {
10229
+ // TODO: Should we rename VectorizeMemoryCheckThreshold?
10217
10230
if (TotalCost > VectorizeMemoryCheckThreshold) {
10218
10231
LLVM_DEBUG (
10219
10232
dbgs ()
@@ -10240,7 +10253,9 @@ static bool isOutsideLoopWorkProfitable(GeneratedRTChecks &Checks,
10240
10253
// The total cost of the vector loop is
10241
10254
// RtC + VecC * (TC / VF) + EpiC
10242
10255
// where
10243
- // * RtC is the cost of the generated runtime checks
10256
+ // * RtC is the cost of the generated runtime checks plus the cost of
10257
+ // performing any additional work in the vector.early.exit block for loops
10258
+ // with uncountable early exits.
10244
10259
// * VecC is the cost of a single vector iteration.
10245
10260
// * TC is the actual trip count of the loop
10246
10261
// * VF is the vectorization factor
@@ -10257,7 +10272,7 @@ static bool isOutsideLoopWorkProfitable(GeneratedRTChecks &Checks,
10257
10272
// For now we assume the epilogue cost EpiC = 0 for simplicity. Note that
10258
10273
// the computations are performed on doubles, not integers and the result
10259
10274
// is rounded up, hence we get an upper estimate of the TC.
10260
- unsigned IntVF = getEstimatedRuntimeVF (VF.Width , VScale );
10275
+ unsigned IntVF = getEstimatedRuntimeVF (VF.Width , CM. getVScaleForTuning () );
10261
10276
uint64_t RtC = *TotalCost.getValue ();
10262
10277
uint64_t Div = ScalarC * IntVF - *VF.Cost .getValue ();
10263
10278
uint64_t MinTC1 = Div == 0 ? 0 : divideCeil (RtC * IntVF, Div);
@@ -10285,7 +10300,7 @@ static bool isOutsideLoopWorkProfitable(GeneratedRTChecks &Checks,
10285
10300
10286
10301
// Skip vectorization if the expected trip count is less than the minimum
10287
10302
// required trip count.
10288
- if (auto ExpectedTC = getSmallBestKnownTC (PSE, L )) {
10303
+ if (auto ExpectedTC = getSmallBestKnownTC (PSE, CM. TheLoop )) {
10289
10304
if (ElementCount::isKnownLT (ElementCount::getFixed (*ExpectedTC),
10290
10305
VF.MinProfitableTripCount )) {
10291
10306
LLVM_DEBUG (dbgs () << " LV: Vectorization is not beneficial: expected "
@@ -10682,17 +10697,12 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10682
10697
if (VF.Width .isVector () || SelectedIC > 1 )
10683
10698
Checks.create (L, *LVL.getLAI (), PSE.getPredicate (), VF.Width , SelectedIC);
10684
10699
10685
- InstructionCost EarlyExitCost = InstructionCost::getInvalid ();
10686
- if (VF.Width .isVector () && LVL.hasUncountableEarlyExit ())
10687
- EarlyExitCost =
10688
- calculateEarlyExitCost (CM, LVP.getPlanFor (VF.Width ), VF.Width );
10689
-
10690
10700
// Check if it is profitable to vectorize with runtime checks.
10691
10701
bool ForceVectorization =
10692
10702
Hints.getForce () == LoopVectorizeHints::FK_Enabled;
10693
10703
if (!ForceVectorization &&
10694
- !isOutsideLoopWorkProfitable (Checks, VF, L, *TTI, PSE, SEL ,
10695
- CM. getVScaleForTuning ( ), EarlyExitCost )) {
10704
+ !isOutsideLoopWorkProfitable (Checks, VF, CM, PSE,
10705
+ LVP. getPlanFor (VF. Width ), SEL )) {
10696
10706
ORE->emit ([&]() {
10697
10707
return OptimizationRemarkAnalysisAliasing (
10698
10708
DEBUG_TYPE, " CantReorderMemOps" , L->getStartLoc (),
0 commit comments