@@ -1566,14 +1566,14 @@ class LoopVectorizationCostModel {
1566
1566
1567
1567
// / Returns true if we're required to use a scalar epilogue for at least
1568
1568
// / the final iteration of the original loop.
1569
- bool requiresScalarEpilogue () const {
1569
+ bool requiresScalarEpilogue (ElementCount VF ) const {
1570
1570
if (!isScalarEpilogueAllowed ())
1571
1571
return false ;
1572
1572
// If we might exit from anywhere but the latch, must run the exiting
1573
1573
// iteration in scalar form.
1574
1574
if (TheLoop->getExitingBlock () != TheLoop->getLoopLatch ())
1575
1575
return true ;
1576
- return InterleaveInfo.requiresScalarEpilogue ();
1576
+ return VF. isVector () && InterleaveInfo.requiresScalarEpilogue ();
1577
1577
}
1578
1578
1579
1579
// / Returns true if a scalar epilogue is not allowed due to optsize or a
@@ -3181,18 +3181,13 @@ Value *InnerLoopVectorizer::getOrCreateVectorTripCount(Loop *L) {
3181
3181
// unroll factor (number of SIMD instructions).
3182
3182
Value *R = Builder.CreateURem (TC, Step, " n.mod.vf" );
3183
3183
3184
- // There are two cases where we need to ensure (at least) the last iteration
3185
- // runs in the scalar remainder loop. Thus, if the step evenly divides
3186
- // the trip count, we set the remainder to be equal to the step. If the step
3187
- // does not evenly divide the trip count, no adjustment is necessary since
3188
- // there will already be scalar iterations. Note that the minimum iterations
3189
- // check ensures that N >= Step. The cases are:
3190
- // 1) If there is a non-reversed interleaved group that may speculatively
3191
- // access memory out-of-bounds.
3192
- // 2) If any instruction may follow a conditionally taken exit. That is, if
3193
- // the loop contains multiple exiting blocks, or a single exiting block
3194
- // which is not the latch.
3195
- if (VF.isVector () && Cost->requiresScalarEpilogue ()) {
3184
+ // There are cases where we *must* run at least one iteration in the remainder
3185
+ // loop. See the cost model for when this can happen. If the step evenly
3186
+ // divides the trip count, we set the remainder to be equal to the step. If
3187
+ // the step does not evenly divide the trip count, no adjustment is necessary
3188
+ // since there will already be scalar iterations. Note that the minimum
3189
+ // iterations check ensures that N >= Step.
3190
+ if (Cost->requiresScalarEpilogue (VF)) {
3196
3191
auto *IsZero = Builder.CreateICmpEQ (R, ConstantInt::get (R->getType (), 0 ));
3197
3192
R = Builder.CreateSelect (IsZero, Step, R);
3198
3193
}
@@ -3246,8 +3241,8 @@ void InnerLoopVectorizer::emitMinimumIterationCountCheck(Loop *L,
3246
3241
// vector trip count is zero. This check also covers the case where adding one
3247
3242
// to the backedge-taken count overflowed leading to an incorrect trip count
3248
3243
// of zero. In this case we will also jump to the scalar loop.
3249
- auto P = Cost->requiresScalarEpilogue () ? ICmpInst::ICMP_ULE
3250
- : ICmpInst::ICMP_ULT;
3244
+ auto P = Cost->requiresScalarEpilogue (VF ) ? ICmpInst::ICMP_ULE
3245
+ : ICmpInst::ICMP_ULT;
3251
3246
3252
3247
// If tail is to be folded, vector loop takes care of all iterations.
3253
3248
Value *CheckMinIters = Builder.getFalse ();
@@ -8323,8 +8318,8 @@ BasicBlock *EpilogueVectorizerMainLoop::emitMinimumIterationCountCheck(
8323
8318
8324
8319
// Generate code to check if the loop's trip count is less than VF * UF of the
8325
8320
// main vector loop.
8326
- auto P =
8327
- Cost-> requiresScalarEpilogue () ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
8321
+ auto P = Cost-> requiresScalarEpilogue (ForEpilogue ? EPI. EpilogueVF : VF) ?
8322
+ ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
8328
8323
8329
8324
Value *CheckMinIters = Builder.CreateICmp (
8330
8325
P, Count, ConstantInt::get (Count->getType (), VFactor * UFactor),
@@ -8467,8 +8462,8 @@ EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck(
8467
8462
8468
8463
// Generate code to check if the loop's trip count is less than VF * UF of the
8469
8464
// vector epilogue loop.
8470
- auto P =
8471
- Cost-> requiresScalarEpilogue () ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
8465
+ auto P = Cost-> requiresScalarEpilogue (EPI. EpilogueVF ) ?
8466
+ ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
8472
8467
8473
8468
Value *CheckMinIters = Builder.CreateICmp (
8474
8469
P, Count,
0 commit comments