Skip to content

Commit e49d65f

Browse files
committed
[LV] Fix bug when unrolling (only) a loop with non-latch exit
If we unroll a loop in the vectorizer (without vectorizing), and the cost model requires a epilogue be generated for correctness, the code generation must actually do so. The included test case on an unmodified opt will access memory one past the expected bound. As a result, this patch is fixing a latent miscompile. Differential Revision: https://reviews.llvm.org/D103700
1 parent c33ebad commit e49d65f

File tree

2 files changed

+23
-28
lines changed

2 files changed

+23
-28
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 15 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1566,14 +1566,14 @@ class LoopVectorizationCostModel {
15661566

15671567
/// Returns true if we're required to use a scalar epilogue for at least
15681568
/// the final iteration of the original loop.
1569-
bool requiresScalarEpilogue() const {
1569+
bool requiresScalarEpilogue(ElementCount VF) const {
15701570
if (!isScalarEpilogueAllowed())
15711571
return false;
15721572
// If we might exit from anywhere but the latch, must run the exiting
15731573
// iteration in scalar form.
15741574
if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch())
15751575
return true;
1576-
return InterleaveInfo.requiresScalarEpilogue();
1576+
return VF.isVector() && InterleaveInfo.requiresScalarEpilogue();
15771577
}
15781578

15791579
/// Returns true if a scalar epilogue is not allowed due to optsize or a
@@ -3181,18 +3181,13 @@ Value *InnerLoopVectorizer::getOrCreateVectorTripCount(Loop *L) {
31813181
// unroll factor (number of SIMD instructions).
31823182
Value *R = Builder.CreateURem(TC, Step, "n.mod.vf");
31833183

3184-
// There are two cases where we need to ensure (at least) the last iteration
3185-
// runs in the scalar remainder loop. Thus, if the step evenly divides
3186-
// the trip count, we set the remainder to be equal to the step. If the step
3187-
// does not evenly divide the trip count, no adjustment is necessary since
3188-
// there will already be scalar iterations. Note that the minimum iterations
3189-
// check ensures that N >= Step. The cases are:
3190-
// 1) If there is a non-reversed interleaved group that may speculatively
3191-
// access memory out-of-bounds.
3192-
// 2) If any instruction may follow a conditionally taken exit. That is, if
3193-
// the loop contains multiple exiting blocks, or a single exiting block
3194-
// which is not the latch.
3195-
if (VF.isVector() && Cost->requiresScalarEpilogue()) {
3184+
// There are cases where we *must* run at least one iteration in the remainder
3185+
// loop. See the cost model for when this can happen. If the step evenly
3186+
// divides the trip count, we set the remainder to be equal to the step. If
3187+
// the step does not evenly divide the trip count, no adjustment is necessary
3188+
// since there will already be scalar iterations. Note that the minimum
3189+
// iterations check ensures that N >= Step.
3190+
if (Cost->requiresScalarEpilogue(VF)) {
31963191
auto *IsZero = Builder.CreateICmpEQ(R, ConstantInt::get(R->getType(), 0));
31973192
R = Builder.CreateSelect(IsZero, Step, R);
31983193
}
@@ -3246,8 +3241,8 @@ void InnerLoopVectorizer::emitMinimumIterationCountCheck(Loop *L,
32463241
// vector trip count is zero. This check also covers the case where adding one
32473242
// to the backedge-taken count overflowed leading to an incorrect trip count
32483243
// of zero. In this case we will also jump to the scalar loop.
3249-
auto P = Cost->requiresScalarEpilogue() ? ICmpInst::ICMP_ULE
3250-
: ICmpInst::ICMP_ULT;
3244+
auto P = Cost->requiresScalarEpilogue(VF) ? ICmpInst::ICMP_ULE
3245+
: ICmpInst::ICMP_ULT;
32513246

32523247
// If tail is to be folded, vector loop takes care of all iterations.
32533248
Value *CheckMinIters = Builder.getFalse();
@@ -8323,8 +8318,8 @@ BasicBlock *EpilogueVectorizerMainLoop::emitMinimumIterationCountCheck(
83238318

83248319
// Generate code to check if the loop's trip count is less than VF * UF of the
83258320
// main vector loop.
8326-
auto P =
8327-
Cost->requiresScalarEpilogue() ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
8321+
auto P = Cost->requiresScalarEpilogue(ForEpilogue ? EPI.EpilogueVF : VF) ?
8322+
ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
83288323

83298324
Value *CheckMinIters = Builder.CreateICmp(
83308325
P, Count, ConstantInt::get(Count->getType(), VFactor * UFactor),
@@ -8467,8 +8462,8 @@ EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck(
84678462

84688463
// Generate code to check if the loop's trip count is less than VF * UF of the
84698464
// vector epilogue loop.
8470-
auto P =
8471-
Cost->requiresScalarEpilogue() ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
8465+
auto P = Cost->requiresScalarEpilogue(EPI.EpilogueVF) ?
8466+
ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
84728467

84738468
Value *CheckMinIters = Builder.CreateICmp(
84748469
P, Count,

llvm/test/Transforms/LoopVectorize/unroll_nonlatch.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@
22
; RUN: opt %s -S -loop-vectorize -force-vector-interleave=2 | FileCheck %s
33

44
; Demonstrate a case where we unroll a loop, but don't vectorize it.
5-
; This currently reveals a miscompile. The original loop runs stores in
6-
; the latch block on iterations 0 to 1022, and exits when %indvars.iv = 1023.
7-
; Currently, the unrolled loop produced by the vectorizer runs the iteration
8-
; where %indvar.iv = 1023 in the vector.body loop before exiting. This results
9-
; in an out of bounds access..
5+
; The original loop runs stores in the latch block on iterations 0 to 1022,
6+
; and exits when %indvars.iv = 1023. (That is, it actually runs the stores
7+
; for an odd number of iterations.) If we unroll by two in the "vector.body"
8+
; loop, we must exit to the epilogue on iteration with %indvars.iv = 1022 to
9+
; avoid an out of bounds access.
1010

1111
define void @test(double* %data) {
1212
; CHECK-LABEL: @test(
@@ -31,13 +31,13 @@ define void @test(double* %data) {
3131
; CHECK-NEXT: store double [[TMP8]], double* [[TMP4]], align 8
3232
; CHECK-NEXT: store double [[TMP9]], double* [[TMP5]], align 8
3333
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
34-
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
34+
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1022
3535
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
3636
; CHECK: middle.block:
37-
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024
37+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1022
3838
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
3939
; CHECK: scalar.ph:
40-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
40+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1022, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
4141
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
4242
; CHECK: for.body:
4343
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_LATCH:%.*]] ]

0 commit comments

Comments
 (0)