Skip to content

Commit 0e528ac

Browse files
committed
[VPlan] Use start value operand for FindLastIV reduction phis.
Update VPReductionPHIRecipe::execute to use the start value from the start value operand of the recipe. This is needed to make sure we resume from the correct value during epilogue vectorization. At the moment, the start value is set to the sentinel value in adjustRecipesForReductions, as the original start value needs to be used when creating ResumePhi recipes. Fixes a mis-compile introduced by b3cba9b in SPEC2017 on AArch64.
1 parent 0f6d93f commit 0e528ac

File tree

3 files changed

+29
-18
lines changed

3 files changed

+29
-18
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9703,6 +9703,15 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
97039703
// Convert the reduction phi to operate on bools.
97049704
PhiR->setOperand(0, Plan->getOrAddLiveIn(ConstantInt::getFalse(
97059705
OrigLoop->getHeader()->getContext())));
9706+
continue;
9707+
}
9708+
9709+
if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(
9710+
RdxDesc.getRecurrenceKind())) {
9711+
// Adjust the start value for FindLastIV recurrences to use the sentinel
9712+
// value after generating the ResumePhi recipe, which uses the original
9713+
// start value.
9714+
PhiR->setOperand(0, Plan->getOrAddLiveIn(RdxDesc.getSentinelValue()));
97069715
}
97079716
}
97089717

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3404,13 +3404,15 @@ void VPReductionPHIRecipe::execute(VPTransformState &State) {
34043404
}
34053405
} else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) {
34063406
// [I|F]FindLastIV will use a sentinel value to initialize the reduction
3407-
// phi. In the exit block, ComputeReductionResult will generate checks to
3408-
// verify if the reduction result is the sentinel value. If the result is
3409-
// the sentinel value, it will be corrected back to the start value.
3407+
// phi or the resume value from the main vector loop when vectorizing the
3408+
// epilogue loop. In the exit block, ComputeReductionResult will generate
3409+
// checks to verify if the reduction result is the sentinel value. If the
3410+
// result is the sentinel value, it will be corrected back to the start
3411+
// value.
34103412
// TODO: The sentinel value is not always necessary. When the start value is
34113413
// a constant, and smaller than the start value of the induction variable,
34123414
// the start value can be directly used to initialize the reduction phi.
3413-
StartV = Iden = RdxDesc.getSentinelValue();
3415+
StartV = Iden = StartV;
34143416
if (!ScalarPHI) {
34153417
IRBuilderBase::InsertPointGuard IPBuilder(Builder);
34163418
Builder.SetInsertPoint(VectorPH->getTerminator());

llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
22
; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -epilogue-vectorization-force-VF=4 -S < %s | FileCheck %s
33

4-
; FIXME: Currently the reduction in the epilogue vector loop uses an incorrect
5-
; start value.
64
define i64 @select_icmp_const(ptr %a, i64 %n) {
75
; CHECK-LABEL: define i64 @select_icmp_const(
86
; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
@@ -48,11 +46,13 @@ define i64 @select_icmp_const(ptr %a, i64 %n) {
4846
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[BC_RESUME_VAL]], i64 0
4947
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
5048
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[DOTSPLAT]], <i64 0, i64 1, i64 2, i64 3>
49+
; CHECK-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <4 x i64> poison, i64 [[BC_MERGE_RDX]], i64 0
50+
; CHECK-NEXT: [[DOTSPLAT9:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT8]], <4 x i64> poison, <4 x i32> zeroinitializer
5151
; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
5252
; CHECK: [[VEC_EPILOG_VECTOR_BODY]]:
5353
; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
5454
; CHECK-NEXT: [[VEC_IND5:%.*]] = phi <4 x i64> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT6:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
55-
; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VEC_EPILOG_PH]] ], [ [[TMP11:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
55+
; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi <4 x i64> [ [[DOTSPLAT9]], %[[VEC_EPILOG_PH]] ], [ [[TMP11:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
5656
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX4]], 0
5757
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
5858
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0
@@ -70,12 +70,12 @@ define i64 @select_icmp_const(ptr %a, i64 %n) {
7070
; CHECK-NEXT: [[CMP_N12:%.*]] = icmp eq i64 [[N]], [[N_VEC3]]
7171
; CHECK-NEXT: br i1 [[CMP_N12]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
7272
; CHECK: [[VEC_EPILOG_SCALAR_PH]]:
73-
; CHECK-NEXT: [[BC_RESUME_VAL13:%.*]] = phi i64 [ [[N_VEC3]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
74-
; CHECK-NEXT: [[BC_MERGE_RDX14:%.*]] = phi i64 [ [[RDX_SELECT11]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 3, %[[ITER_CHECK]] ]
73+
; CHECK-NEXT: [[BC_RESUME_VAL15:%.*]] = phi i64 [ [[N_VEC3]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
74+
; CHECK-NEXT: [[BC_MERGE_RDX16:%.*]] = phi i64 [ [[RDX_SELECT11]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 3, %[[ITER_CHECK]] ]
7575
; CHECK-NEXT: br label %[[LOOP:.*]]
7676
; CHECK: [[LOOP]]:
77-
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL13]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
78-
; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[BC_MERGE_RDX14]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
77+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL15]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
78+
; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[BC_MERGE_RDX16]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
7979
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
8080
; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 8
8181
; CHECK-NEXT: [[C:%.*]] = icmp eq i64 [[L]], 3
@@ -105,8 +105,6 @@ exit:
105105
ret i64 %sel
106106
}
107107

108-
; FIXME: Currently the reduction in the epilogue vector loop uses an incorrect
109-
; start value.
110108
define i64 @select_fcmp_const_fast(ptr %a, i64 %n) {
111109
; CHECK-LABEL: define i64 @select_fcmp_const_fast(
112110
; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
@@ -152,11 +150,13 @@ define i64 @select_fcmp_const_fast(ptr %a, i64 %n) {
152150
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[BC_RESUME_VAL]], i64 0
153151
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
154152
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[DOTSPLAT]], <i64 0, i64 1, i64 2, i64 3>
153+
; CHECK-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <4 x i64> poison, i64 [[BC_MERGE_RDX]], i64 0
154+
; CHECK-NEXT: [[DOTSPLAT9:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT8]], <4 x i64> poison, <4 x i32> zeroinitializer
155155
; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
156156
; CHECK: [[VEC_EPILOG_VECTOR_BODY]]:
157157
; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
158158
; CHECK-NEXT: [[VEC_IND5:%.*]] = phi <4 x i64> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT6:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
159-
; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VEC_EPILOG_PH]] ], [ [[TMP11:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
159+
; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi <4 x i64> [ [[DOTSPLAT9]], %[[VEC_EPILOG_PH]] ], [ [[TMP11:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
160160
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX4]], 0
161161
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP7]]
162162
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 0
@@ -174,12 +174,12 @@ define i64 @select_fcmp_const_fast(ptr %a, i64 %n) {
174174
; CHECK-NEXT: [[CMP_N12:%.*]] = icmp eq i64 [[N]], [[N_VEC3]]
175175
; CHECK-NEXT: br i1 [[CMP_N12]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
176176
; CHECK: [[VEC_EPILOG_SCALAR_PH]]:
177-
; CHECK-NEXT: [[BC_RESUME_VAL13:%.*]] = phi i64 [ [[N_VEC3]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
178-
; CHECK-NEXT: [[BC_MERGE_RDX14:%.*]] = phi i64 [ [[RDX_SELECT11]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 2, %[[ITER_CHECK]] ]
177+
; CHECK-NEXT: [[BC_RESUME_VAL15:%.*]] = phi i64 [ [[N_VEC3]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
178+
; CHECK-NEXT: [[BC_MERGE_RDX16:%.*]] = phi i64 [ [[RDX_SELECT11]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 2, %[[ITER_CHECK]] ]
179179
; CHECK-NEXT: br label %[[LOOP:.*]]
180180
; CHECK: [[LOOP]]:
181-
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL13]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
182-
; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[BC_MERGE_RDX14]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
181+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL15]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
182+
; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[BC_MERGE_RDX16]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
183183
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
184184
; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP]], align 4
185185
; CHECK-NEXT: [[C:%.*]] = fcmp fast ueq float [[L]], 3.000000e+00

0 commit comments

Comments
 (0)