Skip to content

Commit be9d783

Browse files
committed
[LV] Fix FindLastIV reduction for epilogue vectorization.
1 parent 9f231a8 commit be9d783

File tree

5 files changed

+29
-41
lines changed

5 files changed

+29
-41
lines changed

llvm/include/llvm/Transforms/Utils/LoopUtils.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -420,15 +420,17 @@ Value *createAnyOfReduction(IRBuilderBase &B, Value *Src,
420420
PHINode *OrigPhi);
421421

422422
/// Create a reduction of the given vector \p Src for a reduction of the
423-
/// kind RecurKind::IFindLastIV or RecurKind::FFindLastIV. The reduction
424-
/// operation is described by \p Desc.
425-
Value *createFindLastIVReduction(IRBuilderBase &B, Value *Src,
423+
/// kind RecurKind::IFindLastIV or RecurKind::FFindLastIV. The scalar \p
424+
/// StartVal is the incoming value of reduction phi from outside the loop. The
425+
/// reduction operation is described by \p Desc.
426+
Value *createFindLastIVReduction(IRBuilderBase &B, Value *Src, Value *StartVal,
426427
const RecurrenceDescriptor &Desc);
427428

428429
/// Create a generic reduction using a recurrence descriptor \p Desc
429430
/// Fast-math-flags are propagated using the RecurrenceDescriptor.
430431
Value *createReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc,
431-
Value *Src, PHINode *OrigPhi = nullptr);
432+
Value *Src, Value *StartVal = nullptr,
433+
PHINode *OrigPhi = nullptr);
432434

433435
/// Create an ordered reduction intrinsic using the given recurrence
434436
/// descriptor \p Desc.

llvm/lib/Transforms/Utils/LoopUtils.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1209,11 +1209,12 @@ Value *llvm::createAnyOfReduction(IRBuilderBase &Builder, Value *Src,
12091209
}
12101210

12111211
Value *llvm::createFindLastIVReduction(IRBuilderBase &Builder, Value *Src,
1212+
Value *StartVal,
12121213
const RecurrenceDescriptor &Desc) {
12131214
assert(RecurrenceDescriptor::isFindLastIVRecurrenceKind(
12141215
Desc.getRecurrenceKind()) &&
12151216
"Unexpected reduction kind");
1216-
Value *StartVal = Desc.getRecurrenceStartValue();
1217+
assert(StartVal && "Null start value");
12171218
Value *Sentinel = Desc.getSentinelValue();
12181219
Value *MaxRdx = Src->getType()->isVectorTy()
12191220
? Builder.CreateIntMaxReduce(Src, true)
@@ -1320,9 +1321,8 @@ Value *llvm::createSimpleReduction(VectorBuilder &VBuilder, Value *Src,
13201321
return VBuilder.createSimpleReduction(Id, SrcTy, Ops);
13211322
}
13221323

1323-
Value *llvm::createReduction(IRBuilderBase &B,
1324-
const RecurrenceDescriptor &Desc, Value *Src,
1325-
PHINode *OrigPhi) {
1324+
Value *llvm::createReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc,
1325+
Value *Src, Value *StartVal, PHINode *OrigPhi) {
13261326
// TODO: Support in-order reductions based on the recurrence descriptor.
13271327
// All ops in the reduction inherit fast-math-flags from the recurrence
13281328
// descriptor.
@@ -1333,7 +1333,7 @@ Value *llvm::createReduction(IRBuilderBase &B,
13331333
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK))
13341334
return createAnyOfReduction(B, Src, Desc, OrigPhi);
13351335
if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK))
1336-
return createFindLastIVReduction(B, Src, Desc);
1336+
return createFindLastIVReduction(B, Src, StartVal, Desc);
13371337

13381338
return createSimpleReduction(B, Src, RK);
13391339
}

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9706,15 +9706,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
97069706
// Convert the reduction phi to operate on bools.
97079707
PhiR->setOperand(0, Plan->getOrAddLiveIn(ConstantInt::getFalse(
97089708
OrigLoop->getHeader()->getContext())));
9709-
continue;
9710-
}
9711-
9712-
if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(
9713-
RdxDesc.getRecurrenceKind())) {
9714-
// Adjust the start value for FindLastIV recurrences to use the sentinel
9715-
// value after generating the ResumePhi recipe, which uses the original
9716-
// start value.
9717-
PhiR->setOperand(0, Plan->getOrAddLiveIn(RdxDesc.getSentinelValue()));
97189709
}
97199710
}
97209711

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -581,8 +581,9 @@ Value *VPInstruction::generate(VPTransformState &State) {
581581
RecurrenceDescriptor::isAnyOfRecurrenceKind(RK) ||
582582
RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) &&
583583
!PhiR->isInLoop()) {
584+
Value *StartVal = PhiR->getStartValue()->getLiveInIRValue();
584585
ReducedPartRdx =
585-
createReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi);
586+
createReduction(Builder, RdxDesc, ReducedPartRdx, StartVal, OrigPhi);
586587
// If the reduction can be performed in a smaller type, we need to extend
587588
// the reduction to the wider type before we branch to the original loop.
588589
if (PhiTy != RdxDesc.getRecurrenceType())
@@ -3405,15 +3406,13 @@ void VPReductionPHIRecipe::execute(VPTransformState &State) {
34053406
}
34063407
} else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) {
34073408
// [I|F]FindLastIV will use a sentinel value to initialize the reduction
3408-
// phi or the resume value from the main vector loop when vectorizing the
3409-
// epilogue loop. In the exit block, ComputeReductionResult will generate
3410-
// checks to verify if the reduction result is the sentinel value. If the
3411-
// result is the sentinel value, it will be corrected back to the start
3412-
// value.
3409+
// phi. In the exit block, ComputeReductionResult will generate checks to
3410+
// verify if the reduction result is the sentinel value. If the result is
3411+
// the sentinel value, it will be corrected back to the start value.
34133412
// TODO: The sentinel value is not always necessary. When the start value is
34143413
// a constant, and smaller than the start value of the induction variable,
34153414
// the start value can be directly used to initialize the reduction phi.
3416-
Iden = StartV;
3415+
StartV = Iden = RdxDesc.getSentinelValue();
34173416
if (!ScalarPHI) {
34183417
IRBuilderBase::InsertPointGuard IPBuilder(Builder);
34193418
Builder.SetInsertPoint(VectorPH->getTerminator());

llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -46,13 +46,11 @@ define i64 @select_icmp_const(ptr %a, i64 %n) {
4646
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[BC_RESUME_VAL]], i64 0
4747
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
4848
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[DOTSPLAT]], <i64 0, i64 1, i64 2, i64 3>
49-
; CHECK-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <4 x i64> poison, i64 [[BC_MERGE_RDX]], i64 0
50-
; CHECK-NEXT: [[DOTSPLAT9:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT8]], <4 x i64> poison, <4 x i32> zeroinitializer
5149
; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
5250
; CHECK: [[VEC_EPILOG_VECTOR_BODY]]:
5351
; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
5452
; CHECK-NEXT: [[VEC_IND5:%.*]] = phi <4 x i64> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT6:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
55-
; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi <4 x i64> [ [[DOTSPLAT9]], %[[VEC_EPILOG_PH]] ], [ [[TMP11:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
53+
; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VEC_EPILOG_PH]] ], [ [[TMP11:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
5654
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX4]], 0
5755
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
5856
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0
@@ -66,16 +64,16 @@ define i64 @select_icmp_const(ptr %a, i64 %n) {
6664
; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]:
6765
; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP11]])
6866
; CHECK-NEXT: [[RDX_SELECT_CMP10:%.*]] = icmp ne i64 [[TMP13]], -9223372036854775808
69-
; CHECK-NEXT: [[RDX_SELECT11:%.*]] = select i1 [[RDX_SELECT_CMP10]], i64 [[TMP13]], i64 3
67+
; CHECK-NEXT: [[RDX_SELECT11:%.*]] = select i1 [[RDX_SELECT_CMP10]], i64 [[TMP13]], i64 [[BC_MERGE_RDX]]
7068
; CHECK-NEXT: [[CMP_N12:%.*]] = icmp eq i64 [[N]], [[N_VEC3]]
7169
; CHECK-NEXT: br i1 [[CMP_N12]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
7270
; CHECK: [[VEC_EPILOG_SCALAR_PH]]:
73-
; CHECK-NEXT: [[BC_RESUME_VAL15:%.*]] = phi i64 [ [[N_VEC3]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, %[[ITER_CHECK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ]
74-
; CHECK-NEXT: [[BC_MERGE_RDX16:%.*]] = phi i64 [ [[RDX_SELECT11]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 3, %[[ITER_CHECK]] ], [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ]
71+
; CHECK-NEXT: [[BC_RESUME_VAL13:%.*]] = phi i64 [ [[N_VEC3]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, %[[ITER_CHECK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ]
72+
; CHECK-NEXT: [[BC_MERGE_RDX14:%.*]] = phi i64 [ [[RDX_SELECT11]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 3, %[[ITER_CHECK]] ], [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ]
7573
; CHECK-NEXT: br label %[[LOOP:.*]]
7674
; CHECK: [[LOOP]]:
77-
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL15]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
78-
; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[BC_MERGE_RDX16]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
75+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL13]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
76+
; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[BC_MERGE_RDX14]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
7977
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
8078
; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 8
8179
; CHECK-NEXT: [[C:%.*]] = icmp eq i64 [[L]], 3
@@ -150,13 +148,11 @@ define i64 @select_fcmp_const_fast(ptr %a, i64 %n) {
150148
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[BC_RESUME_VAL]], i64 0
151149
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
152150
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[DOTSPLAT]], <i64 0, i64 1, i64 2, i64 3>
153-
; CHECK-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <4 x i64> poison, i64 [[BC_MERGE_RDX]], i64 0
154-
; CHECK-NEXT: [[DOTSPLAT9:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT8]], <4 x i64> poison, <4 x i32> zeroinitializer
155151
; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
156152
; CHECK: [[VEC_EPILOG_VECTOR_BODY]]:
157153
; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
158154
; CHECK-NEXT: [[VEC_IND5:%.*]] = phi <4 x i64> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT6:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
159-
; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi <4 x i64> [ [[DOTSPLAT9]], %[[VEC_EPILOG_PH]] ], [ [[TMP11:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
155+
; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VEC_EPILOG_PH]] ], [ [[TMP11:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
160156
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX4]], 0
161157
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP7]]
162158
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 0
@@ -170,16 +166,16 @@ define i64 @select_fcmp_const_fast(ptr %a, i64 %n) {
170166
; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]:
171167
; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP11]])
172168
; CHECK-NEXT: [[RDX_SELECT_CMP10:%.*]] = icmp ne i64 [[TMP13]], -9223372036854775808
173-
; CHECK-NEXT: [[RDX_SELECT11:%.*]] = select i1 [[RDX_SELECT_CMP10]], i64 [[TMP13]], i64 2
169+
; CHECK-NEXT: [[RDX_SELECT11:%.*]] = select i1 [[RDX_SELECT_CMP10]], i64 [[TMP13]], i64 [[BC_MERGE_RDX]]
174170
; CHECK-NEXT: [[CMP_N12:%.*]] = icmp eq i64 [[N]], [[N_VEC3]]
175171
; CHECK-NEXT: br i1 [[CMP_N12]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
176172
; CHECK: [[VEC_EPILOG_SCALAR_PH]]:
177-
; CHECK-NEXT: [[BC_RESUME_VAL15:%.*]] = phi i64 [ [[N_VEC3]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, %[[ITER_CHECK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ]
178-
; CHECK-NEXT: [[BC_MERGE_RDX16:%.*]] = phi i64 [ [[RDX_SELECT11]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 2, %[[ITER_CHECK]] ], [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ]
173+
; CHECK-NEXT: [[BC_RESUME_VAL13:%.*]] = phi i64 [ [[N_VEC3]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, %[[ITER_CHECK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ]
174+
; CHECK-NEXT: [[BC_MERGE_RDX14:%.*]] = phi i64 [ [[RDX_SELECT11]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 2, %[[ITER_CHECK]] ], [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ]
179175
; CHECK-NEXT: br label %[[LOOP:.*]]
180176
; CHECK: [[LOOP]]:
181-
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL15]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
182-
; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[BC_MERGE_RDX16]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
177+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL13]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
178+
; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[BC_MERGE_RDX14]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
183179
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
184180
; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP]], align 4
185181
; CHECK-NEXT: [[C:%.*]] = fcmp fast ueq float [[L]], 3.000000e+00

0 commit comments

Comments
 (0)