Skip to content

Commit d253d51

Browse files
committed
Revert "[LV] Fix FindLastIV reduction for epilogue vectorization."
This reverts commit 7e233ba.
1 parent d480045 commit d253d51

File tree

5 files changed

+41
-29
lines changed

5 files changed

+41
-29
lines changed

llvm/include/llvm/Transforms/Utils/LoopUtils.h

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -420,17 +420,15 @@ Value *createAnyOfReduction(IRBuilderBase &B, Value *Src,
420420
PHINode *OrigPhi);
421421

422422
/// Create a reduction of the given vector \p Src for a reduction of the
423-
/// kind RecurKind::IFindLastIV or RecurKind::FFindLastIV. The scalar \p
424-
/// StartVal is the incoming value of reduction phi from outside the loop. The
425-
/// reduction operation is described by \p Desc.
426-
Value *createFindLastIVReduction(IRBuilderBase &B, Value *Src, Value *StartVal,
423+
/// kind RecurKind::IFindLastIV or RecurKind::FFindLastIV. The reduction
424+
/// operation is described by \p Desc.
425+
Value *createFindLastIVReduction(IRBuilderBase &B, Value *Src,
427426
const RecurrenceDescriptor &Desc);
428427

429428
/// Create a generic reduction using a recurrence descriptor \p Desc
430429
/// Fast-math-flags are propagated using the RecurrenceDescriptor.
431430
Value *createReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc,
432-
Value *Src, Value *StartVal = nullptr,
433-
PHINode *OrigPhi = nullptr);
431+
Value *Src, PHINode *OrigPhi = nullptr);
434432

435433
/// Create an ordered reduction intrinsic using the given recurrence
436434
/// descriptor \p Desc.

llvm/lib/Transforms/Utils/LoopUtils.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1209,12 +1209,11 @@ Value *llvm::createAnyOfReduction(IRBuilderBase &Builder, Value *Src,
12091209
}
12101210

12111211
Value *llvm::createFindLastIVReduction(IRBuilderBase &Builder, Value *Src,
1212-
Value *StartVal,
12131212
const RecurrenceDescriptor &Desc) {
12141213
assert(RecurrenceDescriptor::isFindLastIVRecurrenceKind(
12151214
Desc.getRecurrenceKind()) &&
12161215
"Unexpected reduction kind");
1217-
assert(StartVal && "Null start value");
1216+
Value *StartVal = Desc.getRecurrenceStartValue();
12181217
Value *Sentinel = Desc.getSentinelValue();
12191218
Value *MaxRdx = Src->getType()->isVectorTy()
12201219
? Builder.CreateIntMaxReduce(Src, true)
@@ -1321,8 +1320,9 @@ Value *llvm::createSimpleReduction(VectorBuilder &VBuilder, Value *Src,
13211320
return VBuilder.createSimpleReduction(Id, SrcTy, Ops);
13221321
}
13231322

1324-
Value *llvm::createReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc,
1325-
Value *Src, Value *StartVal, PHINode *OrigPhi) {
1323+
Value *llvm::createReduction(IRBuilderBase &B,
1324+
const RecurrenceDescriptor &Desc, Value *Src,
1325+
PHINode *OrigPhi) {
13261326
// TODO: Support in-order reductions based on the recurrence descriptor.
13271327
// All ops in the reduction inherit fast-math-flags from the recurrence
13281328
// descriptor.
@@ -1333,7 +1333,7 @@ Value *llvm::createReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc,
13331333
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK))
13341334
return createAnyOfReduction(B, Src, Desc, OrigPhi);
13351335
if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK))
1336-
return createFindLastIVReduction(B, Src, StartVal, Desc);
1336+
return createFindLastIVReduction(B, Src, Desc);
13371337

13381338
return createSimpleReduction(B, Src, RK);
13391339
}

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9794,6 +9794,15 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
97949794
// Convert the reduction phi to operate on bools.
97959795
PhiR->setOperand(0, Plan->getOrAddLiveIn(ConstantInt::getFalse(
97969796
OrigLoop->getHeader()->getContext())));
9797+
continue;
9798+
}
9799+
9800+
if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(
9801+
RdxDesc.getRecurrenceKind())) {
9802+
// Adjust the start value for FindLastIV recurrences to use the sentinel
9803+
// value after generating the ResumePhi recipe, which uses the original
9804+
// start value.
9805+
PhiR->setOperand(0, Plan->getOrAddLiveIn(RdxDesc.getSentinelValue()));
97979806
}
97989807
}
97999808

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -566,9 +566,8 @@ Value *VPInstruction::generate(VPTransformState &State) {
566566
RecurrenceDescriptor::isAnyOfRecurrenceKind(RK) ||
567567
RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) &&
568568
!PhiR->isInLoop()) {
569-
Value *StartVal = PhiR->getStartValue()->getLiveInIRValue();
570569
ReducedPartRdx =
571-
createReduction(Builder, RdxDesc, ReducedPartRdx, StartVal, OrigPhi);
570+
createReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi);
572571
// If the reduction can be performed in a smaller type, we need to extend
573572
// the reduction to the wider type before we branch to the original loop.
574573
if (PhiTy != RdxDesc.getRecurrenceType())
@@ -3395,13 +3394,15 @@ void VPReductionPHIRecipe::execute(VPTransformState &State) {
33953394
}
33963395
} else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) {
33973396
// [I|F]FindLastIV will use a sentinel value to initialize the reduction
3398-
// phi. In the exit block, ComputeReductionResult will generate checks to
3399-
// verify if the reduction result is the sentinel value. If the result is
3400-
// the sentinel value, it will be corrected back to the start value.
3397+
// phi or the resume value from the main vector loop when vectorizing the
3398+
// epilogue loop. In the exit block, ComputeReductionResult will generate
3399+
// checks to verify if the reduction result is the sentinel value. If the
3400+
// result is the sentinel value, it will be corrected back to the start
3401+
// value.
34013402
// TODO: The sentinel value is not always necessary. When the start value is
34023403
// a constant, and smaller than the start value of the induction variable,
34033404
// the start value can be directly used to initialize the reduction phi.
3404-
StartV = Iden = RdxDesc.getSentinelValue();
3405+
Iden = StartV;
34053406
if (!ScalarPHI) {
34063407
IRBuilderBase::InsertPointGuard IPBuilder(Builder);
34073408
Builder.SetInsertPoint(VectorPH->getTerminator());

llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,13 @@ define i64 @select_icmp_const(ptr %a, i64 %n) {
4646
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[BC_RESUME_VAL]], i64 0
4747
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
4848
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[DOTSPLAT]], <i64 0, i64 1, i64 2, i64 3>
49+
; CHECK-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <4 x i64> poison, i64 [[BC_MERGE_RDX]], i64 0
50+
; CHECK-NEXT: [[DOTSPLAT9:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT8]], <4 x i64> poison, <4 x i32> zeroinitializer
4951
; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
5052
; CHECK: [[VEC_EPILOG_VECTOR_BODY]]:
5153
; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
5254
; CHECK-NEXT: [[VEC_IND5:%.*]] = phi <4 x i64> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT6:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
53-
; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VEC_EPILOG_PH]] ], [ [[TMP11:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
55+
; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi <4 x i64> [ [[DOTSPLAT9]], %[[VEC_EPILOG_PH]] ], [ [[TMP11:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
5456
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX4]], 0
5557
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
5658
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0
@@ -64,16 +66,16 @@ define i64 @select_icmp_const(ptr %a, i64 %n) {
6466
; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]:
6567
; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP11]])
6668
; CHECK-NEXT: [[RDX_SELECT_CMP10:%.*]] = icmp ne i64 [[TMP13]], -9223372036854775808
67-
; CHECK-NEXT: [[RDX_SELECT11:%.*]] = select i1 [[RDX_SELECT_CMP10]], i64 [[TMP13]], i64 [[BC_MERGE_RDX]]
69+
; CHECK-NEXT: [[RDX_SELECT11:%.*]] = select i1 [[RDX_SELECT_CMP10]], i64 [[TMP13]], i64 3
6870
; CHECK-NEXT: [[CMP_N12:%.*]] = icmp eq i64 [[N]], [[N_VEC3]]
6971
; CHECK-NEXT: br i1 [[CMP_N12]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
7072
; CHECK: [[VEC_EPILOG_SCALAR_PH]]:
71-
; CHECK-NEXT: [[BC_RESUME_VAL13:%.*]] = phi i64 [ [[N_VEC3]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, %[[ITER_CHECK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ]
72-
; CHECK-NEXT: [[BC_MERGE_RDX14:%.*]] = phi i64 [ [[RDX_SELECT11]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 3, %[[ITER_CHECK]] ], [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ]
73+
; CHECK-NEXT: [[BC_RESUME_VAL15:%.*]] = phi i64 [ [[N_VEC3]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, %[[ITER_CHECK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ]
74+
; CHECK-NEXT: [[BC_MERGE_RDX16:%.*]] = phi i64 [ [[RDX_SELECT11]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 3, %[[ITER_CHECK]] ], [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ]
7375
; CHECK-NEXT: br label %[[LOOP:.*]]
7476
; CHECK: [[LOOP]]:
75-
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL13]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
76-
; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[BC_MERGE_RDX14]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
77+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL15]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
78+
; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[BC_MERGE_RDX16]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
7779
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
7880
; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 8
7981
; CHECK-NEXT: [[C:%.*]] = icmp eq i64 [[L]], 3
@@ -148,11 +150,13 @@ define i64 @select_fcmp_const_fast(ptr %a, i64 %n) {
148150
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[BC_RESUME_VAL]], i64 0
149151
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
150152
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[DOTSPLAT]], <i64 0, i64 1, i64 2, i64 3>
153+
; CHECK-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <4 x i64> poison, i64 [[BC_MERGE_RDX]], i64 0
154+
; CHECK-NEXT: [[DOTSPLAT9:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT8]], <4 x i64> poison, <4 x i32> zeroinitializer
151155
; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
152156
; CHECK: [[VEC_EPILOG_VECTOR_BODY]]:
153157
; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
154158
; CHECK-NEXT: [[VEC_IND5:%.*]] = phi <4 x i64> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT6:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
155-
; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VEC_EPILOG_PH]] ], [ [[TMP11:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
159+
; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi <4 x i64> [ [[DOTSPLAT9]], %[[VEC_EPILOG_PH]] ], [ [[TMP11:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
156160
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX4]], 0
157161
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP7]]
158162
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 0
@@ -166,16 +170,16 @@ define i64 @select_fcmp_const_fast(ptr %a, i64 %n) {
166170
; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]:
167171
; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP11]])
168172
; CHECK-NEXT: [[RDX_SELECT_CMP10:%.*]] = icmp ne i64 [[TMP13]], -9223372036854775808
169-
; CHECK-NEXT: [[RDX_SELECT11:%.*]] = select i1 [[RDX_SELECT_CMP10]], i64 [[TMP13]], i64 [[BC_MERGE_RDX]]
173+
; CHECK-NEXT: [[RDX_SELECT11:%.*]] = select i1 [[RDX_SELECT_CMP10]], i64 [[TMP13]], i64 2
170174
; CHECK-NEXT: [[CMP_N12:%.*]] = icmp eq i64 [[N]], [[N_VEC3]]
171175
; CHECK-NEXT: br i1 [[CMP_N12]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
172176
; CHECK: [[VEC_EPILOG_SCALAR_PH]]:
173-
; CHECK-NEXT: [[BC_RESUME_VAL13:%.*]] = phi i64 [ [[N_VEC3]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, %[[ITER_CHECK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ]
174-
; CHECK-NEXT: [[BC_MERGE_RDX14:%.*]] = phi i64 [ [[RDX_SELECT11]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 2, %[[ITER_CHECK]] ], [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ]
177+
; CHECK-NEXT: [[BC_RESUME_VAL15:%.*]] = phi i64 [ [[N_VEC3]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, %[[ITER_CHECK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ]
178+
; CHECK-NEXT: [[BC_MERGE_RDX16:%.*]] = phi i64 [ [[RDX_SELECT11]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 2, %[[ITER_CHECK]] ], [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ]
175179
; CHECK-NEXT: br label %[[LOOP:.*]]
176180
; CHECK: [[LOOP]]:
177-
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL13]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
178-
; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[BC_MERGE_RDX14]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
181+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL15]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
182+
; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[BC_MERGE_RDX16]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
179183
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
180184
; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP]], align 4
181185
; CHECK-NEXT: [[C:%.*]] = fcmp fast ueq float [[L]], 3.000000e+00

0 commit comments

Comments
 (0)