Skip to content

Commit 3397950

Browse files
Mel-Chenfhahn
andauthored
[LV] Fix FindLastIV reduction for epilogue vectorization. (#120395)
Following 0e528ac, this patch adjusts the resume value of VPReductionPHIRecipe for FindLastIV reductions. Replacing the resume value with: ResumeValue = ResumeValue == StartValue ? SentinelValue : ResumeValue; This addressed the correctness issue when the start value might not be less than the minimum value of a monotonically increasing induction variable. Thanks Florian Hahn for the help. --------- Co-authored-by: Florian Hahn <[email protected]>
1 parent 41a94de commit 3397950

File tree

2 files changed

+33
-2
lines changed

2 files changed

+33
-2
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7691,6 +7691,20 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
76917691
"AnyOf expected to start by comparing main resume value to original "
76927692
"start value");
76937693
MainResumeValue = Cmp->getOperand(0);
7694+
} else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(
7695+
RdxDesc.getRecurrenceKind())) {
7696+
using namespace llvm::PatternMatch;
7697+
Value *Cmp, *OrigResumeV;
7698+
bool IsExpectedPattern =
7699+
match(MainResumeValue, m_Select(m_OneUse(m_Value(Cmp)),
7700+
m_Specific(RdxDesc.getSentinelValue()),
7701+
m_Value(OrigResumeV))) &&
7702+
match(Cmp,
7703+
m_SpecificICmp(ICmpInst::ICMP_EQ, m_Specific(OrigResumeV),
7704+
m_Specific(RdxDesc.getRecurrenceStartValue())));
7705+
assert(IsExpectedPattern && "Unexpected reduction resume pattern");
7706+
(void)IsExpectedPattern;
7707+
MainResumeValue = OrigResumeV;
76947708
}
76957709
PHINode *MainResumePhi = cast<PHINode>(MainResumeValue);
76967710

@@ -10413,6 +10427,19 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
1041310427
cast<Instruction>(ResumeV)->getParent()->getFirstNonPHI());
1041410428
ResumeV =
1041510429
Builder.CreateICmpNE(ResumeV, RdxDesc.getRecurrenceStartValue());
10430+
} else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) {
10431+
// VPReductionPHIRecipe for FindLastIV reductions requires an adjustment
10432+
// to the resume value. The resume value is adjusted to the sentinel
10433+
// value when the final value from the main vector loop equals the start
10434+
// value. This ensures correctness when the start value might not be
10435+
// less than the minimum value of a monotonically increasing induction
10436+
// variable.
10437+
IRBuilder<> Builder(
10438+
cast<Instruction>(ResumeV)->getParent()->getFirstNonPHI());
10439+
Value *Cmp =
10440+
Builder.CreateICmpEQ(ResumeV, RdxDesc.getRecurrenceStartValue());
10441+
ResumeV =
10442+
Builder.CreateSelect(Cmp, RdxDesc.getSentinelValue(), ResumeV);
1041610443
}
1041710444
} else {
1041810445
// Retrieve the induction resume values for wide inductions from

llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,9 @@ define i64 @select_icmp_const(ptr %a, i64 %n) {
4040
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]]
4141
; CHECK: [[VEC_EPILOG_PH]]:
4242
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
43-
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 3, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
43+
; CHECK-NEXT: [[BC_MERGE_RDX1:%.*]] = phi i64 [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 3, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
44+
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[BC_MERGE_RDX1]], 3
45+
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = select i1 [[TMP14]], i64 -9223372036854775808, i64 [[BC_MERGE_RDX1]]
4446
; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[N]], 4
4547
; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[N]], [[N_MOD_VF2]]
4648
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[BC_RESUME_VAL]], i64 0
@@ -144,7 +146,9 @@ define i64 @select_fcmp_const_fast(ptr %a, i64 %n) {
144146
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]]
145147
; CHECK: [[VEC_EPILOG_PH]]:
146148
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
147-
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 2, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
149+
; CHECK-NEXT: [[BC_MERGE_RDX1:%.*]] = phi i64 [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 2, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
150+
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[BC_MERGE_RDX1]], 2
151+
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = select i1 [[TMP14]], i64 -9223372036854775808, i64 [[BC_MERGE_RDX1]]
148152
; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[N]], 4
149153
; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[N]], [[N_MOD_VF2]]
150154
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[BC_RESUME_VAL]], i64 0

0 commit comments

Comments
 (0)