Skip to content

Commit d7bfe5b

Browse files
committed
[VPlan] Add ReductionStartVector VPInstruction.
Add a new VPInstruction::ReductionStartVector opcode to create the start values for wide reductions. This more accurately models the start value creation in VPlan and simplifies VPReductionPHIRecipe::execute.
1 parent b68565b commit d7bfe5b

14 files changed

+121
-78
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7501,8 +7501,11 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
75017501
cast<VPReductionPHIRecipe>(EpiRedResult->getOperand(0));
75027502
const RecurrenceDescriptor &RdxDesc =
75037503
EpiRedHeaderPhi->getRecurrenceDescriptor();
7504-
Value *MainResumeValue =
7505-
EpiRedHeaderPhi->getStartValue()->getUnderlyingValue();
7504+
Value *MainResumeValue;
7505+
if (auto *VPI = dyn_cast<VPInstruction>(EpiRedHeaderPhi->getStartValue()))
7506+
MainResumeValue = VPI->getOperand(0)->getUnderlyingValue();
7507+
else
7508+
MainResumeValue = EpiRedHeaderPhi->getStartValue()->getUnderlyingValue();
75067509
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(
75077510
RdxDesc.getRecurrenceKind())) {
75087511
auto *Cmp = cast<ICmpInst>(MainResumeValue);
@@ -8552,6 +8555,7 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
85528555
// If the PHI is used by a partial reduction, set the scale factor.
85538556
unsigned ScaleFactor =
85548557
getScalingForReduction(RdxDesc.getLoopExitInstr()).value_or(1);
8558+
85558559
PhiRecipe = new VPReductionPHIRecipe(
85568560
Phi, RdxDesc, *StartV, CM.isInLoopReduction(Phi),
85578561
CM.useOrderedReductions(RdxDesc), ScaleFactor);
@@ -9439,7 +9443,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
94399443
continue;
94409444

94419445
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
9442-
Type *PhiTy = PhiR->getOperand(0)->getLiveInIRValue()->getType();
9446+
Type *PhiTy = PhiR->getUnderlyingValue()->getType();
94439447
// If tail is folded by masking, introduce selects between the phi
94449448
// and the users outside the vector region of each reduction, at the
94459449
// beginning of the dedicated latch block.
@@ -9569,6 +9573,27 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
95699573
// start value.
95709574
PhiR->setOperand(0, Plan->getOrAddLiveIn(RdxDesc.getSentinelValue()));
95719575
}
9576+
RecurKind RK = RdxDesc.getRecurrenceKind();
9577+
if (PhiR->isOrdered() || PhiR->isInLoop() ||
9578+
(!RecurrenceDescriptor::isAnyOfRecurrenceKind(RK) &&
9579+
!RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK) &&
9580+
!RecurrenceDescriptor::isMinMaxRecurrenceKind(RK))) {
9581+
VPBuilder PHBuilder(Plan->getVectorPreheader());
9582+
VPValue *Iden = Plan->getOrAddLiveIn(
9583+
getRecurrenceIdentity(RK, PhiTy, RdxDesc.getFastMathFlags()));
9584+
// If the PHI is used by a partial reduction, set the scale factor.
9585+
unsigned ScaleFactor =
9586+
RecipeBuilder.getScalingForReduction(RdxDesc.getLoopExitInstr())
9587+
.value_or(1);
9588+
Type *I32Ty = IntegerType::getInt32Ty(PhiTy->getContext());
9589+
auto *ScalarFactorVPV =
9590+
Plan->getOrAddLiveIn(ConstantInt::get(I32Ty, ScaleFactor));
9591+
VPValue *StartV =
9592+
PHBuilder.createNaryOp(VPInstruction::ReductionStartVector,
9593+
{PhiR->getStartValue(), Iden, ScalarFactorVPV},
9594+
RdxDesc.getFastMathFlags());
9595+
PhiR->setOperand(0, StartV);
9596+
}
95729597
}
95739598
for (VPRecipeBase *R : ToDelete)
95749599
R->eraseFromParent();
@@ -10081,6 +10106,12 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
1008110106
}
1008210107
assert(ResumeV && "Must have a resume value");
1008310108
VPValue *StartVal = Plan.getOrAddLiveIn(ResumeV);
10109+
if (auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&R)) {
10110+
if (auto *VPI = dyn_cast<VPInstruction>(PhiR->getStartValue())) {
10111+
VPI->setOperand(0, StartVal);
10112+
continue;
10113+
}
10114+
}
1008410115
cast<VPHeaderPHIRecipe>(&R)->setStartValue(StartVal);
1008510116
}
1008610117

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -907,6 +907,10 @@ class VPInstruction : public VPRecipeWithIRFlags,
907907
BranchOnCount,
908908
BranchOnCond,
909909
Broadcast,
910+
/// Start vector for reductions with 3 operands: the original start value,
911+
/// the identity value for the reduction and an integer indicating the
912+
/// scaling factor.
913+
ReductionStartVector,
910914
ComputeFindLastIVResult,
911915
ComputeReductionResult,
912916
// Extracts the last lane from its operand if it is a vector, or the last
@@ -2225,13 +2229,6 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
22252229

22262230
/// Returns true, if the phi is part of an in-loop reduction.
22272231
bool isInLoop() const { return IsInLoop; }
2228-
2229-
/// Returns true if the recipe only uses the first lane of operand \p Op.
2230-
bool onlyFirstLaneUsed(const VPValue *Op) const override {
2231-
assert(is_contained(operands(), Op) &&
2232-
"Op must be an operand of the recipe");
2233-
return Op == getStartValue();
2234-
}
22352232
};
22362233

22372234
/// A recipe for vectorizing a phi-node as a sequence of mask-based select

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,8 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
8787
inferScalarType(R->getOperand(1)) &&
8888
"different types inferred for different operands");
8989
return IntegerType::get(Ctx, 1);
90+
case VPInstruction::ReductionStartVector:
91+
return inferScalarType(R->getOperand(0));
9092
case VPInstruction::ComputeFindLastIVResult:
9193
case VPInstruction::ComputeReductionResult: {
9294
auto *PhiR = cast<VPReductionPHIRecipe>(R->getOperand(0));

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 28 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -604,6 +604,20 @@ Value *VPInstruction::generate(VPTransformState &State) {
604604
return Builder.CreateVectorSplat(
605605
State.VF, State.get(getOperand(0), /*IsScalar*/ true), "broadcast");
606606
}
607+
case VPInstruction::ReductionStartVector: {
608+
if (State.VF.isScalar())
609+
return State.get(getOperand(0), true);
610+
IRBuilderBase::FastMathFlagGuard FMFG(Builder);
611+
Builder.setFastMathFlags(getFastMathFlags());
612+
// If this start vector is scaled then it should produce a vector with fewer
613+
// elements than the VF.
614+
ElementCount VF = State.VF.divideCoefficientBy(
615+
cast<ConstantInt>(getOperand(2)->getLiveInIRValue())->getZExtValue());
616+
auto *Iden = Builder.CreateVectorSplat(VF, State.get(getOperand(1), true));
617+
Constant *Zero = Builder.getInt32(0);
618+
return Builder.CreateInsertElement(Iden, State.get(getOperand(0), true),
619+
Zero);
620+
}
607621
case VPInstruction::ComputeFindLastIVResult: {
608622
// FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
609623
// and will be removed by breaking up the recipe further.
@@ -892,6 +906,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
892906
case VPInstruction::PtrAdd:
893907
case VPInstruction::WideIVStep:
894908
case VPInstruction::StepVector:
909+
case VPInstruction::ReductionStartVector:
895910
return false;
896911
default:
897912
return true;
@@ -922,6 +937,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
922937
case VPInstruction::CanonicalIVIncrementForPart:
923938
case VPInstruction::BranchOnCount:
924939
case VPInstruction::BranchOnCond:
940+
case VPInstruction::ReductionStartVector:
925941
return true;
926942
case VPInstruction::PtrAdd:
927943
return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this);
@@ -1023,6 +1039,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
10231039
case VPInstruction::FirstActiveLane:
10241040
O << "first-active-lane";
10251041
break;
1042+
case VPInstruction::ReductionStartVector:
1043+
O << "reduction-start-vector";
1044+
break;
10261045
default:
10271046
O << Instruction::getOpcodeName(getOpcode());
10281047
}
@@ -1613,6 +1632,7 @@ bool VPIRFlags::flagsValidForOpcode(unsigned Opcode) const {
16131632
Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
16141633
Opcode == Instruction::FCmp || Opcode == Instruction::Select ||
16151634
Opcode == VPInstruction::WideIVStep ||
1635+
Opcode == VPInstruction::ReductionStartVector ||
16161636
Opcode == VPInstruction::ComputeReductionResult;
16171637
case OperationType::NonNegOp:
16181638
return Opcode == Instruction::ZExt;
@@ -3843,17 +3863,19 @@ void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent,
38433863
#endif
38443864

38453865
void VPReductionPHIRecipe::execute(VPTransformState &State) {
3846-
// If this phi is fed by a scaled reduction then it should output a
3847-
// vector with fewer elements than the VF.
3848-
ElementCount VF = State.VF.divideCoefficientBy(VFScaleFactor);
3866+
// Reductions do not have to start at zero. They can start with
3867+
// any loop invariant values.
3868+
VPValue *StartVPV = getStartValue();
38493869

38503870
// In order to support recurrences we need to be able to vectorize Phi nodes.
38513871
// Phi nodes have cycles, so we need to vectorize them in two stages. This is
38523872
// stage #1: We create a new vector PHI node with no incoming edges. We'll use
38533873
// this value when we vectorize all of the instructions that use the PHI.
3854-
auto *ScalarTy = State.TypeAnalysis.inferScalarType(this);
3874+
BasicBlock *VectorPH =
3875+
State.CFG.VPBB2IRBB.at(getParent()->getCFGPredecessor(0));
38553876
bool ScalarPHI = State.VF.isScalar() || IsInLoop;
3856-
Type *VecTy = ScalarPHI ? ScalarTy : VectorType::get(ScalarTy, VF);
3877+
Value *StartV = State.get(StartVPV, ScalarPHI);
3878+
Type *VecTy = StartV->getType();
38573879

38583880
BasicBlock *HeaderBB = State.CFG.PrevBB;
38593881
assert(State.CurrentParentLoop->getHeader() == HeaderBB &&
@@ -3862,49 +3884,7 @@ void VPReductionPHIRecipe::execute(VPTransformState &State) {
38623884
Phi->insertBefore(HeaderBB->getFirstInsertionPt());
38633885
State.set(this, Phi, IsInLoop);
38643886

3865-
BasicBlock *VectorPH =
3866-
State.CFG.VPBB2IRBB.at(getParent()->getCFGPredecessor(0));
3867-
// Create start and identity vector values for the reduction in the preheader.
3868-
// TODO: Introduce recipes in VPlan preheader to create initial values.
3869-
IRBuilderBase::InsertPointGuard IPBuilder(State.Builder);
3870-
State.Builder.SetInsertPoint(VectorPH->getTerminator());
3871-
3872-
// Reductions do not have to start at zero. They can start with
3873-
// any loop invariant values.
3874-
VPValue *StartVPV = getStartValue();
3875-
RecurKind RK = RdxDesc.getRecurrenceKind();
3876-
if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) ||
3877-
RecurrenceDescriptor::isAnyOfRecurrenceKind(RK) ||
3878-
RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) {
3879-
// [I|F]FindLastIV will use a sentinel value to initialize the reduction
3880-
// phi or the resume value from the main vector loop when vectorizing the
3881-
// epilogue loop. In the exit block, ComputeReductionResult will generate
3882-
// checks to verify if the reduction result is the sentinel value. If the
3883-
// result is the sentinel value, it will be corrected back to the start
3884-
// value.
3885-
// TODO: The sentinel value is not always necessary. When the start value is
3886-
// a constant, and smaller than the start value of the induction variable,
3887-
// the start value can be directly used to initialize the reduction phi.
3888-
Phi->addIncoming(State.get(StartVPV, ScalarPHI), VectorPH);
3889-
return;
3890-
}
3891-
3892-
Value *Iden = getRecurrenceIdentity(RK, VecTy->getScalarType(),
3893-
RdxDesc.getFastMathFlags());
3894-
unsigned CurrentPart = getUnrollPart(*this);
3895-
Value *StartV = StartVPV->getLiveInIRValue();
3896-
if (!ScalarPHI) {
3897-
if (CurrentPart == 0) {
3898-
Iden = State.Builder.CreateVectorSplat(VF, Iden);
3899-
Constant *Zero = State.Builder.getInt32(0);
3900-
StartV = State.Builder.CreateInsertElement(Iden, StartV, Zero);
3901-
} else {
3902-
Iden = State.Builder.CreateVectorSplat(VF, Iden);
3903-
}
3904-
}
3905-
3906-
Value *StartVal = (CurrentPart == 0) ? StartV : Iden;
3907-
Phi->addIncoming(StartVal, VectorPH);
3887+
Phi->addIncoming(StartV, VectorPH);
39083888
}
39093889

39103890
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1155,6 +1155,16 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
11551155
return;
11561156
}
11571157
}
1158+
// Simplify redundant ReductionStartVector recipes after unrolling.
1159+
VPValue *StartV;
1160+
if (match(Def, m_VPInstruction<VPInstruction::ReductionStartVector>(
1161+
m_VPValue(StartV), m_VPValue(), m_VPValue()))) {
1162+
Def->replaceUsesWithIf(StartV, [Def](const VPUser &U, unsigned Idx) {
1163+
auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&U);
1164+
return PhiR && Def == PhiR->getOperand(Idx) && PhiR->isInLoop();
1165+
});
1166+
return;
1167+
}
11581168
}
11591169

11601170
void VPlanTransforms::simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy) {

llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,23 @@ void UnrollState::unrollHeaderPHIByUF(VPHeaderPHIRecipe *R,
223223
Copy->addOperand(R);
224224
Copy->addOperand(getConstantVPV(Part));
225225
} else if (RdxPhi) {
226+
// If the start value is a ReductionStartVector, use the identity value
227+
// (second operand) for unrolled parts. If the scaling factor is > 1,
228+
// create a new ReductionStartVector with the scale factor and both
229+
// operands set to the identity value.
230+
if (auto *VPI = dyn_cast<VPInstruction>(RdxPhi->getStartValue())) {
231+
if (cast<ConstantInt>(VPI->getOperand(2)->getLiveInIRValue())
232+
->getZExtValue() == 1)
233+
Copy->setOperand(0, VPI->getOperand(1));
234+
else {
235+
if (Part == 1) {
236+
auto *C = VPI->clone();
237+
C->setOperand(0, C->getOperand(1));
238+
C->insertAfter(VPI);
239+
addUniformForAllParts(C);
240+
}
241+
}
242+
}
226243
Copy->addOperand(getConstantVPV(Part));
227244
} else {
228245
assert(isa<VPActiveLaneMaskPHIRecipe>(R) &&

llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-epilogue.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,8 +161,8 @@ define void @dotp_small_epilogue_vf(i64 %idx.neg, i8 %a) #1 {
161161
; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i8> poison, i8 [[A]], i64 0
162162
; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT6]], <4 x i8> poison, <4 x i32> zeroinitializer
163163
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[IDX_NEG]], [[N_VEC5]]
164-
; CHECK-NEXT: [[TMP8:%.*]] = sext <4 x i8> [[BROADCAST_SPLAT7]] to <4 x i32>
165164
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[BC_MERGE_RDX]], i32 0
165+
; CHECK-NEXT: [[TMP8:%.*]] = sext <4 x i8> [[BROADCAST_SPLAT7]] to <4 x i32>
166166
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
167167
; CHECK: vec.epilog.vector.body:
168168
; CHECK-NEXT: [[INDEX9:%.*]] = phi i64 [ [[IV]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT14:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]

llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,10 +153,10 @@ define i1 @select_exit_cond(ptr %start, ptr %end, i64 %N) {
153153
; CHECK-NEXT: [[N_MOD_VF24:%.*]] = urem i64 [[TMP2]], 2
154154
; CHECK-NEXT: [[N_VEC25:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF24]]
155155
; CHECK-NEXT: [[TMP56:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC25]]
156+
; CHECK-NEXT: [[TMP57:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BC_MERGE_RDX]], i32 0
156157
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[VEC_EPILOG_RESUME_VAL]], i64 0
157158
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
158159
; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i64> [[DOTSPLAT]], <i64 0, i64 1>
159-
; CHECK-NEXT: [[TMP57:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BC_MERGE_RDX]], i32 0
160160
; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
161161
; CHECK: [[VEC_EPILOG_VECTOR_BODY]]:
162162
; CHECK-NEXT: [[INDEX38:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT32:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]

llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -277,9 +277,9 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) {
277277
; IF-EVL-OUTLOOP-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[N]], 1
278278
; IF-EVL-OUTLOOP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
279279
; IF-EVL-OUTLOOP-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4
280+
; IF-EVL-OUTLOOP-NEXT: [[TMP9:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0
280281
; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
281282
; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
282-
; IF-EVL-OUTLOOP-NEXT: [[TMP9:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0
283283
; IF-EVL-OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]]
284284
; IF-EVL-OUTLOOP: vector.body:
285285
; IF-EVL-OUTLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -581,8 +581,8 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) {
581581
; NO-VP-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
582582
; NO-VP-OUTLOOP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
583583
; NO-VP-OUTLOOP-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4
584-
; NO-VP-OUTLOOP-NEXT: [[TMP12:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
585584
; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0
585+
; NO-VP-OUTLOOP-NEXT: [[TMP12:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
586586
; NO-VP-OUTLOOP-NEXT: [[TMP14:%.*]] = mul <vscale x 4 x i32> [[TMP12]], splat (i32 1)
587587
; NO-VP-OUTLOOP-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i32> zeroinitializer, [[TMP14]]
588588
; NO-VP-OUTLOOP-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP10]] to i32
@@ -771,8 +771,8 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) {
771771
; NO-VP-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
772772
; NO-VP-OUTLOOP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
773773
; NO-VP-OUTLOOP-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4
774-
; NO-VP-OUTLOOP-NEXT: [[TMP12:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
775774
; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0
775+
; NO-VP-OUTLOOP-NEXT: [[TMP12:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
776776
; NO-VP-OUTLOOP-NEXT: [[TMP14:%.*]] = mul <vscale x 4 x i32> [[TMP12]], splat (i32 1)
777777
; NO-VP-OUTLOOP-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i32> zeroinitializer, [[TMP14]]
778778
; NO-VP-OUTLOOP-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP10]] to i32

llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,9 +137,9 @@ define i32 @mul(ptr %a, i64 %n, i32 %start) {
137137
; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 8
138138
; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
139139
; IF-EVL-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[N]], 1
140+
; IF-EVL-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> splat (i32 1), i32 [[START:%.*]], i32 0
140141
; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
141142
; IF-EVL-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT1]], <8 x i64> poison, <8 x i32> zeroinitializer
142-
; IF-EVL-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> splat (i32 1), i32 [[START:%.*]], i32 0
143143
; IF-EVL-NEXT: br label [[FOR_BODY:%.*]]
144144
; IF-EVL: vector.body:
145145
; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
@@ -1220,9 +1220,9 @@ define float @fmul(ptr %a, i64 %n, float %start) {
12201220
; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 8
12211221
; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
12221222
; IF-EVL-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[N]], 1
1223+
; IF-EVL-NEXT: [[TMP9:%.*]] = insertelement <8 x float> splat (float 1.000000e+00), float [[START:%.*]], i32 0
12231224
; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
12241225
; IF-EVL-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT1]], <8 x i64> poison, <8 x i32> zeroinitializer
1225-
; IF-EVL-NEXT: [[TMP9:%.*]] = insertelement <8 x float> splat (float 1.000000e+00), float [[START:%.*]], i32 0
12261226
; IF-EVL-NEXT: br label [[FOR_BODY:%.*]]
12271227
; IF-EVL: vector.body:
12281228
; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]

0 commit comments

Comments
 (0)