Skip to content

[VPlan] Add ReductionStartVector VPInstruction. #142290

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Jun 9, 2025
42 changes: 39 additions & 3 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7235,8 +7235,14 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
cast<VPReductionPHIRecipe>(EpiRedResult->getOperand(0));
const RecurrenceDescriptor &RdxDesc =
EpiRedHeaderPhi->getRecurrenceDescriptor();
Value *MainResumeValue =
EpiRedHeaderPhi->getStartValue()->getUnderlyingValue();
Value *MainResumeValue;
if (auto *VPI = dyn_cast<VPInstruction>(EpiRedHeaderPhi->getStartValue())) {
assert((VPI->getOpcode() == VPInstruction::Broadcast ||
VPI->getOpcode() == VPInstruction::ReductionStartVector) &&
"unexpected start recipe");
MainResumeValue = VPI->getOperand(0)->getUnderlyingValue();
} else
MainResumeValue = EpiRedHeaderPhi->getStartValue()->getUnderlyingValue();
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(
RdxDesc.getRecurrenceKind())) {
Value *StartV = EpiRedResult->getOperand(1)->getLiveInIRValue();
Expand Down Expand Up @@ -9173,7 +9179,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
continue;

const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
Type *PhiTy = PhiR->getOperand(0)->getLiveInIRValue()->getType();
Type *PhiTy = PhiR->getUnderlyingValue()->getType();
// If tail is folded by masking, introduce selects between the phi
// and the users outside the vector region of each reduction, at the
// beginning of the dedicated latch block.
Expand Down Expand Up @@ -9311,6 +9317,27 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
// start value.
PhiR->setOperand(0, Plan->getOrAddLiveIn(RdxDesc.getSentinelValue()));
}
RecurKind RK = RdxDesc.getRecurrenceKind();
if ((!RecurrenceDescriptor::isAnyOfRecurrenceKind(RK) &&
!RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK) &&
!RecurrenceDescriptor::isMinMaxRecurrenceKind(RK))) {
VPBuilder PHBuilder(Plan->getVectorPreheader());
VPValue *Iden = Plan->getOrAddLiveIn(
getRecurrenceIdentity(RK, PhiTy, RdxDesc.getFastMathFlags()));
// If the PHI is used by a partial reduction, set the scale factor.
unsigned ScaleFactor =
RecipeBuilder.getScalingForReduction(RdxDesc.getLoopExitInstr())
.value_or(1);
Type *I32Ty = IntegerType::getInt32Ty(PhiTy->getContext());
auto *ScaleFactorVPV =
Plan->getOrAddLiveIn(ConstantInt::get(I32Ty, ScaleFactor));
VPValue *StartV = PHBuilder.createNaryOp(
VPInstruction::ReductionStartVector,
{PhiR->getStartValue(), Iden, ScaleFactorVPV},
PhiTy->isFloatingPointTy() ? RdxDesc.getFastMathFlags()
: FastMathFlags());
PhiR->setOperand(0, StartV);
}
}
for (VPRecipeBase *R : ToDelete)
R->eraseFromParent();
Expand Down Expand Up @@ -9816,6 +9843,15 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
Value *Cmp = Builder.CreateICmpEQ(ResumeV, ToFrozen[StartV]);
ResumeV =
Builder.CreateSelect(Cmp, RdxDesc.getSentinelValue(), ResumeV);
} else {
VPValue *StartVal = Plan.getOrAddLiveIn(ResumeV);
auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
if (auto *VPI = dyn_cast<VPInstruction>(PhiR->getStartValue())) {
assert(VPI->getOpcode() == VPInstruction::ReductionStartVector &&
"unexpected start value");
VPI->setOperand(0, StartVal);
continue;
}
}
} else {
// Retrieve the induction resume values for wide inductions from
Expand Down
6 changes: 5 additions & 1 deletion llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -934,6 +934,10 @@ class VPInstruction : public VPRecipeWithIRFlags,
/// Scale the first operand (vector step) by the second operand
/// (scalar-step). Casts both operands to the result type if needed.
WideIVStep,
/// Start vector for reductions with 3 operands: the original start value,
/// the identity value for the reduction and an integer indicating the
/// scaling factor.
ReductionStartVector,
// Creates a step vector starting from 0 to VF with a step of 1.
StepVector,

Expand Down Expand Up @@ -2231,7 +2235,7 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
bool onlyFirstLaneUsed(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return Op == getStartValue();
return isOrdered() || isInLoop();
}
};

Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
switch (Opcode) {
case Instruction::ExtractElement:
case Instruction::Freeze:
case VPInstruction::ReductionStartVector:
return inferScalarType(R->getOperand(0));
case Instruction::Select: {
Type *ResTy = inferScalarType(R->getOperand(1));
Expand Down Expand Up @@ -395,6 +396,10 @@ static unsigned getVFScaleFactor(VPRecipeBase *R) {
return RR->getVFScaleFactor();
if (auto *RR = dyn_cast<VPPartialReductionRecipe>(R))
return RR->getVFScaleFactor();
assert(
(!isa<VPInstruction>(R) || cast<VPInstruction>(R)->getOpcode() !=
VPInstruction::ReductionStartVector) &&
"getting scaling factor of reduction-start-vector not implemented yet");
return 1;
}

Expand Down
76 changes: 28 additions & 48 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -604,6 +604,20 @@ Value *VPInstruction::generate(VPTransformState &State) {
return Builder.CreateVectorSplat(
State.VF, State.get(getOperand(0), /*IsScalar*/ true), "broadcast");
}
case VPInstruction::ReductionStartVector: {
if (State.VF.isScalar())
return State.get(getOperand(0), true);
IRBuilderBase::FastMathFlagGuard FMFG(Builder);
Builder.setFastMathFlags(getFastMathFlags());
// If this start vector is scaled then it should produce a vector with fewer
// elements than the VF.
ElementCount VF = State.VF.divideCoefficientBy(
cast<ConstantInt>(getOperand(2)->getLiveInIRValue())->getZExtValue());
auto *Iden = Builder.CreateVectorSplat(VF, State.get(getOperand(1), true));
Constant *Zero = Builder.getInt32(0);
return Builder.CreateInsertElement(Iden, State.get(getOperand(0), true),
Zero);
}
case VPInstruction::ComputeAnyOfResult: {
// FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
// and will be removed by breaking up the recipe further.
Expand Down Expand Up @@ -899,6 +913,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
case VPInstruction::PtrAdd:
case VPInstruction::WideIVStep:
case VPInstruction::StepVector:
case VPInstruction::ReductionStartVector:
return false;
default:
return true;
Expand Down Expand Up @@ -929,6 +944,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
case VPInstruction::CanonicalIVIncrementForPart:
case VPInstruction::BranchOnCount:
case VPInstruction::BranchOnCond:
case VPInstruction::ReductionStartVector:
return true;
case VPInstruction::PtrAdd:
return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this);
Expand Down Expand Up @@ -1034,6 +1050,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::FirstActiveLane:
O << "first-active-lane";
break;
case VPInstruction::ReductionStartVector:
O << "reduction-start-vector";
break;
default:
O << Instruction::getOpcodeName(getOpcode());
}
Expand Down Expand Up @@ -1617,6 +1636,7 @@ bool VPIRFlags::flagsValidForOpcode(unsigned Opcode) const {
Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
Opcode == Instruction::FCmp || Opcode == Instruction::Select ||
Opcode == VPInstruction::WideIVStep ||
Opcode == VPInstruction::ReductionStartVector ||
Opcode == VPInstruction::ComputeReductionResult;
case OperationType::NonNegOp:
return Opcode == Instruction::ZExt;
Expand Down Expand Up @@ -3847,17 +3867,19 @@ void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent,
#endif

void VPReductionPHIRecipe::execute(VPTransformState &State) {
// If this phi is fed by a scaled reduction then it should output a
// vector with fewer elements than the VF.
ElementCount VF = State.VF.divideCoefficientBy(VFScaleFactor);
Comment on lines -3850 to -3852
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this no longer needed?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nope, both the start value and the backedge value now are narrowed already.

// Reductions do not have to start at zero. They can start with
// any loop invariant values.
VPValue *StartVPV = getStartValue();

// In order to support recurrences we need to be able to vectorize Phi nodes.
// Phi nodes have cycles, so we need to vectorize them in two stages. This is
// stage #1: We create a new vector PHI node with no incoming edges. We'll use
// this value when we vectorize all of the instructions that use the PHI.
auto *ScalarTy = State.TypeAnalysis.inferScalarType(this);
BasicBlock *VectorPH =
State.CFG.VPBB2IRBB.at(getParent()->getCFGPredecessor(0));
bool ScalarPHI = State.VF.isScalar() || IsInLoop;
Type *VecTy = ScalarPHI ? ScalarTy : VectorType::get(ScalarTy, VF);
Value *StartV = State.get(StartVPV, ScalarPHI);
Type *VecTy = StartV->getType();

BasicBlock *HeaderBB = State.CFG.PrevBB;
assert(State.CurrentParentLoop->getHeader() == HeaderBB &&
Expand All @@ -3866,49 +3888,7 @@ void VPReductionPHIRecipe::execute(VPTransformState &State) {
Phi->insertBefore(HeaderBB->getFirstInsertionPt());
State.set(this, Phi, IsInLoop);

BasicBlock *VectorPH =
State.CFG.VPBB2IRBB.at(getParent()->getCFGPredecessor(0));
// Create start and identity vector values for the reduction in the preheader.
// TODO: Introduce recipes in VPlan preheader to create initial values.
IRBuilderBase::InsertPointGuard IPBuilder(State.Builder);
State.Builder.SetInsertPoint(VectorPH->getTerminator());

// Reductions do not have to start at zero. They can start with
// any loop invariant values.
VPValue *StartVPV = getStartValue();
RecurKind RK = RdxDesc.getRecurrenceKind();
if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) ||
RecurrenceDescriptor::isAnyOfRecurrenceKind(RK) ||
RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) {
// [I|F]FindLastIV will use a sentinel value to initialize the reduction
// phi or the resume value from the main vector loop when vectorizing the
// epilogue loop. In the exit block, ComputeReductionResult will generate
// checks to verify if the reduction result is the sentinel value. If the
// result is the sentinel value, it will be corrected back to the start
// value.
// TODO: The sentinel value is not always necessary. When the start value is
// a constant, and smaller than the start value of the induction variable,
// the start value can be directly used to initialize the reduction phi.
Phi->addIncoming(State.get(StartVPV, ScalarPHI), VectorPH);
return;
}

Value *Iden = getRecurrenceIdentity(RK, VecTy->getScalarType(),
RdxDesc.getFastMathFlags());
unsigned CurrentPart = getUnrollPart(*this);
Value *StartV = StartVPV->getLiveInIRValue();
if (!ScalarPHI) {
if (CurrentPart == 0) {
Iden = State.Builder.CreateVectorSplat(VF, Iden);
Constant *Zero = State.Builder.getInt32(0);
StartV = State.Builder.CreateInsertElement(Iden, StartV, Zero);
} else {
Iden = State.Builder.CreateVectorSplat(VF, Iden);
}
}

Value *StartVal = (CurrentPart == 0) ? StartV : Iden;
Phi->addIncoming(StartVal, VectorPH);
Phi->addIncoming(StartV, VectorPH);
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
Expand Down
10 changes: 10 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1153,6 +1153,16 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
return;
}
}
// Simplify redundant ReductionStartVector recipes after unrolling.
VPValue *StartV;
if (match(Def, m_VPInstruction<VPInstruction::ReductionStartVector>(
m_VPValue(StartV), m_VPValue(), m_VPValue()))) {
Def->replaceUsesWithIf(StartV, [](const VPUser &U, unsigned Idx) {
auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&U);
return PhiR && PhiR->isInLoop();
});
return;
}
}

void VPlanTransforms::simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy) {
Expand Down
19 changes: 17 additions & 2 deletions llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "llvm/IR/Intrinsics.h"

using namespace llvm;
using namespace llvm::VPlanPatternMatch;

namespace {

Expand Down Expand Up @@ -223,6 +224,22 @@ void UnrollState::unrollHeaderPHIByUF(VPHeaderPHIRecipe *R,
Copy->addOperand(R);
Copy->addOperand(getConstantVPV(Part));
} else if (RdxPhi) {
// If the start value is a ReductionStartVector, use the identity value
// (second operand) for unrolled parts. If the scaling factor is > 1,
// create a new ReductionStartVector with the scale factor and both
// operands set to the identity value.
if (auto *VPI = dyn_cast<VPInstruction>(RdxPhi->getStartValue())) {
assert(VPI->getOpcode() == VPInstruction::ReductionStartVector &&
"unexpected start VPInstruction");
if (match(VPI->getOperand(2), m_SpecificInt(1))) {
Copy->setOperand(0, VPI->getOperand(1));
} else if (Part == 1) {
auto *C = VPI->clone();
C->setOperand(0, C->getOperand(1));
C->insertAfter(VPI);
addUniformForAllParts(C);
}
}
Copy->addOperand(getConstantVPV(Part));
} else {
assert(isa<VPActiveLaneMaskPHIRecipe>(R) &&
Expand All @@ -233,7 +250,6 @@ void UnrollState::unrollHeaderPHIByUF(VPHeaderPHIRecipe *R,

/// Handle non-header-phi recipes.
void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
using namespace llvm::VPlanPatternMatch;
if (match(&R, m_BranchOnCond(m_VPValue())) ||
match(&R, m_BranchOnCount(m_VPValue(), m_VPValue())))
return;
Expand Down Expand Up @@ -301,7 +317,6 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
}
}

using namespace llvm::VPlanPatternMatch;
void UnrollState::unrollBlock(VPBlockBase *VPB) {
auto *VPR = dyn_cast<VPRegionBlock>(VPB);
if (VPR) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,16 +60,16 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) {
; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i32 [[TMP2]], 8
; CHECK-NEXT: [[N_VEC5:%.*]] = sub i32 [[TMP2]], [[N_MOD_VF4]]
; CHECK-NEXT: [[TMP16:%.*]] = trunc i32 [[N_VEC5]] to i8
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i8> poison, i8 [[BC_RESUME_VAL]], i64 0
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i8> poison, i8 [[TMP15]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i8> [[DOTSPLATINSERT]], <8 x i8> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i8> [[DOTSPLAT]], <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>
; CHECK-NEXT: [[DOTSPLATINSERT10:%.*]] = insertelement <8 x i8> poison, i8 [[TMP15]], i64 0
; CHECK-NEXT: [[DOTSPLATINSERT10:%.*]] = insertelement <8 x i8> poison, i8 [[BC_RESUME_VAL]], i64 0
; CHECK-NEXT: [[DOTSPLAT11:%.*]] = shufflevector <8 x i8> [[DOTSPLATINSERT10]], <8 x i8> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i8> [[DOTSPLAT11]], <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>
; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
; CHECK: [[VEC_EPILOG_VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX6:%.*]] = phi i32 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT13:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND7:%.*]] = phi <8 x i8> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT8:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI9:%.*]] = phi <8 x i8> [ [[DOTSPLAT11]], %[[VEC_EPILOG_PH]] ], [ [[TMP20:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI9:%.*]] = phi <8 x i8> [ [[DOTSPLAT]], %[[VEC_EPILOG_PH]] ], [ [[TMP20:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[IV:%.*]] = trunc i32 [[INDEX6]] to i8
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[A]], i8 [[IV]]
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[GEP]], i32 0
Expand All @@ -87,12 +87,12 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) {
; CHECK-NEXT: [[CMP_N16:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC5]]
; CHECK-NEXT: br i1 [[CMP_N16]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
; CHECK: [[VEC_EPILOG_SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL17:%.*]] = phi i8 [ [[TMP16]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
; CHECK-NEXT: [[BC_MERGE_RDX18:%.*]] = phi i8 [ [[RDX_SELECT15]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[ITER_CHECK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL15:%.*]] = phi i8 [ [[TMP16]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
; CHECK-NEXT: [[BC_MERGE_RDX16:%.*]] = phi i8 [ [[RDX_SELECT15]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[ITER_CHECK]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV1:%.*]] = phi i8 [ [[BC_RESUME_VAL17]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[RDX:%.*]] = phi i8 [ [[BC_MERGE_RDX18]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[IV1:%.*]] = phi i8 [ [[BC_RESUME_VAL15]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[RDX:%.*]] = phi i8 [ [[BC_MERGE_RDX16]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, ptr [[A]], i8 [[IV1]]
; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[GEP1]], align 8
; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,8 @@ define void @dotp_small_epilogue_vf(i64 %idx.neg, i8 %a) #1 {
; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i8> poison, i8 [[A]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT6]], <4 x i8> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[IDX_NEG]], [[N_VEC5]]
; CHECK-NEXT: [[TMP8:%.*]] = sext <4 x i8> [[BROADCAST_SPLAT7]] to <4 x i32>
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[BC_MERGE_RDX]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = sext <4 x i8> [[BROADCAST_SPLAT7]] to <4 x i32>
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; CHECK: vec.epilog.vector.body:
; CHECK-NEXT: [[INDEX9:%.*]] = phi i64 [ [[IV]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT14:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
Expand Down
6 changes: 4 additions & 2 deletions llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,13 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) {
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: EMIT vp<[[RDX_START:%.+]]> = reduction-start-vector ir<0>, ir<0>, ir<4>
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]>
; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[ACC:%.+]]> = phi ir<0>, ir<[[REDUCE:%.+]]> (VF scaled by 1/4)
; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[ACC:%.+]]> = phi vp<[[RDX_START]]>, ir<[[REDUCE:%.+]]> (VF scaled by 1/4)
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]>
; CHECK-NEXT: CLONE ir<%gep.a> = getelementptr ir<%a>, vp<[[STEPS]]>
; CHECK-NEXT: vp<[[PTR_A:%.+]]> = vector-pointer ir<%gep.a>
Expand Down Expand Up @@ -83,11 +84,12 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) {
; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, ir-bb<vector.ph>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<vector.ph>:
; CHECK-NEXT: EMIT vp<[[RDX_START:%.+]]> = reduction-start-vector ir<0>, ir<0>, ir<4>
; CHECK-NEXT: Successor(s): vector.body
; CHECK-EMPTY:
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT-SCALAR vp<[[EP_IV:%.+]]> = phi [ ir<0>, ir-bb<vector.ph> ], [ vp<%index.next>, vector.body ]
; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%accum> = phi ir<0>, ir<%add> (VF scaled by 1/4)
; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%accum> = phi vp<[[RDX_START]]>, ir<%add> (VF scaled by 1/4)
; CHECK-NEXT: CLONE ir<%gep.a> = getelementptr ir<%a>, vp<[[EP_IV]]>
; CHECK-NEXT: vp<[[PTR_A:%.+]]> = vector-pointer ir<%gep.a>
; CHECK-NEXT: WIDEN ir<%load.a> = load vp<[[PTR_A]]>
Expand Down
Loading
Loading