Skip to content

Commit 4dc8307

Browse files
committed
[LV][VPlan] Implement VPlan-based cost for exit condition.
This patch tried to model the cost of exit conditions through vplan-based cost model. * `BranchOnCount` will generate icmp + br. The branch instruction is already implemented by the VPRegionBlock so we only need to calculate the cost of icmp. If the VF is same as the trip count of the loop, the cost of the BranchOnCount is free. This patch is not quite NFC for following reasons. * Some of the BranchOnCount could be optimized to BranchOnCond, which is free. * Some of the instructions calculated in the exit condition in legacy cost model will be optimized out.
1 parent 29b7295 commit 4dc8307

15 files changed

+298
-301
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 0 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -7271,45 +7271,6 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
72717271
}
72727272
}
72737273

7274-
/// Compute the cost of all exiting conditions of the loop using the legacy
7275-
/// cost model. This is to match the legacy behavior, which adds the cost of
7276-
/// all exit conditions. Note that this over-estimates the cost, as there will
7277-
/// be a single condition to control the vector loop.
7278-
SmallVector<BasicBlock *> Exiting;
7279-
CM.TheLoop->getExitingBlocks(Exiting);
7280-
SetVector<Instruction *> ExitInstrs;
7281-
// Collect all exit conditions.
7282-
for (BasicBlock *EB : Exiting) {
7283-
auto *Term = dyn_cast<BranchInst>(EB->getTerminator());
7284-
if (!Term)
7285-
continue;
7286-
if (auto *CondI = dyn_cast<Instruction>(Term->getOperand(0))) {
7287-
ExitInstrs.insert(CondI);
7288-
}
7289-
}
7290-
// Compute the cost of all instructions only feeding the exit conditions.
7291-
for (unsigned I = 0; I != ExitInstrs.size(); ++I) {
7292-
Instruction *CondI = ExitInstrs[I];
7293-
if (!OrigLoop->contains(CondI) ||
7294-
!CostCtx.SkipCostComputation.insert(CondI).second)
7295-
continue;
7296-
InstructionCost CondICost = CostCtx.getLegacyCost(CondI, VF);
7297-
LLVM_DEBUG({
7298-
dbgs() << "Cost of " << CondICost << " for VF " << VF
7299-
<< ": exit condition instruction " << *CondI << "\n";
7300-
});
7301-
Cost += CondICost;
7302-
for (Value *Op : CondI->operands()) {
7303-
auto *OpI = dyn_cast<Instruction>(Op);
7304-
if (!OpI || any_of(OpI->users(), [&ExitInstrs, this](User *U) {
7305-
return OrigLoop->contains(cast<Instruction>(U)->getParent()) &&
7306-
!ExitInstrs.contains(cast<Instruction>(U));
7307-
}))
7308-
continue;
7309-
ExitInstrs.insert(OpI);
7310-
}
7311-
}
7312-
73137274
// The legacy cost model has special logic to compute the cost of in-loop
73147275
// reductions, which may be smaller than the sum of all instructions involved
73157276
// in the reduction.

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -742,6 +742,18 @@ InstructionCost VPInstruction::computeCost(ElementCount VF,
742742
return Ctx.TTI.getArithmeticReductionCost(
743743
Instruction::Or, cast<VectorType>(VecTy), std::nullopt, Ctx.CostKind);
744744
}
745+
case VPInstruction::BranchOnCount: {
746+
// BranchOnCount will genearte icmp_eq + br instructions and the
747+
// cost of branch will be calculated in VPRegionBlock.
748+
// If the vector loop only executed once, ignore the cost of the cmp.
749+
Type *ValTy = Ctx.Types.inferScalarType(getOperand(0));
750+
auto TC = dyn_cast_if_present<ConstantInt>(
751+
getParent()->getPlan()->getTripCount()->getUnderlyingValue());
752+
if (TC && VF.isFixed() && TC->getSExtValue() == VF.getFixedValue())
753+
return 0;
754+
return Ctx.TTI.getCmpSelInstrCost(Instruction::ICmp, ValTy, nullptr,
755+
CmpInst::ICMP_EQ, Ctx.CostKind);
756+
}
745757
default:
746758
// TODO: Compute cost other VPInstructions once the legacy cost model has
747759
// been retired.

llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll

Lines changed: 42 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -8,39 +8,41 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
88
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
99
; CHECK: vector.ph:
1010
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
11-
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 8
11+
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
1212
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
1313
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 8, [[TMP4]]
1414
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
1515
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
1616
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
17-
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8
17+
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2
1818
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[N_VEC]]
19-
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 8)
20-
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
21-
; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 8 x i64> [[TMP8]], splat (i64 1)
22-
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP7]]
19+
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 8)
20+
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
21+
; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 2 x i64> [[TMP8]], splat (i64 1)
22+
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP7]]
2323
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]]
24-
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP12]], i64 0
25-
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
26-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
27-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
24+
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP12]], i64 0
25+
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
26+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[VAL]], i64 0
27+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
2828
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
2929
; CHECK: vector.body:
3030
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
31-
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 8 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
32-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
31+
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
32+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
3333
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 0
3434
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP13]]
35-
; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw <vscale x 8 x i64> [[VEC_IND]], splat (i64 3)
36-
; CHECK-NEXT: [[TMP11:%.*]] = lshr <vscale x 8 x i64> [[BROADCAST_SPLAT]], [[TMP10]]
37-
; CHECK-NEXT: [[TMP14:%.*]] = trunc <vscale x 8 x i64> [[TMP11]] to <vscale x 8 x i8>
35+
; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw <vscale x 2 x i64> [[VEC_IND]], splat (i64 3)
36+
; CHECK-NEXT: [[TMP11:%.*]] = lshr <vscale x 2 x i64> [[BROADCAST_SPLAT]], [[TMP10]]
37+
; CHECK-NEXT: [[TMP16:%.*]] = trunc <vscale x 2 x i64> [[TMP11]] to <vscale x 2 x i8>
3838
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0
39-
; CHECK-NEXT: call void @llvm.masked.store.nxv8i8.p0(<vscale x 8 x i8> [[TMP14]], ptr [[TMP17]], i32 1, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]])
39+
; CHECK-NEXT: call void @llvm.masked.store.nxv2i8.p0(<vscale x 2 x i8> [[TMP16]], ptr [[TMP17]], i32 1, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
4040
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
41-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT]]
42-
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_NEXT]], i64 8)
43-
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
41+
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 8)
42+
; CHECK-NEXT: [[TMP14:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
43+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]]
44+
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <vscale x 2 x i1> [[TMP14]], i32 0
45+
; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
4446
; CHECK: middle.block:
4547
; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
4648
; CHECK: scalar.ph:
@@ -94,39 +96,41 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
9496
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
9597
; CHECK: vector.ph:
9698
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
97-
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 8
99+
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
98100
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
99101
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[WIDE_TRIP_COUNT]], [[TMP4]]
100102
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
101103
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
102104
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
103-
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8
105+
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2
104106
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[N_VEC]]
105-
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[WIDE_TRIP_COUNT]])
106-
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
107-
; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 8 x i64> [[TMP8]], splat (i64 1)
108-
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP7]]
107+
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[WIDE_TRIP_COUNT]])
108+
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
109+
; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 2 x i64> [[TMP8]], splat (i64 1)
110+
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP7]]
109111
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]]
110-
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP12]], i64 0
111-
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
112-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
113-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
112+
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP12]], i64 0
113+
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
114+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[VAL]], i64 0
115+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
114116
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
115117
; CHECK: vector.body:
116118
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
117-
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 8 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
118-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
119+
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
120+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
119121
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 0
120122
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP13]]
121-
; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw <vscale x 8 x i64> [[VEC_IND]], splat (i64 3)
122-
; CHECK-NEXT: [[TMP11:%.*]] = lshr <vscale x 8 x i64> [[BROADCAST_SPLAT]], [[TMP10]]
123-
; CHECK-NEXT: [[TMP14:%.*]] = trunc <vscale x 8 x i64> [[TMP11]] to <vscale x 8 x i8>
123+
; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw <vscale x 2 x i64> [[VEC_IND]], splat (i64 3)
124+
; CHECK-NEXT: [[TMP11:%.*]] = lshr <vscale x 2 x i64> [[BROADCAST_SPLAT]], [[TMP10]]
125+
; CHECK-NEXT: [[TMP16:%.*]] = trunc <vscale x 2 x i64> [[TMP11]] to <vscale x 2 x i8>
124126
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0
125-
; CHECK-NEXT: call void @llvm.masked.store.nxv8i8.p0(<vscale x 8 x i8> [[TMP14]], ptr [[TMP17]], i32 1, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]])
127+
; CHECK-NEXT: call void @llvm.masked.store.nxv2i8.p0(<vscale x 2 x i8> [[TMP16]], ptr [[TMP17]], i32 1, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
126128
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
127-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT]]
128-
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_NEXT]], i64 [[WIDE_TRIP_COUNT]])
129-
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
129+
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 [[WIDE_TRIP_COUNT]])
130+
; CHECK-NEXT: [[TMP14:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
131+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]]
132+
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <vscale x 2 x i1> [[TMP14]], i32 0
133+
; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
130134
; CHECK: middle.block:
131135
; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
132136
; CHECK: scalar.ph:

llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll

Lines changed: 42 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -768,30 +768,60 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 {
768768
; DEFAULT-LABEL: define void @multiple_exit_conditions(
769769
; DEFAULT-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]]) #[[ATTR2:[0-9]+]] {
770770
; DEFAULT-NEXT: entry:
771-
; DEFAULT-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
771+
; DEFAULT-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
772+
; DEFAULT-NEXT: [[TMP9:%.*]] = mul i64 [[TMP7]], 8
773+
; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 257, [[TMP9]]
774+
; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
772775
; DEFAULT: vector.ph:
773-
; DEFAULT-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 2048
776+
; DEFAULT-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
777+
; DEFAULT-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 8
778+
; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 257, [[TMP3]]
779+
; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 257, [[N_MOD_VF]]
780+
; DEFAULT-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
781+
; DEFAULT-NEXT: [[TMP5:%.*]] = mul i64 [[TMP10]], 8
782+
; DEFAULT-NEXT: [[TMP6:%.*]] = mul i64 [[N_VEC]], 8
783+
; DEFAULT-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP6]]
784+
; DEFAULT-NEXT: [[TMP8:%.*]] = mul i64 [[N_VEC]], 2
774785
; DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]]
775786
; DEFAULT: vector.body:
776787
; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
777788
; DEFAULT-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
778789
; DEFAULT-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
779790
; DEFAULT-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP0]]
780791
; DEFAULT-NEXT: [[TMP1:%.*]] = load i16, ptr [[SRC]], align 2
781-
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[TMP1]], i64 0
782-
; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
783-
; DEFAULT-NEXT: [[TMP2:%.*]] = or <8 x i16> [[BROADCAST_SPLAT]], splat (i16 1)
784-
; DEFAULT-NEXT: [[TMP3:%.*]] = uitofp <8 x i16> [[TMP2]] to <8 x double>
792+
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i16> poison, i16 [[TMP1]], i64 0
793+
; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i16> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
794+
; DEFAULT-NEXT: [[TMP11:%.*]] = or <vscale x 2 x i16> [[BROADCAST_SPLAT]], splat (i16 1)
795+
; DEFAULT-NEXT: [[TMP12:%.*]] = or <vscale x 2 x i16> [[BROADCAST_SPLAT]], splat (i16 1)
796+
; DEFAULT-NEXT: [[TMP13:%.*]] = or <vscale x 2 x i16> [[BROADCAST_SPLAT]], splat (i16 1)
797+
; DEFAULT-NEXT: [[TMP14:%.*]] = or <vscale x 2 x i16> [[BROADCAST_SPLAT]], splat (i16 1)
798+
; DEFAULT-NEXT: [[TMP15:%.*]] = uitofp <vscale x 2 x i16> [[TMP11]] to <vscale x 2 x double>
799+
; DEFAULT-NEXT: [[TMP16:%.*]] = uitofp <vscale x 2 x i16> [[TMP12]] to <vscale x 2 x double>
800+
; DEFAULT-NEXT: [[TMP17:%.*]] = uitofp <vscale x 2 x i16> [[TMP13]] to <vscale x 2 x double>
801+
; DEFAULT-NEXT: [[TMP18:%.*]] = uitofp <vscale x 2 x i16> [[TMP14]] to <vscale x 2 x double>
785802
; DEFAULT-NEXT: [[TMP4:%.*]] = getelementptr double, ptr [[NEXT_GEP]], i32 0
786-
; DEFAULT-NEXT: store <8 x double> [[TMP3]], ptr [[TMP4]], align 8
787-
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
788-
; DEFAULT-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
789-
; DEFAULT-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
803+
; DEFAULT-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
804+
; DEFAULT-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 2
805+
; DEFAULT-NEXT: [[TMP22:%.*]] = getelementptr double, ptr [[NEXT_GEP]], i64 [[TMP21]]
806+
; DEFAULT-NEXT: [[TMP23:%.*]] = call i64 @llvm.vscale.i64()
807+
; DEFAULT-NEXT: [[TMP24:%.*]] = mul i64 [[TMP23]], 4
808+
; DEFAULT-NEXT: [[TMP25:%.*]] = getelementptr double, ptr [[NEXT_GEP]], i64 [[TMP24]]
809+
; DEFAULT-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64()
810+
; DEFAULT-NEXT: [[TMP27:%.*]] = mul i64 [[TMP26]], 6
811+
; DEFAULT-NEXT: [[TMP28:%.*]] = getelementptr double, ptr [[NEXT_GEP]], i64 [[TMP27]]
812+
; DEFAULT-NEXT: store <vscale x 2 x double> [[TMP15]], ptr [[TMP4]], align 8
813+
; DEFAULT-NEXT: store <vscale x 2 x double> [[TMP16]], ptr [[TMP22]], align 8
814+
; DEFAULT-NEXT: store <vscale x 2 x double> [[TMP17]], ptr [[TMP25]], align 8
815+
; DEFAULT-NEXT: store <vscale x 2 x double> [[TMP18]], ptr [[TMP28]], align 8
816+
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
817+
; DEFAULT-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
818+
; DEFAULT-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
790819
; DEFAULT: middle.block:
791-
; DEFAULT-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
820+
; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 257, [[N_VEC]]
821+
; DEFAULT-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
792822
; DEFAULT: scalar.ph:
793823
; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[DST]], [[ENTRY:%.*]] ]
794-
; DEFAULT-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 512, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
824+
; DEFAULT-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[TMP8]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
795825
; DEFAULT-NEXT: br label [[LOOP:%.*]]
796826
; DEFAULT: vector.scevcheck:
797827
; DEFAULT-NEXT: unreachable

0 commit comments

Comments
 (0)