Skip to content

Commit c45d230

Browse files
committed
[LV][VPlan] Implement VPlan-based cost for exit condition.
This patch tried to model the cost of exit conditions through vplan-based cost model. * `BranchOnCount` will generate icmp + br. The branch instruction is already implemented by the VPRegionBlock so we only need to calculate the cost of icmp. If the VF is same as the trip count of the loop, the cost of the BranchOnCount is free. This patch is not quite NFC for following reasons. * Some of the BranchOnCount could be optimized to BranchOnCond, which is free. * Some of the instructions calculated in the exit condition in legacy cost model will be optimized out.
1 parent cd4c10a commit c45d230

15 files changed

+338
-336
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 5 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -7290,46 +7290,6 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
72907290
}
72917291
}
72927292

7293-
/// Compute the cost of all exiting conditions of the loop using the legacy
7294-
/// cost model. This is to match the legacy behavior, which adds the cost of
7295-
/// all exit conditions. Note that this over-estimates the cost, as there will
7296-
/// be a single condition to control the vector loop.
7297-
SmallVector<BasicBlock *> Exiting;
7298-
CM.TheLoop->getExitingBlocks(Exiting);
7299-
SetVector<Instruction *> ExitInstrs;
7300-
// Collect all exit conditions.
7301-
for (BasicBlock *EB : Exiting) {
7302-
auto *Term = dyn_cast<BranchInst>(EB->getTerminator());
7303-
if (!Term || CostCtx.skipCostComputation(Term, VF.isVector()))
7304-
continue;
7305-
if (auto *CondI = dyn_cast<Instruction>(Term->getOperand(0))) {
7306-
ExitInstrs.insert(CondI);
7307-
}
7308-
}
7309-
// Compute the cost of all instructions only feeding the exit conditions.
7310-
for (unsigned I = 0; I != ExitInstrs.size(); ++I) {
7311-
Instruction *CondI = ExitInstrs[I];
7312-
if (!OrigLoop->contains(CondI) ||
7313-
!CostCtx.SkipCostComputation.insert(CondI).second)
7314-
continue;
7315-
InstructionCost CondICost = CostCtx.getLegacyCost(CondI, VF);
7316-
LLVM_DEBUG({
7317-
dbgs() << "Cost of " << CondICost << " for VF " << VF
7318-
<< ": exit condition instruction " << *CondI << "\n";
7319-
});
7320-
Cost += CondICost;
7321-
for (Value *Op : CondI->operands()) {
7322-
auto *OpI = dyn_cast<Instruction>(Op);
7323-
if (!OpI || CostCtx.skipCostComputation(OpI, VF.isVector()) ||
7324-
any_of(OpI->users(), [&ExitInstrs, this](User *U) {
7325-
return OrigLoop->contains(cast<Instruction>(U)->getParent()) &&
7326-
!ExitInstrs.contains(cast<Instruction>(U));
7327-
}))
7328-
continue;
7329-
ExitInstrs.insert(OpI);
7330-
}
7331-
}
7332-
73337293
// The legacy cost model has special logic to compute the cost of in-loop
73347294
// reductions, which may be smaller than the sum of all instructions involved
73357295
// in the reduction.
@@ -7486,6 +7446,11 @@ static bool planContainsAdditionalSimplifications(VPlan &Plan,
74867446
// comparing against the legacy cost isn't desirable.
74877447
if (isa<VPPartialReductionRecipe>(&R))
74887448
return true;
7449+
7450+
// The legacy cost model will under estimate the cost of BranchOnCount if exit condition were explicit contructed in the vplan.
7451+
if (VPInstruction *VPI = dyn_cast<VPInstruction>(&R); VPI && VPI->getOpcode() == VPInstruction::BranchOnCount)
7452+
return true;
7453+
74897454
if (Instruction *UI = GetInstructionForCost(&R))
74907455
SeenInstrs.insert(UI);
74917456
}

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -743,6 +743,19 @@ InstructionCost VPInstruction::computeCost(ElementCount VF,
743743
return Ctx.TTI.getArithmeticReductionCost(
744744
Instruction::Or, cast<VectorType>(VecTy), std::nullopt, Ctx.CostKind);
745745
}
746+
case VPInstruction::BranchOnCount: {
747+
if (getUnderlyingValue())
748+
// BranchOnCount will genearte icmp_eq + br instructions and the
749+
// cost of branch will be calculated in VPRegionBlock.
750+
// If the vector loop only executed once, ignore the cost of the cmp.
751+
Type *ValTy = Ctx.Types.inferScalarType(getOperand(0));
752+
auto TC = dyn_cast_if_present<ConstantInt>(
753+
getParent()->getPlan()->getTripCount()->getUnderlyingValue());
754+
if (TC && VF.isFixed() && TC->getSExtValue() == VF.getFixedValue())
755+
return 0;
756+
return Ctx.TTI.getCmpSelInstrCost(Instruction::ICmp, ValTy, nullptr,
757+
CmpInst::ICMP_EQ, Ctx.CostKind);
758+
}
746759
default:
747760
// TODO: Compute cost other VPInstructions once the legacy cost model has
748761
// been retired.

llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll

Lines changed: 42 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -8,39 +8,41 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
88
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
99
; CHECK: vector.ph:
1010
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
11-
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 8
11+
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
1212
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
1313
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 8, [[TMP4]]
1414
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
1515
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
1616
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
17-
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8
17+
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2
1818
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[N_VEC]]
19-
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 8)
20-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
21-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
22-
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
23-
; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 8 x i64> [[TMP8]], splat (i64 1)
24-
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP7]]
19+
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 8)
20+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[VAL]], i64 0
21+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
22+
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
23+
; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 2 x i64> [[TMP8]], splat (i64 1)
24+
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP7]]
2525
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]]
26-
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP12]], i64 0
27-
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
26+
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP12]], i64 0
27+
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
2828
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
2929
; CHECK: vector.body:
3030
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
31-
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 8 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
32-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
31+
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
32+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
3333
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 0
3434
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP13]]
35-
; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw <vscale x 8 x i64> [[VEC_IND]], splat (i64 3)
36-
; CHECK-NEXT: [[TMP11:%.*]] = lshr <vscale x 8 x i64> [[BROADCAST_SPLAT]], [[TMP10]]
37-
; CHECK-NEXT: [[TMP14:%.*]] = trunc <vscale x 8 x i64> [[TMP11]] to <vscale x 8 x i8>
35+
; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw <vscale x 2 x i64> [[VEC_IND]], splat (i64 3)
36+
; CHECK-NEXT: [[TMP11:%.*]] = lshr <vscale x 2 x i64> [[BROADCAST_SPLAT]], [[TMP10]]
37+
; CHECK-NEXT: [[TMP16:%.*]] = trunc <vscale x 2 x i64> [[TMP11]] to <vscale x 2 x i8>
3838
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0
39-
; CHECK-NEXT: call void @llvm.masked.store.nxv8i8.p0(<vscale x 8 x i8> [[TMP14]], ptr [[TMP17]], i32 1, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]])
39+
; CHECK-NEXT: call void @llvm.masked.store.nxv2i8.p0(<vscale x 2 x i8> [[TMP16]], ptr [[TMP17]], i32 1, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
4040
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
41-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT]]
42-
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_NEXT]], i64 8)
43-
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
41+
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 8)
42+
; CHECK-NEXT: [[TMP14:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
43+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]]
44+
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <vscale x 2 x i1> [[TMP14]], i32 0
45+
; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
4446
; CHECK: middle.block:
4547
; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
4648
; CHECK: scalar.ph:
@@ -94,39 +96,41 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
9496
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
9597
; CHECK: vector.ph:
9698
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
97-
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 8
99+
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
98100
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
99101
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[WIDE_TRIP_COUNT]], [[TMP4]]
100102
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
101103
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
102104
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
103-
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8
105+
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2
104106
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[N_VEC]]
105-
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[WIDE_TRIP_COUNT]])
106-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
107-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
108-
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
109-
; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 8 x i64> [[TMP8]], splat (i64 1)
110-
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP7]]
107+
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[WIDE_TRIP_COUNT]])
108+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[VAL]], i64 0
109+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
110+
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
111+
; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 2 x i64> [[TMP8]], splat (i64 1)
112+
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP7]]
111113
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]]
112-
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP12]], i64 0
113-
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
114+
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP12]], i64 0
115+
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
114116
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
115117
; CHECK: vector.body:
116118
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
117-
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 8 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
118-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
119+
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
120+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
119121
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 0
120122
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP13]]
121-
; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw <vscale x 8 x i64> [[VEC_IND]], splat (i64 3)
122-
; CHECK-NEXT: [[TMP11:%.*]] = lshr <vscale x 8 x i64> [[BROADCAST_SPLAT]], [[TMP10]]
123-
; CHECK-NEXT: [[TMP14:%.*]] = trunc <vscale x 8 x i64> [[TMP11]] to <vscale x 8 x i8>
123+
; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw <vscale x 2 x i64> [[VEC_IND]], splat (i64 3)
124+
; CHECK-NEXT: [[TMP11:%.*]] = lshr <vscale x 2 x i64> [[BROADCAST_SPLAT]], [[TMP10]]
125+
; CHECK-NEXT: [[TMP16:%.*]] = trunc <vscale x 2 x i64> [[TMP11]] to <vscale x 2 x i8>
124126
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0
125-
; CHECK-NEXT: call void @llvm.masked.store.nxv8i8.p0(<vscale x 8 x i8> [[TMP14]], ptr [[TMP17]], i32 1, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]])
127+
; CHECK-NEXT: call void @llvm.masked.store.nxv2i8.p0(<vscale x 2 x i8> [[TMP16]], ptr [[TMP17]], i32 1, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
126128
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
127-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT]]
128-
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_NEXT]], i64 [[WIDE_TRIP_COUNT]])
129-
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
129+
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 [[WIDE_TRIP_COUNT]])
130+
; CHECK-NEXT: [[TMP14:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
131+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]]
132+
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <vscale x 2 x i1> [[TMP14]], i32 0
133+
; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
130134
; CHECK: middle.block:
131135
; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
132136
; CHECK: scalar.ph:

0 commit comments

Comments
 (0)