Skip to content

Commit f79214d

Browse files
committed
[LV] Support predicated div/rem operations via safe-divisor select idiom
This patch adds support for vectorizing conditionally executed div/rem operations via a variant of widening. The existing support for predicated divrem in the vectorizer requires scalarization which we can't do for scalable vectors. The basic idea is that we can always divide (take remainder) by 1 without executing UB. As such, we can use the active lane mask to conditional select either the actual divisor for active lanes, or a constant one for inactive lanes. We already account for the cost of the active lane mask, so the only additional cost is a splat of one and the vector select. This is one of several possible approaches to this problem; see the review thread for discussion on some of the others. This one was chosen mostly because it was straight forward, and none of the others seemed oviously better. I enabled the new code only for scalable vectors. We could also legally enable it for fixed vectors as well, but I haven't thought through the cost tradeoffs between widening and scalarization enough to know if that's profitable. This will be explored in future patches. Differential Revision: https://reviews.llvm.org/D130164
1 parent 689895f commit f79214d

File tree

5 files changed

+302
-106
lines changed

5 files changed

+302
-106
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 118 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -4473,7 +4473,11 @@ bool LoopVectorizationCostModel::isPredicatedInst(Instruction *I,
44734473
case Instruction::URem:
44744474
// TODO: We can use the loop-preheader as context point here and get
44754475
// context sensitive reasoning
4476-
return !isSafeToSpeculativelyExecute(I);
4476+
// We have the option to use the safe-divisor idiom to avoid predication.
4477+
// At the moment this is only used for scalable (which legally can't
4478+
// scalarize), but long term we want to make a cost based decision
4479+
// for fixed length vectors as well.
4480+
return !VF.isScalable() && !isSafeToSpeculativelyExecute(I);
44774481
}
44784482
}
44794483

@@ -7059,33 +7063,60 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF,
70597063
case Instruction::SDiv:
70607064
case Instruction::URem:
70617065
case Instruction::SRem:
7062-
// If we have a predicated instruction, it may not be executed for each
7063-
// vector lane. Get the scalarization cost and scale this amount by the
7064-
// probability of executing the predicated block. If the instruction is not
7065-
// predicated, we fall through to the next case.
7066-
if (VF.isVector() && isScalarWithPredication(I, VF)) {
7066+
if (VF.isVector() && blockNeedsPredicationForAnyReason(I->getParent()) &&
7067+
!isSafeToSpeculativelyExecute(I)) {
7068+
// If we're speculating lanes, we have two options - scalarization and
7069+
// guarded widening.
7070+
if (isScalarWithPredication(I, VF)) {
7071+
// Get the scalarization cost and scale this amount by the probability of
7072+
// executing the predicated block. If the instruction is not predicated,
7073+
// we fall through to the next case.
7074+
InstructionCost Cost = 0;
7075+
7076+
// These instructions have a non-void type, so account for the phi nodes
7077+
// that we will create. This cost is likely to be zero. The phi node
7078+
// cost, if any, should be scaled by the block probability because it
7079+
// models a copy at the end of each predicated block.
7080+
Cost += VF.getKnownMinValue() *
7081+
TTI.getCFInstrCost(Instruction::PHI, CostKind);
7082+
7083+
// The cost of the non-predicated instruction.
7084+
Cost += VF.getKnownMinValue() *
7085+
TTI.getArithmeticInstrCost(I->getOpcode(), RetTy, CostKind);
7086+
7087+
// The cost of insertelement and extractelement instructions needed for
7088+
// scalarization.
7089+
Cost += getScalarizationOverhead(I, VF);
7090+
7091+
// Scale the cost by the probability of executing the predicated blocks.
7092+
// This assumes the predicated block for each vector lane is equally
7093+
// likely.
7094+
return Cost / getReciprocalPredBlockProb();
7095+
}
70677096
InstructionCost Cost = 0;
70687097

7069-
// These instructions have a non-void type, so account for the phi nodes
7070-
// that we will create. This cost is likely to be zero. The phi node
7071-
// cost, if any, should be scaled by the block probability because it
7072-
// models a copy at the end of each predicated block.
7073-
Cost += VF.getKnownMinValue() *
7074-
TTI.getCFInstrCost(Instruction::PHI, CostKind);
7075-
7076-
// The cost of the non-predicated instruction.
7077-
Cost += VF.getKnownMinValue() *
7078-
TTI.getArithmeticInstrCost(I->getOpcode(), RetTy, CostKind);
7079-
7080-
// The cost of insertelement and extractelement instructions needed for
7081-
// scalarization.
7082-
Cost += getScalarizationOverhead(I, VF);
7083-
7084-
// Scale the cost by the probability of executing the predicated blocks.
7085-
// This assumes the predicated block for each vector lane is equally
7086-
// likely.
7087-
return Cost / getReciprocalPredBlockProb();
7098+
// The cost of the select guard to ensure all lanes are well defined
7099+
// after we speculate above any internal control flow.
7100+
Cost += TTI.getCmpSelInstrCost(
7101+
Instruction::Select, ToVectorTy(I->getType(), VF),
7102+
ToVectorTy(Type::getInt1Ty(I->getContext()), VF),
7103+
CmpInst::BAD_ICMP_PREDICATE, CostKind);
7104+
7105+
// Certain instructions can be cheaper to vectorize if they have a constant
7106+
// second vector operand. One example of this are shifts on x86.
7107+
Value *Op2 = I->getOperand(1);
7108+
auto Op2Info = TTI.getOperandInfo(Op2);
7109+
if (Op2Info.Kind == TargetTransformInfo::OK_AnyValue && Legal->isUniform(Op2))
7110+
Op2Info.Kind = TargetTransformInfo::OK_UniformValue;
7111+
7112+
SmallVector<const Value *, 4> Operands(I->operand_values());
7113+
Cost += TTI.getArithmeticInstrCost(
7114+
I->getOpcode(), VectorTy, CostKind,
7115+
{TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
7116+
Op2Info, Operands, I);
7117+
return Cost;
70887118
}
7119+
// We've proven all lanes safe to speculate, fall through.
70897120
[[fallthrough]];
70907121
case Instruction::Add:
70917122
case Instruction::FAdd:
@@ -8323,55 +8354,66 @@ bool VPRecipeBuilder::shouldWiden(Instruction *I, VFRange &Range) const {
83238354
Range);
83248355
}
83258356

8326-
VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
8327-
ArrayRef<VPValue *> Operands) const {
8328-
auto IsVectorizableOpcode = [](unsigned Opcode) {
8329-
switch (Opcode) {
8330-
case Instruction::Add:
8331-
case Instruction::And:
8332-
case Instruction::AShr:
8333-
case Instruction::BitCast:
8334-
case Instruction::FAdd:
8335-
case Instruction::FCmp:
8336-
case Instruction::FDiv:
8337-
case Instruction::FMul:
8338-
case Instruction::FNeg:
8339-
case Instruction::FPExt:
8340-
case Instruction::FPToSI:
8341-
case Instruction::FPToUI:
8342-
case Instruction::FPTrunc:
8343-
case Instruction::FRem:
8344-
case Instruction::FSub:
8345-
case Instruction::ICmp:
8346-
case Instruction::IntToPtr:
8347-
case Instruction::LShr:
8348-
case Instruction::Mul:
8349-
case Instruction::Or:
8350-
case Instruction::PtrToInt:
8351-
case Instruction::SDiv:
8352-
case Instruction::Select:
8353-
case Instruction::SExt:
8354-
case Instruction::Shl:
8355-
case Instruction::SIToFP:
8356-
case Instruction::SRem:
8357-
case Instruction::Sub:
8358-
case Instruction::Trunc:
8359-
case Instruction::UDiv:
8360-
case Instruction::UIToFP:
8361-
case Instruction::URem:
8362-
case Instruction::Xor:
8363-
case Instruction::ZExt:
8364-
case Instruction::Freeze:
8365-
return true;
8357+
VPRecipeBase *VPRecipeBuilder::tryToWiden(Instruction *I,
8358+
ArrayRef<VPValue *> Operands,
8359+
VPBasicBlock *VPBB, VPlanPtr &Plan) {
8360+
switch (I->getOpcode()) {
8361+
default:
8362+
return nullptr;
8363+
case Instruction::SDiv:
8364+
case Instruction::UDiv:
8365+
case Instruction::SRem:
8366+
case Instruction::URem: {
8367+
// If not provably safe, use a select to form a safe divisor before widening the
8368+
// div/rem operation itself. Otherwise fall through to general handling below.
8369+
if (CM.blockNeedsPredicationForAnyReason(I->getParent()) &&
8370+
!isSafeToSpeculativelyExecute(I)) {
8371+
SmallVector<VPValue *> Ops(Operands.begin(), Operands.end());
8372+
VPValue *Mask = createBlockInMask(I->getParent(), Plan);
8373+
VPValue *One =
8374+
Plan->getOrAddExternalDef(ConstantInt::get(I->getType(), 1u, false));
8375+
auto *SafeRHS =
8376+
new VPInstruction(Instruction::Select, {Mask, Ops[1], One},
8377+
I->getDebugLoc());
8378+
VPBB->appendRecipe(SafeRHS);
8379+
Ops[1] = SafeRHS;
8380+
return new VPWidenRecipe(*I, make_range(Ops.begin(), Ops.end()));
83668381
}
8367-
return false;
8382+
LLVM_FALLTHROUGH;
8383+
}
8384+
case Instruction::Add:
8385+
case Instruction::And:
8386+
case Instruction::AShr:
8387+
case Instruction::BitCast:
8388+
case Instruction::FAdd:
8389+
case Instruction::FCmp:
8390+
case Instruction::FDiv:
8391+
case Instruction::FMul:
8392+
case Instruction::FNeg:
8393+
case Instruction::FPExt:
8394+
case Instruction::FPToSI:
8395+
case Instruction::FPToUI:
8396+
case Instruction::FPTrunc:
8397+
case Instruction::FRem:
8398+
case Instruction::FSub:
8399+
case Instruction::ICmp:
8400+
case Instruction::IntToPtr:
8401+
case Instruction::LShr:
8402+
case Instruction::Mul:
8403+
case Instruction::Or:
8404+
case Instruction::PtrToInt:
8405+
case Instruction::Select:
8406+
case Instruction::SExt:
8407+
case Instruction::Shl:
8408+
case Instruction::SIToFP:
8409+
case Instruction::Sub:
8410+
case Instruction::Trunc:
8411+
case Instruction::UIToFP:
8412+
case Instruction::Xor:
8413+
case Instruction::ZExt:
8414+
case Instruction::Freeze:
8415+
return new VPWidenRecipe(*I, make_range(Operands.begin(), Operands.end()));
83688416
};
8369-
8370-
if (!IsVectorizableOpcode(I->getOpcode()))
8371-
return nullptr;
8372-
8373-
// Success: widen this instruction.
8374-
return new VPWidenRecipe(*I, make_range(Operands.begin(), Operands.end()));
83758417
}
83768418

83778419
void VPRecipeBuilder::fixHeaderPhis() {
@@ -8506,7 +8548,8 @@ VPRecipeBuilder::createReplicateRegion(VPReplicateRecipe *PredRecipe,
85068548
VPRecipeOrVPValueTy
85078549
VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
85088550
ArrayRef<VPValue *> Operands,
8509-
VFRange &Range, VPlanPtr &Plan) {
8551+
VFRange &Range, VPBasicBlock *VPBB,
8552+
VPlanPtr &Plan) {
85108553
// First, check for specific widening recipes that deal with inductions, Phi
85118554
// nodes, calls and memory operations.
85128555
VPRecipeBase *Recipe;
@@ -8584,7 +8627,7 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
85848627
*SI, make_range(Operands.begin(), Operands.end()), InvariantCond));
85858628
}
85868629

8587-
return toVPRecipeResult(tryToWiden(Instr, Operands));
8630+
return toVPRecipeResult(tryToWiden(Instr, Operands, VPBB, Plan));
85888631
}
85898632

85908633
void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
@@ -8855,7 +8898,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
88558898
continue;
88568899

88578900
if (auto RecipeOrValue = RecipeBuilder.tryToCreateWidenRecipe(
8858-
Instr, Operands, Range, Plan)) {
8901+
Instr, Operands, Range, VPBB, Plan)) {
88598902
// If Instr can be simplified to an existing VPValue, use it.
88608903
if (RecipeOrValue.is<VPValue *>()) {
88618904
auto *VPV = RecipeOrValue.get<VPValue *>();

llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,8 @@ class VPRecipeBuilder {
100100
/// Check if \p I has an opcode that can be widened and return a VPWidenRecipe
101101
/// if it can. The function should only be called if the cost-model indicates
102102
/// that widening should be performed.
103-
VPWidenRecipe *tryToWiden(Instruction *I, ArrayRef<VPValue *> Operands) const;
103+
VPRecipeBase *tryToWiden(Instruction *I, ArrayRef<VPValue *> Operands,
104+
VPBasicBlock *VPBB, VPlanPtr &Plan);
104105

105106
/// Return a VPRecipeOrValueTy with VPRecipeBase * being set. This can be used to force the use as VPRecipeBase* for recipe sub-types that also inherit from VPValue.
106107
VPRecipeOrVPValueTy toVPRecipeResult(VPRecipeBase *R) const { return R; }
@@ -119,7 +120,8 @@ class VPRecipeBuilder {
119120
/// VPRecipeOrVPValueTy with nullptr.
120121
VPRecipeOrVPValueTy tryToCreateWidenRecipe(Instruction *Instr,
121122
ArrayRef<VPValue *> Operands,
122-
VFRange &Range, VPlanPtr &Plan);
123+
VFRange &Range, VPBasicBlock *VPBB,
124+
VPlanPtr &Plan);
123125

124126
/// Set the recipe created for given ingredient. This operation is a no-op for
125127
/// ingredients that were not marked using a nullptr entry in the map.

llvm/test/Transforms/LoopVectorize/AArch64/scalable-predicate-instruction.ll

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,9 @@ target triple = "aarch64-unknown-linux-gnu"
1111
; a[i] /= b[i];
1212
; }
1313

14-
; Scalarizing the division cannot be done for scalable vectors at the moment
15-
; when the loop needs predication
16-
; Future implementation of llvm.vp could allow this to happen
17-
1814
define void @predication_in_loop(i32* %a, i32* %b, i32* %cond) #0 {
1915
; CHECK-LABEL: @predication_in_loop
20-
; CHECK-NOT: sdiv <vscale x 4 x i32>
16+
; CHECK: sdiv <vscale x 4 x i32>
2117
;
2218
entry:
2319
br label %for.body

llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll

Lines changed: 51 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -694,25 +694,67 @@ while.end.loopexit: ; preds = %while.body
694694
ret void
695695
}
696696

697-
; Negative tests where we don't expect tail-folding
698-
699-
; Integer divides can throw exceptions and since we can't scalarize conditional
700-
; divides for scalable vectors we just don't bother vectorizing.
697+
; Integer divides can throw exceptions; if we vectorize, we must ensure
698+
; that speculated lanes don't fault.
701699
define void @simple_idiv(i32* noalias %dst, i32* noalias %src, i64 %n) #0 {
702700
; CHECK-LABEL: @simple_idiv(
703701
; CHECK-NEXT: entry:
702+
; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 1)
703+
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 -1, [[UMAX]]
704+
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
705+
; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4
706+
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
707+
; CHECK-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
708+
; CHECK: vector.ph:
709+
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
710+
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
711+
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
712+
; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4
713+
; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP7]], 1
714+
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP8]]
715+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
716+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
717+
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]])
718+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
719+
; CHECK: vector.body:
720+
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VECTOR_BODY]] ]
721+
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
722+
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], 0
723+
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, i32* [[SRC:%.*]], i64 [[TMP9]]
724+
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, i32* [[DST:%.*]], i64 [[TMP9]]
725+
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, i32* [[TMP10]], i32 0
726+
; CHECK-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <vscale x 4 x i32>*
727+
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP13]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> poison)
728+
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, i32* [[TMP11]], i32 0
729+
; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <vscale x 4 x i32>*
730+
; CHECK-NEXT: [[WIDE_MASKED_LOAD2:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP15]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> poison)
731+
; CHECK-NEXT: [[TMP16:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> [[WIDE_MASKED_LOAD2]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
732+
; CHECK-NEXT: [[TMP17:%.*]] = udiv <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], [[TMP16]]
733+
; CHECK-NEXT: [[TMP18:%.*]] = bitcast i32* [[TMP14]] to <vscale x 4 x i32>*
734+
; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[TMP17]], <vscale x 4 x i32>* [[TMP18]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
735+
; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64()
736+
; CHECK-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 4
737+
; CHECK-NEXT: [[INDEX_NEXT3]] = add i64 [[INDEX1]], [[TMP20]]
738+
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_NEXT3]], i64 [[UMAX]])
739+
; CHECK-NEXT: [[TMP21:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
740+
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <vscale x 4 x i1> [[TMP21]], i32 0
741+
; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
742+
; CHECK: middle.block:
743+
; CHECK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
744+
; CHECK: scalar.ph:
745+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
704746
; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
705747
; CHECK: while.body:
706-
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ 0, [[ENTRY:%.*]] ]
707-
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i32, i32* [[SRC:%.*]], i64 [[INDEX]]
708-
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i32, i32* [[DST:%.*]], i64 [[INDEX]]
748+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
749+
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i32, i32* [[SRC]], i64 [[INDEX]]
750+
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i32, i32* [[DST]], i64 [[INDEX]]
709751
; CHECK-NEXT: [[VAL1:%.*]] = load i32, i32* [[GEP1]], align 4
710752
; CHECK-NEXT: [[VAL2:%.*]] = load i32, i32* [[GEP2]], align 4
711753
; CHECK-NEXT: [[RES:%.*]] = udiv i32 [[VAL1]], [[VAL2]]
712754
; CHECK-NEXT: store i32 [[RES]], i32* [[GEP2]], align 4
713755
; CHECK-NEXT: [[INDEX_NEXT]] = add nsw i64 [[INDEX]], 1
714-
; CHECK-NEXT: [[CMP10:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N:%.*]]
715-
; CHECK-NEXT: br i1 [[CMP10]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT:%.*]], !llvm.loop [[LOOP20:![0-9]+]]
756+
; CHECK-NEXT: [[CMP10:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N]]
757+
; CHECK-NEXT: br i1 [[CMP10]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP21:![0-9]+]]
716758
; CHECK: while.end.loopexit:
717759
; CHECK-NEXT: ret void
718760
;

0 commit comments

Comments
 (0)