Skip to content

Commit 83b5806

Browse files
committed
[LV][EVL] Support icmp/fcmp instruction with EVL-vectorization
1 parent 7ad63c0 commit 83b5806

File tree

7 files changed

+166
-18
lines changed

7 files changed

+166
-18
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11711,7 +11711,7 @@ InstructionCost BoUpSLP::getSpillCost() const {
1171111711
if (auto *FPMO = dyn_cast<FPMathOperator>(II))
1171211712
FMF = FPMO->getFastMathFlags();
1171311713
IntrinsicCostAttributes ICA(II->getIntrinsicID(), II->getType(), Tys,
11714-
FMF);
11714+
FMF, II);
1171511715
InstructionCost IntrCost =
1171611716
TTI->getIntrinsicInstrCost(ICA, TTI::TCK_RecipThroughput);
1171711717
InstructionCost CallCost = TTI->getCallInstrCost(

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1457,6 +1457,34 @@ InstructionCost VPWidenRecipe::computeCost(ElementCount VF,
14571457
void VPWidenEVLRecipe::execute(VPTransformState &State) {
14581458
unsigned Opcode = getOpcode();
14591459
// TODO: Support other opcodes
1460+
if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) {
1461+
Value *Op1 = State.get(getOperand(0), 0);
1462+
Value *Op2 = State.get(getOperand(1), 0);
1463+
auto &Ctx = State.Builder.getContext();
1464+
Value *Pred = MetadataAsValue::get(
1465+
Ctx, MDString::get(Ctx, CmpInst::getPredicateName(getPredicate())));
1466+
1467+
IRBuilderBase &BuilderIR = State.Builder;
1468+
VectorBuilder Builder(BuilderIR);
1469+
Value *Mask = BuilderIR.CreateVectorSplat(State.VF, BuilderIR.getTrue());
1470+
Builder.setMask(Mask).setEVL(State.get(getEVL(), /*NeedsScalar=*/true));
1471+
1472+
VectorType *DataType = VectorType::get(Type::getInt1Ty(Ctx), State.VF);
1473+
1474+
Value *VPInst = Builder.createVectorInstruction(Opcode, DataType,
1475+
{Op1, Op2, Pred}, "vp.op");
1476+
// if (isa<FPMathOperator>(VPInst))
1477+
// setFlags(cast<Instruction>(VPInst));
1478+
if (VPInst) {
1479+
if (auto *VecOp = dyn_cast<CastInst>(VPInst))
1480+
VecOp->copyIRFlags(getUnderlyingInstr());
1481+
}
1482+
State.set(this, VPInst, 0);
1483+
State.addMetadata(VPInst,
1484+
dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1485+
return;
1486+
}
1487+
14601488
if (!Instruction::isBinaryOp(Opcode) && !Instruction::isUnaryOp(Opcode))
14611489
llvm_unreachable("Unsupported opcode in VPWidenEVLRecipe::execute");
14621490

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1476,7 +1476,8 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
14761476
.Case<VPWidenRecipe>([&](VPWidenRecipe *W) -> VPRecipeBase * {
14771477
unsigned Opcode = W->getOpcode();
14781478
if (!Instruction::isBinaryOp(Opcode) &&
1479-
!Instruction::isUnaryOp(Opcode))
1479+
!Instruction::isUnaryOp(Opcode) &&
1480+
Opcode != Instruction::ICmp && Opcode != Instruction::FCmp)
14801481
return nullptr;
14811482
return new VPWidenEVLRecipe(*W, EVL);
14821483
})

llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -282,7 +282,7 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) {
282282
; IF-EVL-INLOOP-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP13]]
283283
; IF-EVL-INLOOP-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0
284284
; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP18]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]])
285-
; IF-EVL-INLOOP-NEXT: [[TMP19:%.*]] = icmp sgt <vscale x 4 x i32> [[VP_OP_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
285+
; IF-EVL-INLOOP-NEXT: [[TMP19:%.*]] = call <vscale x 4 x i1> @llvm.vp.icmp.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), metadata !"sgt", <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]])
286286
; IF-EVL-INLOOP-NEXT: [[TMP20:%.*]] = select <vscale x 4 x i1> [[TMP16]], <vscale x 4 x i1> [[TMP19]], <vscale x 4 x i1> zeroinitializer
287287
; IF-EVL-INLOOP-NEXT: [[TMP21:%.*]] = call i32 @llvm.vp.reduce.add.nxv4i32(i32 0, <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> [[TMP20]], i32 [[TMP12]])
288288
; IF-EVL-INLOOP-NEXT: [[TMP22]] = add i32 [[TMP21]], [[VEC_PHI]]

llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-masked-loadstore.ll

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -45,18 +45,18 @@ define void @masked_loadstore(ptr noalias %a, ptr noalias %b, i64 %n) {
4545
; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP11]]
4646
; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i32 0
4747
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP16]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]])
48-
; IF-EVL-NEXT: [[TMP17:%.*]] = icmp ne <vscale x 4 x i32> [[VP_OP_LOAD]], zeroinitializer
49-
; IF-EVL-NEXT: [[TMP18:%.*]] = select <vscale x 4 x i1> [[TMP14]], <vscale x 4 x i1> [[TMP17]], <vscale x 4 x i1> zeroinitializer
50-
; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[TMP11]]
51-
; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[TMP19]], i32 0
52-
; IF-EVL-NEXT: [[VP_OP_LOAD3:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP20]], <vscale x 4 x i1> [[TMP18]], i32 [[TMP10]])
53-
; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VP_OP_LOAD3]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]])
54-
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_OP]], ptr align 4 [[TMP20]], <vscale x 4 x i1> [[TMP18]], i32 [[TMP10]])
55-
; IF-EVL-NEXT: [[TMP21:%.*]] = zext i32 [[TMP10]] to i64
56-
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]]
48+
; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x i1> @llvm.vp.icmp.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> zeroinitializer, metadata !"ne", <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]])
49+
; IF-EVL-NEXT: [[TMP17:%.*]] = select <vscale x 4 x i1> [[TMP14]], <vscale x 4 x i1> [[VP_OP]], <vscale x 4 x i1> zeroinitializer
50+
; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[TMP11]]
51+
; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP18]], i32 0
52+
; IF-EVL-NEXT: [[VP_OP_LOAD3:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP19]], <vscale x 4 x i1> [[TMP17]], i32 [[TMP10]])
53+
; IF-EVL-NEXT: [[VP_OP4:%.*]] = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VP_OP_LOAD3]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]])
54+
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_OP4]], ptr align 4 [[TMP19]], <vscale x 4 x i1> [[TMP17]], i32 [[TMP10]])
55+
; IF-EVL-NEXT: [[TMP20:%.*]] = zext i32 [[TMP10]] to i64
56+
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP20]], [[EVL_BASED_IV]]
5757
; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
58-
; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
59-
; IF-EVL-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
58+
; IF-EVL-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
59+
; IF-EVL-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
6060
; IF-EVL: middle.block:
6161
; IF-EVL-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
6262
; IF-EVL: scalar.ph:
@@ -65,13 +65,13 @@ define void @masked_loadstore(ptr noalias %a, ptr noalias %b, i64 %n) {
6565
; IF-EVL: for.body:
6666
; IF-EVL-NEXT: [[I_011:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
6767
; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I_011]]
68-
; IF-EVL-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
69-
; IF-EVL-NEXT: [[CMP1:%.*]] = icmp ne i32 [[TMP23]], 0
68+
; IF-EVL-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
69+
; IF-EVL-NEXT: [[CMP1:%.*]] = icmp ne i32 [[TMP22]], 0
7070
; IF-EVL-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
7171
; IF-EVL: if.then:
7272
; IF-EVL-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I_011]]
73-
; IF-EVL-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4
74-
; IF-EVL-NEXT: [[ADD:%.*]] = add i32 [[TMP23]], [[TMP24]]
73+
; IF-EVL-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4
74+
; IF-EVL-NEXT: [[ADD:%.*]] = add i32 [[TMP22]], [[TMP23]]
7575
; IF-EVL-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX3]], align 4
7676
; IF-EVL-NEXT: br label [[FOR_INC]]
7777
; IF-EVL: for.inc:

llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,7 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal
166166
; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]]
167167
; IF-EVL-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
168168
; IF-EVL-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
169+
169170
; IF-EVL: middle.block:
170171
; IF-EVL-NEXT: br i1 true, label [[LOOPEND:%.*]], label [[SCALAR_PH]]
171172
; IF-EVL: scalar.ph:
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
; REQUIRES: asserts
2+
3+
; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize \
4+
; RUN: -force-tail-folding-style=data-with-evl \
5+
; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
6+
; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < %s 2>&1 | FileCheck --check-prefix=IF-EVL %s
7+
8+
define void @vp_icmp(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) {
9+
; IF-EVL: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
10+
; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
11+
; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count
12+
; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
13+
14+
; IF-EVL: vector.ph:
15+
; IF-EVL-NEXT: Successor(s): vector loop
16+
17+
; IF-EVL: <x1> vector loop: {
18+
; IF-EVL-NEXT: vector.body:
19+
; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
20+
; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEX:%[0-9]+]]>
21+
; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]>
22+
; IF-EVL-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
23+
; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>
24+
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
25+
; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
26+
; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
27+
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]>
28+
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
29+
; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]>
30+
; IF-EVL-NEXT: WIDEN ir<[[ICMP:%.+]]> = vp.icmp sgt ir<[[LD1]]>, ir<[[LD2]]>, vp<[[EVL]]>
31+
; IF-EVL-NEXT: WIDEN-CAST ir<[[ZEXT:%.+]]> = zext ir<[[ICMP]]> to i32
32+
; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
33+
; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]>
34+
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[ZEXT]]>, vp<[[EVL]]>
35+
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
36+
; IF-EVL-NEXT: EMIT vp<[[IV_NEX]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
37+
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%[0-9]+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
38+
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
39+
; IF-EVL-NEXT: No successors
40+
; IF-EVL-NEXT: }
41+
42+
entry:
43+
%cmp12 = icmp sgt i64 %N, 0
44+
br i1 %cmp12, label %for.body, label %for.cond.cleanup
45+
46+
for.cond.cleanup: ; preds = %for.body, %entry
47+
ret void
48+
49+
for.body: ; preds = %entry, %for.body
50+
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
51+
%arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
52+
%0 = load i32, ptr %arrayidx, align 4
53+
%arrayidx3 = getelementptr inbounds i32, ptr %c, i64 %indvars.iv
54+
%1 = load i32, ptr %arrayidx3, align 4
55+
%cmp4 = icmp sgt i32 %0, %1
56+
%conv5 = zext i1 %cmp4 to i32
57+
%arrayidx7 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
58+
store i32 %conv5, ptr %arrayidx7, align 4
59+
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
60+
%exitcond.not = icmp eq i64 %indvars.iv.next, %N
61+
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
62+
}
63+
64+
define void @vp_fcmp(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) {
65+
; IF-EVL: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
66+
; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
67+
; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count
68+
; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
69+
70+
; IF-EVL: vector.ph:
71+
; IF-EVL-NEXT: Successor(s): vector loop
72+
73+
; IF-EVL: <x1> vector loop: {
74+
; IF-EVL-NEXT: vector.body:
75+
; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
76+
; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEX:%[0-9]+]]>
77+
; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]>
78+
; IF-EVL-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
79+
; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>
80+
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
81+
; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
82+
; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
83+
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]>
84+
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
85+
; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]>
86+
; IF-EVL-NEXT: WIDEN ir<[[FCMP:%.+]]> = vp.fcmp ogt ir<[[LD1]]>, ir<[[LD2]]>, vp<[[EVL]]>
87+
; IF-EVL-NEXT: WIDEN-CAST ir<[[UITOFP:%.+]]> = uitofp ir<[[FCMP]]> to float
88+
; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
89+
; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]>
90+
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[UITOFP]]>, vp<[[EVL]]>
91+
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
92+
; IF-EVL-NEXT: EMIT vp<[[IV_NEX]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
93+
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%[0-9]+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
94+
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
95+
; IF-EVL-NEXT: No successors
96+
; IF-EVL-NEXT: }
97+
98+
entry:
99+
%cmp13 = icmp sgt i64 %N, 0
100+
br i1 %cmp13, label %for.body, label %for.cond.cleanup
101+
102+
for.cond.cleanup:
103+
ret void
104+
105+
for.body:
106+
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
107+
%arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv
108+
%0 = load float, ptr %arrayidx, align 4
109+
%arrayidx3 = getelementptr inbounds float, ptr %c, i64 %indvars.iv
110+
%1 = load float, ptr %arrayidx3, align 4
111+
%cmp4 = fcmp ogt float %0, %1
112+
%conv6 = uitofp i1 %cmp4 to float
113+
%arrayidx8 = getelementptr inbounds float, ptr %a, i64 %indvars.iv
114+
store float %conv6, ptr %arrayidx8, align 4
115+
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
116+
%exitcond.not = icmp eq i64 %indvars.iv.next, %N
117+
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
118+
}

0 commit comments

Comments
 (0)