Skip to content

Commit 74ff22f

Browse files
committed
Force to predicate a reduction operation when fold by EVL.
1 parent 95e9f9b commit 74ff22f

7 files changed

+125
-75
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1515,6 +1515,17 @@ class LoopVectorizationCostModel {
15151515
return InLoopReductions.contains(Phi);
15161516
}
15171517

1518+
/// Returns true if the predicated reduction select should be used.
1519+
bool usePredicatedReductionSelect(unsigned Opcode, Type *PhiTy) const {
1520+
// Force to use predicated reduction select since the EVL of the
1521+
// second-to-last iteration might not be VF*UF.
1522+
if (foldTailWithEVL())
1523+
return true;
1524+
return PreferPredicatedReductionSelect ||
1525+
TTI.preferPredicatedReductionSelect(
1526+
Opcode, PhiTy, TargetTransformInfo::ReductionFlags());
1527+
}
1528+
15181529
/// Estimate cost of an intrinsic call instruction CI if it were vectorized
15191530
/// with factor VF. Return the cost of the instruction, including
15201531
/// scalarization overhead if it's needed.
@@ -9344,10 +9355,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
93449355
cast<VPInstruction>(&U)->getOpcode() ==
93459356
VPInstruction::ComputeReductionResult;
93469357
});
9347-
if (PreferPredicatedReductionSelect ||
9348-
TTI.preferPredicatedReductionSelect(
9349-
PhiR->getRecurrenceDescriptor().getOpcode(), PhiTy,
9350-
TargetTransformInfo::ReductionFlags()))
9358+
if (CM.usePredicatedReductionSelect(
9359+
PhiR->getRecurrenceDescriptor().getOpcode(), PhiTy))
93519360
PhiR->setOperand(1, NewExitingVPV);
93529361
}
93539362

llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll

Lines changed: 53 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -136,16 +136,16 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) {
136136
; IF-EVL-OUTLOOP: vector.body:
137137
; IF-EVL-OUTLOOP-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
138138
; IF-EVL-OUTLOOP-NEXT: [[EVL_BASED_IV:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
139-
; IF-EVL-OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
139+
; IF-EVL-OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
140140
; IF-EVL-OUTLOOP-NEXT: [[TMP5:%.*]] = sub i32 [[N]], [[EVL_BASED_IV]]
141141
; IF-EVL-OUTLOOP-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[TMP5]], i32 4, i1 true)
142142
; IF-EVL-OUTLOOP-NEXT: [[TMP7:%.*]] = add i32 [[EVL_BASED_IV]], 0
143143
; IF-EVL-OUTLOOP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[TMP7]]
144144
; IF-EVL-OUTLOOP-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i32 0
145145
; IF-EVL-OUTLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i16> @llvm.vp.load.nxv4i16.p0(ptr align 2 [[TMP9]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP6]])
146146
; IF-EVL-OUTLOOP-NEXT: [[TMP10:%.*]] = sext <vscale x 4 x i16> [[VP_OP_LOAD]] to <vscale x 4 x i32>
147-
; IF-EVL-OUTLOOP-NEXT: [[TMP11]] = add <vscale x 4 x i32> [[VEC_PHI]], [[TMP10]]
148-
; IF-EVL-OUTLOOP-NEXT: [[TMP12:%.*]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> [[TMP11]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP6]])
147+
; IF-EVL-OUTLOOP-NEXT: [[TMP11:%.*]] = add <vscale x 4 x i32> [[VEC_PHI]], [[TMP10]]
148+
; IF-EVL-OUTLOOP-NEXT: [[TMP12]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> [[TMP11]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP6]])
149149
; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i32 [[TMP6]], [[EVL_BASED_IV]]
150150
; IF-EVL-OUTLOOP-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP4]]
151151
; IF-EVL-OUTLOOP-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
@@ -350,19 +350,60 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) {
350350
;
351351
; IF-EVL-OUTLOOP-LABEL: @smin(
352352
; IF-EVL-OUTLOOP-NEXT: entry:
353+
; IF-EVL-OUTLOOP-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N:%.*]]
354+
; IF-EVL-OUTLOOP-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
355+
; IF-EVL-OUTLOOP-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4
356+
; IF-EVL-OUTLOOP-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
357+
; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
358+
; IF-EVL-OUTLOOP: vector.ph:
359+
; IF-EVL-OUTLOOP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
360+
; IF-EVL-OUTLOOP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
361+
; IF-EVL-OUTLOOP-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1
362+
; IF-EVL-OUTLOOP-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]]
363+
; IF-EVL-OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
364+
; IF-EVL-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
365+
; IF-EVL-OUTLOOP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
366+
; IF-EVL-OUTLOOP-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4
367+
; IF-EVL-OUTLOOP-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[START:%.*]], i64 0
368+
; IF-EVL-OUTLOOP-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[MINMAX_IDENT_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
369+
; IF-EVL-OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]]
370+
; IF-EVL-OUTLOOP: vector.body:
371+
; IF-EVL-OUTLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
372+
; IF-EVL-OUTLOOP-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
373+
; IF-EVL-OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ]
374+
; IF-EVL-OUTLOOP-NEXT: [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
375+
; IF-EVL-OUTLOOP-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP9]], i32 4, i1 true)
376+
; IF-EVL-OUTLOOP-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0
377+
; IF-EVL-OUTLOOP-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]]
378+
; IF-EVL-OUTLOOP-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0
379+
; IF-EVL-OUTLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]])
380+
; IF-EVL-OUTLOOP-NEXT: [[TMP14:%.*]] = icmp slt <vscale x 4 x i32> [[VP_OP_LOAD]], [[VEC_PHI]]
381+
; IF-EVL-OUTLOOP-NEXT: [[TMP15:%.*]] = select <vscale x 4 x i1> [[TMP14]], <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VEC_PHI]]
382+
; IF-EVL-OUTLOOP-NEXT: [[TMP16]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> [[TMP15]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP10]])
383+
; IF-EVL-OUTLOOP-NEXT: [[TMP17:%.*]] = zext i32 [[TMP10]] to i64
384+
; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP17]], [[EVL_BASED_IV]]
385+
; IF-EVL-OUTLOOP-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
386+
; IF-EVL-OUTLOOP-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
387+
; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
388+
; IF-EVL-OUTLOOP: middle.block:
389+
; IF-EVL-OUTLOOP-NEXT: [[TMP19:%.*]] = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> [[TMP16]])
390+
; IF-EVL-OUTLOOP-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
391+
; IF-EVL-OUTLOOP: scalar.ph:
392+
; IF-EVL-OUTLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
393+
; IF-EVL-OUTLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP19]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ]
353394
; IF-EVL-OUTLOOP-NEXT: br label [[FOR_BODY:%.*]]
354395
; IF-EVL-OUTLOOP: for.body:
355-
; IF-EVL-OUTLOOP-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
356-
; IF-EVL-OUTLOOP-NEXT: [[RDX:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY]] ], [ [[SMIN:%.*]], [[FOR_BODY]] ]
357-
; IF-EVL-OUTLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IV]]
358-
; IF-EVL-OUTLOOP-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
359-
; IF-EVL-OUTLOOP-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[TMP0]], [[RDX]]
360-
; IF-EVL-OUTLOOP-NEXT: [[SMIN]] = select i1 [[CMP_I]], i32 [[TMP0]], i32 [[RDX]]
396+
; IF-EVL-OUTLOOP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
397+
; IF-EVL-OUTLOOP-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SMIN:%.*]], [[FOR_BODY]] ]
398+
; IF-EVL-OUTLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
399+
; IF-EVL-OUTLOOP-NEXT: [[TMP20:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
400+
; IF-EVL-OUTLOOP-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[TMP20]], [[RDX]]
401+
; IF-EVL-OUTLOOP-NEXT: [[SMIN]] = select i1 [[CMP_I]], i32 [[TMP20]], i32 [[RDX]]
361402
; IF-EVL-OUTLOOP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
362-
; IF-EVL-OUTLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N:%.*]]
363-
; IF-EVL-OUTLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
403+
; IF-EVL-OUTLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
404+
; IF-EVL-OUTLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
364405
; IF-EVL-OUTLOOP: for.end:
365-
; IF-EVL-OUTLOOP-NEXT: [[SMIN_LCSSA:%.*]] = phi i32 [ [[SMIN]], [[FOR_BODY]] ]
406+
; IF-EVL-OUTLOOP-NEXT: [[SMIN_LCSSA:%.*]] = phi i32 [ [[SMIN]], [[FOR_BODY]] ], [ [[TMP19]], [[MIDDLE_BLOCK]] ]
366407
; IF-EVL-OUTLOOP-NEXT: ret i32 [[SMIN_LCSSA]]
367408
;
368409
; IF-EVL-INLOOP-LABEL: @smin(

llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -53,15 +53,15 @@ define i32 @cond_add(ptr %a, i64 %n, i32 %start) {
5353
; IF-EVL-OUTLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP17]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP11]])
5454
; IF-EVL-OUTLOOP-NEXT: [[TMP18:%.*]] = icmp sgt <vscale x 4 x i32> [[VP_OP_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
5555
; IF-EVL-OUTLOOP-NEXT: [[TMP19:%.*]] = select <vscale x 4 x i1> [[TMP18]], <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> zeroinitializer
56-
; IF-EVL-OUTLOOP-NEXT: [[TMP20]] = add <vscale x 4 x i32> [[TMP19]], [[VEC_PHI]]
57-
; IF-EVL-OUTLOOP-NEXT: [[TMP21:%.*]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> [[TMP20]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP11]])
56+
; IF-EVL-OUTLOOP-NEXT: [[TMP21:%.*]] = add <vscale x 4 x i32> [[TMP19]], [[VEC_PHI]]
57+
; IF-EVL-OUTLOOP-NEXT: [[TMP20]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> [[TMP21]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP11]])
5858
; IF-EVL-OUTLOOP-NEXT: [[TMP22:%.*]] = zext i32 [[TMP11]] to i64
5959
; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT1]] = add i64 [[TMP22]], [[EVL_BASED_IV1]]
6060
; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[EVL_BASED_IV]], [[TMP8]]
6161
; IF-EVL-OUTLOOP-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N_VEC]]
6262
; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
6363
; IF-EVL-OUTLOOP: middle.block:
64-
; IF-EVL-OUTLOOP-NEXT: [[TMP24:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP21]])
64+
; IF-EVL-OUTLOOP-NEXT: [[TMP24:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP20]])
6565
; IF-EVL-OUTLOOP-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
6666
; IF-EVL-OUTLOOP: scalar.ph:
6767
; IF-EVL-OUTLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
@@ -305,15 +305,15 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) {
305305
; IF-EVL-OUTLOOP-NEXT: [[TMP19:%.*]] = add <vscale x 4 x i32> [[VEC_PHI]], [[VP_OP_LOAD]]
306306
; IF-EVL-OUTLOOP-NEXT: [[TMP20:%.*]] = xor <vscale x 4 x i1> [[TMP18]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
307307
; IF-EVL-OUTLOOP-NEXT: [[TMP21:%.*]] = select <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> [[TMP20]], <vscale x 4 x i1> zeroinitializer
308-
; IF-EVL-OUTLOOP-NEXT: [[PREDPHI]] = select <vscale x 4 x i1> [[TMP21]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> [[TMP19]]
309-
; IF-EVL-OUTLOOP-NEXT: [[TMP22:%.*]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> [[PREDPHI]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP11]])
308+
; IF-EVL-OUTLOOP-NEXT: [[PREDPHI1:%.*]] = select <vscale x 4 x i1> [[TMP21]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> [[TMP19]]
309+
; IF-EVL-OUTLOOP-NEXT: [[PREDPHI]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> [[PREDPHI1]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP11]])
310310
; IF-EVL-OUTLOOP-NEXT: [[TMP23:%.*]] = zext i32 [[TMP11]] to i64
311311
; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP23]], [[EVL_BASED_IV]]
312312
; IF-EVL-OUTLOOP-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
313313
; IF-EVL-OUTLOOP-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
314314
; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
315315
; IF-EVL-OUTLOOP: middle.block:
316-
; IF-EVL-OUTLOOP-NEXT: [[TMP27:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP22]])
316+
; IF-EVL-OUTLOOP-NEXT: [[TMP27:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[PREDPHI]])
317317
; IF-EVL-OUTLOOP-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
318318
; IF-EVL-OUTLOOP: scalar.ph:
319319
; IF-EVL-OUTLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]

llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-inloop-reduction.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1804,16 +1804,16 @@ define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) {
18041804
; IF-EVL: vector.body:
18051805
; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
18061806
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
1807-
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
1807+
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ]
18081808
; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
18091809
; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP9]], i32 4, i1 true)
18101810
; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0
18111811
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]]
18121812
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0
18131813
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]])
18141814
; IF-EVL-NEXT: [[TMP14:%.*]] = icmp slt <vscale x 4 x i32> [[VP_OP_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
1815-
; IF-EVL-NEXT: [[TMP15]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP14]]
1816-
; IF-EVL-NEXT: [[TMP16:%.*]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> [[VEC_PHI]], i32 [[TMP10]])
1815+
; IF-EVL-NEXT: [[TMP15:%.*]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP14]]
1816+
; IF-EVL-NEXT: [[TMP16]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> [[VEC_PHI]], i32 [[TMP10]])
18171817
; IF-EVL-NEXT: [[TMP17:%.*]] = zext i32 [[TMP10]] to i64
18181818
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP17]], [[EVL_BASED_IV]]
18191819
; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
@@ -1931,16 +1931,16 @@ define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) {
19311931
; IF-EVL: vector.body:
19321932
; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
19331933
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
1934-
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
1934+
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ]
19351935
; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
19361936
; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP9]], i32 4, i1 true)
19371937
; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0
19381938
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]]
19391939
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 0
19401940
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]])
19411941
; IF-EVL-NEXT: [[TMP14:%.*]] = fcmp fast olt <vscale x 4 x float> [[VP_OP_LOAD]], shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 3.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
1942-
; IF-EVL-NEXT: [[TMP15]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP14]]
1943-
; IF-EVL-NEXT: [[TMP16:%.*]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> [[VEC_PHI]], i32 [[TMP10]])
1942+
; IF-EVL-NEXT: [[TMP15:%.*]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP14]]
1943+
; IF-EVL-NEXT: [[TMP16]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> [[VEC_PHI]], i32 [[TMP10]])
19441944
; IF-EVL-NEXT: [[TMP17:%.*]] = zext i32 [[TMP10]] to i64
19451945
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP17]], [[EVL_BASED_IV]]
19461946
; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]

0 commit comments

Comments
 (0)