Skip to content

Commit ab25633

Browse files
committed
[LoopVectorize] Ensure fairness when selecting epilogue VFs
Whilst rebasing PR llvm#116247 I discovered an issue where PR llvm#108190 seems to have unintentionally introduced an unfairness in selecting epilogue VFs by making potentially better choices for fixed-width VFs compared to scalable VFs. When considering whether epilogue vectorisation is profitable or not the latest algorithm appears to be: bool IsProfitable = false; if (VF.isFixed()) IsProfitable = (IC * VF.getFixedValue()) >= EpilogueVectorizationMinVF; else IsProfitable = (getVScaleForTuning() * VF.getKnownMinValue()) >= EpilogueVectorizationMinVF; Instead, the estimate for the number of scalar iterations processed in the main vector loop should be (IC * estimatedRuntimeVF)
1 parent 6a12b43 commit ab25633

File tree

4 files changed

+154
-39
lines changed

4 files changed

+154
-39
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4731,7 +4731,7 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
47314731

47324732
unsigned Multiplier = IC;
47334733
if (MainLoopVF.isScalable())
4734-
Multiplier = getVScaleForTuning(OrigLoop, TTI).value_or(1);
4734+
Multiplier *= getVScaleForTuning(OrigLoop, TTI).value_or(1);
47354735

47364736
if (!CM.isEpilogueVectorizationProfitable(MainLoopVF, Multiplier)) {
47374737
LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization is not profitable for "

llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll

Lines changed: 52 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -302,10 +302,15 @@ exit:
302302
define i16 @reduce_udiv(ptr %src, i16 %x, i64 %N) #0 {
303303
; DEFAULT-LABEL: define i16 @reduce_udiv(
304304
; DEFAULT-SAME: ptr [[SRC:%.*]], i16 [[X:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
305-
; DEFAULT-NEXT: entry:
305+
; DEFAULT-NEXT: iter.check:
306306
; DEFAULT-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
307307
; DEFAULT-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
308-
; DEFAULT-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 8
308+
; DEFAULT-NEXT: [[TMP8:%.*]] = mul i64 [[TMP1]], 2
309+
; DEFAULT-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP0]], [[TMP8]]
310+
; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[ENTRY:%.*]]
311+
; DEFAULT: vector.main.loop.iter.check:
312+
; DEFAULT-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
313+
; DEFAULT-NEXT: [[TMP2:%.*]] = mul i64 [[TMP9]], 8
309314
; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
310315
; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
311316
; DEFAULT: vector.ph:
@@ -336,28 +341,63 @@ define i16 @reduce_udiv(ptr %src, i16 %x, i64 %N) #0 {
336341
; DEFAULT-NEXT: [[TMP22]] = or <vscale x 4 x i16> [[TMP20]], [[VEC_PHI1]]
337342
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
338343
; DEFAULT-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
339-
; DEFAULT-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
344+
; DEFAULT-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK1:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
340345
; DEFAULT: middle.block:
341346
; DEFAULT-NEXT: [[BIN_RDX:%.*]] = or <vscale x 4 x i16> [[TMP22]], [[TMP21]]
342347
; DEFAULT-NEXT: [[TMP24:%.*]] = call i16 @llvm.vector.reduce.or.nxv4i16(<vscale x 4 x i16> [[BIN_RDX]])
343348
; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
344-
; DEFAULT-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
345-
; DEFAULT: scalar.ph:
346-
; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
349+
; DEFAULT-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
350+
; DEFAULT: vec.epilog.iter.check:
351+
; DEFAULT-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP0]], [[N_VEC]]
352+
; DEFAULT-NEXT: [[TMP35:%.*]] = call i64 @llvm.vscale.i64()
353+
; DEFAULT-NEXT: [[TMP36:%.*]] = mul i64 [[TMP35]], 2
354+
; DEFAULT-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], [[TMP36]]
355+
; DEFAULT-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[SCALAR_PH]]
356+
; DEFAULT: vec.epilog.ph:
347357
; DEFAULT-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ [[TMP24]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
358+
; DEFAULT-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
359+
; DEFAULT-NEXT: [[TMP37:%.*]] = call i64 @llvm.vscale.i64()
360+
; DEFAULT-NEXT: [[TMP38:%.*]] = mul i64 [[TMP37]], 2
361+
; DEFAULT-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[TMP0]], [[TMP38]]
362+
; DEFAULT-NEXT: [[N_VEC5:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF4]]
363+
; DEFAULT-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64()
364+
; DEFAULT-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 2
365+
; DEFAULT-NEXT: [[TMP27:%.*]] = insertelement <vscale x 2 x i16> zeroinitializer, i16 [[BC_MERGE_RDX]], i32 0
366+
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <vscale x 2 x i16> poison, i16 [[X]], i64 0
367+
; DEFAULT-NEXT: [[BROADCAST_SPLAT10:%.*]] = shufflevector <vscale x 2 x i16> [[BROADCAST_SPLATINSERT9]], <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
348368
; DEFAULT-NEXT: br label [[LOOP:%.*]]
369+
; DEFAULT: vec.epilog.vector.body:
370+
; DEFAULT-NEXT: [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDEX_NEXT11:%.*]], [[LOOP]] ]
371+
; DEFAULT-NEXT: [[VEC_PHI7:%.*]] = phi <vscale x 2 x i16> [ [[TMP27]], [[SCALAR_PH]] ], [ [[TMP32:%.*]], [[LOOP]] ]
372+
; DEFAULT-NEXT: [[TMP28:%.*]] = add i64 [[INDEX6]], 0
373+
; DEFAULT-NEXT: [[TMP29:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[TMP28]]
374+
; DEFAULT-NEXT: [[TMP30:%.*]] = getelementptr i16, ptr [[TMP29]], i32 0
375+
; DEFAULT-NEXT: [[WIDE_LOAD8:%.*]] = load <vscale x 2 x i16>, ptr [[TMP30]], align 2
376+
; DEFAULT-NEXT: [[TMP31:%.*]] = udiv <vscale x 2 x i16> [[WIDE_LOAD8]], [[BROADCAST_SPLAT10]]
377+
; DEFAULT-NEXT: [[TMP32]] = or <vscale x 2 x i16> [[TMP31]], [[VEC_PHI7]]
378+
; DEFAULT-NEXT: [[INDEX_NEXT11]] = add nuw i64 [[INDEX6]], [[TMP26]]
379+
; DEFAULT-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT11]], [[N_VEC5]]
380+
; DEFAULT-NEXT: br i1 [[TMP33]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
381+
; DEFAULT: vec.epilog.middle.block:
382+
; DEFAULT-NEXT: [[TMP34:%.*]] = call i16 @llvm.vector.reduce.or.nxv2i16(<vscale x 2 x i16> [[TMP32]])
383+
; DEFAULT-NEXT: [[CMP_N12:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC5]]
384+
; DEFAULT-NEXT: br i1 [[CMP_N12]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
385+
; DEFAULT: vec.epilog.scalar.ph:
386+
; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ]
387+
; DEFAULT-NEXT: [[BC_MERGE_RDX13:%.*]] = phi i16 [ [[TMP34]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK]] ], [ [[TMP24]], [[MIDDLE_BLOCK]] ]
388+
; DEFAULT-NEXT: br label [[LOOP1:%.*]]
349389
; DEFAULT: loop:
350-
; DEFAULT-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
351-
; DEFAULT-NEXT: [[RED:%.*]] = phi i16 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], [[LOOP]] ]
390+
; DEFAULT-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP1]] ]
391+
; DEFAULT-NEXT: [[RED:%.*]] = phi i16 [ [[BC_MERGE_RDX13]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[RED_NEXT:%.*]], [[LOOP1]] ]
352392
; DEFAULT-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[IV]]
353393
; DEFAULT-NEXT: [[L:%.*]] = load i16, ptr [[GEP]], align 2
354394
; DEFAULT-NEXT: [[DIV:%.*]] = udiv i16 [[L]], [[X]]
355395
; DEFAULT-NEXT: [[RED_NEXT]] = or i16 [[DIV]], [[RED]]
356396
; DEFAULT-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
357397
; DEFAULT-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]]
358-
; DEFAULT-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
398+
; DEFAULT-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP6:![0-9]+]]
359399
; DEFAULT: exit:
360-
; DEFAULT-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi i16 [ [[RED_NEXT]], [[LOOP]] ], [ [[TMP24]], [[MIDDLE_BLOCK]] ]
400+
; DEFAULT-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi i16 [ [[RED_NEXT]], [[LOOP1]] ], [ [[TMP24]], [[MIDDLE_BLOCK1]] ], [ [[TMP34]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
361401
; DEFAULT-NEXT: ret i16 [[RED_NEXT_LCSSA]]
362402
;
363403
; PRED-LABEL: define i16 @reduce_udiv(
@@ -445,7 +485,8 @@ attributes #0 = { "target-features"="+sve" }
445485
; DEFAULT: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
446486
; DEFAULT: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
447487
; DEFAULT: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
448-
; DEFAULT: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
488+
; DEFAULT: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]}
489+
; DEFAULT: [[LOOP6]] = distinct !{[[LOOP6]], [[META2]], [[META1]]}
449490
;.
450491
; PRED: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
451492
; PRED: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}

llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll

Lines changed: 51 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -281,10 +281,15 @@ for.cond.cleanup: ; preds = %for.inc, %entry
281281

282282
define void @gather_nxv4i32_ind64_stride2(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) #0 {
283283
; CHECK-LABEL: @gather_nxv4i32_ind64_stride2(
284-
; CHECK-NEXT: entry:
284+
; CHECK-NEXT: iter.check:
285285
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
286-
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3
287-
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ugt i64 [[N:%.*]], [[TMP1]]
286+
; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP0]], 1
287+
; CHECK-NEXT: [[MIN_ITERS_CHECK_NOT:%.*]] = icmp ugt i64 [[N:%.*]], [[TMP8]]
288+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK_NOT]], label [[ENTRY:%.*]], label [[VEC_EPILOG_SCALAR_PH:%.*]]
289+
; CHECK: vector.main.loop.iter.check:
290+
; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64()
291+
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP17]], 3
292+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ugt i64 [[N]], [[TMP1]]
288293
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VECTOR_PH:%.*]], label [[SCALAR_PH:%.*]]
289294
; CHECK: vector.ph:
290295
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
@@ -320,22 +325,59 @@ define void @gather_nxv4i32_ind64_stride2(ptr noalias nocapture %a, ptr noalias
320325
; CHECK-NEXT: store <vscale x 4 x float> [[WIDE_MASKED_GATHER2]], ptr [[TMP14]], align 4
321326
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
322327
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC1]]
323-
; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
328+
; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK1:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
324329
; CHECK: middle.block:
325-
; CHECK-NEXT: br label [[SCALAR_PH]]
326-
; CHECK: scalar.ph:
327-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC1]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
330+
; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]]
331+
; CHECK: vec.epilog.iter.check:
332+
; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64()
333+
; CHECK-NEXT: [[TMP22:%.*]] = shl nuw nsw i64 [[TMP21]], 1
334+
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK_NOT:%.*]] = icmp ugt i64 [[TMP6]], [[TMP22]]
335+
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK_NOT]], label [[SCALAR_PH]], label [[VEC_EPILOG_SCALAR_PH]]
336+
; CHECK: vec.epilog.ph:
337+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC1]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
338+
; CHECK-NEXT: [[TMP23:%.*]] = call i64 @llvm.vscale.i64()
339+
; CHECK-NEXT: [[TMP24:%.*]] = shl nuw nsw i64 [[TMP23]], 1
340+
; CHECK-NEXT: [[TMP25:%.*]] = add nsw i64 [[TMP24]], -1
341+
; CHECK-NEXT: [[N_MOD_VF4:%.*]] = and i64 [[N]], [[TMP25]]
342+
; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[N_MOD_VF4]], 0
343+
; CHECK-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i64 [[TMP24]], i64 [[N_MOD_VF4]]
344+
; CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[N]], [[TMP27]]
345+
; CHECK-NEXT: [[TMP28:%.*]] = call i64 @llvm.vscale.i64()
346+
; CHECK-NEXT: [[TMP29:%.*]] = shl nuw nsw i64 [[TMP28]], 1
347+
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[BC_RESUME_VAL]], i64 0
348+
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
349+
; CHECK-NEXT: [[TMP30:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
350+
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[TMP30]]
351+
; CHECK-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP29]], i64 0
352+
; CHECK-NEXT: [[DOTSPLAT9:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT8]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
328353
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
354+
; CHECK: vec.epilog.vector.body:
355+
; CHECK-NEXT: [[INDEX7:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDEX_NEXT10:%.*]], [[FOR_BODY]] ]
356+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], [[SCALAR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[FOR_BODY]] ]
357+
; CHECK-NEXT: [[TMP31:%.*]] = shl <vscale x 2 x i64> [[VEC_IND]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
358+
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, ptr [[B]], <vscale x 2 x i64> [[TMP31]]
359+
; CHECK-NEXT: [[WIDE_MASKED_GATHER1:%.*]] = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr> [[TMP32]], i32 4, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x float> poison)
360+
; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX7]]
361+
; CHECK-NEXT: store <vscale x 2 x float> [[WIDE_MASKED_GATHER1]], ptr [[TMP33]], align 4
362+
; CHECK-NEXT: [[INDEX_NEXT10]] = add nuw i64 [[INDEX7]], [[TMP29]]
363+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT9]]
364+
; CHECK-NEXT: [[TMP34:%.*]] = icmp eq i64 [[INDEX_NEXT10]], [[N_VEC5]]
365+
; CHECK-NEXT: br i1 [[TMP34]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
366+
; CHECK: vec.epilog.middle.block:
367+
; CHECK-NEXT: br label [[VEC_EPILOG_SCALAR_PH]]
368+
; CHECK: vec.epilog.scalar.ph:
369+
; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC1]], [[MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ]
370+
; CHECK-NEXT: br label [[FOR_BODY1:%.*]]
329371
; CHECK: for.body:
330-
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
372+
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY1]] ], [ [[BC_RESUME_VAL6]], [[VEC_EPILOG_SCALAR_PH]] ]
331373
; CHECK-NEXT: [[ARRAYIDX_IDX:%.*]] = shl i64 [[INDVARS_IV]], 3
332374
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[ARRAYIDX_IDX]]
333375
; CHECK-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4
334376
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
335377
; CHECK-NEXT: store float [[TMP16]], ptr [[ARRAYIDX2]], align 4
336378
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
337379
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
338-
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
380+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY1]], !llvm.loop [[LOOP13:![0-9]+]]
339381
; CHECK: for.cond.cleanup:
340382
; CHECK-NEXT: ret void
341383
;

0 commit comments

Comments
 (0)