Skip to content

Commit deb1fd8

Browse files
committed
[Test] Add test to set the flags for castInst and selectInst with the EVLRecipe
1 parent 0294dab commit deb1fd8

6 files changed

+1115
-140
lines changed

llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll

Lines changed: 219 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -538,3 +538,222 @@ for.body:
538538
for.end:
539539
ret i32 %smin
540540
}
541+
542+
define nofpclass(nan inf) float @vp_reduction_with_fastflags(ptr %a, ptr %b, i64 %N, float %start) {
543+
; OUTLOOP-LABEL: @vp_reduction_with_fastflags(
544+
; OUTLOOP-NEXT: entry:
545+
; OUTLOOP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
546+
; OUTLOOP-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
547+
; OUTLOOP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]]
548+
; OUTLOOP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
549+
; OUTLOOP: vector.ph:
550+
; OUTLOOP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
551+
; OUTLOOP-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
552+
; OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
553+
; OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
554+
; OUTLOOP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
555+
; OUTLOOP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
556+
; OUTLOOP-NEXT: [[TMP6:%.*]] = insertelement <vscale x 4 x float> zeroinitializer, float [[START:%.*]], i32 0
557+
; OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]]
558+
; OUTLOOP: vector.body:
559+
; OUTLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
560+
; OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x float> [ [[TMP6]], [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
561+
; OUTLOOP-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0
562+
; OUTLOOP-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[TMP7]]
563+
; OUTLOOP-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 0
564+
; OUTLOOP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP9]], align 4
565+
; OUTLOOP-NEXT: [[TMP10]] = fadd fast <vscale x 4 x float> [[WIDE_LOAD]], [[VEC_PHI]]
566+
; OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
567+
; OUTLOOP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
568+
; OUTLOOP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
569+
; OUTLOOP: middle.block:
570+
; OUTLOOP-NEXT: [[TMP12:%.*]] = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> [[TMP10]])
571+
; OUTLOOP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
572+
; OUTLOOP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
573+
; OUTLOOP: scalar.ph:
574+
; OUTLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
575+
; OUTLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ]
576+
; OUTLOOP-NEXT: br label [[FOR_BODY:%.*]]
577+
; OUTLOOP: for.body:
578+
; OUTLOOP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
579+
; OUTLOOP-NEXT: [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
580+
; OUTLOOP-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[IV]]
581+
; OUTLOOP-NEXT: [[TMP13:%.*]] = load float, ptr [[GEP]], align 4
582+
; OUTLOOP-NEXT: [[ADD]] = fadd fast float [[TMP13]], [[RDX]]
583+
; OUTLOOP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
584+
; OUTLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
585+
; OUTLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
586+
; OUTLOOP: for.end:
587+
; OUTLOOP-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ]
588+
; OUTLOOP-NEXT: ret float [[ADD_LCSSA]]
589+
;
590+
; INLOOP-LABEL: @vp_reduction_with_fastflags(
591+
; INLOOP-NEXT: entry:
592+
; INLOOP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
593+
; INLOOP-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
594+
; INLOOP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]]
595+
; INLOOP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
596+
; INLOOP: vector.ph:
597+
; INLOOP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
598+
; INLOOP-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
599+
; INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
600+
; INLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
601+
; INLOOP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
602+
; INLOOP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
603+
; INLOOP-NEXT: br label [[VECTOR_BODY:%.*]]
604+
; INLOOP: vector.body:
605+
; INLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
606+
; INLOOP-NEXT: [[VEC_PHI:%.*]] = phi float [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
607+
; INLOOP-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0
608+
; INLOOP-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[TMP6]]
609+
; INLOOP-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0
610+
; INLOOP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP8]], align 4
611+
; INLOOP-NEXT: [[TMP9:%.*]] = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> [[WIDE_LOAD]])
612+
; INLOOP-NEXT: [[TMP10]] = fadd fast float [[TMP9]], [[VEC_PHI]]
613+
; INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
614+
; INLOOP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
615+
; INLOOP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
616+
; INLOOP: middle.block:
617+
; INLOOP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
618+
; INLOOP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
619+
; INLOOP: scalar.ph:
620+
; INLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
621+
; INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ]
622+
; INLOOP-NEXT: br label [[FOR_BODY:%.*]]
623+
; INLOOP: for.body:
624+
; INLOOP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
625+
; INLOOP-NEXT: [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
626+
; INLOOP-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[IV]]
627+
; INLOOP-NEXT: [[TMP12:%.*]] = load float, ptr [[GEP]], align 4
628+
; INLOOP-NEXT: [[ADD]] = fadd fast float [[TMP12]], [[RDX]]
629+
; INLOOP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
630+
; INLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
631+
; INLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
632+
; INLOOP: for.end:
633+
; INLOOP-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
634+
; INLOOP-NEXT: ret float [[ADD_LCSSA]]
635+
;
636+
; IF-EVL-OUTLOOP-LABEL: @vp_reduction_with_fastflags(
637+
; IF-EVL-OUTLOOP-NEXT: entry:
638+
; IF-EVL-OUTLOOP-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N:%.*]]
639+
; IF-EVL-OUTLOOP-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
640+
; IF-EVL-OUTLOOP-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4
641+
; IF-EVL-OUTLOOP-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
642+
; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
643+
; IF-EVL-OUTLOOP: vector.ph:
644+
; IF-EVL-OUTLOOP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
645+
; IF-EVL-OUTLOOP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
646+
; IF-EVL-OUTLOOP-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1
647+
; IF-EVL-OUTLOOP-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]]
648+
; IF-EVL-OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
649+
; IF-EVL-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
650+
; IF-EVL-OUTLOOP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
651+
; IF-EVL-OUTLOOP-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4
652+
; IF-EVL-OUTLOOP-NEXT: [[TMP9:%.*]] = insertelement <vscale x 4 x float> zeroinitializer, float [[START:%.*]], i32 0
653+
; IF-EVL-OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]]
654+
; IF-EVL-OUTLOOP: vector.body:
655+
; IF-EVL-OUTLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
656+
; IF-EVL-OUTLOOP-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
657+
; IF-EVL-OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x float> [ [[TMP9]], [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ]
658+
; IF-EVL-OUTLOOP-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
659+
; IF-EVL-OUTLOOP-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
660+
; IF-EVL-OUTLOOP-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0
661+
; IF-EVL-OUTLOOP-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[TMP11]]
662+
; IF-EVL-OUTLOOP-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 0
663+
; IF-EVL-OUTLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
664+
; IF-EVL-OUTLOOP-NEXT: [[VP_OP:%.*]] = call fast <vscale x 4 x float> @llvm.vp.fadd.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> [[VEC_PHI]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
665+
; IF-EVL-OUTLOOP-NEXT: [[TMP14]] = call <vscale x 4 x float> @llvm.vp.merge.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[VP_OP]], <vscale x 4 x float> [[VEC_PHI]], i32 [[TMP10]])
666+
; IF-EVL-OUTLOOP-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64
667+
; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]]
668+
; IF-EVL-OUTLOOP-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
669+
; IF-EVL-OUTLOOP-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
670+
; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
671+
; IF-EVL-OUTLOOP: middle.block:
672+
; IF-EVL-OUTLOOP-NEXT: [[TMP17:%.*]] = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> [[TMP14]])
673+
; IF-EVL-OUTLOOP-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
674+
; IF-EVL-OUTLOOP: scalar.ph:
675+
; IF-EVL-OUTLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
676+
; IF-EVL-OUTLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP17]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ]
677+
; IF-EVL-OUTLOOP-NEXT: br label [[FOR_BODY:%.*]]
678+
; IF-EVL-OUTLOOP: for.body:
679+
; IF-EVL-OUTLOOP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
680+
; IF-EVL-OUTLOOP-NEXT: [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
681+
; IF-EVL-OUTLOOP-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[IV]]
682+
; IF-EVL-OUTLOOP-NEXT: [[TMP18:%.*]] = load float, ptr [[GEP]], align 4
683+
; IF-EVL-OUTLOOP-NEXT: [[ADD]] = fadd fast float [[TMP18]], [[RDX]]
684+
; IF-EVL-OUTLOOP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
685+
; IF-EVL-OUTLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
686+
; IF-EVL-OUTLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
687+
; IF-EVL-OUTLOOP: for.end:
688+
; IF-EVL-OUTLOOP-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ]
689+
; IF-EVL-OUTLOOP-NEXT: ret float [[ADD_LCSSA]]
690+
;
691+
; IF-EVL-INLOOP-LABEL: @vp_reduction_with_fastflags(
692+
; IF-EVL-INLOOP-NEXT: entry:
693+
; IF-EVL-INLOOP-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N:%.*]]
694+
; IF-EVL-INLOOP-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
695+
; IF-EVL-INLOOP-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4
696+
; IF-EVL-INLOOP-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
697+
; IF-EVL-INLOOP-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
698+
; IF-EVL-INLOOP: vector.ph:
699+
; IF-EVL-INLOOP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
700+
; IF-EVL-INLOOP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
701+
; IF-EVL-INLOOP-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1
702+
; IF-EVL-INLOOP-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]]
703+
; IF-EVL-INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
704+
; IF-EVL-INLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
705+
; IF-EVL-INLOOP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
706+
; IF-EVL-INLOOP-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4
707+
; IF-EVL-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]]
708+
; IF-EVL-INLOOP: vector.body:
709+
; IF-EVL-INLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
710+
; IF-EVL-INLOOP-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
711+
; IF-EVL-INLOOP-NEXT: [[VEC_PHI:%.*]] = phi float [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ]
712+
; IF-EVL-INLOOP-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
713+
; IF-EVL-INLOOP-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
714+
; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = add i64 [[EVL_BASED_IV]], 0
715+
; IF-EVL-INLOOP-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[TMP10]]
716+
; IF-EVL-INLOOP-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i32 0
717+
; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
718+
; IF-EVL-INLOOP-NEXT: [[TMP13:%.*]] = call fast float @llvm.vp.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
719+
; IF-EVL-INLOOP-NEXT: [[TMP14]] = fadd fast float [[TMP13]], [[VEC_PHI]]
720+
; IF-EVL-INLOOP-NEXT: [[TMP15:%.*]] = zext i32 [[TMP9]] to i64
721+
; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]]
722+
; IF-EVL-INLOOP-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
723+
; IF-EVL-INLOOP-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
724+
; IF-EVL-INLOOP-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
725+
; IF-EVL-INLOOP: middle.block:
726+
; IF-EVL-INLOOP-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
727+
; IF-EVL-INLOOP: scalar.ph:
728+
; IF-EVL-INLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
729+
; IF-EVL-INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP14]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ]
730+
; IF-EVL-INLOOP-NEXT: br label [[FOR_BODY:%.*]]
731+
; IF-EVL-INLOOP: for.body:
732+
; IF-EVL-INLOOP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
733+
; IF-EVL-INLOOP-NEXT: [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
734+
; IF-EVL-INLOOP-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[IV]]
735+
; IF-EVL-INLOOP-NEXT: [[TMP17:%.*]] = load float, ptr [[GEP]], align 4
736+
; IF-EVL-INLOOP-NEXT: [[ADD]] = fadd fast float [[TMP17]], [[RDX]]
737+
; IF-EVL-INLOOP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
738+
; IF-EVL-INLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
739+
; IF-EVL-INLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
740+
; IF-EVL-INLOOP: for.end:
741+
; IF-EVL-INLOOP-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ]
742+
; IF-EVL-INLOOP-NEXT: ret float [[ADD_LCSSA]]
743+
;
744+
entry:
745+
br label %for.body
746+
747+
for.body:
748+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
749+
%rdx = phi float [ %start, %entry ], [ %add, %for.body ]
750+
%gep = getelementptr inbounds nuw float, ptr %b, i64 %iv
751+
%0 = load float, ptr %gep, align 4
752+
%add = fadd fast float %0, %rdx
753+
%iv.next = add nuw nsw i64 %iv, 1
754+
%exitcond.not = icmp eq i64 %iv.next, %N
755+
br i1 %exitcond.not, label %for.end, label %for.body
756+
757+
for.end:
758+
ret float %add
759+
}

0 commit comments

Comments
 (0)