@@ -538,3 +538,222 @@ for.body:
538
538
for.end:
539
539
ret i32 %smin
540
540
}
541
+
542
+ define nofpclass(nan inf) float @vp_reduction_with_fastflags (ptr %a , ptr %b , i64 %N , float %start ) {
543
+ ; OUTLOOP-LABEL: @vp_reduction_with_fastflags(
544
+ ; OUTLOOP-NEXT: entry:
545
+ ; OUTLOOP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
546
+ ; OUTLOOP-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
547
+ ; OUTLOOP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]]
548
+ ; OUTLOOP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
549
+ ; OUTLOOP: vector.ph:
550
+ ; OUTLOOP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
551
+ ; OUTLOOP-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
552
+ ; OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
553
+ ; OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
554
+ ; OUTLOOP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
555
+ ; OUTLOOP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
556
+ ; OUTLOOP-NEXT: [[TMP6:%.*]] = insertelement <vscale x 4 x float> zeroinitializer, float [[START:%.*]], i32 0
557
+ ; OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]]
558
+ ; OUTLOOP: vector.body:
559
+ ; OUTLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
560
+ ; OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x float> [ [[TMP6]], [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
561
+ ; OUTLOOP-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0
562
+ ; OUTLOOP-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[TMP7]]
563
+ ; OUTLOOP-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 0
564
+ ; OUTLOOP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP9]], align 4
565
+ ; OUTLOOP-NEXT: [[TMP10]] = fadd fast <vscale x 4 x float> [[WIDE_LOAD]], [[VEC_PHI]]
566
+ ; OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
567
+ ; OUTLOOP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
568
+ ; OUTLOOP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
569
+ ; OUTLOOP: middle.block:
570
+ ; OUTLOOP-NEXT: [[TMP12:%.*]] = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> [[TMP10]])
571
+ ; OUTLOOP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
572
+ ; OUTLOOP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
573
+ ; OUTLOOP: scalar.ph:
574
+ ; OUTLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
575
+ ; OUTLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ]
576
+ ; OUTLOOP-NEXT: br label [[FOR_BODY:%.*]]
577
+ ; OUTLOOP: for.body:
578
+ ; OUTLOOP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
579
+ ; OUTLOOP-NEXT: [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
580
+ ; OUTLOOP-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[IV]]
581
+ ; OUTLOOP-NEXT: [[TMP13:%.*]] = load float, ptr [[GEP]], align 4
582
+ ; OUTLOOP-NEXT: [[ADD]] = fadd fast float [[TMP13]], [[RDX]]
583
+ ; OUTLOOP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
584
+ ; OUTLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
585
+ ; OUTLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
586
+ ; OUTLOOP: for.end:
587
+ ; OUTLOOP-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ]
588
+ ; OUTLOOP-NEXT: ret float [[ADD_LCSSA]]
589
+ ;
590
+ ; INLOOP-LABEL: @vp_reduction_with_fastflags(
591
+ ; INLOOP-NEXT: entry:
592
+ ; INLOOP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
593
+ ; INLOOP-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
594
+ ; INLOOP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]]
595
+ ; INLOOP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
596
+ ; INLOOP: vector.ph:
597
+ ; INLOOP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
598
+ ; INLOOP-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
599
+ ; INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
600
+ ; INLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
601
+ ; INLOOP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
602
+ ; INLOOP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
603
+ ; INLOOP-NEXT: br label [[VECTOR_BODY:%.*]]
604
+ ; INLOOP: vector.body:
605
+ ; INLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
606
+ ; INLOOP-NEXT: [[VEC_PHI:%.*]] = phi float [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
607
+ ; INLOOP-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0
608
+ ; INLOOP-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[TMP6]]
609
+ ; INLOOP-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0
610
+ ; INLOOP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP8]], align 4
611
+ ; INLOOP-NEXT: [[TMP9:%.*]] = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> [[WIDE_LOAD]])
612
+ ; INLOOP-NEXT: [[TMP10]] = fadd fast float [[TMP9]], [[VEC_PHI]]
613
+ ; INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
614
+ ; INLOOP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
615
+ ; INLOOP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
616
+ ; INLOOP: middle.block:
617
+ ; INLOOP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
618
+ ; INLOOP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
619
+ ; INLOOP: scalar.ph:
620
+ ; INLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
621
+ ; INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ]
622
+ ; INLOOP-NEXT: br label [[FOR_BODY:%.*]]
623
+ ; INLOOP: for.body:
624
+ ; INLOOP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
625
+ ; INLOOP-NEXT: [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
626
+ ; INLOOP-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[IV]]
627
+ ; INLOOP-NEXT: [[TMP12:%.*]] = load float, ptr [[GEP]], align 4
628
+ ; INLOOP-NEXT: [[ADD]] = fadd fast float [[TMP12]], [[RDX]]
629
+ ; INLOOP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
630
+ ; INLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
631
+ ; INLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
632
+ ; INLOOP: for.end:
633
+ ; INLOOP-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
634
+ ; INLOOP-NEXT: ret float [[ADD_LCSSA]]
635
+ ;
636
+ ; IF-EVL-OUTLOOP-LABEL: @vp_reduction_with_fastflags(
637
+ ; IF-EVL-OUTLOOP-NEXT: entry:
638
+ ; IF-EVL-OUTLOOP-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N:%.*]]
639
+ ; IF-EVL-OUTLOOP-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
640
+ ; IF-EVL-OUTLOOP-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4
641
+ ; IF-EVL-OUTLOOP-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
642
+ ; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
643
+ ; IF-EVL-OUTLOOP: vector.ph:
644
+ ; IF-EVL-OUTLOOP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
645
+ ; IF-EVL-OUTLOOP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
646
+ ; IF-EVL-OUTLOOP-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1
647
+ ; IF-EVL-OUTLOOP-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]]
648
+ ; IF-EVL-OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
649
+ ; IF-EVL-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
650
+ ; IF-EVL-OUTLOOP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
651
+ ; IF-EVL-OUTLOOP-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4
652
+ ; IF-EVL-OUTLOOP-NEXT: [[TMP9:%.*]] = insertelement <vscale x 4 x float> zeroinitializer, float [[START:%.*]], i32 0
653
+ ; IF-EVL-OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]]
654
+ ; IF-EVL-OUTLOOP: vector.body:
655
+ ; IF-EVL-OUTLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
656
+ ; IF-EVL-OUTLOOP-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
657
+ ; IF-EVL-OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x float> [ [[TMP9]], [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ]
658
+ ; IF-EVL-OUTLOOP-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
659
+ ; IF-EVL-OUTLOOP-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
660
+ ; IF-EVL-OUTLOOP-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0
661
+ ; IF-EVL-OUTLOOP-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[TMP11]]
662
+ ; IF-EVL-OUTLOOP-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 0
663
+ ; IF-EVL-OUTLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
664
+ ; IF-EVL-OUTLOOP-NEXT: [[VP_OP:%.*]] = call fast <vscale x 4 x float> @llvm.vp.fadd.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> [[VEC_PHI]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
665
+ ; IF-EVL-OUTLOOP-NEXT: [[TMP14]] = call <vscale x 4 x float> @llvm.vp.merge.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[VP_OP]], <vscale x 4 x float> [[VEC_PHI]], i32 [[TMP10]])
666
+ ; IF-EVL-OUTLOOP-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64
667
+ ; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]]
668
+ ; IF-EVL-OUTLOOP-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
669
+ ; IF-EVL-OUTLOOP-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
670
+ ; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
671
+ ; IF-EVL-OUTLOOP: middle.block:
672
+ ; IF-EVL-OUTLOOP-NEXT: [[TMP17:%.*]] = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> [[TMP14]])
673
+ ; IF-EVL-OUTLOOP-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
674
+ ; IF-EVL-OUTLOOP: scalar.ph:
675
+ ; IF-EVL-OUTLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
676
+ ; IF-EVL-OUTLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP17]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ]
677
+ ; IF-EVL-OUTLOOP-NEXT: br label [[FOR_BODY:%.*]]
678
+ ; IF-EVL-OUTLOOP: for.body:
679
+ ; IF-EVL-OUTLOOP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
680
+ ; IF-EVL-OUTLOOP-NEXT: [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
681
+ ; IF-EVL-OUTLOOP-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[IV]]
682
+ ; IF-EVL-OUTLOOP-NEXT: [[TMP18:%.*]] = load float, ptr [[GEP]], align 4
683
+ ; IF-EVL-OUTLOOP-NEXT: [[ADD]] = fadd fast float [[TMP18]], [[RDX]]
684
+ ; IF-EVL-OUTLOOP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
685
+ ; IF-EVL-OUTLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
686
+ ; IF-EVL-OUTLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
687
+ ; IF-EVL-OUTLOOP: for.end:
688
+ ; IF-EVL-OUTLOOP-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ]
689
+ ; IF-EVL-OUTLOOP-NEXT: ret float [[ADD_LCSSA]]
690
+ ;
691
+ ; IF-EVL-INLOOP-LABEL: @vp_reduction_with_fastflags(
692
+ ; IF-EVL-INLOOP-NEXT: entry:
693
+ ; IF-EVL-INLOOP-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N:%.*]]
694
+ ; IF-EVL-INLOOP-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
695
+ ; IF-EVL-INLOOP-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4
696
+ ; IF-EVL-INLOOP-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
697
+ ; IF-EVL-INLOOP-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
698
+ ; IF-EVL-INLOOP: vector.ph:
699
+ ; IF-EVL-INLOOP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
700
+ ; IF-EVL-INLOOP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
701
+ ; IF-EVL-INLOOP-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1
702
+ ; IF-EVL-INLOOP-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]]
703
+ ; IF-EVL-INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
704
+ ; IF-EVL-INLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
705
+ ; IF-EVL-INLOOP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
706
+ ; IF-EVL-INLOOP-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4
707
+ ; IF-EVL-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]]
708
+ ; IF-EVL-INLOOP: vector.body:
709
+ ; IF-EVL-INLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
710
+ ; IF-EVL-INLOOP-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
711
+ ; IF-EVL-INLOOP-NEXT: [[VEC_PHI:%.*]] = phi float [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ]
712
+ ; IF-EVL-INLOOP-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
713
+ ; IF-EVL-INLOOP-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
714
+ ; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = add i64 [[EVL_BASED_IV]], 0
715
+ ; IF-EVL-INLOOP-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[TMP10]]
716
+ ; IF-EVL-INLOOP-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i32 0
717
+ ; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
718
+ ; IF-EVL-INLOOP-NEXT: [[TMP13:%.*]] = call fast float @llvm.vp.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
719
+ ; IF-EVL-INLOOP-NEXT: [[TMP14]] = fadd fast float [[TMP13]], [[VEC_PHI]]
720
+ ; IF-EVL-INLOOP-NEXT: [[TMP15:%.*]] = zext i32 [[TMP9]] to i64
721
+ ; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]]
722
+ ; IF-EVL-INLOOP-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
723
+ ; IF-EVL-INLOOP-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
724
+ ; IF-EVL-INLOOP-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
725
+ ; IF-EVL-INLOOP: middle.block:
726
+ ; IF-EVL-INLOOP-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
727
+ ; IF-EVL-INLOOP: scalar.ph:
728
+ ; IF-EVL-INLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
729
+ ; IF-EVL-INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP14]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ]
730
+ ; IF-EVL-INLOOP-NEXT: br label [[FOR_BODY:%.*]]
731
+ ; IF-EVL-INLOOP: for.body:
732
+ ; IF-EVL-INLOOP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
733
+ ; IF-EVL-INLOOP-NEXT: [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
734
+ ; IF-EVL-INLOOP-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[IV]]
735
+ ; IF-EVL-INLOOP-NEXT: [[TMP17:%.*]] = load float, ptr [[GEP]], align 4
736
+ ; IF-EVL-INLOOP-NEXT: [[ADD]] = fadd fast float [[TMP17]], [[RDX]]
737
+ ; IF-EVL-INLOOP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
738
+ ; IF-EVL-INLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
739
+ ; IF-EVL-INLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
740
+ ; IF-EVL-INLOOP: for.end:
741
+ ; IF-EVL-INLOOP-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ]
742
+ ; IF-EVL-INLOOP-NEXT: ret float [[ADD_LCSSA]]
743
+ ;
744
+ entry:
745
+ br label %for.body
746
+
747
+ for.body:
748
+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %for.body ]
749
+ %rdx = phi float [ %start , %entry ], [ %add , %for.body ]
750
+ %gep = getelementptr inbounds nuw float , ptr %b , i64 %iv
751
+ %0 = load float , ptr %gep , align 4
752
+ %add = fadd fast float %0 , %rdx
753
+ %iv.next = add nuw nsw i64 %iv , 1
754
+ %exitcond.not = icmp eq i64 %iv.next , %N
755
+ br i1 %exitcond.not , label %for.end , label %for.body
756
+
757
+ for.end:
758
+ ret float %add
759
+ }
0 commit comments