@@ -487,6 +487,54 @@ for.end: ; preds = %for.body, %entry
487
487
ret void
488
488
}
489
489
490
+ define void @saxpy_vec_demanded_fields (i64 %n , float %a , ptr nocapture readonly %x , ptr nocapture %y ) {
491
+ ; CHECK-LABEL: saxpy_vec_demanded_fields:
492
+ ; CHECK: # %bb.0: # %entry
493
+ ; CHECK-NEXT: vsetvli a3, a0, e32, m8, ta, ma
494
+ ; CHECK-NEXT: beqz a3, .LBB9_2
495
+ ; CHECK-NEXT: .LBB9_1: # %for.body
496
+ ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
497
+ ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
498
+ ; CHECK-NEXT: vle32.v v8, (a1)
499
+ ; CHECK-NEXT: vle32.v v16, (a2)
500
+ ; CHECK-NEXT: slli a4, a3, 2
501
+ ; CHECK-NEXT: add a1, a1, a4
502
+ ; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma
503
+ ; CHECK-NEXT: vfmacc.vf v16, fa0, v8
504
+ ; CHECK-NEXT: vse32.v v16, (a2)
505
+ ; CHECK-NEXT: sub a0, a0, a3
506
+ ; CHECK-NEXT: vsetvli a3, a0, e16, m4, ta, ma
507
+ ; CHECK-NEXT: add a2, a2, a4
508
+ ; CHECK-NEXT: bnez a3, .LBB9_1
509
+ ; CHECK-NEXT: .LBB9_2: # %for.end
510
+ ; CHECK-NEXT: ret
511
+ entry:
512
+ %0 = tail call i64 @llvm.riscv.vsetvli.i64 (i64 %n , i64 2 , i64 3 )
513
+ %cmp.not13 = icmp eq i64 %0 , 0
514
+ br i1 %cmp.not13 , label %for.end , label %for.body
515
+
516
+ for.body: ; preds = %for.body, %entry
517
+ %1 = phi i64 [ %7 , %for.body ], [ %0 , %entry ]
518
+ %n.addr.016 = phi i64 [ %sub , %for.body ], [ %n , %entry ]
519
+ %x.addr.015 = phi ptr [ %add.ptr , %for.body ], [ %x , %entry ]
520
+ %y.addr.014 = phi ptr [ %add.ptr1 , %for.body ], [ %y , %entry ]
521
+ %2 = bitcast ptr %x.addr.015 to ptr
522
+ %3 = tail call <vscale x 16 x float > @llvm.riscv.vle.nxv16f32.i64 (<vscale x 16 x float > undef , ptr %2 , i64 %1 )
523
+ %add.ptr = getelementptr inbounds float , ptr %x.addr.015 , i64 %1
524
+ %4 = bitcast ptr %y.addr.014 to ptr
525
+ %5 = tail call <vscale x 16 x float > @llvm.riscv.vle.nxv16f32.i64 (<vscale x 16 x float > undef , ptr %4 , i64 %1 )
526
+ %6 = tail call <vscale x 16 x float > @llvm.riscv.vfmacc.nxv16f32.f32.i64 (<vscale x 16 x float > %5 , float %a , <vscale x 16 x float > %3 , i64 7 , i64 %1 , i64 0 )
527
+ tail call void @llvm.riscv.vse.nxv16f32.i64 (<vscale x 16 x float > %6 , ptr %4 , i64 %1 )
528
+ %add.ptr1 = getelementptr inbounds float , ptr %y.addr.014 , i64 %1
529
+ %sub = sub i64 %n.addr.016 , %1
530
+ %7 = tail call i64 @llvm.riscv.vsetvli.i64 (i64 %sub , i64 1 , i64 2 )
531
+ %cmp.not = icmp eq i64 %7 , 0
532
+ br i1 %cmp.not , label %for.end , label %for.body
533
+
534
+ for.end: ; preds = %for.body, %entry
535
+ ret void
536
+ }
537
+
490
538
declare i64 @llvm.riscv.vsetvli.i64 (i64 , i64 immarg, i64 immarg)
491
539
declare <vscale x 16 x float > @llvm.riscv.vle.nxv16f32.i64 (<vscale x 16 x float >, ptr nocapture , i64 )
492
540
declare <vscale x 16 x float > @llvm.riscv.vfmacc.nxv16f32.f32.i64 (<vscale x 16 x float >, float , <vscale x 16 x float >, i64 , i64 , i64 )
@@ -501,12 +549,12 @@ define <vscale x 2 x i32> @test_vsetvli_x0_x0(ptr %x, ptr %y, <vscale x 2 x i32>
501
549
; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
502
550
; CHECK-NEXT: vle32.v v9, (a0)
503
551
; CHECK-NEXT: andi a3, a3, 1
504
- ; CHECK-NEXT: beqz a3, .LBB9_2
552
+ ; CHECK-NEXT: beqz a3, .LBB10_2
505
553
; CHECK-NEXT: # %bb.1: # %if
506
554
; CHECK-NEXT: vle16.v v10, (a1)
507
555
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
508
556
; CHECK-NEXT: vwcvt.x.x.v v8, v10
509
- ; CHECK-NEXT: .LBB9_2 : # %if.end
557
+ ; CHECK-NEXT: .LBB10_2 : # %if.end
510
558
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
511
559
; CHECK-NEXT: vadd.vv v8, v9, v8
512
560
; CHECK-NEXT: ret
@@ -540,19 +588,19 @@ define <vscale x 2 x i32> @test_vsetvli_x0_x0_2(ptr %x, ptr %y, ptr %z, i64 %vl,
540
588
; CHECK-NEXT: vsetvli zero, a3, e32, m1, ta, ma
541
589
; CHECK-NEXT: vle32.v v9, (a0)
542
590
; CHECK-NEXT: andi a4, a4, 1
543
- ; CHECK-NEXT: beqz a4, .LBB10_2
591
+ ; CHECK-NEXT: beqz a4, .LBB11_2
544
592
; CHECK-NEXT: # %bb.1: # %if
545
593
; CHECK-NEXT: vle16.v v10, (a1)
546
594
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
547
595
; CHECK-NEXT: vwadd.wv v9, v9, v10
548
- ; CHECK-NEXT: .LBB10_2 : # %if.end
596
+ ; CHECK-NEXT: .LBB11_2 : # %if.end
549
597
; CHECK-NEXT: andi a5, a5, 1
550
- ; CHECK-NEXT: beqz a5, .LBB10_4
598
+ ; CHECK-NEXT: beqz a5, .LBB11_4
551
599
; CHECK-NEXT: # %bb.3: # %if2
552
600
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
553
601
; CHECK-NEXT: vle16.v v10, (a2)
554
602
; CHECK-NEXT: vwadd.wv v9, v9, v10
555
- ; CHECK-NEXT: .LBB10_4 : # %if2.end
603
+ ; CHECK-NEXT: .LBB11_4 : # %if2.end
556
604
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
557
605
; CHECK-NEXT: vadd.vv v8, v9, v8
558
606
; CHECK-NEXT: ret
@@ -586,11 +634,11 @@ define void @vlmax(i64 %N, ptr %c, ptr %a, ptr %b) {
586
634
; CHECK-LABEL: vlmax:
587
635
; CHECK: # %bb.0: # %entry
588
636
; CHECK-NEXT: vsetvli a6, zero, e64, m1, ta, ma
589
- ; CHECK-NEXT: blez a0, .LBB11_3
637
+ ; CHECK-NEXT: blez a0, .LBB12_3
590
638
; CHECK-NEXT: # %bb.1: # %for.body.preheader
591
639
; CHECK-NEXT: li a5, 0
592
640
; CHECK-NEXT: slli a4, a6, 3
593
- ; CHECK-NEXT: .LBB11_2 : # %for.body
641
+ ; CHECK-NEXT: .LBB12_2 : # %for.body
594
642
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
595
643
; CHECK-NEXT: vle64.v v8, (a2)
596
644
; CHECK-NEXT: vle64.v v9, (a3)
@@ -600,8 +648,8 @@ define void @vlmax(i64 %N, ptr %c, ptr %a, ptr %b) {
600
648
; CHECK-NEXT: add a1, a1, a4
601
649
; CHECK-NEXT: add a3, a3, a4
602
650
; CHECK-NEXT: add a2, a2, a4
603
- ; CHECK-NEXT: blt a5, a0, .LBB11_2
604
- ; CHECK-NEXT: .LBB11_3 : # %for.end
651
+ ; CHECK-NEXT: blt a5, a0, .LBB12_2
652
+ ; CHECK-NEXT: .LBB12_3 : # %for.end
605
653
; CHECK-NEXT: ret
606
654
entry:
607
655
%0 = tail call i64 @llvm.riscv.vsetvlimax.i64 (i64 3 , i64 0 )
@@ -633,18 +681,18 @@ define void @vector_init_vlmax(i64 %N, ptr %c) {
633
681
; CHECK-LABEL: vector_init_vlmax:
634
682
; CHECK: # %bb.0: # %entry
635
683
; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma
636
- ; CHECK-NEXT: blez a0, .LBB12_3
684
+ ; CHECK-NEXT: blez a0, .LBB13_3
637
685
; CHECK-NEXT: # %bb.1: # %for.body.preheader
638
686
; CHECK-NEXT: li a3, 0
639
687
; CHECK-NEXT: slli a4, a2, 3
640
688
; CHECK-NEXT: vmv.v.i v8, 0
641
- ; CHECK-NEXT: .LBB12_2 : # %for.body
689
+ ; CHECK-NEXT: .LBB13_2 : # %for.body
642
690
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
643
691
; CHECK-NEXT: vse64.v v8, (a1)
644
692
; CHECK-NEXT: add a3, a3, a2
645
693
; CHECK-NEXT: add a1, a1, a4
646
- ; CHECK-NEXT: blt a3, a0, .LBB12_2
647
- ; CHECK-NEXT: .LBB12_3 : # %for.end
694
+ ; CHECK-NEXT: blt a3, a0, .LBB13_2
695
+ ; CHECK-NEXT: .LBB13_3 : # %for.end
648
696
; CHECK-NEXT: ret
649
697
entry:
650
698
%0 = tail call i64 @llvm.riscv.vsetvlimax.i64 (i64 3 , i64 0 )
@@ -669,20 +717,20 @@ define void @vector_init_vsetvli_N(i64 %N, ptr %c) {
669
717
; CHECK-LABEL: vector_init_vsetvli_N:
670
718
; CHECK: # %bb.0: # %entry
671
719
; CHECK-NEXT: vsetvli a2, a0, e64, m1, ta, ma
672
- ; CHECK-NEXT: blez a0, .LBB13_3
720
+ ; CHECK-NEXT: blez a0, .LBB14_3
673
721
; CHECK-NEXT: # %bb.1: # %for.body.preheader
674
722
; CHECK-NEXT: li a3, 0
675
723
; CHECK-NEXT: slli a4, a2, 3
676
724
; CHECK-NEXT: vsetvli a5, zero, e64, m1, ta, ma
677
725
; CHECK-NEXT: vmv.v.i v8, 0
678
- ; CHECK-NEXT: .LBB13_2 : # %for.body
726
+ ; CHECK-NEXT: .LBB14_2 : # %for.body
679
727
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
680
728
; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma
681
729
; CHECK-NEXT: vse64.v v8, (a1)
682
730
; CHECK-NEXT: add a3, a3, a2
683
731
; CHECK-NEXT: add a1, a1, a4
684
- ; CHECK-NEXT: blt a3, a0, .LBB13_2
685
- ; CHECK-NEXT: .LBB13_3 : # %for.end
732
+ ; CHECK-NEXT: blt a3, a0, .LBB14_2
733
+ ; CHECK-NEXT: .LBB14_3 : # %for.end
686
734
; CHECK-NEXT: ret
687
735
entry:
688
736
%0 = tail call i64 @llvm.riscv.vsetvli (i64 %N , i64 3 , i64 0 )
@@ -711,13 +759,13 @@ define void @vector_init_vsetvli_fv(i64 %N, ptr %c) {
711
759
; CHECK-NEXT: slli a4, a3, 3
712
760
; CHECK-NEXT: vsetvli a5, zero, e64, m1, ta, ma
713
761
; CHECK-NEXT: vmv.v.i v8, 0
714
- ; CHECK-NEXT: .LBB14_1 : # %for.body
762
+ ; CHECK-NEXT: .LBB15_1 : # %for.body
715
763
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
716
764
; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
717
765
; CHECK-NEXT: vse64.v v8, (a1)
718
766
; CHECK-NEXT: add a2, a2, a3
719
767
; CHECK-NEXT: add a1, a1, a4
720
- ; CHECK-NEXT: blt a2, a0, .LBB14_1
768
+ ; CHECK-NEXT: blt a2, a0, .LBB15_1
721
769
; CHECK-NEXT: # %bb.2: # %for.end
722
770
; CHECK-NEXT: ret
723
771
entry:
@@ -745,13 +793,13 @@ define void @vector_init_vsetvli_fv2(i64 %N, ptr %c) {
745
793
; CHECK-NEXT: li a2, 0
746
794
; CHECK-NEXT: vsetvli a3, zero, e64, m1, ta, ma
747
795
; CHECK-NEXT: vmv.v.i v8, 0
748
- ; CHECK-NEXT: .LBB15_1 : # %for.body
796
+ ; CHECK-NEXT: .LBB16_1 : # %for.body
749
797
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
750
798
; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
751
799
; CHECK-NEXT: vse64.v v8, (a1)
752
800
; CHECK-NEXT: addi a2, a2, 4
753
801
; CHECK-NEXT: addi a1, a1, 32
754
- ; CHECK-NEXT: blt a2, a0, .LBB15_1
802
+ ; CHECK-NEXT: blt a2, a0, .LBB16_1
755
803
; CHECK-NEXT: # %bb.2: # %for.end
756
804
; CHECK-NEXT: ret
757
805
entry:
@@ -779,13 +827,13 @@ define void @vector_init_vsetvli_fv3(i64 %N, ptr %c) {
779
827
; CHECK-NEXT: li a2, 0
780
828
; CHECK-NEXT: vsetvli a3, zero, e64, m1, ta, ma
781
829
; CHECK-NEXT: vmv.v.i v8, 0
782
- ; CHECK-NEXT: .LBB16_1 : # %for.body
830
+ ; CHECK-NEXT: .LBB17_1 : # %for.body
783
831
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
784
832
; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
785
833
; CHECK-NEXT: vse64.v v8, (a1)
786
834
; CHECK-NEXT: addi a2, a2, 4
787
835
; CHECK-NEXT: addi a1, a1, 32
788
- ; CHECK-NEXT: blt a2, a0, .LBB16_1
836
+ ; CHECK-NEXT: blt a2, a0, .LBB17_1
789
837
; CHECK-NEXT: # %bb.2: # %for.end
790
838
; CHECK-NEXT: ret
791
839
entry:
@@ -861,10 +909,10 @@ define <vscale x 1 x double> @compat_store_consistency(i1 %cond, <vscale x 1 x d
861
909
; CHECK-NEXT: vsetvli a3, zero, e64, m1, ta, ma
862
910
; CHECK-NEXT: vfadd.vv v8, v8, v9
863
911
; CHECK-NEXT: vs1r.v v8, (a1)
864
- ; CHECK-NEXT: beqz a0, .LBB19_2
912
+ ; CHECK-NEXT: beqz a0, .LBB20_2
865
913
; CHECK-NEXT: # %bb.1: # %if.then
866
914
; CHECK-NEXT: vse32.v v10, (a2)
867
- ; CHECK-NEXT: .LBB19_2 : # %if.end
915
+ ; CHECK-NEXT: .LBB20_2 : # %if.end
868
916
; CHECK-NEXT: ret
869
917
entry:
870
918
%res = fadd <vscale x 1 x double > %a , %b
@@ -886,16 +934,16 @@ define <vscale x 2 x i32> @test_ratio_only_vmv_s_x(ptr %x, ptr %y, i1 %cond) nou
886
934
; CHECK-LABEL: test_ratio_only_vmv_s_x:
887
935
; CHECK: # %bb.0: # %entry
888
936
; CHECK-NEXT: andi a2, a2, 1
889
- ; CHECK-NEXT: beqz a2, .LBB20_2
937
+ ; CHECK-NEXT: beqz a2, .LBB21_2
890
938
; CHECK-NEXT: # %bb.1: # %if
891
939
; CHECK-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
892
940
; CHECK-NEXT: vle16.v v9, (a1)
893
941
; CHECK-NEXT: vwcvt.x.x.v v8, v9
894
- ; CHECK-NEXT: j .LBB20_3
895
- ; CHECK-NEXT: .LBB20_2 :
942
+ ; CHECK-NEXT: j .LBB21_3
943
+ ; CHECK-NEXT: .LBB21_2 :
896
944
; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma
897
945
; CHECK-NEXT: vle32.v v8, (a0)
898
- ; CHECK-NEXT: .LBB20_3 : # %if.end
946
+ ; CHECK-NEXT: .LBB21_3 : # %if.end
899
947
; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma
900
948
; CHECK-NEXT: vmv.s.x v8, zero
901
949
; CHECK-NEXT: ret
@@ -918,16 +966,16 @@ define <vscale x 2 x i32> @test_ratio_only_vmv_s_x2(ptr %x, ptr %y, i1 %cond) no
918
966
; CHECK-LABEL: test_ratio_only_vmv_s_x2:
919
967
; CHECK: # %bb.0: # %entry
920
968
; CHECK-NEXT: andi a2, a2, 1
921
- ; CHECK-NEXT: beqz a2, .LBB21_2
969
+ ; CHECK-NEXT: beqz a2, .LBB22_2
922
970
; CHECK-NEXT: # %bb.1: # %if
923
971
; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma
924
972
; CHECK-NEXT: vle32.v v8, (a0)
925
- ; CHECK-NEXT: j .LBB21_3
926
- ; CHECK-NEXT: .LBB21_2 :
973
+ ; CHECK-NEXT: j .LBB22_3
974
+ ; CHECK-NEXT: .LBB22_2 :
927
975
; CHECK-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
928
976
; CHECK-NEXT: vle16.v v9, (a1)
929
977
; CHECK-NEXT: vwcvt.x.x.v v8, v9
930
- ; CHECK-NEXT: .LBB21_3 : # %if.end
978
+ ; CHECK-NEXT: .LBB22_3 : # %if.end
931
979
; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma
932
980
; CHECK-NEXT: vmv.s.x v8, zero
933
981
; CHECK-NEXT: ret
@@ -953,13 +1001,13 @@ define void @pre_over_vle(ptr %A) {
953
1001
; CHECK: # %bb.0: # %entry
954
1002
; CHECK-NEXT: addi a1, a0, 800
955
1003
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
956
- ; CHECK-NEXT: .LBB22_1 : # %vector.body
1004
+ ; CHECK-NEXT: .LBB23_1 : # %vector.body
957
1005
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
958
1006
; CHECK-NEXT: vle8.v v8, (a0)
959
1007
; CHECK-NEXT: vsext.vf4 v9, v8
960
1008
; CHECK-NEXT: vse32.v v9, (a0)
961
1009
; CHECK-NEXT: addi a0, a0, 8
962
- ; CHECK-NEXT: bne a0, a1, .LBB22_1
1010
+ ; CHECK-NEXT: bne a0, a1, .LBB23_1
963
1011
; CHECK-NEXT: # %bb.2: # %exit
964
1012
; CHECK-NEXT: ret
965
1013
entry:
0 commit comments