@@ -756,3 +756,104 @@ define void @lmul_8_x9() nounwind {
756
756
%v9 = alloca <vscale x 8 x i64 >
757
757
ret void
758
758
}
759
+
760
+ define void @lmul_16_align () nounwind {
761
+ ; NOZBA-LABEL: lmul_16_align:
762
+ ; NOZBA: # %bb.0:
763
+ ; NOZBA-NEXT: addi sp, sp, -144
764
+ ; NOZBA-NEXT: sd ra, 136(sp) # 8-byte Folded Spill
765
+ ; NOZBA-NEXT: sd s0, 128(sp) # 8-byte Folded Spill
766
+ ; NOZBA-NEXT: addi s0, sp, 144
767
+ ; NOZBA-NEXT: csrr a0, vlenb
768
+ ; NOZBA-NEXT: li a1, 24
769
+ ; NOZBA-NEXT: mul a0, a0, a1
770
+ ; NOZBA-NEXT: sub sp, sp, a0
771
+ ; NOZBA-NEXT: andi sp, sp, -128
772
+ ; NOZBA-NEXT: vsetvli a0, zero, e64, m8, ta, ma
773
+ ; NOZBA-NEXT: vmv.v.i v8, 0
774
+ ; NOZBA-NEXT: csrr a0, vlenb
775
+ ; NOZBA-NEXT: add a0, sp, a0
776
+ ; NOZBA-NEXT: addi a0, a0, 128
777
+ ; NOZBA-NEXT: vs8r.v v8, (a0)
778
+ ; NOZBA-NEXT: csrr a1, vlenb
779
+ ; NOZBA-NEXT: slli a1, a1, 3
780
+ ; NOZBA-NEXT: add a0, a0, a1
781
+ ; NOZBA-NEXT: vs8r.v v8, (a0)
782
+ ; NOZBA-NEXT: vsetvli a0, zero, e64, m1, ta, ma
783
+ ; NOZBA-NEXT: vmv.v.i v8, 0
784
+ ; NOZBA-NEXT: addi a0, sp, 128
785
+ ; NOZBA-NEXT: vs1r.v v8, (a0)
786
+ ; NOZBA-NEXT: addi sp, s0, -144
787
+ ; NOZBA-NEXT: ld ra, 136(sp) # 8-byte Folded Reload
788
+ ; NOZBA-NEXT: ld s0, 128(sp) # 8-byte Folded Reload
789
+ ; NOZBA-NEXT: addi sp, sp, 144
790
+ ; NOZBA-NEXT: ret
791
+ ;
792
+ ; ZBA-LABEL: lmul_16_align:
793
+ ; ZBA: # %bb.0:
794
+ ; ZBA-NEXT: addi sp, sp, -144
795
+ ; ZBA-NEXT: sd ra, 136(sp) # 8-byte Folded Spill
796
+ ; ZBA-NEXT: sd s0, 128(sp) # 8-byte Folded Spill
797
+ ; ZBA-NEXT: addi s0, sp, 144
798
+ ; ZBA-NEXT: csrr a0, vlenb
799
+ ; ZBA-NEXT: slli a0, a0, 3
800
+ ; ZBA-NEXT: sh1add a0, a0, a0
801
+ ; ZBA-NEXT: sub sp, sp, a0
802
+ ; ZBA-NEXT: andi sp, sp, -128
803
+ ; ZBA-NEXT: vsetvli a0, zero, e64, m8, ta, ma
804
+ ; ZBA-NEXT: vmv.v.i v8, 0
805
+ ; ZBA-NEXT: csrr a0, vlenb
806
+ ; ZBA-NEXT: add a0, sp, a0
807
+ ; ZBA-NEXT: addi a0, a0, 128
808
+ ; ZBA-NEXT: vs8r.v v8, (a0)
809
+ ; ZBA-NEXT: csrr a1, vlenb
810
+ ; ZBA-NEXT: sh3add a0, a1, a0
811
+ ; ZBA-NEXT: vs8r.v v8, (a0)
812
+ ; ZBA-NEXT: vsetvli a0, zero, e64, m1, ta, ma
813
+ ; ZBA-NEXT: vmv.v.i v8, 0
814
+ ; ZBA-NEXT: addi a0, sp, 128
815
+ ; ZBA-NEXT: vs1r.v v8, (a0)
816
+ ; ZBA-NEXT: addi sp, s0, -144
817
+ ; ZBA-NEXT: ld ra, 136(sp) # 8-byte Folded Reload
818
+ ; ZBA-NEXT: ld s0, 128(sp) # 8-byte Folded Reload
819
+ ; ZBA-NEXT: addi sp, sp, 144
820
+ ; ZBA-NEXT: ret
821
+ ;
822
+ ; NOMUL-LABEL: lmul_16_align:
823
+ ; NOMUL: # %bb.0:
824
+ ; NOMUL-NEXT: addi sp, sp, -144
825
+ ; NOMUL-NEXT: sd ra, 136(sp) # 8-byte Folded Spill
826
+ ; NOMUL-NEXT: sd s0, 128(sp) # 8-byte Folded Spill
827
+ ; NOMUL-NEXT: addi s0, sp, 144
828
+ ; NOMUL-NEXT: csrr a0, vlenb
829
+ ; NOMUL-NEXT: slli a0, a0, 3
830
+ ; NOMUL-NEXT: mv a1, a0
831
+ ; NOMUL-NEXT: slli a0, a0, 1
832
+ ; NOMUL-NEXT: add a0, a0, a1
833
+ ; NOMUL-NEXT: sub sp, sp, a0
834
+ ; NOMUL-NEXT: andi sp, sp, -128
835
+ ; NOMUL-NEXT: vsetvli a0, zero, e64, m8, ta, ma
836
+ ; NOMUL-NEXT: vmv.v.i v8, 0
837
+ ; NOMUL-NEXT: csrr a0, vlenb
838
+ ; NOMUL-NEXT: add a0, sp, a0
839
+ ; NOMUL-NEXT: addi a0, a0, 128
840
+ ; NOMUL-NEXT: vs8r.v v8, (a0)
841
+ ; NOMUL-NEXT: csrr a1, vlenb
842
+ ; NOMUL-NEXT: slli a1, a1, 3
843
+ ; NOMUL-NEXT: add a0, a0, a1
844
+ ; NOMUL-NEXT: vs8r.v v8, (a0)
845
+ ; NOMUL-NEXT: vsetvli a0, zero, e64, m1, ta, ma
846
+ ; NOMUL-NEXT: vmv.v.i v8, 0
847
+ ; NOMUL-NEXT: addi a0, sp, 128
848
+ ; NOMUL-NEXT: vs1r.v v8, (a0)
849
+ ; NOMUL-NEXT: addi sp, s0, -144
850
+ ; NOMUL-NEXT: ld ra, 136(sp) # 8-byte Folded Reload
851
+ ; NOMUL-NEXT: ld s0, 128(sp) # 8-byte Folded Reload
852
+ ; NOMUL-NEXT: addi sp, sp, 144
853
+ ; NOMUL-NEXT: ret
854
+ %v1 = alloca <vscale x 16 x i64 >
855
+ %v2 = alloca <vscale x 1 x i64 >
856
+ store <vscale x 16 x i64 > zeroinitializer , ptr %v1
857
+ store <vscale x 1 x i64 > zeroinitializer , ptr %v2
858
+ ret void
859
+ }
0 commit comments