@@ -801,3 +801,144 @@ entry:
801
801
%out = shufflevector <16 x i8 > %src1 , <16 x i8 > undef , <16 x i32 > <i32 0 , i32 0 , i32 2 , i32 2 , i32 4 , i32 4 , i32 6 , i32 6 , i32 8 , i32 8 , i32 10 , i32 10 , i32 12 , i32 12 , i32 14 , i32 14 >
802
802
ret <16 x i8 > %out
803
803
}
804
+
805
+
806
+ define arm_aapcs_vfpcc <8 x i16 > @vmovn32trunct_undef2 (<8 x i16 > %a ) {
807
+ ; CHECK-LABEL: vmovn32trunct_undef2:
808
+ ; CHECK: @ %bb.0: @ %entry
809
+ ; CHECK-NEXT: bx lr
810
+ ;
811
+ ; CHECKBE-LABEL: vmovn32trunct_undef2:
812
+ ; CHECKBE: @ %bb.0: @ %entry
813
+ ; CHECKBE-NEXT: bx lr
814
+ entry:
815
+ %c1 = call <4 x i32 > @llvm.arm.mve.vreinterpretq.v4i32.v8i16 (<8 x i16 > %a )
816
+ %c2 = call <4 x i32 > @llvm.arm.mve.vreinterpretq.v4i32.v8i16 (<8 x i16 > undef )
817
+ %strided.vec = shufflevector <4 x i32 > %c1 , <4 x i32 > %c2 , <8 x i32 > <i32 0 , i32 4 , i32 1 , i32 5 , i32 2 , i32 6 , i32 3 , i32 7 >
818
+ %out = trunc <8 x i32 > %strided.vec to <8 x i16 >
819
+ ret <8 x i16 > %out
820
+ }
821
+
822
+ define arm_aapcs_vfpcc <8 x i16 > @vmovn32trunct_undef1 (<8 x i16 > %a ) {
823
+ ; CHECK-LABEL: vmovn32trunct_undef1:
824
+ ; CHECK: @ %bb.0: @ %entry
825
+ ; CHECK-NEXT: vmovnt.i32 q0, q0
826
+ ; CHECK-NEXT: bx lr
827
+ ;
828
+ ; CHECKBE-LABEL: vmovn32trunct_undef1:
829
+ ; CHECKBE: @ %bb.0: @ %entry
830
+ ; CHECKBE-NEXT: vrev64.16 q1, q0
831
+ ; CHECKBE-NEXT: vmovnt.i32 q1, q1
832
+ ; CHECKBE-NEXT: vrev64.16 q0, q1
833
+ ; CHECKBE-NEXT: bx lr
834
+ entry:
835
+ %c1 = call <4 x i32 > @llvm.arm.mve.vreinterpretq.v4i32.v8i16 (<8 x i16 > undef )
836
+ %c2 = call <4 x i32 > @llvm.arm.mve.vreinterpretq.v4i32.v8i16 (<8 x i16 > %a )
837
+ %strided.vec = shufflevector <4 x i32 > %c1 , <4 x i32 > %c2 , <8 x i32 > <i32 0 , i32 4 , i32 1 , i32 5 , i32 2 , i32 6 , i32 3 , i32 7 >
838
+ %out = trunc <8 x i32 > %strided.vec to <8 x i16 >
839
+ ret <8 x i16 > %out
840
+ }
841
+
842
+ define arm_aapcs_vfpcc <8 x i16 > @vmovn16b_undef2 (<16 x i8 > %a ) {
843
+ ; CHECK-LABEL: vmovn16b_undef2:
844
+ ; CHECK: @ %bb.0: @ %entry
845
+ ; CHECK-NEXT: bx lr
846
+ ;
847
+ ; CHECKBE-LABEL: vmovn16b_undef2:
848
+ ; CHECKBE: @ %bb.0: @ %entry
849
+ ; CHECKBE-NEXT: vrev64.8 q1, q0
850
+ ; CHECKBE-NEXT: vrev64.16 q0, q1
851
+ ; CHECKBE-NEXT: bx lr
852
+ entry:
853
+ %c1 = call <8 x i16 > @llvm.arm.mve.vreinterpretq.v8i16.v16i8 (<16 x i8 > %a )
854
+ %c2 = call <8 x i16 > @llvm.arm.mve.vreinterpretq.v8i16.v16i8 (<16 x i8 > undef )
855
+ %out = shufflevector <8 x i16 > %c1 , <8 x i16 > %c2 , <8 x i32 > <i32 0 , i32 9 , i32 2 , i32 11 , i32 4 , i32 13 , i32 6 , i32 15 >
856
+ ret <8 x i16 > %out
857
+ }
858
+
859
+ define arm_aapcs_vfpcc <8 x i16 > @vmovn16b_undef1 (<16 x i8 > %a ) {
860
+ ; CHECK-LABEL: vmovn16b_undef1:
861
+ ; CHECK: @ %bb.0: @ %entry
862
+ ; CHECK-NEXT: bx lr
863
+ ;
864
+ ; CHECKBE-LABEL: vmovn16b_undef1:
865
+ ; CHECKBE: @ %bb.0: @ %entry
866
+ ; CHECKBE-NEXT: vrev64.8 q1, q0
867
+ ; CHECKBE-NEXT: vrev64.16 q0, q1
868
+ ; CHECKBE-NEXT: bx lr
869
+ entry:
870
+ %c1 = call <8 x i16 > @llvm.arm.mve.vreinterpretq.v8i16.v16i8 (<16 x i8 > undef )
871
+ %c2 = call <8 x i16 > @llvm.arm.mve.vreinterpretq.v8i16.v16i8 (<16 x i8 > %a )
872
+ %out = shufflevector <8 x i16 > %c1 , <8 x i16 > %c2 , <8 x i32 > <i32 0 , i32 9 , i32 2 , i32 11 , i32 4 , i32 13 , i32 6 , i32 15 >
873
+ ret <8 x i16 > %out
874
+ }
875
+
876
+ define arm_aapcs_vfpcc <8 x i16 > @vmovn32_badlanes (<4 x i32 > %src1 ) {
877
+ ; CHECK-LABEL: vmovn32_badlanes:
878
+ ; CHECK: @ %bb.0: @ %entry
879
+ ; CHECK-NEXT: vmov r0, s0
880
+ ; CHECK-NEXT: vmov.16 q1[1], r0
881
+ ; CHECK-NEXT: vmov r0, s1
882
+ ; CHECK-NEXT: vmov.16 q1[3], r0
883
+ ; CHECK-NEXT: vmov.16 q1[5], r0
884
+ ; CHECK-NEXT: vmov r0, s2
885
+ ; CHECK-NEXT: vmov.16 q1[7], r0
886
+ ; CHECK-NEXT: vmov q0, q1
887
+ ; CHECK-NEXT: bx lr
888
+ ;
889
+ ; CHECKBE-LABEL: vmovn32_badlanes:
890
+ ; CHECKBE: @ %bb.0: @ %entry
891
+ ; CHECKBE-NEXT: vrev64.32 q1, q0
892
+ ; CHECKBE-NEXT: vmov r0, s4
893
+ ; CHECKBE-NEXT: vmov.16 q2[1], r0
894
+ ; CHECKBE-NEXT: vmov r0, s5
895
+ ; CHECKBE-NEXT: vmov.16 q2[3], r0
896
+ ; CHECKBE-NEXT: vmov.16 q2[5], r0
897
+ ; CHECKBE-NEXT: vmov r0, s6
898
+ ; CHECKBE-NEXT: vmov.16 q2[7], r0
899
+ ; CHECKBE-NEXT: vrev64.16 q0, q2
900
+ ; CHECKBE-NEXT: bx lr
901
+ entry:
902
+ %strided.vec = shufflevector <4 x i32 > %src1 , <4 x i32 > undef , <8 x i32 > <i32 4 , i32 0 , i32 5 , i32 1 , i32 6 , i32 1 , i32 7 , i32 2 >
903
+ %out = trunc <8 x i32 > %strided.vec to <8 x i16 >
904
+ ret <8 x i16 > %out
905
+ }
906
+
907
+ define arm_aapcs_vfpcc <16 x i8 > @vmovn16trunct_undef2 (<16 x i8 > %a ) {
908
+ ; CHECK-LABEL: vmovn16trunct_undef2:
909
+ ; CHECK: @ %bb.0: @ %entry
910
+ ; CHECK-NEXT: bx lr
911
+ ;
912
+ ; CHECKBE-LABEL: vmovn16trunct_undef2:
913
+ ; CHECKBE: @ %bb.0: @ %entry
914
+ ; CHECKBE-NEXT: bx lr
915
+ entry:
916
+ %c1 = call <8 x i16 > @llvm.arm.mve.vreinterpretq.v8i16.v16i8 (<16 x i8 > %a )
917
+ %c2 = call <8 x i16 > @llvm.arm.mve.vreinterpretq.v8i16.v16i8 (<16 x i8 > undef )
918
+ %strided.vec = shufflevector <8 x i16 > %c1 , <8 x i16 > %c2 , <16 x i32 > <i32 0 , i32 8 , i32 1 , i32 9 , i32 2 , i32 10 , i32 3 , i32 11 , i32 4 , i32 12 , i32 5 , i32 13 , i32 6 , i32 14 , i32 7 , i32 15 >
919
+ %out = trunc <16 x i16 > %strided.vec to <16 x i8 >
920
+ ret <16 x i8 > %out
921
+ }
922
+
923
+ define arm_aapcs_vfpcc <16 x i8 > @vmovn16trunct_undef1 (<16 x i8 > %a ) {
924
+ ; CHECK-LABEL: vmovn16trunct_undef1:
925
+ ; CHECK: @ %bb.0: @ %entry
926
+ ; CHECK-NEXT: vmovnt.i16 q0, q0
927
+ ; CHECK-NEXT: bx lr
928
+ ;
929
+ ; CHECKBE-LABEL: vmovn16trunct_undef1:
930
+ ; CHECKBE: @ %bb.0: @ %entry
931
+ ; CHECKBE-NEXT: vrev64.8 q1, q0
932
+ ; CHECKBE-NEXT: vmovnt.i16 q1, q1
933
+ ; CHECKBE-NEXT: vrev64.8 q0, q1
934
+ ; CHECKBE-NEXT: bx lr
935
+ entry:
936
+ %c1 = call <8 x i16 > @llvm.arm.mve.vreinterpretq.v8i16.v16i8 (<16 x i8 > undef )
937
+ %c2 = call <8 x i16 > @llvm.arm.mve.vreinterpretq.v8i16.v16i8 (<16 x i8 > %a )
938
+ %strided.vec = shufflevector <8 x i16 > %c1 , <8 x i16 > %c2 , <16 x i32 > <i32 0 , i32 8 , i32 1 , i32 9 , i32 2 , i32 10 , i32 3 , i32 11 , i32 4 , i32 12 , i32 5 , i32 13 , i32 6 , i32 14 , i32 7 , i32 15 >
939
+ %out = trunc <16 x i16 > %strided.vec to <16 x i8 >
940
+ ret <16 x i8 > %out
941
+ }
942
+
943
+ declare <4 x i32 > @llvm.arm.mve.vreinterpretq.v4i32.v8i16 (<8 x i16 >)
944
+ declare <8 x i16 > @llvm.arm.mve.vreinterpretq.v8i16.v16i8 (<16 x i8 >)
0 commit comments