@@ -744,3 +744,200 @@ define <8 x i8> @shuffle_v64i8_v8i8(<64 x i8> %wide.vec) {
744
744
%s = shufflevector <64 x i8 > %wide.vec , <64 x i8 > poison, <8 x i32 > <i32 0 , i32 8 , i32 16 , i32 24 , i32 32 , i32 40 , i32 48 , i32 56 >
745
745
ret <8 x i8 > %s
746
746
}
747
+
748
+ define <8 x i8 > @shuffle_compress_singlesrc_e8 (<8 x i8 > %v ) {
749
+ ; CHECK-LABEL: shuffle_compress_singlesrc_e8:
750
+ ; CHECK: # %bb.0:
751
+ ; CHECK-NEXT: lui a0, %hi(.LCPI49_0)
752
+ ; CHECK-NEXT: addi a0, a0, %lo(.LCPI49_0)
753
+ ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
754
+ ; CHECK-NEXT: vle8.v v10, (a0)
755
+ ; CHECK-NEXT: vrgather.vv v9, v8, v10
756
+ ; CHECK-NEXT: vmv1r.v v8, v9
757
+ ; CHECK-NEXT: ret
758
+ %out = shufflevector <8 x i8 > %v , <8 x i8 > poison, <8 x i32 > <i32 0 , i32 2 , i32 4 , i32 5 , i32 7 , i32 undef , i32 undef , i32 undef >
759
+ ret <8 x i8 > %out
760
+ }
761
+
762
+ define <8 x i16 > @shuffle_compress_singlesrc_e16 (<8 x i16 > %v ) {
763
+ ; CHECK-LABEL: shuffle_compress_singlesrc_e16:
764
+ ; CHECK: # %bb.0:
765
+ ; CHECK-NEXT: lui a0, %hi(.LCPI50_0)
766
+ ; CHECK-NEXT: addi a0, a0, %lo(.LCPI50_0)
767
+ ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
768
+ ; CHECK-NEXT: vle16.v v10, (a0)
769
+ ; CHECK-NEXT: vrgather.vv v9, v8, v10
770
+ ; CHECK-NEXT: vmv.v.v v8, v9
771
+ ; CHECK-NEXT: ret
772
+ %out = shufflevector <8 x i16 > %v , <8 x i16 > poison, <8 x i32 > <i32 0 , i32 2 , i32 4 , i32 5 , i32 7 , i32 undef , i32 undef , i32 undef >
773
+ ret <8 x i16 > %out
774
+ }
775
+
776
+ define <8 x i32 > @shuffle_compress_singlesrc_e32 (<8 x i32 > %v ) {
777
+ ; CHECK-LABEL: shuffle_compress_singlesrc_e32:
778
+ ; CHECK: # %bb.0:
779
+ ; CHECK-NEXT: lui a0, %hi(.LCPI51_0)
780
+ ; CHECK-NEXT: addi a0, a0, %lo(.LCPI51_0)
781
+ ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
782
+ ; CHECK-NEXT: vle16.v v12, (a0)
783
+ ; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
784
+ ; CHECK-NEXT: vmv.v.v v8, v10
785
+ ; CHECK-NEXT: ret
786
+ %out = shufflevector <8 x i32 > %v , <8 x i32 > poison, <8 x i32 > <i32 0 , i32 1 , i32 4 , i32 5 , i32 6 , i32 undef , i32 undef , i32 undef >
787
+ ret <8 x i32 > %out
788
+ }
789
+
790
+ define <8 x i64 > @shuffle_compress_singlesrc_e64 (<8 x i64 > %v ) {
791
+ ; CHECK-LABEL: shuffle_compress_singlesrc_e64:
792
+ ; CHECK: # %bb.0:
793
+ ; CHECK-NEXT: lui a0, %hi(.LCPI52_0)
794
+ ; CHECK-NEXT: addi a0, a0, %lo(.LCPI52_0)
795
+ ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
796
+ ; CHECK-NEXT: vle16.v v16, (a0)
797
+ ; CHECK-NEXT: vrgatherei16.vv v12, v8, v16
798
+ ; CHECK-NEXT: vmv.v.v v8, v12
799
+ ; CHECK-NEXT: ret
800
+ %out = shufflevector <8 x i64 > %v , <8 x i64 > poison, <8 x i32 > <i32 0 , i32 2 , i32 4 , i32 5 , i32 7 , i32 undef , i32 undef , i32 undef >
801
+ ret <8 x i64 > %out
802
+ }
803
+
804
+ define <8 x i32 > @shuffle_compress_singlesrc_gaps_e32 (<8 x i32 > %v ) {
805
+ ; CHECK-LABEL: shuffle_compress_singlesrc_gaps_e32:
806
+ ; CHECK: # %bb.0:
807
+ ; CHECK-NEXT: lui a0, %hi(.LCPI53_0)
808
+ ; CHECK-NEXT: addi a0, a0, %lo(.LCPI53_0)
809
+ ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
810
+ ; CHECK-NEXT: vle16.v v12, (a0)
811
+ ; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
812
+ ; CHECK-NEXT: vmv.v.v v8, v10
813
+ ; CHECK-NEXT: ret
814
+ %out = shufflevector <8 x i32 > %v , <8 x i32 > poison, <8 x i32 > <i32 0 , i32 undef , i32 4 , i32 5 , i32 7 , i32 undef , i32 undef , i32 undef >
815
+ ret <8 x i32 > %out
816
+ }
817
+
818
+ define <8 x i32 > @shuffle_decompress2_singlesrc_e32 (<8 x i32 > %v ) {
819
+ ; CHECK-LABEL: shuffle_decompress2_singlesrc_e32:
820
+ ; CHECK: # %bb.0:
821
+ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
822
+ ; CHECK-NEXT: vwaddu.vv v10, v8, v8
823
+ ; CHECK-NEXT: li a0, -1
824
+ ; CHECK-NEXT: vwmaccu.vx v10, a0, v8
825
+ ; CHECK-NEXT: vmv2r.v v8, v10
826
+ ; CHECK-NEXT: ret
827
+ %out = shufflevector <8 x i32 > %v , <8 x i32 > poison, <8 x i32 > <i32 0 , i32 undef , i32 1 , i32 undef , i32 2 , i32 undef , i32 3 , i32 undef >
828
+ ret <8 x i32 > %out
829
+ }
830
+
831
+ define <8 x i32 > @shuffle_decompress3_singlesrc_e32 (<8 x i32 > %v ) {
832
+ ; RV32-LABEL: shuffle_decompress3_singlesrc_e32:
833
+ ; RV32: # %bb.0:
834
+ ; RV32-NEXT: lui a0, %hi(.LCPI55_0)
835
+ ; RV32-NEXT: addi a0, a0, %lo(.LCPI55_0)
836
+ ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
837
+ ; RV32-NEXT: vle16.v v12, (a0)
838
+ ; RV32-NEXT: vrgatherei16.vv v10, v8, v12
839
+ ; RV32-NEXT: vmv.v.v v8, v10
840
+ ; RV32-NEXT: ret
841
+ ;
842
+ ; RV64-LABEL: shuffle_decompress3_singlesrc_e32:
843
+ ; RV64: # %bb.0:
844
+ ; RV64-NEXT: lui a0, 32769
845
+ ; RV64-NEXT: slli a0, a0, 21
846
+ ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
847
+ ; RV64-NEXT: vmv.v.x v12, a0
848
+ ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
849
+ ; RV64-NEXT: vrgatherei16.vv v10, v8, v12
850
+ ; RV64-NEXT: vmv.v.v v8, v10
851
+ ; RV64-NEXT: ret
852
+ %out = shufflevector <8 x i32 > %v , <8 x i32 > poison, <8 x i32 > <i32 0 , i32 undef , i32 undef , i32 1 , i32 undef , i32 undef , i32 2 , i32 undef >
853
+ ret <8 x i32 > %out
854
+ }
855
+
856
+ ; TODO: This should be a single vslideup.vi
857
+ define <8 x i32 > @shuffle_decompress4_singlesrc_e32 (<8 x i32 > %v ) {
858
+ ; CHECK-LABEL: shuffle_decompress4_singlesrc_e32:
859
+ ; CHECK: # %bb.0:
860
+ ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
861
+ ; CHECK-NEXT: vid.v v10
862
+ ; CHECK-NEXT: vsrl.vi v12, v10, 2
863
+ ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
864
+ ; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
865
+ ; CHECK-NEXT: vmv.v.v v8, v10
866
+ ; CHECK-NEXT: ret
867
+ %out = shufflevector <8 x i32 > %v , <8 x i32 > poison, <8 x i32 > <i32 0 , i32 undef , i32 undef , i32 undef , i32 1 , i32 undef , i32 undef , i32 undef >
868
+ ret <8 x i32 > %out
869
+ }
870
+
871
+ ; TODO: This should be either a single vslideup.vi or two widening interleaves.
872
+ define <8 x i8 > @shuffle_decompress4_singlesrc_e8 (<8 x i8 > %v ) {
873
+ ; CHECK-LABEL: shuffle_decompress4_singlesrc_e8:
874
+ ; CHECK: # %bb.0:
875
+ ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
876
+ ; CHECK-NEXT: vid.v v9
877
+ ; CHECK-NEXT: vsrl.vi v10, v9, 2
878
+ ; CHECK-NEXT: vrgather.vv v9, v8, v10
879
+ ; CHECK-NEXT: vmv1r.v v8, v9
880
+ ; CHECK-NEXT: ret
881
+ %out = shufflevector <8 x i8 > %v , <8 x i8 > poison, <8 x i32 > <i32 0 , i32 undef , i32 undef , i32 undef , i32 1 , i32 undef , i32 undef , i32 undef >
882
+ ret <8 x i8 > %out
883
+ }
884
+
885
+ define <8 x i32 > @shuffle_decompress_singlesrc_e32 (<8 x i32 > %v ) {
886
+ ; CHECK-LABEL: shuffle_decompress_singlesrc_e32:
887
+ ; CHECK: # %bb.0:
888
+ ; CHECK-NEXT: lui a0, %hi(.LCPI58_0)
889
+ ; CHECK-NEXT: addi a0, a0, %lo(.LCPI58_0)
890
+ ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
891
+ ; CHECK-NEXT: vle16.v v12, (a0)
892
+ ; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
893
+ ; CHECK-NEXT: vmv.v.v v8, v10
894
+ ; CHECK-NEXT: ret
895
+ %out = shufflevector <8 x i32 > %v , <8 x i32 > poison, <8 x i32 > <i32 0 , i32 undef , i32 1 , i32 undef , i32 3 , i32 undef , i32 undef , i32 4 >
896
+ ret <8 x i32 > %out
897
+ }
898
+
899
+ define <8 x i32 > @shuffle_repeat2_singlesrc_e32 (<8 x i32 > %v ) {
900
+ ; CHECK-LABEL: shuffle_repeat2_singlesrc_e32:
901
+ ; CHECK: # %bb.0:
902
+ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
903
+ ; CHECK-NEXT: vwaddu.vv v10, v8, v8
904
+ ; CHECK-NEXT: li a0, -1
905
+ ; CHECK-NEXT: vwmaccu.vx v10, a0, v8
906
+ ; CHECK-NEXT: vmv2r.v v8, v10
907
+ ; CHECK-NEXT: ret
908
+ %out = shufflevector <8 x i32 > %v , <8 x i32 > poison, <8 x i32 > <i32 0 , i32 0 , i32 1 , i32 1 , i32 2 , i32 2 , i32 3 , i32 3 >
909
+ ret <8 x i32 > %out
910
+ }
911
+
912
+ define <8 x i32 > @shuffle_repeat3_singlesrc_e32 (<8 x i32 > %v ) {
913
+ ; CHECK-LABEL: shuffle_repeat3_singlesrc_e32:
914
+ ; CHECK: # %bb.0:
915
+ ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
916
+ ; CHECK-NEXT: vmv.v.i v0, 7
917
+ ; CHECK-NEXT: vmv.v.i v11, 1
918
+ ; CHECK-NEXT: li a0, 192
919
+ ; CHECK-NEXT: vmv.s.x v10, a0
920
+ ; CHECK-NEXT: vmerge.vim v11, v11, 0, v0
921
+ ; CHECK-NEXT: vmv.v.v v0, v10
922
+ ; CHECK-NEXT: vmerge.vim v12, v11, 2, v0
923
+ ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
924
+ ; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
925
+ ; CHECK-NEXT: vmv.v.v v8, v10
926
+ ; CHECK-NEXT: ret
927
+ %out = shufflevector <8 x i32 > %v , <8 x i32 > poison, <8 x i32 > <i32 0 , i32 0 , i32 0 , i32 1 , i32 1 , i32 1 , i32 2 , i32 2 >
928
+ ret <8 x i32 > %out
929
+ }
930
+
931
+ define <8 x i32 > @shuffle_repeat4_singlesrc_e32 (<8 x i32 > %v ) {
932
+ ; CHECK-LABEL: shuffle_repeat4_singlesrc_e32:
933
+ ; CHECK: # %bb.0:
934
+ ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
935
+ ; CHECK-NEXT: vid.v v10
936
+ ; CHECK-NEXT: vsrl.vi v12, v10, 2
937
+ ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
938
+ ; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
939
+ ; CHECK-NEXT: vmv.v.v v8, v10
940
+ ; CHECK-NEXT: ret
941
+ %out = shufflevector <8 x i32 > %v , <8 x i32 > poison, <8 x i32 > <i32 0 , i32 0 , i32 0 , i32 0 , i32 1 , i32 1 , i32 1 , i32 1 >
942
+ ret <8 x i32 > %out
943
+ }
0 commit comments