@@ -810,8 +810,8 @@ define <8 x i32> @shuffle_compress_singlesrc_gaps_e32(<8 x i32> %v) {
810
810
ret <8 x i32 > %out
811
811
}
812
812
813
- define <8 x i32 > @shuffle_decompress2_singlesrc_e32 (<8 x i32 > %v ) {
814
- ; CHECK-LABEL: shuffle_decompress2_singlesrc_e32 :
813
+ define <8 x i32 > @shuffle_spread2_singlesrc_e32 (<8 x i32 > %v ) {
814
+ ; CHECK-LABEL: shuffle_spread2_singlesrc_e32 :
815
815
; CHECK: # %bb.0:
816
816
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
817
817
; CHECK-NEXT: vwaddu.vv v10, v8, v8
@@ -823,18 +823,46 @@ define <8 x i32> @shuffle_decompress2_singlesrc_e32(<8 x i32> %v) {
823
823
ret <8 x i32 > %out
824
824
}
825
825
826
- define <8 x i32 > @shuffle_decompress3_singlesrc_e32 (<8 x i32 > %v ) {
827
- ; RV32-LABEL: shuffle_decompress3_singlesrc_e32:
826
+ define <8 x i32 > @shuffle_spread2_singlesrc_e32_index1 (<8 x i32 > %v ) {
827
+ ; CHECK-LABEL: shuffle_spread2_singlesrc_e32_index1:
828
+ ; CHECK: # %bb.0:
829
+ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
830
+ ; CHECK-NEXT: vwaddu.vv v10, v8, v8
831
+ ; CHECK-NEXT: li a0, -1
832
+ ; CHECK-NEXT: vwmaccu.vx v10, a0, v8
833
+ ; CHECK-NEXT: vmv2r.v v8, v10
834
+ ; CHECK-NEXT: ret
835
+ %out = shufflevector <8 x i32 > %v , <8 x i32 > poison, <8 x i32 > <i32 undef , i32 0 , i32 undef , i32 1 , i32 undef , i32 2 , i32 undef , i32 3 >
836
+ ret <8 x i32 > %out
837
+ }
838
+
839
+ define <8 x i32 > @shuffle_spread2_singlesrc_e32_index2 (<8 x i32 > %v ) {
840
+ ; CHECK-LABEL: shuffle_spread2_singlesrc_e32_index2:
841
+ ; CHECK: # %bb.0:
842
+ ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
843
+ ; CHECK-NEXT: vid.v v10
844
+ ; CHECK-NEXT: vsrl.vi v10, v10, 1
845
+ ; CHECK-NEXT: vadd.vi v12, v10, -1
846
+ ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
847
+ ; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
848
+ ; CHECK-NEXT: vmv.v.v v8, v10
849
+ ; CHECK-NEXT: ret
850
+ %out = shufflevector <8 x i32 > %v , <8 x i32 > poison, <8 x i32 > <i32 undef , i32 undef , i32 0 , i32 undef , i32 1 , i32 undef , i32 2 , i32 undef >
851
+ ret <8 x i32 > %out
852
+ }
853
+
854
+ define <8 x i32 > @shuffle_spread3_singlesrc_e32 (<8 x i32 > %v ) {
855
+ ; RV32-LABEL: shuffle_spread3_singlesrc_e32:
828
856
; RV32: # %bb.0:
829
- ; RV32-NEXT: lui a0, %hi(.LCPI55_0 )
830
- ; RV32-NEXT: addi a0, a0, %lo(.LCPI55_0 )
857
+ ; RV32-NEXT: lui a0, %hi(.LCPI57_0 )
858
+ ; RV32-NEXT: addi a0, a0, %lo(.LCPI57_0 )
831
859
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
832
860
; RV32-NEXT: vle16.v v12, (a0)
833
861
; RV32-NEXT: vrgatherei16.vv v10, v8, v12
834
862
; RV32-NEXT: vmv.v.v v8, v10
835
863
; RV32-NEXT: ret
836
864
;
837
- ; RV64-LABEL: shuffle_decompress3_singlesrc_e32 :
865
+ ; RV64-LABEL: shuffle_spread3_singlesrc_e32 :
838
866
; RV64: # %bb.0:
839
867
; RV64-NEXT: lui a0, 32769
840
868
; RV64-NEXT: slli a0, a0, 21
@@ -849,8 +877,8 @@ define <8 x i32> @shuffle_decompress3_singlesrc_e32(<8 x i32> %v) {
849
877
}
850
878
851
879
; TODO: This should be a single vslideup.vi
852
- define <8 x i32 > @shuffle_decompress4_singlesrc_e32 (<8 x i32 > %v ) {
853
- ; CHECK-LABEL: shuffle_decompress4_singlesrc_e32 :
880
+ define <8 x i32 > @shuffle_spread4_singlesrc_e32 (<8 x i32 > %v ) {
881
+ ; CHECK-LABEL: shuffle_spread4_singlesrc_e32 :
854
882
; CHECK: # %bb.0:
855
883
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
856
884
; CHECK-NEXT: vid.v v10
@@ -864,8 +892,8 @@ define <8 x i32> @shuffle_decompress4_singlesrc_e32(<8 x i32> %v) {
864
892
}
865
893
866
894
; TODO: This should be either a single vslideup.vi or two widening interleaves.
867
- define <8 x i8 > @shuffle_decompress4_singlesrc_e8 (<8 x i8 > %v ) {
868
- ; CHECK-LABEL: shuffle_decompress4_singlesrc_e8 :
895
+ define <8 x i8 > @shuffle_spread4_singlesrc_e8 (<8 x i8 > %v ) {
896
+ ; CHECK-LABEL: shuffle_spread4_singlesrc_e8 :
869
897
; CHECK: # %bb.0:
870
898
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
871
899
; CHECK-NEXT: vid.v v9
@@ -877,11 +905,25 @@ define <8 x i8> @shuffle_decompress4_singlesrc_e8(<8 x i8> %v) {
877
905
ret <8 x i8 > %out
878
906
}
879
907
908
+ define <32 x i8 > @shuffle_spread8_singlesrc_e8 (<32 x i8 > %v ) {
909
+ ; CHECK-LABEL: shuffle_spread8_singlesrc_e8:
910
+ ; CHECK: # %bb.0:
911
+ ; CHECK-NEXT: li a0, 32
912
+ ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
913
+ ; CHECK-NEXT: vid.v v10
914
+ ; CHECK-NEXT: vsrl.vi v12, v10, 3
915
+ ; CHECK-NEXT: vrgather.vv v10, v8, v12
916
+ ; CHECK-NEXT: vmv.v.v v8, v10
917
+ ; CHECK-NEXT: ret
918
+ %out = shufflevector <32 x i8 > %v , <32 x i8 > poison, <32 x i32 > <i32 0 , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef , i32 1 , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef , i32 2 , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef , i32 3 , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef >
919
+ ret <32 x i8 > %out
920
+ }
921
+
880
922
define <8 x i32 > @shuffle_decompress_singlesrc_e32 (<8 x i32 > %v ) {
881
923
; CHECK-LABEL: shuffle_decompress_singlesrc_e32:
882
924
; CHECK: # %bb.0:
883
- ; CHECK-NEXT: lui a0, %hi(.LCPI58_0 )
884
- ; CHECK-NEXT: addi a0, a0, %lo(.LCPI58_0 )
925
+ ; CHECK-NEXT: lui a0, %hi(.LCPI61_0 )
926
+ ; CHECK-NEXT: addi a0, a0, %lo(.LCPI61_0 )
885
927
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
886
928
; CHECK-NEXT: vle16.v v12, (a0)
887
929
; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
0 commit comments