@@ -874,27 +874,30 @@ define <16 x i8> @reverse_v16i8_2(<8 x i8> %a, <8 x i8> %b) {
874
874
define <32 x i8 > @reverse_v32i8_2 (<16 x i8 > %a , <16 x i8 > %b ) {
875
875
; CHECK-LABEL: reverse_v32i8_2:
876
876
; CHECK: # %bb.0:
877
- ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
878
- ; CHECK-NEXT: vmv1r.v v10, v9
879
877
; CHECK-NEXT: csrr a0, vlenb
880
- ; CHECK-NEXT: vid.v v12
881
- ; CHECK-NEXT: addi a1, a0, -1
882
- ; CHECK-NEXT: vrsub.vx v12, v12, a1
883
- ; CHECK-NEXT: lui a1, 16
884
- ; CHECK-NEXT: addi a1, a1, -1
878
+ ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
879
+ ; CHECK-NEXT: vid.v v10
880
+ ; CHECK-NEXT: li a1, 32
881
+ ; CHECK-NEXT: addi a2, a0, -1
882
+ ; CHECK-NEXT: vrsub.vx v10, v10, a2
883
+ ; CHECK-NEXT: lui a2, 16
885
884
; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
886
- ; CHECK-NEXT: vrgatherei16.vv v15, v8, v12
887
- ; CHECK-NEXT: vrgatherei16.vv v14, v9, v12
885
+ ; CHECK-NEXT: vrgatherei16.vv v15, v8, v10
886
+ ; CHECK-NEXT: vrgatherei16.vv v14, v12, v10
887
+ ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
888
+ ; CHECK-NEXT: vid.v v10
889
+ ; CHECK-NEXT: addi a2, a2, -1
890
+ ; CHECK-NEXT: vrsub.vi v10, v10, 15
891
+ ; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
892
+ ; CHECK-NEXT: vrgather.vv v17, v13, v10
893
+ ; CHECK-NEXT: vrgather.vv v16, v9, v10
888
894
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
889
- ; CHECK-NEXT: vmv.s.x v0, a1
890
- ; CHECK-NEXT: li a1, 32
895
+ ; CHECK-NEXT: vmv.s.x v0, a2
891
896
; CHECK-NEXT: slli a0, a0, 1
892
- ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu
893
- ; CHECK-NEXT: vid.v v8
894
897
; CHECK-NEXT: addi a0, a0, -32
895
- ; CHECK-NEXT: vrsub.vi v12, v8, 15
898
+ ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
896
899
; CHECK-NEXT: vslidedown.vx v8, v14, a0
897
- ; CHECK-NEXT: vrgather.vv v8, v10, v12 , v0.t
900
+ ; CHECK-NEXT: vmerge.vvm v8, v8, v16 , v0
898
901
; CHECK-NEXT: ret
899
902
%res = shufflevector <16 x i8 > %a , <16 x i8 > %b , <32 x i32 > <i32 31 , i32 30 , i32 29 , i32 28 , i32 27 , i32 26 , i32 25 , i32 24 , i32 23 , i32 22 , i32 21 , i32 20 , i32 19 , i32 18 , i32 17 , i32 16 , i32 15 , i32 14 , i32 13 , i32 12 , i32 11 , i32 10 , i32 9 , i32 8 , i32 7 , i32 6 , i32 5 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 >
900
903
ret <32 x i8 > %res
@@ -943,23 +946,25 @@ define <8 x i16> @reverse_v8i16_2(<4 x i16> %a, <4 x i16> %b) {
943
946
define <16 x i16 > @reverse_v16i16_2 (<8 x i16 > %a , <8 x i16 > %b ) {
944
947
; CHECK-LABEL: reverse_v16i16_2:
945
948
; CHECK: # %bb.0:
946
- ; CHECK-NEXT: vsetvli a0, zero , e16, m1 , ta, ma
947
- ; CHECK-NEXT: vmv1r .v v10, v9
949
+ ; CHECK-NEXT: vsetivli zero, 16 , e16, m2 , ta, ma
950
+ ; CHECK-NEXT: vid .v v10
948
951
; CHECK-NEXT: csrr a0, vlenb
952
+ ; CHECK-NEXT: vrsub.vi v10, v10, 7
953
+ ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
954
+ ; CHECK-NEXT: vrgather.vv v13, v12, v10
955
+ ; CHECK-NEXT: vrgather.vv v12, v9, v10
949
956
; CHECK-NEXT: vid.v v9
950
957
; CHECK-NEXT: srli a1, a0, 1
951
958
; CHECK-NEXT: addi a1, a1, -1
952
959
; CHECK-NEXT: vrsub.vx v9, v9, a1
953
- ; CHECK-NEXT: vrgather.vv v13, v8, v9
954
- ; CHECK-NEXT: vrgather.vv v12, v11, v9
955
- ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu
956
- ; CHECK-NEXT: vid.v v8
957
960
; CHECK-NEXT: li a1, 255
958
961
; CHECK-NEXT: addi a0, a0, -16
959
- ; CHECK-NEXT: vrsub.vi v14, v8, 7
962
+ ; CHECK-NEXT: vrgather.vv v15, v8, v9
963
+ ; CHECK-NEXT: vrgather.vv v14, v10, v9
960
964
; CHECK-NEXT: vmv.s.x v0, a1
961
- ; CHECK-NEXT: vslidedown.vx v8, v12, a0
962
- ; CHECK-NEXT: vrgather.vv v8, v10, v14, v0.t
965
+ ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
966
+ ; CHECK-NEXT: vslidedown.vx v8, v14, a0
967
+ ; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0
963
968
; CHECK-NEXT: ret
964
969
%res = shufflevector <8 x i16 > %a , <8 x i16 > %b , <16 x i32 > <i32 15 , i32 14 , i32 13 , i32 12 , i32 11 , i32 10 , i32 9 , i32 8 , i32 7 , i32 6 , i32 5 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 >
965
970
ret <16 x i16 > %res
@@ -1024,24 +1029,27 @@ define <4 x i32> @reverse_v4i32_2(<2 x i32> %a, < 2 x i32> %b) {
1024
1029
define <8 x i32 > @reverse_v8i32_2 (<4 x i32 > %a , <4 x i32 > %b ) {
1025
1030
; CHECK-LABEL: reverse_v8i32_2:
1026
1031
; CHECK: # %bb.0:
1027
- ; CHECK-NEXT: vsetvli a0, zero, e32 , m1, ta, ma
1028
- ; CHECK-NEXT: vmv1r .v v10, v9
1032
+ ; CHECK-NEXT: vsetivli zero, 8, e16 , m1, ta, ma
1033
+ ; CHECK-NEXT: vid .v v10
1029
1034
; CHECK-NEXT: csrr a0, vlenb
1030
- ; CHECK-NEXT: vid.v v9
1031
- ; CHECK-NEXT: srli a1, a0, 2
1032
- ; CHECK-NEXT: addi a1, a1, -1
1033
- ; CHECK-NEXT: vrsub.vx v9, v9, a1
1034
- ; CHECK-NEXT: vrgather.vv v13, v8, v9
1035
- ; CHECK-NEXT: vrgather.vv v12, v11, v9
1035
+ ; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
1036
+ ; CHECK-NEXT: vid.v v12
1036
1037
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1037
- ; CHECK-NEXT: vid.v v8
1038
- ; CHECK-NEXT: vmv.v.i v0, 15
1038
+ ; CHECK-NEXT: vrsub.vi v10, v10, 3
1039
+ ; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
1040
+ ; CHECK-NEXT: vrgatherei16.vv v15, v11, v10
1041
+ ; CHECK-NEXT: vrgatherei16.vv v14, v9, v10
1042
+ ; CHECK-NEXT: srli a1, a0, 2
1039
1043
; CHECK-NEXT: srli a0, a0, 1
1040
- ; CHECK-NEXT: vrsub.vi v14, v8, 3
1044
+ ; CHECK-NEXT: addi a1, a1, -1
1041
1045
; CHECK-NEXT: addi a0, a0, -8
1042
- ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
1046
+ ; CHECK-NEXT: vrsub.vx v10, v12, a1
1047
+ ; CHECK-NEXT: vrgather.vv v13, v8, v10
1048
+ ; CHECK-NEXT: vrgather.vv v12, v9, v10
1049
+ ; CHECK-NEXT: vmv.v.i v0, 15
1050
+ ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1043
1051
; CHECK-NEXT: vslidedown.vx v8, v12, a0
1044
- ; CHECK-NEXT: vrgatherei16.vv v8, v10 , v14, v0.t
1052
+ ; CHECK-NEXT: vmerge.vvm v8, v8 , v14, v0
1045
1053
; CHECK-NEXT: ret
1046
1054
%res = shufflevector <4 x i32 > %a , <4 x i32 > %b , <8 x i32 > <i32 7 , i32 6 , i32 5 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 >
1047
1055
ret <8 x i32 > %res
@@ -1197,23 +1205,25 @@ define <8 x half> @reverse_v8f16_2(<4 x half> %a, <4 x half> %b) {
1197
1205
define <16 x half > @reverse_v16f16_2 (<8 x half > %a , <8 x half > %b ) {
1198
1206
; CHECK-LABEL: reverse_v16f16_2:
1199
1207
; CHECK: # %bb.0:
1200
- ; CHECK-NEXT: vsetvli a0, zero , e16, m1 , ta, ma
1201
- ; CHECK-NEXT: vmv1r .v v10, v9
1208
+ ; CHECK-NEXT: vsetivli zero, 16 , e16, m2 , ta, ma
1209
+ ; CHECK-NEXT: vid .v v10
1202
1210
; CHECK-NEXT: csrr a0, vlenb
1211
+ ; CHECK-NEXT: vrsub.vi v10, v10, 7
1212
+ ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
1213
+ ; CHECK-NEXT: vrgather.vv v13, v12, v10
1214
+ ; CHECK-NEXT: vrgather.vv v12, v9, v10
1203
1215
; CHECK-NEXT: vid.v v9
1204
1216
; CHECK-NEXT: srli a1, a0, 1
1205
1217
; CHECK-NEXT: addi a1, a1, -1
1206
1218
; CHECK-NEXT: vrsub.vx v9, v9, a1
1207
- ; CHECK-NEXT: vrgather.vv v13, v8, v9
1208
- ; CHECK-NEXT: vrgather.vv v12, v11, v9
1209
- ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu
1210
- ; CHECK-NEXT: vid.v v8
1211
1219
; CHECK-NEXT: li a1, 255
1212
1220
; CHECK-NEXT: addi a0, a0, -16
1213
- ; CHECK-NEXT: vrsub.vi v14, v8, 7
1221
+ ; CHECK-NEXT: vrgather.vv v15, v8, v9
1222
+ ; CHECK-NEXT: vrgather.vv v14, v10, v9
1214
1223
; CHECK-NEXT: vmv.s.x v0, a1
1215
- ; CHECK-NEXT: vslidedown.vx v8, v12, a0
1216
- ; CHECK-NEXT: vrgather.vv v8, v10, v14, v0.t
1224
+ ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
1225
+ ; CHECK-NEXT: vslidedown.vx v8, v14, a0
1226
+ ; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0
1217
1227
; CHECK-NEXT: ret
1218
1228
%res = shufflevector <8 x half > %a , <8 x half > %b , <16 x i32 > <i32 15 , i32 14 , i32 13 , i32 12 , i32 11 , i32 10 , i32 9 , i32 8 , i32 7 , i32 6 , i32 5 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 >
1219
1229
ret <16 x half > %res
@@ -1269,24 +1279,27 @@ define <4 x float> @reverse_v4f32_2(<2 x float> %a, <2 x float> %b) {
1269
1279
define <8 x float > @reverse_v8f32_2 (<4 x float > %a , <4 x float > %b ) {
1270
1280
; CHECK-LABEL: reverse_v8f32_2:
1271
1281
; CHECK: # %bb.0:
1272
- ; CHECK-NEXT: vsetvli a0, zero, e32 , m1, ta, ma
1273
- ; CHECK-NEXT: vmv1r .v v10, v9
1282
+ ; CHECK-NEXT: vsetivli zero, 8, e16 , m1, ta, ma
1283
+ ; CHECK-NEXT: vid .v v10
1274
1284
; CHECK-NEXT: csrr a0, vlenb
1275
- ; CHECK-NEXT: vid.v v9
1276
- ; CHECK-NEXT: srli a1, a0, 2
1277
- ; CHECK-NEXT: addi a1, a1, -1
1278
- ; CHECK-NEXT: vrsub.vx v9, v9, a1
1279
- ; CHECK-NEXT: vrgather.vv v13, v8, v9
1280
- ; CHECK-NEXT: vrgather.vv v12, v11, v9
1285
+ ; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
1286
+ ; CHECK-NEXT: vid.v v12
1281
1287
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1282
- ; CHECK-NEXT: vid.v v8
1283
- ; CHECK-NEXT: vmv.v.i v0, 15
1288
+ ; CHECK-NEXT: vrsub.vi v10, v10, 3
1289
+ ; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
1290
+ ; CHECK-NEXT: vrgatherei16.vv v15, v11, v10
1291
+ ; CHECK-NEXT: vrgatherei16.vv v14, v9, v10
1292
+ ; CHECK-NEXT: srli a1, a0, 2
1284
1293
; CHECK-NEXT: srli a0, a0, 1
1285
- ; CHECK-NEXT: vrsub.vi v14, v8, 3
1294
+ ; CHECK-NEXT: addi a1, a1, -1
1286
1295
; CHECK-NEXT: addi a0, a0, -8
1287
- ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
1296
+ ; CHECK-NEXT: vrsub.vx v10, v12, a1
1297
+ ; CHECK-NEXT: vrgather.vv v13, v8, v10
1298
+ ; CHECK-NEXT: vrgather.vv v12, v9, v10
1299
+ ; CHECK-NEXT: vmv.v.i v0, 15
1300
+ ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1288
1301
; CHECK-NEXT: vslidedown.vx v8, v12, a0
1289
- ; CHECK-NEXT: vrgatherei16.vv v8, v10 , v14, v0.t
1302
+ ; CHECK-NEXT: vmerge.vvm v8, v8 , v14, v0
1290
1303
; CHECK-NEXT: ret
1291
1304
%res = shufflevector <4 x float > %a , <4 x float > %b , <8 x i32 > <i32 7 , i32 6 , i32 5 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 >
1292
1305
ret <8 x float > %res
0 commit comments