@@ -168,12 +168,11 @@ define <4 x i64> @m2_splat_into_slide_two_source_v2_lo(<4 x i64> %v1, <4 x i64>
168
168
define <4 x i64 > @m2_splat_into_slide_two_source (<4 x i64 > %v1 , <4 x i64 > %v2 ) vscale_range(2 ,2 ) {
169
169
; CHECK-LABEL: m2_splat_into_slide_two_source:
170
170
; CHECK: # %bb.0:
171
- ; CHECK-NEXT: vsetivli zero, 1, e8, mf8 , ta, ma
172
- ; CHECK-NEXT: vmv.v.i v0, 12
173
- ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu
171
+ ; CHECK-NEXT: vsetivli zero, 2, e64, m1 , ta, ma
172
+ ; CHECK-NEXT: vslidedown.vi v13, v10, 1
173
+ ; CHECK-NEXT: vslideup.vi v13, v11, 1
174
174
; CHECK-NEXT: vrgather.vi v12, v8, 0
175
- ; CHECK-NEXT: vslideup.vi v12, v10, 1, v0.t
176
- ; CHECK-NEXT: vmv.v.v v8, v12
175
+ ; CHECK-NEXT: vmv2r.v v8, v12
177
176
; CHECK-NEXT: ret
178
177
%res = shufflevector <4 x i64 > %v1 , <4 x i64 > %v2 , <4 x i32 > <i32 0 , i32 0 , i32 5 , i32 6 >
179
178
ret <4 x i64 > %res
@@ -183,18 +182,17 @@ define void @shuffle1(ptr %explicit_0, ptr %explicit_1) vscale_range(2,2) {
183
182
; CHECK-LABEL: shuffle1:
184
183
; CHECK: # %bb.0:
185
184
; CHECK-NEXT: addi a0, a0, 252
185
+ ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
186
+ ; CHECK-NEXT: vmv.v.i v8, 0
186
187
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
187
- ; CHECK-NEXT: vid.v v8
188
+ ; CHECK-NEXT: vid.v v10
188
189
; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma
189
- ; CHECK-NEXT: vle32.v v9, (a0)
190
- ; CHECK-NEXT: li a0, 175
191
- ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
192
- ; CHECK-NEXT: vsrl.vi v8, v8, 1
193
- ; CHECK-NEXT: vmv.s.x v0, a0
194
- ; CHECK-NEXT: vadd.vi v8, v8, 1
195
- ; CHECK-NEXT: vrgather.vv v11, v9, v8
196
- ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
197
- ; CHECK-NEXT: vmerge.vim v8, v10, 0, v0
190
+ ; CHECK-NEXT: vle32.v v11, (a0)
191
+ ; CHECK-NEXT: vmv.v.i v0, 5
192
+ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
193
+ ; CHECK-NEXT: vsrl.vi v10, v10, 1
194
+ ; CHECK-NEXT: vadd.vi v10, v10, 1
195
+ ; CHECK-NEXT: vrgather.vv v9, v11, v10, v0.t
198
196
; CHECK-NEXT: addi a0, a1, 672
199
197
; CHECK-NEXT: vs2r.v v8, (a0)
200
198
; CHECK-NEXT: ret
@@ -211,15 +209,15 @@ define void @shuffle1(ptr %explicit_0, ptr %explicit_1) vscale_range(2,2) {
211
209
define <16 x float > @shuffle2 (<4 x float > %a ) vscale_range(2 ,2 ) {
212
210
; CHECK-LABEL: shuffle2:
213
211
; CHECK: # %bb.0:
214
- ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
215
- ; CHECK-NEXT: vid.v v9
216
- ; CHECK-NEXT: li a0, -97
217
- ; CHECK-NEXT: vadd.vv v9, v9, v9
218
- ; CHECK-NEXT: vrsub.vi v9, v9, 4
219
- ; CHECK-NEXT: vmv.s.x v0, a0
220
- ; CHECK-NEXT: vrgather.vv v13, v8, v9
221
212
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
222
- ; CHECK-NEXT: vmerge.vim v8, v12, 0, v0
213
+ ; CHECK-NEXT: vmv1r.v v12, v8
214
+ ; CHECK-NEXT: vmv.v.i v8, 0
215
+ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
216
+ ; CHECK-NEXT: vid.v v13
217
+ ; CHECK-NEXT: vadd.vv v13, v13, v13
218
+ ; CHECK-NEXT: vmv.v.i v0, 6
219
+ ; CHECK-NEXT: vrsub.vi v13, v13, 4
220
+ ; CHECK-NEXT: vrgather.vv v9, v12, v13, v0.t
223
221
; CHECK-NEXT: ret
224
222
%b = extractelement <4 x float > %a , i32 2
225
223
%c = insertelement <16 x float > <float 0 .000000e+00 , float 0 .000000e+00 , float 0 .000000e+00 , float 0 .000000e+00 , float 0 .000000e+00 , float undef , float 0 .000000e+00 , float 0 .000000e+00 , float 0 .000000e+00 , float 0 .000000e+00 , float 0 .000000e+00 , float 0 .000000e+00 , float 0 .000000e+00 , float 0 .000000e+00 , float 0 .000000e+00 , float 0 .000000e+00 >, float %b , i32 5
@@ -231,16 +229,15 @@ define <16 x float> @shuffle2(<4 x float> %a) vscale_range(2,2) {
231
229
define i64 @extract_any_extend_vector_inreg_v16i64 (<16 x i64 > %a0 , i32 %a1 ) vscale_range(2 ,2 ) {
232
230
; RV32-LABEL: extract_any_extend_vector_inreg_v16i64:
233
231
; RV32: # %bb.0:
234
- ; RV32-NEXT: li a1, 16
235
- ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu
232
+ ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
236
233
; RV32-NEXT: vmv.v.i v16, 0
237
- ; RV32-NEXT: vmv.s.x v0, a1
234
+ ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu
235
+ ; RV32-NEXT: vmv.v.i v0, 1
238
236
; RV32-NEXT: li a1, 32
239
- ; RV32-NEXT: vrgather.vi v16, v8, 15 , v0.t
240
- ; RV32-NEXT: vsetvli zero, zero , e64, m8, ta, ma
237
+ ; RV32-NEXT: vrgather.vi v18, v15, 1 , v0.t
238
+ ; RV32-NEXT: vsetivli zero, 1 , e64, m8, ta, ma
241
239
; RV32-NEXT: vslidedown.vx v8, v16, a0
242
240
; RV32-NEXT: vmv.x.s a0, v8
243
- ; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
244
241
; RV32-NEXT: vsrl.vx v8, v8, a1
245
242
; RV32-NEXT: vmv.x.s a1, v8
246
243
; RV32-NEXT: ret
@@ -258,13 +255,14 @@ define i64 @extract_any_extend_vector_inreg_v16i64(<16 x i64> %a0, i32 %a1) vsca
258
255
; RV64-NEXT: addi s0, sp, 256
259
256
; RV64-NEXT: .cfi_def_cfa s0, 0
260
257
; RV64-NEXT: andi sp, sp, -128
261
- ; RV64-NEXT: li a1, -17
258
+ ; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
259
+ ; RV64-NEXT: vmv.v.i v0, 1
262
260
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
263
- ; RV64-NEXT: vmv.s.x v0, a1
264
- ; RV64-NEXT: vrgather.vi v16, v8, 15
265
- ; RV64-NEXT: vmerge.vim v8, v16, 0 , v0
261
+ ; RV64-NEXT: vmv.v.i v16, 0
262
+ ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu
263
+ ; RV64-NEXT: vrgather.vi v18, v15, 1 , v0.t
266
264
; RV64-NEXT: mv s2, sp
267
- ; RV64-NEXT: vs8r.v v8 , (s2)
265
+ ; RV64-NEXT: vs8r.v v16 , (s2)
268
266
; RV64-NEXT: andi a0, a0, 15
269
267
; RV64-NEXT: li a1, 8
270
268
; RV64-NEXT: call __muldi3
@@ -290,21 +288,16 @@ define i64 @extract_any_extend_vector_inreg_v16i64(<16 x i64> %a0, i32 %a1) vsca
290
288
define <4 x double > @shuffles_add (<4 x double > %0 , <4 x double > %1 ) vscale_range(2 ,2 ) {
291
289
; CHECK-LABEL: shuffles_add:
292
290
; CHECK: # %bb.0:
291
+ ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu
292
+ ; CHECK-NEXT: vmv1r.v v13, v10
293
+ ; CHECK-NEXT: vslideup.vi v13, v11, 1
294
+ ; CHECK-NEXT: vmv1r.v v8, v9
295
+ ; CHECK-NEXT: vmv.v.i v0, 1
296
+ ; CHECK-NEXT: vrgather.vi v12, v9, 0
297
+ ; CHECK-NEXT: vmv1r.v v9, v11
298
+ ; CHECK-NEXT: vrgather.vi v9, v10, 1, v0.t
293
299
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
294
- ; CHECK-NEXT: vrgather.vi v12, v8, 2
295
- ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
296
- ; CHECK-NEXT: vid.v v14
297
- ; CHECK-NEXT: vmv.v.i v0, 12
298
- ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
299
- ; CHECK-NEXT: vrgather.vi v16, v8, 3
300
- ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
301
- ; CHECK-NEXT: vadd.vv v8, v14, v14
302
- ; CHECK-NEXT: vadd.vi v9, v8, -4
303
- ; CHECK-NEXT: vadd.vi v8, v8, -3
304
- ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
305
- ; CHECK-NEXT: vrgatherei16.vv v12, v10, v9, v0.t
306
- ; CHECK-NEXT: vrgatherei16.vv v16, v10, v8, v0.t
307
- ; CHECK-NEXT: vfadd.vv v8, v12, v16
300
+ ; CHECK-NEXT: vfadd.vv v8, v12, v8
308
301
; CHECK-NEXT: ret
309
302
%3 = shufflevector <4 x double > %0 , <4 x double > %1 , <4 x i32 > <i32 undef , i32 2 , i32 4 , i32 6 >
310
303
%4 = shufflevector <4 x double > %0 , <4 x double > %1 , <4 x i32 > <i32 undef , i32 3 , i32 5 , i32 7 >
0 commit comments