@@ -168,11 +168,12 @@ define <4 x i64> @m2_splat_into_slide_two_source_v2_lo(<4 x i64> %v1, <4 x i64>
168
168
define <4 x i64 > @m2_splat_into_slide_two_source (<4 x i64 > %v1 , <4 x i64 > %v2 ) vscale_range(2 ,2 ) {
169
169
; CHECK-LABEL: m2_splat_into_slide_two_source:
170
170
; CHECK: # %bb.0:
171
- ; CHECK-NEXT: vsetivli zero, 2, e64, m1 , ta, ma
172
- ; CHECK-NEXT: vslidedown.vi v13, v10, 1
173
- ; CHECK-NEXT: vslideup.vi v13, v11, 1
171
+ ; CHECK-NEXT: vsetivli zero, 1, e8, mf8 , ta, ma
172
+ ; CHECK-NEXT: vmv.v.i v0, 12
173
+ ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu
174
174
; CHECK-NEXT: vrgather.vi v12, v8, 0
175
- ; CHECK-NEXT: vmv2r.v v8, v12
175
+ ; CHECK-NEXT: vslideup.vi v12, v10, 1, v0.t
176
+ ; CHECK-NEXT: vmv.v.v v8, v12
176
177
; CHECK-NEXT: ret
177
178
%res = shufflevector <4 x i64 > %v1 , <4 x i64 > %v2 , <4 x i32 > <i32 0 , i32 0 , i32 5 , i32 6 >
178
179
ret <4 x i64 > %res
@@ -182,17 +183,18 @@ define void @shuffle1(ptr %explicit_0, ptr %explicit_1) vscale_range(2,2) {
182
183
; CHECK-LABEL: shuffle1:
183
184
; CHECK: # %bb.0:
184
185
; CHECK-NEXT: addi a0, a0, 252
185
- ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
186
- ; CHECK-NEXT: vmv.v.i v8, 0
187
186
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
188
- ; CHECK-NEXT: vid.v v10
187
+ ; CHECK-NEXT: vid.v v8
189
188
; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma
190
- ; CHECK-NEXT: vle32.v v11, (a0)
191
- ; CHECK-NEXT: vmv.v.i v0, 5
192
- ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
193
- ; CHECK-NEXT: vsrl.vi v10, v10, 1
194
- ; CHECK-NEXT: vadd.vi v10, v10, 1
195
- ; CHECK-NEXT: vrgather.vv v9, v11, v10, v0.t
189
+ ; CHECK-NEXT: vle32.v v9, (a0)
190
+ ; CHECK-NEXT: li a0, 175
191
+ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
192
+ ; CHECK-NEXT: vsrl.vi v8, v8, 1
193
+ ; CHECK-NEXT: vmv.s.x v0, a0
194
+ ; CHECK-NEXT: vadd.vi v8, v8, 1
195
+ ; CHECK-NEXT: vrgather.vv v11, v9, v8
196
+ ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
197
+ ; CHECK-NEXT: vmerge.vim v8, v10, 0, v0
196
198
; CHECK-NEXT: addi a0, a1, 672
197
199
; CHECK-NEXT: vs2r.v v8, (a0)
198
200
; CHECK-NEXT: ret
@@ -209,15 +211,15 @@ define void @shuffle1(ptr %explicit_0, ptr %explicit_1) vscale_range(2,2) {
209
211
define <16 x float > @shuffle2 (<4 x float > %a ) vscale_range(2 ,2 ) {
210
212
; CHECK-LABEL: shuffle2:
211
213
; CHECK: # %bb.0:
214
+ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
215
+ ; CHECK-NEXT: vid.v v9
216
+ ; CHECK-NEXT: li a0, -97
217
+ ; CHECK-NEXT: vadd.vv v9, v9, v9
218
+ ; CHECK-NEXT: vrsub.vi v9, v9, 4
219
+ ; CHECK-NEXT: vmv.s.x v0, a0
220
+ ; CHECK-NEXT: vrgather.vv v13, v8, v9
212
221
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
213
- ; CHECK-NEXT: vmv1r.v v12, v8
214
- ; CHECK-NEXT: vmv.v.i v8, 0
215
- ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
216
- ; CHECK-NEXT: vid.v v13
217
- ; CHECK-NEXT: vadd.vv v13, v13, v13
218
- ; CHECK-NEXT: vmv.v.i v0, 6
219
- ; CHECK-NEXT: vrsub.vi v13, v13, 4
220
- ; CHECK-NEXT: vrgather.vv v9, v12, v13, v0.t
222
+ ; CHECK-NEXT: vmerge.vim v8, v12, 0, v0
221
223
; CHECK-NEXT: ret
222
224
%b = extractelement <4 x float > %a , i32 2
223
225
%c = insertelement <16 x float > <float 0 .000000e+00 , float 0 .000000e+00 , float 0 .000000e+00 , float 0 .000000e+00 , float 0 .000000e+00 , float undef , float 0 .000000e+00 , float 0 .000000e+00 , float 0 .000000e+00 , float 0 .000000e+00 , float 0 .000000e+00 , float 0 .000000e+00 , float 0 .000000e+00 , float 0 .000000e+00 , float 0 .000000e+00 , float 0 .000000e+00 >, float %b , i32 5
@@ -229,15 +231,16 @@ define <16 x float> @shuffle2(<4 x float> %a) vscale_range(2,2) {
229
231
define i64 @extract_any_extend_vector_inreg_v16i64 (<16 x i64 > %a0 , i32 %a1 ) vscale_range(2 ,2 ) {
230
232
; RV32-LABEL: extract_any_extend_vector_inreg_v16i64:
231
233
; RV32: # %bb.0:
232
- ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
234
+ ; RV32-NEXT: li a1, 16
235
+ ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu
233
236
; RV32-NEXT: vmv.v.i v16, 0
234
- ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu
235
- ; RV32-NEXT: vmv.v.i v0, 1
237
+ ; RV32-NEXT: vmv.s.x v0, a1
236
238
; RV32-NEXT: li a1, 32
237
- ; RV32-NEXT: vrgather.vi v18, v15, 1 , v0.t
238
- ; RV32-NEXT: vsetivli zero, 1 , e64, m8, ta, ma
239
+ ; RV32-NEXT: vrgather.vi v16, v8, 15 , v0.t
240
+ ; RV32-NEXT: vsetvli zero, zero , e64, m8, ta, ma
239
241
; RV32-NEXT: vslidedown.vx v8, v16, a0
240
242
; RV32-NEXT: vmv.x.s a0, v8
243
+ ; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
241
244
; RV32-NEXT: vsrl.vx v8, v8, a1
242
245
; RV32-NEXT: vmv.x.s a1, v8
243
246
; RV32-NEXT: ret
@@ -255,14 +258,13 @@ define i64 @extract_any_extend_vector_inreg_v16i64(<16 x i64> %a0, i32 %a1) vsca
255
258
; RV64-NEXT: addi s0, sp, 256
256
259
; RV64-NEXT: .cfi_def_cfa s0, 0
257
260
; RV64-NEXT: andi sp, sp, -128
258
- ; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
259
- ; RV64-NEXT: vmv.v.i v0, 1
261
+ ; RV64-NEXT: li a1, -17
260
262
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
261
- ; RV64-NEXT: vmv.v.i v16, 0
262
- ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu
263
- ; RV64-NEXT: vrgather.vi v18, v15, 1 , v0.t
263
+ ; RV64-NEXT: vmv.s.x v0, a1
264
+ ; RV64-NEXT: vrgather.vi v16, v8, 15
265
+ ; RV64-NEXT: vmerge.vim v8, v16, 0 , v0
264
266
; RV64-NEXT: mv s2, sp
265
- ; RV64-NEXT: vs8r.v v16 , (s2)
267
+ ; RV64-NEXT: vs8r.v v8 , (s2)
266
268
; RV64-NEXT: andi a0, a0, 15
267
269
; RV64-NEXT: li a1, 8
268
270
; RV64-NEXT: call __muldi3
@@ -288,16 +290,21 @@ define i64 @extract_any_extend_vector_inreg_v16i64(<16 x i64> %a0, i32 %a1) vsca
288
290
define <4 x double > @shuffles_add (<4 x double > %0 , <4 x double > %1 ) vscale_range(2 ,2 ) {
289
291
; CHECK-LABEL: shuffles_add:
290
292
; CHECK: # %bb.0:
291
- ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu
292
- ; CHECK-NEXT: vmv1r.v v13, v10
293
- ; CHECK-NEXT: vslideup.vi v13, v11, 1
294
- ; CHECK-NEXT: vmv1r.v v8, v9
295
- ; CHECK-NEXT: vmv.v.i v0, 1
296
- ; CHECK-NEXT: vrgather.vi v12, v9, 0
297
- ; CHECK-NEXT: vmv1r.v v9, v11
298
- ; CHECK-NEXT: vrgather.vi v9, v10, 1, v0.t
299
293
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
300
- ; CHECK-NEXT: vfadd.vv v8, v12, v8
294
+ ; CHECK-NEXT: vrgather.vi v12, v8, 2
295
+ ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
296
+ ; CHECK-NEXT: vid.v v14
297
+ ; CHECK-NEXT: vmv.v.i v0, 12
298
+ ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
299
+ ; CHECK-NEXT: vrgather.vi v16, v8, 3
300
+ ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
301
+ ; CHECK-NEXT: vadd.vv v8, v14, v14
302
+ ; CHECK-NEXT: vadd.vi v9, v8, -4
303
+ ; CHECK-NEXT: vadd.vi v8, v8, -3
304
+ ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
305
+ ; CHECK-NEXT: vrgatherei16.vv v12, v10, v9, v0.t
306
+ ; CHECK-NEXT: vrgatherei16.vv v16, v10, v8, v0.t
307
+ ; CHECK-NEXT: vfadd.vv v8, v12, v16
301
308
; CHECK-NEXT: ret
302
309
%3 = shufflevector <4 x double > %0 , <4 x double > %1 , <4 x i32 > <i32 undef , i32 2 , i32 4 , i32 6 >
303
310
%4 = shufflevector <4 x double > %0 , <4 x double > %1 , <4 x i32 > <i32 undef , i32 3 , i32 5 , i32 7 >
0 commit comments