@@ -194,28 +194,13 @@ define void @test_revhv32i16(ptr %a) #0 {
194
194
define void @test_rev_elts_fail (ptr %a ) #1 {
195
195
; CHECK-LABEL: test_rev_elts_fail:
196
196
; CHECK: // %bb.0:
197
- ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
198
- ; CHECK-NEXT: sub x9, sp, #48
199
- ; CHECK-NEXT: mov x29, sp
200
- ; CHECK-NEXT: and sp, x9, #0xffffffffffffffe0
201
- ; CHECK-NEXT: .cfi_def_cfa w29, 16
202
- ; CHECK-NEXT: .cfi_offset w30, -8
203
- ; CHECK-NEXT: .cfi_offset w29, -16
204
197
; CHECK-NEXT: ptrue p0.d
198
+ ; CHECK-NEXT: adrp x8, .LCPI11_0
199
+ ; CHECK-NEXT: add x8, x8, :lo12:.LCPI11_0
205
200
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
206
- ; CHECK-NEXT: mov z1.d, z0.d[2]
207
- ; CHECK-NEXT: mov z2.d, z0.d[3]
208
- ; CHECK-NEXT: mov x9, v0.d[1]
209
- ; CHECK-NEXT: fmov x8, d1
210
- ; CHECK-NEXT: fmov x10, d2
211
- ; CHECK-NEXT: stp x10, x8, [sp, #16]
212
- ; CHECK-NEXT: fmov x8, d0
213
- ; CHECK-NEXT: stp x9, x8, [sp]
214
- ; CHECK-NEXT: mov x8, sp
215
- ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8]
201
+ ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x8]
202
+ ; CHECK-NEXT: tbl z0.d, { z0.d }, z1.d
216
203
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
217
- ; CHECK-NEXT: mov sp, x29
218
- ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
219
204
; CHECK-NEXT: ret
220
205
%tmp1 = load <4 x i64 >, ptr %a
221
206
%tmp2 = shufflevector <4 x i64 > %tmp1 , <4 x i64 > undef , <4 x i32 > <i32 1 , i32 0 , i32 3 , i32 2 >
@@ -260,39 +245,26 @@ define void @test_revdv4f64_sve2p1(ptr %a) #2 {
260
245
261
246
; sve-vector-bits-min=256, sve-vector-bits-max is not set, REV inst can't be generated.
262
247
define void @test_revv8i32 (ptr %a ) #0 {
263
- ; CHECK-LABEL: test_revv8i32:
264
- ; CHECK: // %bb.0:
265
- ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
266
- ; CHECK-NEXT: sub x9, sp, #48
267
- ; CHECK-NEXT: mov x29, sp
268
- ; CHECK-NEXT: and sp, x9, #0xffffffffffffffe0
269
- ; CHECK-NEXT: .cfi_def_cfa w29, 16
270
- ; CHECK-NEXT: .cfi_offset w30, -8
271
- ; CHECK-NEXT: .cfi_offset w29, -16
272
- ; CHECK-NEXT: ptrue p0.s, vl8
273
- ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
274
- ; CHECK-NEXT: mov w8, v0.s[1]
275
- ; CHECK-NEXT: mov w9, v0.s[2]
276
- ; CHECK-NEXT: mov w10, v0.s[3]
277
- ; CHECK-NEXT: fmov w11, s0
278
- ; CHECK-NEXT: mov z1.s, z0.s[4]
279
- ; CHECK-NEXT: mov z2.s, z0.s[5]
280
- ; CHECK-NEXT: mov z3.s, z0.s[6]
281
- ; CHECK-NEXT: mov z0.s, z0.s[7]
282
- ; CHECK-NEXT: stp w8, w11, [sp, #24]
283
- ; CHECK-NEXT: fmov w8, s1
284
- ; CHECK-NEXT: stp w10, w9, [sp, #16]
285
- ; CHECK-NEXT: fmov w9, s2
286
- ; CHECK-NEXT: stp w9, w8, [sp, #8]
287
- ; CHECK-NEXT: fmov w8, s3
288
- ; CHECK-NEXT: fmov w9, s0
289
- ; CHECK-NEXT: stp w9, w8, [sp]
290
- ; CHECK-NEXT: mov x8, sp
291
- ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8]
292
- ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
293
- ; CHECK-NEXT: mov sp, x29
294
- ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
295
- ; CHECK-NEXT: ret
248
+ ; VBITS_GE_256-LABEL: test_revv8i32:
249
+ ; VBITS_GE_256: // %bb.0:
250
+ ; VBITS_GE_256-NEXT: ptrue p0.s, vl8
251
+ ; VBITS_GE_256-NEXT: index z0.s, #7, #-1
252
+ ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
253
+ ; VBITS_GE_256-NEXT: tbl z0.s, { z1.s }, z0.s
254
+ ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0]
255
+ ; VBITS_GE_256-NEXT: ret
256
+ ;
257
+ ; VBITS_GE_512-LABEL: test_revv8i32:
258
+ ; VBITS_GE_512: // %bb.0:
259
+ ; VBITS_GE_512-NEXT: ptrue p0.s, vl8
260
+ ; VBITS_GE_512-NEXT: adrp x8, .LCPI14_0
261
+ ; VBITS_GE_512-NEXT: add x8, x8, :lo12:.LCPI14_0
262
+ ; VBITS_GE_512-NEXT: ptrue p1.s, vl16
263
+ ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
264
+ ; VBITS_GE_512-NEXT: ld1w { z1.s }, p1/z, [x8]
265
+ ; VBITS_GE_512-NEXT: tbl z0.s, { z0.s }, z1.s
266
+ ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
267
+ ; VBITS_GE_512-NEXT: ret
296
268
%tmp1 = load <8 x i32 >, ptr %a
297
269
%tmp2 = shufflevector <8 x i32 > %tmp1 , <8 x i32 > undef , <8 x i32 > <i32 7 , i32 6 , i32 5 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 >
298
270
store <8 x i32 > %tmp2 , ptr %a
@@ -379,60 +351,13 @@ define void @test_revv8i32v8i32(ptr %a, ptr %b) #1 {
379
351
define void @test_rev_fail (ptr %a ) #1 {
380
352
; CHECK-LABEL: test_rev_fail:
381
353
; CHECK: // %bb.0:
382
- ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
383
- ; CHECK-NEXT: sub x9, sp, #48
384
- ; CHECK-NEXT: mov x29, sp
385
- ; CHECK-NEXT: and sp, x9, #0xffffffffffffffe0
386
- ; CHECK-NEXT: .cfi_def_cfa w29, 16
387
- ; CHECK-NEXT: .cfi_offset w30, -8
388
- ; CHECK-NEXT: .cfi_offset w29, -16
389
354
; CHECK-NEXT: ptrue p0.h
355
+ ; CHECK-NEXT: adrp x8, .LCPI20_0
356
+ ; CHECK-NEXT: add x8, x8, :lo12:.LCPI20_0
390
357
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
391
- ; CHECK-NEXT: mov z1.h, z0.h[8]
392
- ; CHECK-NEXT: fmov w8, s0
393
- ; CHECK-NEXT: mov z2.h, z0.h[9]
394
- ; CHECK-NEXT: mov z3.h, z0.h[10]
395
- ; CHECK-NEXT: mov z4.h, z0.h[11]
396
- ; CHECK-NEXT: strh w8, [sp, #14]
397
- ; CHECK-NEXT: fmov w8, s1
398
- ; CHECK-NEXT: mov z1.h, z0.h[12]
399
- ; CHECK-NEXT: fmov w9, s2
400
- ; CHECK-NEXT: mov z2.h, z0.h[13]
401
- ; CHECK-NEXT: strh w8, [sp, #30]
402
- ; CHECK-NEXT: fmov w8, s3
403
- ; CHECK-NEXT: mov z3.h, z0.h[14]
404
- ; CHECK-NEXT: strh w9, [sp, #28]
405
- ; CHECK-NEXT: fmov w9, s4
406
- ; CHECK-NEXT: mov z4.h, z0.h[15]
407
- ; CHECK-NEXT: fmov w10, s2
408
- ; CHECK-NEXT: strh w8, [sp, #26]
409
- ; CHECK-NEXT: fmov w8, s1
410
- ; CHECK-NEXT: fmov w11, s3
411
- ; CHECK-NEXT: strh w9, [sp, #24]
412
- ; CHECK-NEXT: umov w9, v0.h[1]
413
- ; CHECK-NEXT: fmov w12, s4
414
- ; CHECK-NEXT: strh w10, [sp, #20]
415
- ; CHECK-NEXT: umov w10, v0.h[3]
416
- ; CHECK-NEXT: strh w8, [sp, #22]
417
- ; CHECK-NEXT: umov w8, v0.h[2]
418
- ; CHECK-NEXT: strh w11, [sp, #18]
419
- ; CHECK-NEXT: umov w11, v0.h[4]
420
- ; CHECK-NEXT: strh w12, [sp, #16]
421
- ; CHECK-NEXT: umov w12, v0.h[5]
422
- ; CHECK-NEXT: strh w9, [sp, #12]
423
- ; CHECK-NEXT: umov w9, v0.h[6]
424
- ; CHECK-NEXT: strh w8, [sp, #10]
425
- ; CHECK-NEXT: umov w8, v0.h[7]
426
- ; CHECK-NEXT: strh w10, [sp, #8]
427
- ; CHECK-NEXT: strh w11, [sp, #6]
428
- ; CHECK-NEXT: strh w12, [sp, #4]
429
- ; CHECK-NEXT: strh w9, [sp, #2]
430
- ; CHECK-NEXT: strh w8, [sp]
431
- ; CHECK-NEXT: mov x8, sp
432
- ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8]
358
+ ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x8]
359
+ ; CHECK-NEXT: tbl z0.h, { z0.h }, z1.h
433
360
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
434
- ; CHECK-NEXT: mov sp, x29
435
- ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
436
361
; CHECK-NEXT: ret
437
362
%tmp1 = load <16 x i16 >, ptr %a
438
363
%tmp2 = shufflevector <16 x i16 > %tmp1 , <16 x i16 > undef , <16 x i32 > <i32 7 , i32 6 , i32 5 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 , i32 15 , i32 14 , i32 13 , i32 12 , i32 11 , i32 10 , i32 9 , i32 8 >
0 commit comments