@@ -319,34 +319,27 @@ define void @vg_unwind_with_sve_args(<vscale x 2 x i64> %x) #0 {
319
319
; CHECK-NEXT: .cfi_offset w29, -32
320
320
; CHECK-NEXT: addvl sp, sp, #-18
321
321
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 144 * VG
322
+ ; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
323
+ ; CHECK-NEXT: ptrue pn8.b
322
324
; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
325
+ ; CHECK-NEXT: st1b { z22.b, z23.b }, pn8, [sp, #4, mul vl] // 32-byte Folded Spill
326
+ ; CHECK-NEXT: st1b { z20.b, z21.b }, pn8, [sp, #8, mul vl] // 32-byte Folded Spill
323
327
; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
328
+ ; CHECK-NEXT: st1b { z18.b, z19.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill
329
+ ; CHECK-NEXT: st1b { z16.b, z17.b }, pn8, [sp, #16, mul vl] // 32-byte Folded Spill
324
330
; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
331
+ ; CHECK-NEXT: st1b { z14.b, z15.b }, pn8, [sp, #20, mul vl] // 32-byte Folded Spill
332
+ ; CHECK-NEXT: st1b { z12.b, z13.b }, pn8, [sp, #24, mul vl] // 32-byte Folded Spill
325
333
; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
334
+ ; CHECK-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #28, mul vl] // 32-byte Folded Spill
326
335
; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
327
336
; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
328
337
; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
329
- ; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
330
338
; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
331
339
; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
332
340
; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
333
341
; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
334
- ; CHECK-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill
335
- ; CHECK-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill
336
- ; CHECK-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill
337
- ; CHECK-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill
338
- ; CHECK-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill
339
- ; CHECK-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill
340
- ; CHECK-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill
341
- ; CHECK-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill
342
- ; CHECK-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill
343
- ; CHECK-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill
344
- ; CHECK-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill
345
- ; CHECK-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill
346
- ; CHECK-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill
347
- ; CHECK-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill
348
- ; CHECK-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill
349
- ; CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
342
+ ; CHECK-NEXT: st1b { z8.b, z9.b }, pn8, [sp, #32, mul vl] // 32-byte Folded Spill
350
343
; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 32 - 8 * VG
351
344
; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 32 - 16 * VG
352
345
; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 32 - 24 * VG
@@ -368,23 +361,16 @@ define void @vg_unwind_with_sve_args(<vscale x 2 x i64> %x) #0 {
368
361
; CHECK-NEXT: .cfi_restore vg
369
362
; CHECK-NEXT: addvl sp, sp, #1
370
363
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 144 * VG
371
- ; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
372
- ; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
373
- ; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
374
- ; CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
375
- ; CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
376
- ; CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
377
- ; CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
378
- ; CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
379
- ; CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
380
- ; CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
381
- ; CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
382
- ; CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
383
- ; CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
384
- ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
385
- ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
386
- ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
364
+ ; CHECK-NEXT: ptrue pn8.b
387
365
; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
366
+ ; CHECK-NEXT: ld1b { z22.b, z23.b }, pn8/z, [sp, #4, mul vl] // 32-byte Folded Reload
367
+ ; CHECK-NEXT: ld1b { z20.b, z21.b }, pn8/z, [sp, #8, mul vl] // 32-byte Folded Reload
368
+ ; CHECK-NEXT: ld1b { z18.b, z19.b }, pn8/z, [sp, #12, mul vl] // 32-byte Folded Reload
369
+ ; CHECK-NEXT: ld1b { z16.b, z17.b }, pn8/z, [sp, #16, mul vl] // 32-byte Folded Reload
370
+ ; CHECK-NEXT: ld1b { z14.b, z15.b }, pn8/z, [sp, #20, mul vl] // 32-byte Folded Reload
371
+ ; CHECK-NEXT: ld1b { z12.b, z13.b }, pn8/z, [sp, #24, mul vl] // 32-byte Folded Reload
372
+ ; CHECK-NEXT: ld1b { z10.b, z11.b }, pn8/z, [sp, #28, mul vl] // 32-byte Folded Reload
373
+ ; CHECK-NEXT: ld1b { z8.b, z9.b }, pn8/z, [sp, #32, mul vl] // 32-byte Folded Reload
388
374
; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
389
375
; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
390
376
; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
@@ -428,34 +414,27 @@ define void @vg_unwind_with_sve_args(<vscale x 2 x i64> %x) #0 {
428
414
; FP-CHECK-NEXT: .cfi_offset w30, -40
429
415
; FP-CHECK-NEXT: .cfi_offset w29, -48
430
416
; FP-CHECK-NEXT: addvl sp, sp, #-18
417
+ ; FP-CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
418
+ ; FP-CHECK-NEXT: ptrue pn8.b
431
419
; FP-CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
420
+ ; FP-CHECK-NEXT: st1b { z22.b, z23.b }, pn8, [sp, #4, mul vl] // 32-byte Folded Spill
421
+ ; FP-CHECK-NEXT: st1b { z20.b, z21.b }, pn8, [sp, #8, mul vl] // 32-byte Folded Spill
432
422
; FP-CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
423
+ ; FP-CHECK-NEXT: st1b { z18.b, z19.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill
424
+ ; FP-CHECK-NEXT: st1b { z16.b, z17.b }, pn8, [sp, #16, mul vl] // 32-byte Folded Spill
433
425
; FP-CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
426
+ ; FP-CHECK-NEXT: st1b { z14.b, z15.b }, pn8, [sp, #20, mul vl] // 32-byte Folded Spill
427
+ ; FP-CHECK-NEXT: st1b { z12.b, z13.b }, pn8, [sp, #24, mul vl] // 32-byte Folded Spill
434
428
; FP-CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
429
+ ; FP-CHECK-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #28, mul vl] // 32-byte Folded Spill
435
430
; FP-CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
436
431
; FP-CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
437
432
; FP-CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
438
- ; FP-CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
439
433
; FP-CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
440
434
; FP-CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
441
435
; FP-CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
442
436
; FP-CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
443
- ; FP-CHECK-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill
444
- ; FP-CHECK-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill
445
- ; FP-CHECK-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill
446
- ; FP-CHECK-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill
447
- ; FP-CHECK-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill
448
- ; FP-CHECK-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill
449
- ; FP-CHECK-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill
450
- ; FP-CHECK-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill
451
- ; FP-CHECK-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill
452
- ; FP-CHECK-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill
453
- ; FP-CHECK-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill
454
- ; FP-CHECK-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill
455
- ; FP-CHECK-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill
456
- ; FP-CHECK-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill
457
- ; FP-CHECK-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill
458
- ; FP-CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
437
+ ; FP-CHECK-NEXT: st1b { z8.b, z9.b }, pn8, [sp, #32, mul vl] // 32-byte Folded Spill
459
438
; FP-CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 48 - 8 * VG
460
439
; FP-CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 48 - 16 * VG
461
440
; FP-CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 48 - 24 * VG
@@ -475,23 +454,16 @@ define void @vg_unwind_with_sve_args(<vscale x 2 x i64> %x) #0 {
475
454
; FP-CHECK-NEXT: smstart sm
476
455
; FP-CHECK-NEXT: .cfi_restore vg
477
456
; FP-CHECK-NEXT: addvl sp, sp, #1
478
- ; FP-CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
479
- ; FP-CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
480
- ; FP-CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
481
- ; FP-CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
482
- ; FP-CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
483
- ; FP-CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
484
- ; FP-CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
485
- ; FP-CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
486
- ; FP-CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
487
- ; FP-CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
488
- ; FP-CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
489
- ; FP-CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
490
- ; FP-CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
491
- ; FP-CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
492
- ; FP-CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
493
- ; FP-CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
457
+ ; FP-CHECK-NEXT: ptrue pn8.b
494
458
; FP-CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
459
+ ; FP-CHECK-NEXT: ld1b { z22.b, z23.b }, pn8/z, [sp, #4, mul vl] // 32-byte Folded Reload
460
+ ; FP-CHECK-NEXT: ld1b { z20.b, z21.b }, pn8/z, [sp, #8, mul vl] // 32-byte Folded Reload
461
+ ; FP-CHECK-NEXT: ld1b { z18.b, z19.b }, pn8/z, [sp, #12, mul vl] // 32-byte Folded Reload
462
+ ; FP-CHECK-NEXT: ld1b { z16.b, z17.b }, pn8/z, [sp, #16, mul vl] // 32-byte Folded Reload
463
+ ; FP-CHECK-NEXT: ld1b { z14.b, z15.b }, pn8/z, [sp, #20, mul vl] // 32-byte Folded Reload
464
+ ; FP-CHECK-NEXT: ld1b { z12.b, z13.b }, pn8/z, [sp, #24, mul vl] // 32-byte Folded Reload
465
+ ; FP-CHECK-NEXT: ld1b { z10.b, z11.b }, pn8/z, [sp, #28, mul vl] // 32-byte Folded Reload
466
+ ; FP-CHECK-NEXT: ld1b { z8.b, z9.b }, pn8/z, [sp, #32, mul vl] // 32-byte Folded Reload
495
467
; FP-CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
496
468
; FP-CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
497
469
; FP-CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
0 commit comments