@@ -423,45 +423,45 @@ define <vscale x 4 x i64> @usdot_8to64(<vscale x 4 x i64> %acc, <vscale x 16 x i
423
423
; CHECK-NEWLOWERING-NEXT: .cfi_offset w29, -16
424
424
; CHECK-NEWLOWERING-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
425
425
; CHECK-NEWLOWERING-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG
426
- ; CHECK-NEWLOWERING-NEXT: uunpklo z4.h, z2.b
426
+ ; CHECK-NEWLOWERING-NEXT: uunpkhi z4.h, z2.b
427
+ ; CHECK-NEWLOWERING-NEXT: uunpklo z2.h, z2.b
427
428
; CHECK-NEWLOWERING-NEXT: sunpklo z5.h, z3.b
428
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z2.h, z2.b
429
429
; CHECK-NEWLOWERING-NEXT: sunpkhi z3.h, z3.b
430
430
; CHECK-NEWLOWERING-NEXT: ptrue p0.d
431
431
; CHECK-NEWLOWERING-NEXT: uunpklo z6.s, z4.h
432
+ ; CHECK-NEWLOWERING-NEXT: uunpklo z7.s, z2.h
433
+ ; CHECK-NEWLOWERING-NEXT: sunpklo z24.s, z5.h
434
+ ; CHECK-NEWLOWERING-NEXT: sunpklo z25.s, z3.h
432
435
; CHECK-NEWLOWERING-NEXT: uunpkhi z4.s, z4.h
433
- ; CHECK-NEWLOWERING-NEXT: sunpklo z7.s, z5.h
434
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z5.s, z5.h
435
- ; CHECK-NEWLOWERING-NEXT: uunpklo z24.s, z2.h
436
436
; CHECK-NEWLOWERING-NEXT: uunpkhi z2.s, z2.h
437
- ; CHECK-NEWLOWERING-NEXT: sunpklo z25.s, z3.h
438
437
; CHECK-NEWLOWERING-NEXT: sunpkhi z3.s, z3.h
439
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z26.d, z6.s
440
- ; CHECK-NEWLOWERING-NEXT: uunpklo z6.d, z6.s
441
- ; CHECK-NEWLOWERING-NEXT: uunpklo z27.d, z4.s
442
- ; CHECK-NEWLOWERING-NEXT: sunpklo z28.d, z7.s
443
- ; CHECK-NEWLOWERING-NEXT: sunpklo z29.d, z5.s
438
+ ; CHECK-NEWLOWERING-NEXT: sunpkhi z5.s, z5.h
439
+ ; CHECK-NEWLOWERING-NEXT: uunpklo z26.d, z6.s
440
+ ; CHECK-NEWLOWERING-NEXT: uunpklo z27.d, z7.s
441
+ ; CHECK-NEWLOWERING-NEXT: uunpkhi z7.d, z7.s
442
+ ; CHECK-NEWLOWERING-NEXT: sunpklo z28.d, z24.s
443
+ ; CHECK-NEWLOWERING-NEXT: sunpkhi z24.d, z24.s
444
+ ; CHECK-NEWLOWERING-NEXT: uunpkhi z6.d, z6.s
445
+ ; CHECK-NEWLOWERING-NEXT: sunpklo z29.d, z25.s
446
+ ; CHECK-NEWLOWERING-NEXT: sunpkhi z25.d, z25.s
447
+ ; CHECK-NEWLOWERING-NEXT: uunpklo z30.d, z4.s
444
448
; CHECK-NEWLOWERING-NEXT: uunpkhi z4.d, z4.s
445
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z7.d, z7.s
449
+ ; CHECK-NEWLOWERING-NEXT: uunpklo z31.d, z2.s
450
+ ; CHECK-NEWLOWERING-NEXT: uunpkhi z2.d, z2.s
451
+ ; CHECK-NEWLOWERING-NEXT: sunpklo z8.d, z3.s
452
+ ; CHECK-NEWLOWERING-NEXT: sunpklo z9.d, z5.s
446
453
; CHECK-NEWLOWERING-NEXT: sunpkhi z5.d, z5.s
447
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z30.d, z24.s
448
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z31.d, z2.s
449
- ; CHECK-NEWLOWERING-NEXT: uunpklo z24.d, z24.s
450
- ; CHECK-NEWLOWERING-NEXT: uunpklo z2.d, z2.s
451
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z8.d, z25.s
452
- ; CHECK-NEWLOWERING-NEXT: sunpklo z25.d, z25.s
453
- ; CHECK-NEWLOWERING-NEXT: sunpklo z9.d, z3.s
454
- ; CHECK-NEWLOWERING-NEXT: mul z27.d, z27.d, z29.d
455
- ; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z6.d, z28.d
454
+ ; CHECK-NEWLOWERING-NEXT: mul z7.d, z7.d, z24.d
455
+ ; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z27.d, z28.d
456
456
; CHECK-NEWLOWERING-NEXT: sunpkhi z3.d, z3.s
457
- ; CHECK-NEWLOWERING-NEXT: mul z4 .d, z4 .d, z5 .d
458
- ; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z26.d, z7 .d
459
- ; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z2.d, z9 .d
460
- ; CHECK-NEWLOWERING-NEXT: movprfx z2, z27
461
- ; CHECK-NEWLOWERING-NEXT: mla z2.d, p0/m, z24 .d, z25 .d
457
+ ; CHECK-NEWLOWERING-NEXT: mul z6 .d, z6 .d, z25 .d
458
+ ; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z26.d, z29 .d
459
+ ; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z2.d, z5 .d
460
+ ; CHECK-NEWLOWERING-NEXT: movprfx z2, z7
461
+ ; CHECK-NEWLOWERING-NEXT: mla z2.d, p0/m, z31 .d, z9 .d
462
462
; CHECK-NEWLOWERING-NEXT: ldr z9, [sp] // 16-byte Folded Reload
463
- ; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z31 .d, z3.d
464
- ; CHECK-NEWLOWERING-NEXT: movprfx z3, z4
463
+ ; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z4 .d, z3.d
464
+ ; CHECK-NEWLOWERING-NEXT: movprfx z3, z6
465
465
; CHECK-NEWLOWERING-NEXT: mla z3.d, p0/m, z30.d, z8.d
466
466
; CHECK-NEWLOWERING-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload
467
467
; CHECK-NEWLOWERING-NEXT: add z0.d, z2.d, z0.d
@@ -556,45 +556,45 @@ define <vscale x 4 x i64> @sudot_8to64(<vscale x 4 x i64> %acc, <vscale x 16 x i
556
556
; CHECK-NEWLOWERING-NEXT: .cfi_offset w29, -16
557
557
; CHECK-NEWLOWERING-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
558
558
; CHECK-NEWLOWERING-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG
559
- ; CHECK-NEWLOWERING-NEXT: sunpklo z4.h, z2.b
559
+ ; CHECK-NEWLOWERING-NEXT: sunpkhi z4.h, z2.b
560
+ ; CHECK-NEWLOWERING-NEXT: sunpklo z2.h, z2.b
560
561
; CHECK-NEWLOWERING-NEXT: uunpklo z5.h, z3.b
561
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z2.h, z2.b
562
562
; CHECK-NEWLOWERING-NEXT: uunpkhi z3.h, z3.b
563
563
; CHECK-NEWLOWERING-NEXT: ptrue p0.d
564
564
; CHECK-NEWLOWERING-NEXT: sunpklo z6.s, z4.h
565
+ ; CHECK-NEWLOWERING-NEXT: sunpklo z7.s, z2.h
566
+ ; CHECK-NEWLOWERING-NEXT: uunpklo z24.s, z5.h
567
+ ; CHECK-NEWLOWERING-NEXT: uunpklo z25.s, z3.h
565
568
; CHECK-NEWLOWERING-NEXT: sunpkhi z4.s, z4.h
566
- ; CHECK-NEWLOWERING-NEXT: uunpklo z7.s, z5.h
567
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z5.s, z5.h
568
- ; CHECK-NEWLOWERING-NEXT: sunpklo z24.s, z2.h
569
569
; CHECK-NEWLOWERING-NEXT: sunpkhi z2.s, z2.h
570
- ; CHECK-NEWLOWERING-NEXT: uunpklo z25.s, z3.h
571
570
; CHECK-NEWLOWERING-NEXT: uunpkhi z3.s, z3.h
572
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z26.d, z6.s
573
- ; CHECK-NEWLOWERING-NEXT: sunpklo z6.d, z6.s
574
- ; CHECK-NEWLOWERING-NEXT: sunpklo z27.d, z4.s
575
- ; CHECK-NEWLOWERING-NEXT: uunpklo z28.d, z7.s
576
- ; CHECK-NEWLOWERING-NEXT: uunpklo z29.d, z5.s
571
+ ; CHECK-NEWLOWERING-NEXT: uunpkhi z5.s, z5.h
572
+ ; CHECK-NEWLOWERING-NEXT: sunpklo z26.d, z6.s
573
+ ; CHECK-NEWLOWERING-NEXT: sunpklo z27.d, z7.s
574
+ ; CHECK-NEWLOWERING-NEXT: sunpkhi z7.d, z7.s
575
+ ; CHECK-NEWLOWERING-NEXT: uunpklo z28.d, z24.s
576
+ ; CHECK-NEWLOWERING-NEXT: uunpkhi z24.d, z24.s
577
+ ; CHECK-NEWLOWERING-NEXT: sunpkhi z6.d, z6.s
578
+ ; CHECK-NEWLOWERING-NEXT: uunpklo z29.d, z25.s
579
+ ; CHECK-NEWLOWERING-NEXT: uunpkhi z25.d, z25.s
580
+ ; CHECK-NEWLOWERING-NEXT: sunpklo z30.d, z4.s
577
581
; CHECK-NEWLOWERING-NEXT: sunpkhi z4.d, z4.s
578
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z7.d, z7.s
582
+ ; CHECK-NEWLOWERING-NEXT: sunpklo z31.d, z2.s
583
+ ; CHECK-NEWLOWERING-NEXT: sunpkhi z2.d, z2.s
584
+ ; CHECK-NEWLOWERING-NEXT: uunpklo z8.d, z3.s
585
+ ; CHECK-NEWLOWERING-NEXT: uunpklo z9.d, z5.s
579
586
; CHECK-NEWLOWERING-NEXT: uunpkhi z5.d, z5.s
580
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z30.d, z24.s
581
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z31.d, z2.s
582
- ; CHECK-NEWLOWERING-NEXT: sunpklo z24.d, z24.s
583
- ; CHECK-NEWLOWERING-NEXT: sunpklo z2.d, z2.s
584
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z8.d, z25.s
585
- ; CHECK-NEWLOWERING-NEXT: uunpklo z25.d, z25.s
586
- ; CHECK-NEWLOWERING-NEXT: uunpklo z9.d, z3.s
587
- ; CHECK-NEWLOWERING-NEXT: mul z27.d, z27.d, z29.d
588
- ; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z6.d, z28.d
587
+ ; CHECK-NEWLOWERING-NEXT: mul z7.d, z7.d, z24.d
588
+ ; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z27.d, z28.d
589
589
; CHECK-NEWLOWERING-NEXT: uunpkhi z3.d, z3.s
590
- ; CHECK-NEWLOWERING-NEXT: mul z4 .d, z4 .d, z5 .d
591
- ; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z26.d, z7 .d
592
- ; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z2.d, z9 .d
593
- ; CHECK-NEWLOWERING-NEXT: movprfx z2, z27
594
- ; CHECK-NEWLOWERING-NEXT: mla z2.d, p0/m, z24 .d, z25 .d
590
+ ; CHECK-NEWLOWERING-NEXT: mul z6 .d, z6 .d, z25 .d
591
+ ; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z26.d, z29 .d
592
+ ; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z2.d, z5 .d
593
+ ; CHECK-NEWLOWERING-NEXT: movprfx z2, z7
594
+ ; CHECK-NEWLOWERING-NEXT: mla z2.d, p0/m, z31 .d, z9 .d
595
595
; CHECK-NEWLOWERING-NEXT: ldr z9, [sp] // 16-byte Folded Reload
596
- ; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z31 .d, z3.d
597
- ; CHECK-NEWLOWERING-NEXT: movprfx z3, z4
596
+ ; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z4 .d, z3.d
597
+ ; CHECK-NEWLOWERING-NEXT: movprfx z3, z6
598
598
; CHECK-NEWLOWERING-NEXT: mla z3.d, p0/m, z30.d, z8.d
599
599
; CHECK-NEWLOWERING-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload
600
600
; CHECK-NEWLOWERING-NEXT: add z0.d, z2.d, z0.d
0 commit comments