@@ -606,7 +606,7 @@ define void @srem_v16i32(ptr %a, ptr %b) #0 {
606
606
;
607
607
; VBITS_GE_256-LABEL: srem_v16i32:
608
608
; VBITS_GE_256: // %bb.0:
609
- ; VBITS_GE_256-NEXT: mov x8, #8
609
+ ; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
610
610
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
611
611
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
612
612
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
@@ -680,13 +680,13 @@ define void @srem_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
680
680
define <1 x i64 > @srem_v1i64 (<1 x i64 > %op1 , <1 x i64 > %op2 ) vscale_range(1 ,0 ) #0 {
681
681
; CHECK-LABEL: srem_v1i64:
682
682
; CHECK: // %bb.0:
683
- ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
684
- ; CHECK-NEXT: ptrue p0.d, vl1
685
683
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
684
+ ; CHECK-NEXT: ptrue p0.d, vl1
685
+ ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
686
686
; CHECK-NEXT: movprfx z2, z0
687
687
; CHECK-NEXT: sdiv z2.d, p0/m, z2.d, z1.d
688
- ; CHECK-NEXT: mul z1 .d, p0/m, z1 .d, z2 .d
689
- ; CHECK-NEXT: sub d0, d0, d1
688
+ ; CHECK-NEXT: mls z0 .d, p0/m, z2 .d, z1 .d
689
+ ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
690
690
; CHECK-NEXT: ret
691
691
%res = srem <1 x i64 > %op1 , %op2
692
692
ret <1 x i64 > %res
@@ -697,13 +697,13 @@ define <1 x i64> @srem_v1i64(<1 x i64> %op1, <1 x i64> %op2) vscale_range(1,0) #
697
697
define <2 x i64 > @srem_v2i64 (<2 x i64 > %op1 , <2 x i64 > %op2 ) vscale_range(1 ,0 ) #0 {
698
698
; CHECK-LABEL: srem_v2i64:
699
699
; CHECK: // %bb.0:
700
- ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
701
- ; CHECK-NEXT: ptrue p0.d, vl2
702
700
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
701
+ ; CHECK-NEXT: ptrue p0.d, vl2
702
+ ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
703
703
; CHECK-NEXT: movprfx z2, z0
704
704
; CHECK-NEXT: sdiv z2.d, p0/m, z2.d, z1.d
705
- ; CHECK-NEXT: mul z1 .d, p0/m, z1 .d, z2 .d
706
- ; CHECK-NEXT: sub v0.2d, v0.2d, v1.2d
705
+ ; CHECK-NEXT: mls z0 .d, p0/m, z2 .d, z1 .d
706
+ ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
707
707
; CHECK-NEXT: ret
708
708
%res = srem <2 x i64 > %op1 , %op2
709
709
ret <2 x i64 > %res
@@ -730,34 +730,32 @@ define void @srem_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
730
730
define void @srem_v8i64 (ptr %a , ptr %b ) #0 {
731
731
; VBITS_GE_128-LABEL: srem_v8i64:
732
732
; VBITS_GE_128: // %bb.0:
733
- ; VBITS_GE_128-NEXT: ldp q4, q5, [x1]
734
- ; VBITS_GE_128-NEXT: ptrue p0.d, vl2
735
- ; VBITS_GE_128-NEXT: ldp q7, q6, [x1, #32]
736
733
; VBITS_GE_128-NEXT: ldp q0, q1, [x0, #32]
737
- ; VBITS_GE_128-NEXT: ldp q2, q3, [x0]
738
- ; VBITS_GE_128-NEXT: movprfx z16, z3
739
- ; VBITS_GE_128-NEXT: sdiv z16.d, p0/m, z16.d, z5.d
740
- ; VBITS_GE_128-NEXT: movprfx z17, z2
741
- ; VBITS_GE_128-NEXT: sdiv z17.d, p0/m, z17.d, z4.d
742
- ; VBITS_GE_128-NEXT: mul z5.d, p0/m, z5.d, z16.d
734
+ ; VBITS_GE_128-NEXT: ptrue p0.d, vl2
735
+ ; VBITS_GE_128-NEXT: ldp q2, q3, [x1, #32]
743
736
; VBITS_GE_128-NEXT: movprfx z16, z1
737
+ ; VBITS_GE_128-NEXT: sdiv z16.d, p0/m, z16.d, z3.d
738
+ ; VBITS_GE_128-NEXT: mls z1.d, p0/m, z16.d, z3.d
739
+ ; VBITS_GE_128-NEXT: movprfx z3, z0
740
+ ; VBITS_GE_128-NEXT: sdiv z3.d, p0/m, z3.d, z2.d
741
+ ; VBITS_GE_128-NEXT: mls z0.d, p0/m, z3.d, z2.d
742
+ ; VBITS_GE_128-NEXT: ldp q4, q5, [x0]
743
+ ; VBITS_GE_128-NEXT: ldp q7, q6, [x1]
744
+ ; VBITS_GE_128-NEXT: movprfx z16, z5
744
745
; VBITS_GE_128-NEXT: sdiv z16.d, p0/m, z16.d, z6.d
745
- ; VBITS_GE_128-NEXT: mul z4.d, p0/m, z4.d, z17.d
746
- ; VBITS_GE_128-NEXT: movprfx z17, z0
747
- ; VBITS_GE_128-NEXT: sdiv z17.d, p0/m, z17.d, z7.d
748
- ; VBITS_GE_128-NEXT: mul z6.d, p0/m, z6.d, z16.d
749
- ; VBITS_GE_128-NEXT: mul z7.d, p0/m, z7.d, z17.d
750
- ; VBITS_GE_128-NEXT: sub v0.2d, v0.2d, v7.2d
751
- ; VBITS_GE_128-NEXT: sub v1.2d, v1.2d, v6.2d
752
- ; VBITS_GE_128-NEXT: sub v2.2d, v2.2d, v4.2d
746
+ ; VBITS_GE_128-NEXT: movprfx z2, z4
747
+ ; VBITS_GE_128-NEXT: sdiv z2.d, p0/m, z2.d, z7.d
753
748
; VBITS_GE_128-NEXT: stp q0, q1, [x0, #32]
754
- ; VBITS_GE_128-NEXT: sub v0.2d, v3.2d, v5.2d
755
- ; VBITS_GE_128-NEXT: stp q2, q0, [x0]
749
+ ; VBITS_GE_128-NEXT: movprfx z0, z4
750
+ ; VBITS_GE_128-NEXT: mls z0.d, p0/m, z2.d, z7.d
751
+ ; VBITS_GE_128-NEXT: movprfx z1, z5
752
+ ; VBITS_GE_128-NEXT: mls z1.d, p0/m, z16.d, z6.d
753
+ ; VBITS_GE_128-NEXT: stp q0, q1, [x0]
756
754
; VBITS_GE_128-NEXT: ret
757
755
;
758
756
; VBITS_GE_256-LABEL: srem_v8i64:
759
757
; VBITS_GE_256: // %bb.0:
760
- ; VBITS_GE_256-NEXT: mov x8, #4
758
+ ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
761
759
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
762
760
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
763
761
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
@@ -1426,7 +1424,7 @@ define void @urem_v16i32(ptr %a, ptr %b) #0 {
1426
1424
;
1427
1425
; VBITS_GE_256-LABEL: urem_v16i32:
1428
1426
; VBITS_GE_256: // %bb.0:
1429
- ; VBITS_GE_256-NEXT: mov x8, #8
1427
+ ; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
1430
1428
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
1431
1429
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
1432
1430
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
@@ -1500,13 +1498,13 @@ define void @urem_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
1500
1498
define <1 x i64 > @urem_v1i64 (<1 x i64 > %op1 , <1 x i64 > %op2 ) vscale_range(1 ,0 ) #0 {
1501
1499
; CHECK-LABEL: urem_v1i64:
1502
1500
; CHECK: // %bb.0:
1503
- ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
1504
- ; CHECK-NEXT: ptrue p0.d, vl1
1505
1501
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
1502
+ ; CHECK-NEXT: ptrue p0.d, vl1
1503
+ ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
1506
1504
; CHECK-NEXT: movprfx z2, z0
1507
1505
; CHECK-NEXT: udiv z2.d, p0/m, z2.d, z1.d
1508
- ; CHECK-NEXT: mul z1 .d, p0/m, z1 .d, z2 .d
1509
- ; CHECK-NEXT: sub d0, d0, d1
1506
+ ; CHECK-NEXT: mls z0 .d, p0/m, z2 .d, z1 .d
1507
+ ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
1510
1508
; CHECK-NEXT: ret
1511
1509
%res = urem <1 x i64 > %op1 , %op2
1512
1510
ret <1 x i64 > %res
@@ -1517,13 +1515,13 @@ define <1 x i64> @urem_v1i64(<1 x i64> %op1, <1 x i64> %op2) vscale_range(1,0) #
1517
1515
define <2 x i64 > @urem_v2i64 (<2 x i64 > %op1 , <2 x i64 > %op2 ) vscale_range(1 ,0 ) #0 {
1518
1516
; CHECK-LABEL: urem_v2i64:
1519
1517
; CHECK: // %bb.0:
1520
- ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
1521
- ; CHECK-NEXT: ptrue p0.d, vl2
1522
1518
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
1519
+ ; CHECK-NEXT: ptrue p0.d, vl2
1520
+ ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
1523
1521
; CHECK-NEXT: movprfx z2, z0
1524
1522
; CHECK-NEXT: udiv z2.d, p0/m, z2.d, z1.d
1525
- ; CHECK-NEXT: mul z1 .d, p0/m, z1 .d, z2 .d
1526
- ; CHECK-NEXT: sub v0.2d, v0.2d, v1.2d
1523
+ ; CHECK-NEXT: mls z0 .d, p0/m, z2 .d, z1 .d
1524
+ ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
1527
1525
; CHECK-NEXT: ret
1528
1526
%res = urem <2 x i64 > %op1 , %op2
1529
1527
ret <2 x i64 > %res
@@ -1550,34 +1548,32 @@ define void @urem_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
1550
1548
define void @urem_v8i64 (ptr %a , ptr %b ) #0 {
1551
1549
; VBITS_GE_128-LABEL: urem_v8i64:
1552
1550
; VBITS_GE_128: // %bb.0:
1553
- ; VBITS_GE_128-NEXT: ldp q4, q5, [x1]
1554
- ; VBITS_GE_128-NEXT: ptrue p0.d, vl2
1555
- ; VBITS_GE_128-NEXT: ldp q7, q6, [x1, #32]
1556
1551
; VBITS_GE_128-NEXT: ldp q0, q1, [x0, #32]
1557
- ; VBITS_GE_128-NEXT: ldp q2, q3, [x0]
1558
- ; VBITS_GE_128-NEXT: movprfx z16, z3
1559
- ; VBITS_GE_128-NEXT: udiv z16.d, p0/m, z16.d, z5.d
1560
- ; VBITS_GE_128-NEXT: movprfx z17, z2
1561
- ; VBITS_GE_128-NEXT: udiv z17.d, p0/m, z17.d, z4.d
1562
- ; VBITS_GE_128-NEXT: mul z5.d, p0/m, z5.d, z16.d
1552
+ ; VBITS_GE_128-NEXT: ptrue p0.d, vl2
1553
+ ; VBITS_GE_128-NEXT: ldp q2, q3, [x1, #32]
1563
1554
; VBITS_GE_128-NEXT: movprfx z16, z1
1555
+ ; VBITS_GE_128-NEXT: udiv z16.d, p0/m, z16.d, z3.d
1556
+ ; VBITS_GE_128-NEXT: mls z1.d, p0/m, z16.d, z3.d
1557
+ ; VBITS_GE_128-NEXT: movprfx z3, z0
1558
+ ; VBITS_GE_128-NEXT: udiv z3.d, p0/m, z3.d, z2.d
1559
+ ; VBITS_GE_128-NEXT: mls z0.d, p0/m, z3.d, z2.d
1560
+ ; VBITS_GE_128-NEXT: ldp q4, q5, [x0]
1561
+ ; VBITS_GE_128-NEXT: ldp q7, q6, [x1]
1562
+ ; VBITS_GE_128-NEXT: movprfx z16, z5
1564
1563
; VBITS_GE_128-NEXT: udiv z16.d, p0/m, z16.d, z6.d
1565
- ; VBITS_GE_128-NEXT: mul z4.d, p0/m, z4.d, z17.d
1566
- ; VBITS_GE_128-NEXT: movprfx z17, z0
1567
- ; VBITS_GE_128-NEXT: udiv z17.d, p0/m, z17.d, z7.d
1568
- ; VBITS_GE_128-NEXT: mul z6.d, p0/m, z6.d, z16.d
1569
- ; VBITS_GE_128-NEXT: mul z7.d, p0/m, z7.d, z17.d
1570
- ; VBITS_GE_128-NEXT: sub v0.2d, v0.2d, v7.2d
1571
- ; VBITS_GE_128-NEXT: sub v1.2d, v1.2d, v6.2d
1572
- ; VBITS_GE_128-NEXT: sub v2.2d, v2.2d, v4.2d
1564
+ ; VBITS_GE_128-NEXT: movprfx z2, z4
1565
+ ; VBITS_GE_128-NEXT: udiv z2.d, p0/m, z2.d, z7.d
1573
1566
; VBITS_GE_128-NEXT: stp q0, q1, [x0, #32]
1574
- ; VBITS_GE_128-NEXT: sub v0.2d, v3.2d, v5.2d
1575
- ; VBITS_GE_128-NEXT: stp q2, q0, [x0]
1567
+ ; VBITS_GE_128-NEXT: movprfx z0, z4
1568
+ ; VBITS_GE_128-NEXT: mls z0.d, p0/m, z2.d, z7.d
1569
+ ; VBITS_GE_128-NEXT: movprfx z1, z5
1570
+ ; VBITS_GE_128-NEXT: mls z1.d, p0/m, z16.d, z6.d
1571
+ ; VBITS_GE_128-NEXT: stp q0, q1, [x0]
1576
1572
; VBITS_GE_128-NEXT: ret
1577
1573
;
1578
1574
; VBITS_GE_256-LABEL: urem_v8i64:
1579
1575
; VBITS_GE_256: // %bb.0:
1580
- ; VBITS_GE_256-NEXT: mov x8, #4
1576
+ ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
1581
1577
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
1582
1578
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
1583
1579
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
0 commit comments