@@ -1570,150 +1570,132 @@ define arm_aapcs_vfpcc void @arm_biquad_cascade_df1_f32(ptr nocapture readonly %
1570
1570
; CHECK-NEXT: sub sp, #4
1571
1571
; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
1572
1572
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
1573
- ; CHECK-NEXT: .pad #48
1574
- ; CHECK-NEXT: sub sp, #48
1575
- ; CHECK-NEXT: ldrd r12, r10, [r0]
1576
- ; CHECK-NEXT: @ implicit-def: $s2
1573
+ ; CHECK-NEXT: .pad #16
1574
+ ; CHECK-NEXT: sub sp, #16
1575
+ ; CHECK-NEXT: ldrd r6, r9, [r0]
1577
1576
; CHECK-NEXT: and r7, r3, #3
1578
- ; CHECK-NEXT: ldr.w r9 , [r0, #8]
1579
- ; CHECK-NEXT: lsrs r0 , r3, #2
1580
- ; CHECK-NEXT: str r0, [sp] @ 4-byte Spill
1577
+ ; CHECK-NEXT: ldr r0 , [r0, #8]
1578
+ ; CHECK-NEXT: lsrs r3 , r3, #2
1579
+ ; CHECK-NEXT: @ implicit-def: $r12
1581
1580
; CHECK-NEXT: str r7, [sp, #4] @ 4-byte Spill
1582
- ; CHECK-NEXT: str r2, [sp, #28] @ 4-byte Spill
1581
+ ; CHECK-NEXT: str r3, [sp] @ 4-byte Spill
1582
+ ; CHECK-NEXT: str r2, [sp, #8] @ 4-byte Spill
1583
1583
; CHECK-NEXT: b .LBB19_3
1584
1584
; CHECK-NEXT: .LBB19_1: @ in Loop: Header=BB19_3 Depth=1
1585
- ; CHECK-NEXT: vmov.f32 s14, s7
1586
- ; CHECK-NEXT: vmov.f32 s4, s3
1587
- ; CHECK-NEXT: vmov.f32 s7, s6
1585
+ ; CHECK-NEXT: mov r3, r8
1586
+ ; CHECK-NEXT: mov r7, r5
1587
+ ; CHECK-NEXT: mov r4, r11
1588
+ ; CHECK-NEXT: mov r8, r10
1588
1589
; CHECK-NEXT: .LBB19_2: @ %if.end69
1589
1590
; CHECK-NEXT: @ in Loop: Header=BB19_3 Depth=1
1590
- ; CHECK-NEXT: ldr r2, [sp, #28 ] @ 4 -byte Reload
1591
- ; CHECK-NEXT: subs.w r12, r12, #1
1592
- ; CHECK-NEXT: vstr s1, [r10 ]
1593
- ; CHECK-NEXT: add.w r9, r9, #128
1594
- ; CHECK-NEXT: vstr s4, [r10 , #4 ]
1595
- ; CHECK-NEXT: vstr s14, [r10 , #8]
1591
+ ; CHECK-NEXT: ldrd r2, r6, [sp, #8 ] @ 8 -byte Folded Reload
1592
+ ; CHECK-NEXT: adds r0, #128
1593
+ ; CHECK-NEXT: strd r7, r4, [r9 ]
1594
+ ; CHECK-NEXT: subs r6, #1
1595
+ ; CHECK-NEXT: strd r3, r8, [r9 , #8 ]
1596
+ ; CHECK-NEXT: add.w r9, r9 , #16
1596
1597
; CHECK-NEXT: mov r1, r2
1597
- ; CHECK-NEXT: vstr s7, [r10, #12]
1598
- ; CHECK-NEXT: add.w r10, r10, #16
1599
1598
; CHECK-NEXT: beq.w .LBB19_13
1600
1599
; CHECK-NEXT: .LBB19_3: @ %do.body
1601
1600
; CHECK-NEXT: @ =>This Loop Header: Depth=1
1602
1601
; CHECK-NEXT: @ Child Loop BB19_5 Depth 2
1603
- ; CHECK-NEXT: ldr r0, [sp] @ 4-byte Reload
1604
- ; CHECK-NEXT: mov r5, r2
1605
- ; CHECK-NEXT: vldr s1, [r10]
1606
- ; CHECK-NEXT: vldr s3, [r10, #4]
1607
- ; CHECK-NEXT: vldr s7, [r10, #8]
1608
- ; CHECK-NEXT: vldr s6, [r10, #12]
1609
- ; CHECK-NEXT: wls lr, r0, .LBB19_6
1602
+ ; CHECK-NEXT: str r6, [sp, #12] @ 4-byte Spill
1603
+ ; CHECK-NEXT: mov r6, r2
1604
+ ; CHECK-NEXT: ldrd r5, r11, [r9]
1605
+ ; CHECK-NEXT: ldrd r8, r10, [r9, #8]
1606
+ ; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload
1607
+ ; CHECK-NEXT: wls lr, r2, .LBB19_6
1610
1608
; CHECK-NEXT: @ %bb.4: @ %while.body.lr.ph
1611
1609
; CHECK-NEXT: @ in Loop: Header=BB19_3 Depth=1
1612
- ; CHECK-NEXT: ldr r5, [sp, #28] @ 4-byte Reload
1610
+ ; CHECK-NEXT: ldr r6, [sp, #8] @ 4-byte Reload
1611
+ ; CHECK-NEXT: mov r4, r11
1612
+ ; CHECK-NEXT: mov r3, r5
1613
1613
; CHECK-NEXT: .LBB19_5: @ %while.body
1614
1614
; CHECK-NEXT: @ Parent Loop BB19_3 Depth=1
1615
1615
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
1616
- ; CHECK-NEXT: vmov r7, s7
1617
- ; CHECK-NEXT: vldr s0, [r1, #12]
1618
- ; CHECK-NEXT: vmov r11, s6
1619
- ; CHECK-NEXT: vldrw.u32 q1, [r9, #112]
1620
- ; CHECK-NEXT: vmov r3, s3
1621
- ; CHECK-NEXT: vldr s3, [r1, #8]
1622
- ; CHECK-NEXT: vstrw.32 q1, [sp, #32] @ 16-byte Spill
1623
- ; CHECK-NEXT: vldrw.u32 q1, [r9]
1624
- ; CHECK-NEXT: vmov r8, s0
1625
- ; CHECK-NEXT: vldrw.u32 q2, [r9, #16]
1626
- ; CHECK-NEXT: ldr r6, [r1, #4]
1627
- ; CHECK-NEXT: vldrw.u32 q7, [r9, #32]
1628
- ; CHECK-NEXT: vmul.f32 q1, q1, r8
1629
- ; CHECK-NEXT: vmov r0, s3
1630
- ; CHECK-NEXT: vfma.f32 q1, q2, r0
1631
- ; CHECK-NEXT: vldrw.u32 q3, [r9, #48]
1632
- ; CHECK-NEXT: ldr r0, [r1], #16
1633
- ; CHECK-NEXT: vfma.f32 q1, q7, r6
1634
- ; CHECK-NEXT: vmov r4, s1
1635
- ; CHECK-NEXT: vldrw.u32 q6, [r9, #64]
1636
- ; CHECK-NEXT: vmov.f32 s1, s0
1637
- ; CHECK-NEXT: vfma.f32 q1, q3, r0
1638
- ; CHECK-NEXT: vmov.f32 s2, s0
1639
- ; CHECK-NEXT: vldrw.u32 q5, [r9, #80]
1640
- ; CHECK-NEXT: vfma.f32 q1, q6, r4
1641
- ; CHECK-NEXT: vldrw.u32 q4, [r9, #96]
1642
- ; CHECK-NEXT: vfma.f32 q1, q5, r3
1643
- ; CHECK-NEXT: vldrw.u32 q2, [sp, #32] @ 16-byte Reload
1644
- ; CHECK-NEXT: vfma.f32 q1, q4, r7
1645
- ; CHECK-NEXT: vfma.f32 q1, q2, r11
1646
- ; CHECK-NEXT: vstrb.8 q1, [r5], #16
1616
+ ; CHECK-NEXT: ldr r5, [r1, #12]
1617
+ ; CHECK-NEXT: vldrw.u32 q2, [r0]
1618
+ ; CHECK-NEXT: vldrw.u32 q6, [r0, #16]
1619
+ ; CHECK-NEXT: ldm.w r1, {r2, r7, r11}
1620
+ ; CHECK-NEXT: vmul.f32 q2, q2, r5
1621
+ ; CHECK-NEXT: vldrw.u32 q7, [r0, #32]
1622
+ ; CHECK-NEXT: vfma.f32 q2, q6, r11
1623
+ ; CHECK-NEXT: vldrw.u32 q4, [r0, #48]
1624
+ ; CHECK-NEXT: vfma.f32 q2, q7, r7
1625
+ ; CHECK-NEXT: vldrw.u32 q5, [r0, #64]
1626
+ ; CHECK-NEXT: vfma.f32 q2, q4, r2
1627
+ ; CHECK-NEXT: vldrw.u32 q3, [r0, #80]
1628
+ ; CHECK-NEXT: vfma.f32 q2, q5, r3
1629
+ ; CHECK-NEXT: vldrw.u32 q1, [r0, #96]
1630
+ ; CHECK-NEXT: vfma.f32 q2, q3, r4
1631
+ ; CHECK-NEXT: vldrw.u32 q0, [r0, #112]
1632
+ ; CHECK-NEXT: vfma.f32 q2, q1, r8
1633
+ ; CHECK-NEXT: adds r1, #16
1634
+ ; CHECK-NEXT: vfma.f32 q2, q0, r10
1635
+ ; CHECK-NEXT: mov r4, r11
1636
+ ; CHECK-NEXT: vmov r10, r8, d5
1637
+ ; CHECK-NEXT: vstrb.8 q2, [r6], #16
1638
+ ; CHECK-NEXT: mov r3, r5
1639
+ ; CHECK-NEXT: mov r12, r5
1647
1640
; CHECK-NEXT: le lr, .LBB19_5
1648
1641
; CHECK-NEXT: .LBB19_6: @ %while.end
1649
1642
; CHECK-NEXT: @ in Loop: Header=BB19_3 Depth=1
1650
- ; CHECK-NEXT: ldr r7 , [sp, #4] @ 4-byte Reload
1651
- ; CHECK-NEXT: cmp r7 , #0
1643
+ ; CHECK-NEXT: ldr r2 , [sp, #4] @ 4-byte Reload
1644
+ ; CHECK-NEXT: cmp r2 , #0
1652
1645
; CHECK-NEXT: beq .LBB19_1
1653
1646
; CHECK-NEXT: @ %bb.7: @ %if.then
1654
1647
; CHECK-NEXT: @ in Loop: Header=BB19_3 Depth=1
1655
- ; CHECK-NEXT: vldrw.u32 q2, [r9, #96]
1656
- ; CHECK-NEXT: vmov lr, s6
1657
- ; CHECK-NEXT: vldr s6, [r1, #12]
1658
- ; CHECK-NEXT: vmov r0, s1
1659
- ; CHECK-NEXT: vstrw.32 q2, [sp, #8] @ 16-byte Spill
1660
- ; CHECK-NEXT: vldrw.u32 q2, [r9, #112]
1661
- ; CHECK-NEXT: vldr s1, [r1, #8]
1662
- ; CHECK-NEXT: vldrw.u32 q3, [r9]
1663
- ; CHECK-NEXT: vldr s4, [r1, #4]
1664
- ; CHECK-NEXT: vstrw.32 q2, [sp, #32] @ 16-byte Spill
1665
- ; CHECK-NEXT: vmov r6, s6
1666
- ; CHECK-NEXT: vldrw.u32 q2, [r9, #16]
1667
- ; CHECK-NEXT: vldr s0, [r1]
1668
- ; CHECK-NEXT: vmul.f32 q3, q3, r6
1669
- ; CHECK-NEXT: vmov r6, s1
1670
- ; CHECK-NEXT: vldrw.u32 q4, [r9, #32]
1671
- ; CHECK-NEXT: vfma.f32 q3, q2, r6
1672
- ; CHECK-NEXT: vmov r4, s4
1673
- ; CHECK-NEXT: vldrw.u32 q5, [r9, #48]
1674
- ; CHECK-NEXT: vldrw.u32 q7, [r9, #64]
1675
- ; CHECK-NEXT: vmov r3, s0
1676
- ; CHECK-NEXT: vfma.f32 q3, q4, r4
1677
- ; CHECK-NEXT: vfma.f32 q3, q5, r3
1678
- ; CHECK-NEXT: vldrw.u32 q6, [r9, #80]
1679
- ; CHECK-NEXT: vmov r1, s3
1680
- ; CHECK-NEXT: vfma.f32 q3, q7, r0
1681
- ; CHECK-NEXT: vldrw.u32 q2, [sp, #8] @ 16-byte Reload
1682
- ; CHECK-NEXT: vmov r2, s7
1683
- ; CHECK-NEXT: vfma.f32 q3, q6, r1
1684
- ; CHECK-NEXT: cmp r7, #1
1685
- ; CHECK-NEXT: vfma.f32 q3, q2, r2
1686
- ; CHECK-NEXT: vldrw.u32 q2, [sp, #32] @ 16-byte Reload
1687
- ; CHECK-NEXT: vfma.f32 q3, q2, lr
1648
+ ; CHECK-NEXT: ldrd lr, r4, [r1]
1649
+ ; CHECK-NEXT: vldrw.u32 q0, [r0]
1650
+ ; CHECK-NEXT: ldrd r7, r1, [r1, #8]
1651
+ ; CHECK-NEXT: vldrw.u32 q6, [r0, #16]
1652
+ ; CHECK-NEXT: vldrw.u32 q7, [r0, #32]
1653
+ ; CHECK-NEXT: vldrw.u32 q4, [r0, #48]
1654
+ ; CHECK-NEXT: vmul.f32 q0, q0, r1
1655
+ ; CHECK-NEXT: vldrw.u32 q5, [r0, #64]
1656
+ ; CHECK-NEXT: vfma.f32 q0, q6, r7
1657
+ ; CHECK-NEXT: vldrw.u32 q3, [r0, #80]
1658
+ ; CHECK-NEXT: vfma.f32 q0, q7, r4
1659
+ ; CHECK-NEXT: vldrw.u32 q2, [r0, #96]
1660
+ ; CHECK-NEXT: vfma.f32 q0, q4, lr
1661
+ ; CHECK-NEXT: vldrw.u32 q1, [r0, #112]
1662
+ ; CHECK-NEXT: vfma.f32 q0, q5, r5
1663
+ ; CHECK-NEXT: cmp r2, #1
1664
+ ; CHECK-NEXT: vfma.f32 q0, q3, r11
1665
+ ; CHECK-NEXT: vfma.f32 q0, q2, r8
1666
+ ; CHECK-NEXT: vfma.f32 q0, q1, r10
1667
+ ; CHECK-NEXT: vmov r5, s0
1688
1668
; CHECK-NEXT: bne .LBB19_9
1689
1669
; CHECK-NEXT: @ %bb.8: @ %if.then58
1690
1670
; CHECK-NEXT: @ in Loop: Header=BB19_3 Depth=1
1691
- ; CHECK-NEXT: vstr s12 , [r5 ]
1692
- ; CHECK-NEXT: vmov.f32 s1, s0
1693
- ; CHECK-NEXT: vmov.f32 s4, s2
1694
- ; CHECK-NEXT: vmov.f32 s14, s12
1671
+ ; CHECK-NEXT: str r5 , [r6 ]
1672
+ ; CHECK-NEXT: mov r7, lr
1673
+ ; CHECK-NEXT: mov r4, r12
1674
+ ; CHECK-NEXT: mov r3, r5
1695
1675
; CHECK-NEXT: b .LBB19_12
1696
1676
; CHECK-NEXT: .LBB19_9: @ %if.else
1697
1677
; CHECK-NEXT: @ in Loop: Header=BB19_3 Depth=1
1698
- ; CHECK-NEXT: cmp r7, #2
1699
- ; CHECK-NEXT: vstmia r5, {s12, s13}
1678
+ ; CHECK-NEXT: vmov r8, s1
1679
+ ; CHECK-NEXT: cmp r2, #2
1680
+ ; CHECK-NEXT: vstr s1, [r6, #4]
1681
+ ; CHECK-NEXT: str r5, [r6]
1700
1682
; CHECK-NEXT: bne .LBB19_11
1701
1683
; CHECK-NEXT: @ %bb.10: @ in Loop: Header=BB19_3 Depth=1
1702
- ; CHECK-NEXT: vmov.f32 s1, s4
1703
- ; CHECK-NEXT: vmov.f32 s4, s0
1704
- ; CHECK-NEXT: vmov.f32 s14, s13
1705
- ; CHECK-NEXT: vmov.f32 s7, s12
1684
+ ; CHECK-NEXT: mov r7, r4
1685
+ ; CHECK-NEXT: mov r3, r8
1686
+ ; CHECK-NEXT: mov r4, lr
1687
+ ; CHECK-NEXT: mov r8, r5
1706
1688
; CHECK-NEXT: b .LBB19_12
1707
1689
; CHECK-NEXT: .LBB19_11: @ %if.else64
1708
1690
; CHECK-NEXT: @ in Loop: Header=BB19_3 Depth=1
1709
- ; CHECK-NEXT: vmov.f32 s7, s13
1710
- ; CHECK-NEXT: vstr s14 , [r5 , #8]
1691
+ ; CHECK-NEXT: vmov r3, s2
1692
+ ; CHECK-NEXT: vstr s2 , [r6 , #8]
1711
1693
; CHECK-NEXT: .LBB19_12: @ %if.end69
1712
1694
; CHECK-NEXT: @ in Loop: Header=BB19_3 Depth=1
1713
- ; CHECK-NEXT: vmov.f32 s2, s6
1695
+ ; CHECK-NEXT: mov r12, r1
1714
1696
; CHECK-NEXT: b .LBB19_2
1715
1697
; CHECK-NEXT: .LBB19_13: @ %do.end
1716
- ; CHECK-NEXT: add sp, #48
1698
+ ; CHECK-NEXT: add sp, #16
1717
1699
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
1718
1700
; CHECK-NEXT: add sp, #4
1719
1701
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
0 commit comments