@@ -1574,8 +1574,8 @@ define i65 @v_lshr_i65(i65 %value, i65 %amount) {
1574
1574
; GFX6-LABEL: v_lshr_i65:
1575
1575
; GFX6: ; %bb.0:
1576
1576
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1577
- ; GFX6-NEXT: v_mov_b32_e32 v5, 0
1578
1577
; GFX6-NEXT: v_and_b32_e32 v4, 1, v2
1578
+ ; GFX6-NEXT: v_mov_b32_e32 v5, 0
1579
1579
; GFX6-NEXT: v_sub_i32_e32 v8, vcc, 64, v3
1580
1580
; GFX6-NEXT: v_add_i32_e32 v2, vcc, 0xffffffc0, v3
1581
1581
; GFX6-NEXT: v_lshr_b64 v[6:7], v[0:1], v3
@@ -1596,8 +1596,8 @@ define i65 @v_lshr_i65(i65 %value, i65 %amount) {
1596
1596
; GFX8-LABEL: v_lshr_i65:
1597
1597
; GFX8: ; %bb.0:
1598
1598
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1599
- ; GFX8-NEXT: v_mov_b32_e32 v5, 0
1600
1599
; GFX8-NEXT: v_and_b32_e32 v4, 1, v2
1600
+ ; GFX8-NEXT: v_mov_b32_e32 v5, 0
1601
1601
; GFX8-NEXT: v_sub_u32_e32 v8, vcc, 64, v3
1602
1602
; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0xffffffc0, v3
1603
1603
; GFX8-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
@@ -1618,8 +1618,8 @@ define i65 @v_lshr_i65(i65 %value, i65 %amount) {
1618
1618
; GFX9-LABEL: v_lshr_i65:
1619
1619
; GFX9: ; %bb.0:
1620
1620
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1621
- ; GFX9-NEXT: v_mov_b32_e32 v5, 0
1622
1621
; GFX9-NEXT: v_and_b32_e32 v4, 1, v2
1622
+ ; GFX9-NEXT: v_mov_b32_e32 v5, 0
1623
1623
; GFX9-NEXT: v_sub_u32_e32 v8, 64, v3
1624
1624
; GFX9-NEXT: v_add_u32_e32 v2, 0xffffffc0, v3
1625
1625
; GFX9-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
@@ -1688,8 +1688,8 @@ define i65 @v_lshr_i65_33(i65 %value) {
1688
1688
; GFX6: ; %bb.0:
1689
1689
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1690
1690
; GFX6-NEXT: v_mov_b32_e32 v3, v1
1691
- ; GFX6-NEXT: v_mov_b32_e32 v1, 0
1692
1691
; GFX6-NEXT: v_and_b32_e32 v0, 1, v2
1692
+ ; GFX6-NEXT: v_mov_b32_e32 v1, 0
1693
1693
; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 31
1694
1694
; GFX6-NEXT: v_lshrrev_b32_e32 v2, 1, v3
1695
1695
; GFX6-NEXT: v_or_b32_e32 v0, v2, v0
@@ -1700,8 +1700,8 @@ define i65 @v_lshr_i65_33(i65 %value) {
1700
1700
; GFX8: ; %bb.0:
1701
1701
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1702
1702
; GFX8-NEXT: v_mov_b32_e32 v3, v1
1703
- ; GFX8-NEXT: v_mov_b32_e32 v1, 0
1704
1703
; GFX8-NEXT: v_and_b32_e32 v0, 1, v2
1704
+ ; GFX8-NEXT: v_mov_b32_e32 v1, 0
1705
1705
; GFX8-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1]
1706
1706
; GFX8-NEXT: v_lshrrev_b32_e32 v2, 1, v3
1707
1707
; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
@@ -1712,8 +1712,8 @@ define i65 @v_lshr_i65_33(i65 %value) {
1712
1712
; GFX9: ; %bb.0:
1713
1713
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1714
1714
; GFX9-NEXT: v_mov_b32_e32 v3, v1
1715
- ; GFX9-NEXT: v_mov_b32_e32 v1, 0
1716
1715
; GFX9-NEXT: v_and_b32_e32 v0, 1, v2
1716
+ ; GFX9-NEXT: v_mov_b32_e32 v1, 0
1717
1717
; GFX9-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1]
1718
1718
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 1, v3
1719
1719
; GFX9-NEXT: v_or_b32_e32 v0, v2, v0
@@ -1749,22 +1749,20 @@ define i65 @v_lshr_i65_33(i65 %value) {
1749
1749
define amdgpu_ps i65 @s_lshr_i65 (i65 inreg %value , i65 inreg %amount ) {
1750
1750
; GCN-LABEL: s_lshr_i65:
1751
1751
; GCN: ; %bb.0:
1752
- ; GCN-NEXT: s_mov_b32 s4, s3
1753
- ; GCN-NEXT: s_mov_b32 s3, 0
1754
- ; GCN-NEXT: s_and_b64 s[2:3], s[2:3], 1
1755
- ; GCN-NEXT: s_sub_i32 s10, s4, 64
1756
- ; GCN-NEXT: s_sub_i32 s8, 64, s4
1757
- ; GCN-NEXT: s_cmp_lt_u32 s4, 64
1752
+ ; GCN-NEXT: s_and_b64 s[4:5], s[2:3], 1
1753
+ ; GCN-NEXT: s_sub_i32 s10, s3, 64
1754
+ ; GCN-NEXT: s_sub_i32 s8, 64, s3
1755
+ ; GCN-NEXT: s_cmp_lt_u32 s3, 64
1758
1756
; GCN-NEXT: s_cselect_b32 s11, 1, 0
1759
- ; GCN-NEXT: s_cmp_eq_u32 s4 , 0
1757
+ ; GCN-NEXT: s_cmp_eq_u32 s3 , 0
1760
1758
; GCN-NEXT: s_cselect_b32 s12, 1, 0
1761
- ; GCN-NEXT: s_lshr_b64 s[6:7], s[2:3 ], s4
1762
- ; GCN-NEXT: s_lshr_b64 s[4:5 ], s[0:1], s4
1763
- ; GCN-NEXT: s_lshl_b64 s[8:9], s[2:3 ], s8
1764
- ; GCN-NEXT: s_or_b64 s[4:5 ], s[4:5 ], s[8:9]
1765
- ; GCN-NEXT: s_lshr_b64 s[2:3 ], s[2:3 ], s10
1759
+ ; GCN-NEXT: s_lshr_b64 s[6:7], s[4:5 ], s3
1760
+ ; GCN-NEXT: s_lshr_b64 s[2:3 ], s[0:1], s3
1761
+ ; GCN-NEXT: s_lshl_b64 s[8:9], s[4:5 ], s8
1762
+ ; GCN-NEXT: s_or_b64 s[2:3 ], s[2:3 ], s[8:9]
1763
+ ; GCN-NEXT: s_lshr_b64 s[4:5 ], s[4:5 ], s10
1766
1764
; GCN-NEXT: s_cmp_lg_u32 s11, 0
1767
- ; GCN-NEXT: s_cselect_b64 s[2:3], s[4:5 ], s[2:3 ]
1765
+ ; GCN-NEXT: s_cselect_b64 s[2:3], s[2:3 ], s[4:5 ]
1768
1766
; GCN-NEXT: s_cmp_lg_u32 s12, 0
1769
1767
; GCN-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3]
1770
1768
; GCN-NEXT: s_cmp_lg_u32 s11, 0
@@ -1773,26 +1771,24 @@ define amdgpu_ps i65 @s_lshr_i65(i65 inreg %value, i65 inreg %amount) {
1773
1771
;
1774
1772
; GFX10PLUS-LABEL: s_lshr_i65:
1775
1773
; GFX10PLUS: ; %bb.0:
1776
- ; GFX10PLUS-NEXT: s_mov_b32 s4, s3
1777
- ; GFX10PLUS-NEXT: s_mov_b32 s3, 0
1778
- ; GFX10PLUS-NEXT: s_sub_i32 s10, s4, 64
1779
- ; GFX10PLUS-NEXT: s_and_b64 s[2:3], s[2:3], 1
1780
- ; GFX10PLUS-NEXT: s_sub_i32 s5, 64, s4
1781
- ; GFX10PLUS-NEXT: s_cmp_lt_u32 s4, 64
1774
+ ; GFX10PLUS-NEXT: s_and_b64 s[4:5], s[2:3], 1
1775
+ ; GFX10PLUS-NEXT: s_sub_i32 s10, s3, 64
1776
+ ; GFX10PLUS-NEXT: s_sub_i32 s2, 64, s3
1777
+ ; GFX10PLUS-NEXT: s_cmp_lt_u32 s3, 64
1782
1778
; GFX10PLUS-NEXT: s_cselect_b32 s11, 1, 0
1783
- ; GFX10PLUS-NEXT: s_cmp_eq_u32 s4 , 0
1779
+ ; GFX10PLUS-NEXT: s_cmp_eq_u32 s3 , 0
1784
1780
; GFX10PLUS-NEXT: s_cselect_b32 s12, 1, 0
1785
- ; GFX10PLUS-NEXT: s_lshr_b64 s[6:7], s[0:1], s4
1786
- ; GFX10PLUS-NEXT: s_lshl_b64 s[8:9], s[2:3 ], s5
1787
- ; GFX10PLUS-NEXT: s_lshr_b64 s[4:5 ], s[2:3 ], s4
1781
+ ; GFX10PLUS-NEXT: s_lshr_b64 s[6:7], s[0:1], s3
1782
+ ; GFX10PLUS-NEXT: s_lshl_b64 s[8:9], s[4:5 ], s2
1783
+ ; GFX10PLUS-NEXT: s_lshr_b64 s[2:3 ], s[4:5 ], s3
1788
1784
; GFX10PLUS-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9]
1789
- ; GFX10PLUS-NEXT: s_lshr_b64 s[2:3 ], s[2:3 ], s10
1785
+ ; GFX10PLUS-NEXT: s_lshr_b64 s[4:5 ], s[4:5 ], s10
1790
1786
; GFX10PLUS-NEXT: s_cmp_lg_u32 s11, 0
1791
- ; GFX10PLUS-NEXT: s_cselect_b64 s[2:3 ], s[6:7], s[2:3 ]
1787
+ ; GFX10PLUS-NEXT: s_cselect_b64 s[4:5 ], s[6:7], s[4:5 ]
1792
1788
; GFX10PLUS-NEXT: s_cmp_lg_u32 s12, 0
1793
- ; GFX10PLUS-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3 ]
1789
+ ; GFX10PLUS-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5 ]
1794
1790
; GFX10PLUS-NEXT: s_cmp_lg_u32 s11, 0
1795
- ; GFX10PLUS-NEXT: s_cselect_b32 s2, s4 , 0
1791
+ ; GFX10PLUS-NEXT: s_cselect_b32 s2, s2 , 0
1796
1792
; GFX10PLUS-NEXT: ; return to shader part epilog
1797
1793
%result = lshr i65 %value , %amount
1798
1794
ret i65 %result
@@ -1801,22 +1797,22 @@ define amdgpu_ps i65 @s_lshr_i65(i65 inreg %value, i65 inreg %amount) {
1801
1797
define amdgpu_ps i65 @s_lshr_i65_33 (i65 inreg %value ) {
1802
1798
; GCN-LABEL: s_lshr_i65_33:
1803
1799
; GCN: ; %bb.0:
1804
- ; GCN-NEXT: s_mov_b32 s3, 0
1805
- ; GCN-NEXT: s_and_b64 s[4:5], s[2:3] , 1
1806
- ; GCN-NEXT: s_lshr_b32 s2, s1, 1
1807
- ; GCN-NEXT: s_lshl_b64 s[0:1 ], s[4:5 ], 31
1808
- ; GCN-NEXT: s_or_b64 s[0:1], s[2:3 ], s[0:1 ]
1809
- ; GCN-NEXT: s_lshr_b32 s2, s5 , 1
1800
+ ; GCN-NEXT: s_and_b64 s[2:3], s[2:3], 1
1801
+ ; GCN-NEXT: s_lshr_b32 s0, s1 , 1
1802
+ ; GCN-NEXT: s_mov_b32 s1, 0
1803
+ ; GCN-NEXT: s_lshl_b64 s[4:5 ], s[2:3 ], 31
1804
+ ; GCN-NEXT: s_or_b64 s[0:1], s[0:1 ], s[4:5 ]
1805
+ ; GCN-NEXT: s_lshr_b32 s2, s3 , 1
1810
1806
; GCN-NEXT: ; return to shader part epilog
1811
1807
;
1812
1808
; GFX10PLUS-LABEL: s_lshr_i65_33:
1813
1809
; GFX10PLUS: ; %bb.0:
1814
- ; GFX10PLUS-NEXT: s_mov_b32 s3, 0
1815
- ; GFX10PLUS-NEXT: s_and_b64 s[4:5], s[2:3] , 1
1816
- ; GFX10PLUS-NEXT: s_lshr_b32 s2, s1, 1
1817
- ; GFX10PLUS-NEXT: s_lshl_b64 s[0:1 ], s[4:5 ], 31
1818
- ; GFX10PLUS-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
1819
- ; GFX10PLUS-NEXT: s_lshr_b32 s2, s5, 1
1810
+ ; GFX10PLUS-NEXT: s_and_b64 s[2:3], s[2:3], 1
1811
+ ; GFX10PLUS-NEXT: s_lshr_b32 s0, s1 , 1
1812
+ ; GFX10PLUS-NEXT: s_mov_b32 s1, 0
1813
+ ; GFX10PLUS-NEXT: s_lshl_b64 s[4:5 ], s[2:3 ], 31
1814
+ ; GFX10PLUS-NEXT: s_lshr_b32 s2, s3, 1
1815
+ ; GFX10PLUS-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
1820
1816
; GFX10PLUS-NEXT: ; return to shader part epilog
1821
1817
%result = lshr i65 %value , 33
1822
1818
ret i65 %result
0 commit comments