@@ -1711,6 +1711,98 @@ define void @too_many_args_use_workitem_id_x_inreg(
1711
1711
ret void
1712
1712
}
1713
1713
1714
+ define void @void_func_i32_v2float_inreg (i32 inreg %arg0 , <2 x float > inreg %arg1 ) #0 {
1715
+ ; GFX9-LABEL: void_func_i32_v2float_inreg:
1716
+ ; GFX9: ; %bb.0:
1717
+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1718
+ ; GFX9-NEXT: v_mov_b32_e32 v0, s4
1719
+ ; GFX9-NEXT: global_store_dword v[0:1], v0, off
1720
+ ; GFX9-NEXT: v_mov_b32_e32 v0, s5
1721
+ ; GFX9-NEXT: v_mov_b32_e32 v1, s6
1722
+ ; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
1723
+ ; GFX9-NEXT: s_waitcnt vmcnt(0)
1724
+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
1725
+ ;
1726
+ ; GFX11-LABEL: void_func_i32_v2float_inreg:
1727
+ ; GFX11: ; %bb.0:
1728
+ ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1729
+ ; GFX11-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v1, s2
1730
+ ; GFX11-NEXT: v_mov_b32_e32 v0, s1
1731
+ ; GFX11-NEXT: s_clause 0x1
1732
+ ; GFX11-NEXT: global_store_b32 v[0:1], v2, off
1733
+ ; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
1734
+ ; GFX11-NEXT: s_setpc_b64 s[30:31]
1735
+ store i32 %arg0 , ptr addrspace (1 ) undef
1736
+ store <2 x float > %arg1 , ptr addrspace (1 ) undef
1737
+ ret void
1738
+ }
1739
+
1740
+ define void @caller_void_func_i32_v2float_inreg (i32 inreg %arg0 , <2 x float > inreg %arg1 ) #0 {
1741
+ ; GFX9-LABEL: caller_void_func_i32_v2float_inreg:
1742
+ ; GFX9: ; %bb.0:
1743
+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1744
+ ; GFX9-NEXT: s_mov_b32 s7, s33
1745
+ ; GFX9-NEXT: s_mov_b32 s33, s32
1746
+ ; GFX9-NEXT: s_or_saveexec_b64 s[8:9], -1
1747
+ ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1748
+ ; GFX9-NEXT: s_mov_b64 exec, s[8:9]
1749
+ ; GFX9-NEXT: s_addk_i32 s32, 0x400
1750
+ ; GFX9-NEXT: s_getpc_b64 s[8:9]
1751
+ ; GFX9-NEXT: s_add_u32 s8, s8, caller_void_func_i32_v2float_inreg@gotpcrel32@lo+4
1752
+ ; GFX9-NEXT: s_addc_u32 s9, s9, caller_void_func_i32_v2float_inreg@gotpcrel32@hi+12
1753
+ ; GFX9-NEXT: s_load_dwordx2 s[8:9], s[8:9], 0x0
1754
+ ; GFX9-NEXT: v_writelane_b32 v40, s7, 2
1755
+ ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
1756
+ ; GFX9-NEXT: s_mov_b32 s2, s6
1757
+ ; GFX9-NEXT: s_mov_b32 s1, s5
1758
+ ; GFX9-NEXT: s_mov_b32 s0, s4
1759
+ ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
1760
+ ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1761
+ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[8:9]
1762
+ ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
1763
+ ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
1764
+ ; GFX9-NEXT: v_readlane_b32 s4, v40, 2
1765
+ ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
1766
+ ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1767
+ ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
1768
+ ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
1769
+ ; GFX9-NEXT: s_mov_b32 s33, s4
1770
+ ; GFX9-NEXT: s_waitcnt vmcnt(0)
1771
+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
1772
+ ;
1773
+ ; GFX11-LABEL: caller_void_func_i32_v2float_inreg:
1774
+ ; GFX11: ; %bb.0:
1775
+ ; GFX11-NEXT s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1776
+ ; GFX11-NEXT s_mov_b32 s3, s33
1777
+ ; GFX11-NEXT s_mov_b32 s33, s32
1778
+ ; GFX11-NEXT s_or_saveexec_b32 s4, -1
1779
+ ; GFX11-NEXT scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
1780
+ ; GFX11-NEXT s_mov_b32 exec_lo, s4
1781
+ ; GFX11-NEXT s_add_i32 s32, s32, 16
1782
+ ; GFX11-NEXT s_getpc_b64 s[4:5]
1783
+ ; GFX11-NEXT s_add_u32 s4, s4, caller_void_func_i32_v2float_inreg@gotpcrel32@lo+4
1784
+ ; GFX11-NEXT s_addc_u32 s5, s5, caller_void_func_i32_v2float_inreg@gotpcrel32@hi+12
1785
+ ; GFX11-NEXT v_writelane_b32 v40, s3, 2
1786
+ ; GFX11-NEXT s_load_b64 s[4:5], s[4:5], 0x0
1787
+ ; GFX11-NEXT v_writelane_b32 v40, s30, 0
1788
+ ; GFX11-NEXT v_writelane_b32 v40, s31, 1
1789
+ ; GFX11-NEXT s_waitcnt lgkmcnt(0)
1790
+ ; GFX11-NEXT s_swappc_b64 s[30:31], s[4:5]
1791
+ ; GFX11-NEXT s_delay_alu instid0(VALU_DEP_1)
1792
+ ; GFX11-NEXT v_readlane_b32 s31, v40, 1
1793
+ ; GFX11-NEXT v_readlane_b32 s30, v40, 0
1794
+ ; GFX11-NEXT v_readlane_b32 s0, v40, 2
1795
+ ; GFX11-NEXT s_or_saveexec_b32 s1, -1
1796
+ ; GFX11-NEXT scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
1797
+ ; GFX11-NEXT s_mov_b32 exec_lo, s1
1798
+ ; GFX11-NEXT s_add_i32 s32, s32, -16
1799
+ ; GFX11-NEXT s_mov_b32 s33, s0
1800
+ ; GFX11-NEXT s_waitcnt vmcnt(0)
1801
+ ; GFX11-NEXT s_setpc_b64 s[30:31]
1802
+ call void @caller_void_func_i32_v2float_inreg (i32 inreg %arg0 , <2 x float > inreg %arg1 )
1803
+ ret void
1804
+ }
1805
+
1714
1806
attributes #0 = { nounwind }
1715
1807
attributes #1 = { nounwind noinline }
1716
1808
0 commit comments