Skip to content

Commit 663e636

Browse files
author
Jun Wang
committed
Minor code change; updated a todo comment; added testcases.
1 parent d84b627 commit 663e636

File tree

3 files changed

+94
-1
lines changed

3 files changed

+94
-1
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2665,7 +2665,7 @@ SDValue SITargetLowering::LowerFormalArguments(
26652665

26662666
if (!IsKernel) {
26672667
CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, isVarArg);
2668-
if (!IsGraphics && !IsKernel && !Subtarget->enableFlatScratch()) {
2668+
if (!IsGraphics && !Subtarget->enableFlatScratch()) {
26692669
CCInfo.AllocateRegBlock(ArrayRef<MCPhysReg>{AMDGPU::SGPR0, AMDGPU::SGPR1,
26702670
AMDGPU::SGPR2, AMDGPU::SGPR3},
26712671
4);

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2550,6 +2550,7 @@ bool isArgPassedInSGPR(const Argument *A) {
25502550
return A->hasAttribute(Attribute::InReg) ||
25512551
A->hasAttribute(Attribute::ByVal);
25522552
default:
2553+
// TODO: treat i1 as divergent?
25532554
return A->hasAttribute(Attribute::InReg);
25542555
}
25552556
}

llvm/test/CodeGen/AMDGPU/function-args-inreg.ll

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1711,6 +1711,98 @@ define void @too_many_args_use_workitem_id_x_inreg(
17111711
ret void
17121712
}
17131713

1714+
define void @void_func_i32_v2float_inreg(i32 inreg %arg0, <2 x float> inreg %arg1) #0 {
1715+
; GFX9-LABEL: void_func_i32_v2float_inreg:
1716+
; GFX9: ; %bb.0:
1717+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1718+
; GFX9-NEXT: v_mov_b32_e32 v0, s4
1719+
; GFX9-NEXT: global_store_dword v[0:1], v0, off
1720+
; GFX9-NEXT: v_mov_b32_e32 v0, s5
1721+
; GFX9-NEXT: v_mov_b32_e32 v1, s6
1722+
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
1723+
; GFX9-NEXT: s_waitcnt vmcnt(0)
1724+
; GFX9-NEXT: s_setpc_b64 s[30:31]
1725+
;
1726+
; GFX11-LABEL: void_func_i32_v2float_inreg:
1727+
; GFX11: ; %bb.0:
1728+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1729+
; GFX11-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v1, s2
1730+
; GFX11-NEXT: v_mov_b32_e32 v0, s1
1731+
; GFX11-NEXT: s_clause 0x1
1732+
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
1733+
; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
1734+
; GFX11-NEXT: s_setpc_b64 s[30:31]
1735+
store i32 %arg0, ptr addrspace(1) undef
1736+
store <2 x float> %arg1, ptr addrspace(1) undef
1737+
ret void
1738+
}
1739+
1740+
define void @caller_void_func_i32_v2float_inreg(i32 inreg %arg0, <2 x float> inreg %arg1) #0 {
1741+
; GFX9-LABEL: caller_void_func_i32_v2float_inreg:
1742+
; GFX9: ; %bb.0:
1743+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1744+
; GFX9-NEXT: s_mov_b32 s7, s33
1745+
; GFX9-NEXT: s_mov_b32 s33, s32
1746+
; GFX9-NEXT: s_or_saveexec_b64 s[8:9], -1
1747+
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1748+
; GFX9-NEXT: s_mov_b64 exec, s[8:9]
1749+
; GFX9-NEXT: s_addk_i32 s32, 0x400
1750+
; GFX9-NEXT: s_getpc_b64 s[8:9]
1751+
; GFX9-NEXT: s_add_u32 s8, s8, caller_void_func_i32_v2float_inreg@gotpcrel32@lo+4
1752+
; GFX9-NEXT: s_addc_u32 s9, s9, caller_void_func_i32_v2float_inreg@gotpcrel32@hi+12
1753+
; GFX9-NEXT: s_load_dwordx2 s[8:9], s[8:9], 0x0
1754+
; GFX9-NEXT: v_writelane_b32 v40, s7, 2
1755+
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
1756+
; GFX9-NEXT: s_mov_b32 s2, s6
1757+
; GFX9-NEXT: s_mov_b32 s1, s5
1758+
; GFX9-NEXT: s_mov_b32 s0, s4
1759+
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
1760+
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1761+
; GFX9-NEXT: s_swappc_b64 s[30:31], s[8:9]
1762+
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
1763+
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
1764+
; GFX9-NEXT: v_readlane_b32 s4, v40, 2
1765+
; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
1766+
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1767+
; GFX9-NEXT: s_mov_b64 exec, s[6:7]
1768+
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
1769+
; GFX9-NEXT: s_mov_b32 s33, s4
1770+
; GFX9-NEXT: s_waitcnt vmcnt(0)
1771+
; GFX9-NEXT: s_setpc_b64 s[30:31]
1772+
;
1773+
; GFX11-LABEL: caller_void_func_i32_v2float_inreg:
1774+
; GFX11: ; %bb.0:
1775+
; GFX11-NEXT s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1776+
; GFX11-NEXT s_mov_b32 s3, s33
1777+
; GFX11-NEXT s_mov_b32 s33, s32
1778+
; GFX11-NEXT s_or_saveexec_b32 s4, -1
1779+
; GFX11-NEXT scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
1780+
; GFX11-NEXT s_mov_b32 exec_lo, s4
1781+
; GFX11-NEXT s_add_i32 s32, s32, 16
1782+
; GFX11-NEXT s_getpc_b64 s[4:5]
1783+
; GFX11-NEXT s_add_u32 s4, s4, caller_void_func_i32_v2float_inreg@gotpcrel32@lo+4
1784+
; GFX11-NEXT s_addc_u32 s5, s5, caller_void_func_i32_v2float_inreg@gotpcrel32@hi+12
1785+
; GFX11-NEXT v_writelane_b32 v40, s3, 2
1786+
; GFX11-NEXT s_load_b64 s[4:5], s[4:5], 0x0
1787+
; GFX11-NEXT v_writelane_b32 v40, s30, 0
1788+
; GFX11-NEXT v_writelane_b32 v40, s31, 1
1789+
; GFX11-NEXT s_waitcnt lgkmcnt(0)
1790+
; GFX11-NEXT s_swappc_b64 s[30:31], s[4:5]
1791+
; GFX11-NEXT s_delay_alu instid0(VALU_DEP_1)
1792+
; GFX11-NEXT v_readlane_b32 s31, v40, 1
1793+
; GFX11-NEXT v_readlane_b32 s30, v40, 0
1794+
; GFX11-NEXT v_readlane_b32 s0, v40, 2
1795+
; GFX11-NEXT s_or_saveexec_b32 s1, -1
1796+
; GFX11-NEXT scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
1797+
; GFX11-NEXT s_mov_b32 exec_lo, s1
1798+
; GFX11-NEXT s_add_i32 s32, s32, -16
1799+
; GFX11-NEXT s_mov_b32 s33, s0
1800+
; GFX11-NEXT s_waitcnt vmcnt(0)
1801+
; GFX11-NEXT s_setpc_b64 s[30:31]
1802+
call void @caller_void_func_i32_v2float_inreg(i32 inreg %arg0, <2 x float> inreg %arg1)
1803+
ret void
1804+
}
1805+
17141806
attributes #0 = { nounwind }
17151807
attributes #1 = { nounwind noinline }
17161808

0 commit comments

Comments
 (0)