Skip to content

Commit 41189ad

Browse files
AMDGPU: Add test for 16 bit unsigned scratch offsets
Large scratch offset with one on highest bit selected as negative, negative offset has same binary representation in 16 bits as large unsigned offset.
1 parent f6e771c commit 41189ad

File tree

2 files changed

+321
-0
lines changed

2 files changed

+321
-0
lines changed

llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1510,4 +1510,115 @@ bb:
15101510
ret void
15111511
}
15121512

1513+
1514+
define amdgpu_gs void @sgpr_base_large_offset(ptr addrspace(1) %out, ptr addrspace(5) inreg %scevgep) {
1515+
; GFX9-LABEL: sgpr_base_large_offset:
1516+
; GFX9: ; %bb.0: ; %entry
1517+
; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5
1518+
; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
1519+
; GFX9-NEXT: s_add_u32 s0, s2, 0xffe8
1520+
; GFX9-NEXT: scratch_load_dword v2, off, s0
1521+
; GFX9-NEXT: s_waitcnt vmcnt(0)
1522+
; GFX9-NEXT: global_store_dword v[0:1], v2, off
1523+
; GFX9-NEXT: s_endpgm
1524+
;
1525+
; GFX10-LABEL: sgpr_base_large_offset:
1526+
; GFX10: ; %bb.0: ; %entry
1527+
; GFX10-NEXT: s_add_u32 s0, s0, s5
1528+
; GFX10-NEXT: s_addc_u32 s1, s1, 0
1529+
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
1530+
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
1531+
; GFX10-NEXT: s_add_u32 s0, s2, 0xffe8
1532+
; GFX10-NEXT: scratch_load_dword v2, off, s0
1533+
; GFX10-NEXT: s_waitcnt vmcnt(0)
1534+
; GFX10-NEXT: global_store_dword v[0:1], v2, off
1535+
; GFX10-NEXT: s_endpgm
1536+
;
1537+
; GFX940-LABEL: sgpr_base_large_offset:
1538+
; GFX940: ; %bb.0: ; %entry
1539+
; GFX940-NEXT: s_add_u32 s0, s0, 0xffe8
1540+
; GFX940-NEXT: scratch_load_dword v2, off, s0
1541+
; GFX940-NEXT: s_waitcnt vmcnt(0)
1542+
; GFX940-NEXT: global_store_dword v[0:1], v2, off sc0 sc1
1543+
; GFX940-NEXT: s_endpgm
1544+
;
1545+
; GFX11-LABEL: sgpr_base_large_offset:
1546+
; GFX11: ; %bb.0: ; %entry
1547+
; GFX11-NEXT: s_add_u32 s0, s0, 0xffe8
1548+
; GFX11-NEXT: scratch_load_b32 v2, off, s0
1549+
; GFX11-NEXT: s_waitcnt vmcnt(0)
1550+
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
1551+
; GFX11-NEXT: s_nop 0
1552+
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1553+
; GFX11-NEXT: s_endpgm
1554+
;
1555+
; GFX12-LABEL: sgpr_base_large_offset:
1556+
; GFX12: ; %bb.0: ; %entry
1557+
; GFX12-NEXT: scratch_load_b32 v2, off, s0 offset:65512
1558+
; GFX12-NEXT: s_wait_loadcnt 0x0
1559+
; GFX12-NEXT: global_store_b32 v[0:1], v2, off
1560+
; GFX12-NEXT: s_nop 0
1561+
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1562+
; GFX12-NEXT: s_endpgm
1563+
entry:
1564+
%scevgep28 = getelementptr i8, ptr addrspace(5) %scevgep, i32 65512
1565+
%0 = load i32, ptr addrspace(5) %scevgep28, align 4
1566+
store i32 %0, ptr addrspace(1) %out
1567+
ret void
1568+
}
1569+
1570+
define amdgpu_gs void @sgpr_base_negative_offset(ptr addrspace(1) %out, ptr addrspace(5) inreg %scevgep) {
1571+
; GFX9-LABEL: sgpr_base_negative_offset:
1572+
; GFX9: ; %bb.0: ; %entry
1573+
; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5
1574+
; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
1575+
; GFX9-NEXT: s_add_u32 s0, s2, 0xffffffe8
1576+
; GFX9-NEXT: scratch_load_dword v2, off, s0
1577+
; GFX9-NEXT: s_waitcnt vmcnt(0)
1578+
; GFX9-NEXT: global_store_dword v[0:1], v2, off
1579+
; GFX9-NEXT: s_endpgm
1580+
;
1581+
; GFX10-LABEL: sgpr_base_negative_offset:
1582+
; GFX10: ; %bb.0: ; %entry
1583+
; GFX10-NEXT: s_add_u32 s0, s0, s5
1584+
; GFX10-NEXT: s_addc_u32 s1, s1, 0
1585+
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
1586+
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
1587+
; GFX10-NEXT: scratch_load_dword v2, off, s2 offset:-24
1588+
; GFX10-NEXT: s_waitcnt vmcnt(0)
1589+
; GFX10-NEXT: global_store_dword v[0:1], v2, off
1590+
; GFX10-NEXT: s_endpgm
1591+
;
1592+
; GFX940-LABEL: sgpr_base_negative_offset:
1593+
; GFX940: ; %bb.0: ; %entry
1594+
; GFX940-NEXT: s_add_u32 s0, s0, 0xffffffe8
1595+
; GFX940-NEXT: scratch_load_dword v2, off, s0
1596+
; GFX940-NEXT: s_waitcnt vmcnt(0)
1597+
; GFX940-NEXT: global_store_dword v[0:1], v2, off sc0 sc1
1598+
; GFX940-NEXT: s_endpgm
1599+
;
1600+
; GFX11-LABEL: sgpr_base_negative_offset:
1601+
; GFX11: ; %bb.0: ; %entry
1602+
; GFX11-NEXT: scratch_load_b32 v2, off, s0 offset:-24
1603+
; GFX11-NEXT: s_waitcnt vmcnt(0)
1604+
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
1605+
; GFX11-NEXT: s_nop 0
1606+
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1607+
; GFX11-NEXT: s_endpgm
1608+
;
1609+
; GFX12-LABEL: sgpr_base_negative_offset:
1610+
; GFX12: ; %bb.0: ; %entry
1611+
; GFX12-NEXT: scratch_load_b32 v2, off, s0 offset:-24
1612+
; GFX12-NEXT: s_wait_loadcnt 0x0
1613+
; GFX12-NEXT: global_store_b32 v[0:1], v2, off
1614+
; GFX12-NEXT: s_nop 0
1615+
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1616+
; GFX12-NEXT: s_endpgm
1617+
entry:
1618+
%scevgep28 = getelementptr i8, ptr addrspace(5) %scevgep, i32 -24
1619+
%0 = load i32, ptr addrspace(5) %scevgep28, align 4
1620+
store i32 %0, ptr addrspace(1) %out
1621+
ret void
1622+
}
1623+
15131624
declare i32 @llvm.amdgcn.workitem.id.x()

llvm/test/CodeGen/AMDGPU/flat-scratch.ll

Lines changed: 210 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4891,5 +4891,215 @@ bb:
48914891
ret void
48924892
}
48934893

4894+
define amdgpu_gs void @sgpr_base_large_offset(ptr addrspace(1) %out, ptr addrspace(5) inreg %scevgep) {
4895+
; GFX9-LABEL: sgpr_base_large_offset:
4896+
; GFX9: ; %bb.0: ; %entry
4897+
; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5
4898+
; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
4899+
; GFX9-NEXT: s_add_i32 s2, s2, 0xffe8
4900+
; GFX9-NEXT: scratch_load_dword v2, off, s2
4901+
; GFX9-NEXT: s_waitcnt vmcnt(0)
4902+
; GFX9-NEXT: global_store_dword v[0:1], v2, off
4903+
; GFX9-NEXT: s_endpgm
4904+
;
4905+
; GFX10-LABEL: sgpr_base_large_offset:
4906+
; GFX10: ; %bb.0: ; %entry
4907+
; GFX10-NEXT: s_add_u32 s0, s0, s5
4908+
; GFX10-NEXT: s_addc_u32 s1, s1, 0
4909+
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
4910+
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
4911+
; GFX10-NEXT: s_add_i32 s2, s2, 0xffe8
4912+
; GFX10-NEXT: scratch_load_dword v2, off, s2
4913+
; GFX10-NEXT: s_waitcnt vmcnt(0)
4914+
; GFX10-NEXT: global_store_dword v[0:1], v2, off
4915+
; GFX10-NEXT: s_endpgm
4916+
;
4917+
; GFX11-LABEL: sgpr_base_large_offset:
4918+
; GFX11: ; %bb.0: ; %entry
4919+
; GFX11-NEXT: s_add_i32 s0, s0, 0xffe8
4920+
; GFX11-NEXT: scratch_load_b32 v2, off, s0
4921+
; GFX11-NEXT: s_waitcnt vmcnt(0)
4922+
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
4923+
; GFX11-NEXT: s_nop 0
4924+
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
4925+
; GFX11-NEXT: s_endpgm
4926+
;
4927+
; GFX12-LABEL: sgpr_base_large_offset:
4928+
; GFX12: ; %bb.0: ; %entry
4929+
; GFX12-NEXT: scratch_load_b32 v2, off, s0 offset:-24
4930+
; GFX12-NEXT: s_wait_loadcnt 0x0
4931+
; GFX12-NEXT: global_store_b32 v[0:1], v2, off
4932+
; GFX12-NEXT: s_nop 0
4933+
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
4934+
; GFX12-NEXT: s_endpgm
4935+
;
4936+
; GFX9-PAL-LABEL: sgpr_base_large_offset:
4937+
; GFX9-PAL: ; %bb.0: ; %entry
4938+
; GFX9-PAL-NEXT: s_getpc_b64 s[2:3]
4939+
; GFX9-PAL-NEXT: s_mov_b32 s2, s8
4940+
; GFX9-PAL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
4941+
; GFX9-PAL-NEXT: s_waitcnt lgkmcnt(0)
4942+
; GFX9-PAL-NEXT: s_and_b32 s3, s3, 0xffff
4943+
; GFX9-PAL-NEXT: s_add_u32 flat_scratch_lo, s2, s5
4944+
; GFX9-PAL-NEXT: s_addc_u32 flat_scratch_hi, s3, 0
4945+
; GFX9-PAL-NEXT: s_add_i32 s0, s0, 0xffe8
4946+
; GFX9-PAL-NEXT: scratch_load_dword v2, off, s0
4947+
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
4948+
; GFX9-PAL-NEXT: global_store_dword v[0:1], v2, off
4949+
; GFX9-PAL-NEXT: s_endpgm
4950+
;
4951+
; GFX940-LABEL: sgpr_base_large_offset:
4952+
; GFX940: ; %bb.0: ; %entry
4953+
; GFX940-NEXT: s_add_i32 s0, s0, 0xffe8
4954+
; GFX940-NEXT: scratch_load_dword v2, off, s0
4955+
; GFX940-NEXT: s_waitcnt vmcnt(0)
4956+
; GFX940-NEXT: global_store_dword v[0:1], v2, off sc0 sc1
4957+
; GFX940-NEXT: s_endpgm
4958+
;
4959+
; GFX10-PAL-LABEL: sgpr_base_large_offset:
4960+
; GFX10-PAL: ; %bb.0: ; %entry
4961+
; GFX10-PAL-NEXT: s_getpc_b64 s[2:3]
4962+
; GFX10-PAL-NEXT: s_mov_b32 s2, s8
4963+
; GFX10-PAL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
4964+
; GFX10-PAL-NEXT: s_waitcnt lgkmcnt(0)
4965+
; GFX10-PAL-NEXT: s_and_b32 s3, s3, 0xffff
4966+
; GFX10-PAL-NEXT: s_add_u32 s2, s2, s5
4967+
; GFX10-PAL-NEXT: s_addc_u32 s3, s3, 0
4968+
; GFX10-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
4969+
; GFX10-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
4970+
; GFX10-PAL-NEXT: s_add_i32 s0, s0, 0xffe8
4971+
; GFX10-PAL-NEXT: scratch_load_dword v2, off, s0
4972+
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0)
4973+
; GFX10-PAL-NEXT: global_store_dword v[0:1], v2, off
4974+
; GFX10-PAL-NEXT: s_endpgm
4975+
;
4976+
; GFX11-PAL-LABEL: sgpr_base_large_offset:
4977+
; GFX11-PAL: ; %bb.0: ; %entry
4978+
; GFX11-PAL-NEXT: s_add_i32 s0, s0, 0xffe8
4979+
; GFX11-PAL-NEXT: scratch_load_b32 v2, off, s0
4980+
; GFX11-PAL-NEXT: s_waitcnt vmcnt(0)
4981+
; GFX11-PAL-NEXT: global_store_b32 v[0:1], v2, off
4982+
; GFX11-PAL-NEXT: s_nop 0
4983+
; GFX11-PAL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
4984+
; GFX11-PAL-NEXT: s_endpgm
4985+
;
4986+
; GFX12-PAL-LABEL: sgpr_base_large_offset:
4987+
; GFX12-PAL: ; %bb.0: ; %entry
4988+
; GFX12-PAL-NEXT: scratch_load_b32 v2, off, s0 offset:-24
4989+
; GFX12-PAL-NEXT: s_wait_loadcnt 0x0
4990+
; GFX12-PAL-NEXT: global_store_b32 v[0:1], v2, off
4991+
; GFX12-PAL-NEXT: s_nop 0
4992+
; GFX12-PAL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
4993+
; GFX12-PAL-NEXT: s_endpgm
4994+
entry:
4995+
%scevgep28 = getelementptr i8, ptr addrspace(5) %scevgep, i32 65512
4996+
%0 = load i32, ptr addrspace(5) %scevgep28, align 4
4997+
store i32 %0, ptr addrspace(1) %out
4998+
ret void
4999+
}
5000+
5001+
define amdgpu_gs void @sgpr_base_negative_offset(ptr addrspace(1) %out, ptr addrspace(5) inreg %scevgep) {
5002+
; GFX9-LABEL: sgpr_base_negative_offset:
5003+
; GFX9: ; %bb.0: ; %entry
5004+
; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5
5005+
; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
5006+
; GFX9-NEXT: s_addk_i32 s2, 0xffe8
5007+
; GFX9-NEXT: scratch_load_dword v2, off, s2
5008+
; GFX9-NEXT: s_waitcnt vmcnt(0)
5009+
; GFX9-NEXT: global_store_dword v[0:1], v2, off
5010+
; GFX9-NEXT: s_endpgm
5011+
;
5012+
; GFX10-LABEL: sgpr_base_negative_offset:
5013+
; GFX10: ; %bb.0: ; %entry
5014+
; GFX10-NEXT: s_add_u32 s0, s0, s5
5015+
; GFX10-NEXT: s_addc_u32 s1, s1, 0
5016+
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
5017+
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
5018+
; GFX10-NEXT: scratch_load_dword v2, off, s2 offset:-24
5019+
; GFX10-NEXT: s_waitcnt vmcnt(0)
5020+
; GFX10-NEXT: global_store_dword v[0:1], v2, off
5021+
; GFX10-NEXT: s_endpgm
5022+
;
5023+
; GFX11-LABEL: sgpr_base_negative_offset:
5024+
; GFX11: ; %bb.0: ; %entry
5025+
; GFX11-NEXT: scratch_load_b32 v2, off, s0 offset:-24
5026+
; GFX11-NEXT: s_waitcnt vmcnt(0)
5027+
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
5028+
; GFX11-NEXT: s_nop 0
5029+
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
5030+
; GFX11-NEXT: s_endpgm
5031+
;
5032+
; GFX12-LABEL: sgpr_base_negative_offset:
5033+
; GFX12: ; %bb.0: ; %entry
5034+
; GFX12-NEXT: scratch_load_b32 v2, off, s0 offset:-24
5035+
; GFX12-NEXT: s_wait_loadcnt 0x0
5036+
; GFX12-NEXT: global_store_b32 v[0:1], v2, off
5037+
; GFX12-NEXT: s_nop 0
5038+
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
5039+
; GFX12-NEXT: s_endpgm
5040+
;
5041+
; GFX9-PAL-LABEL: sgpr_base_negative_offset:
5042+
; GFX9-PAL: ; %bb.0: ; %entry
5043+
; GFX9-PAL-NEXT: s_getpc_b64 s[2:3]
5044+
; GFX9-PAL-NEXT: s_mov_b32 s2, s8
5045+
; GFX9-PAL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
5046+
; GFX9-PAL-NEXT: s_waitcnt lgkmcnt(0)
5047+
; GFX9-PAL-NEXT: s_and_b32 s3, s3, 0xffff
5048+
; GFX9-PAL-NEXT: s_add_u32 flat_scratch_lo, s2, s5
5049+
; GFX9-PAL-NEXT: s_addc_u32 flat_scratch_hi, s3, 0
5050+
; GFX9-PAL-NEXT: s_addk_i32 s0, 0xffe8
5051+
; GFX9-PAL-NEXT: scratch_load_dword v2, off, s0
5052+
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
5053+
; GFX9-PAL-NEXT: global_store_dword v[0:1], v2, off
5054+
; GFX9-PAL-NEXT: s_endpgm
5055+
;
5056+
; GFX940-LABEL: sgpr_base_negative_offset:
5057+
; GFX940: ; %bb.0: ; %entry
5058+
; GFX940-NEXT: s_addk_i32 s0, 0xffe8
5059+
; GFX940-NEXT: scratch_load_dword v2, off, s0
5060+
; GFX940-NEXT: s_waitcnt vmcnt(0)
5061+
; GFX940-NEXT: global_store_dword v[0:1], v2, off sc0 sc1
5062+
; GFX940-NEXT: s_endpgm
5063+
;
5064+
; GFX10-PAL-LABEL: sgpr_base_negative_offset:
5065+
; GFX10-PAL: ; %bb.0: ; %entry
5066+
; GFX10-PAL-NEXT: s_getpc_b64 s[2:3]
5067+
; GFX10-PAL-NEXT: s_mov_b32 s2, s8
5068+
; GFX10-PAL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
5069+
; GFX10-PAL-NEXT: s_waitcnt lgkmcnt(0)
5070+
; GFX10-PAL-NEXT: s_and_b32 s3, s3, 0xffff
5071+
; GFX10-PAL-NEXT: s_add_u32 s2, s2, s5
5072+
; GFX10-PAL-NEXT: s_addc_u32 s3, s3, 0
5073+
; GFX10-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
5074+
; GFX10-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
5075+
; GFX10-PAL-NEXT: scratch_load_dword v2, off, s0 offset:-24
5076+
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0)
5077+
; GFX10-PAL-NEXT: global_store_dword v[0:1], v2, off
5078+
; GFX10-PAL-NEXT: s_endpgm
5079+
;
5080+
; GFX11-PAL-LABEL: sgpr_base_negative_offset:
5081+
; GFX11-PAL: ; %bb.0: ; %entry
5082+
; GFX11-PAL-NEXT: scratch_load_b32 v2, off, s0 offset:-24
5083+
; GFX11-PAL-NEXT: s_waitcnt vmcnt(0)
5084+
; GFX11-PAL-NEXT: global_store_b32 v[0:1], v2, off
5085+
; GFX11-PAL-NEXT: s_nop 0
5086+
; GFX11-PAL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
5087+
; GFX11-PAL-NEXT: s_endpgm
5088+
;
5089+
; GFX12-PAL-LABEL: sgpr_base_negative_offset:
5090+
; GFX12-PAL: ; %bb.0: ; %entry
5091+
; GFX12-PAL-NEXT: scratch_load_b32 v2, off, s0 offset:-24
5092+
; GFX12-PAL-NEXT: s_wait_loadcnt 0x0
5093+
; GFX12-PAL-NEXT: global_store_b32 v[0:1], v2, off
5094+
; GFX12-PAL-NEXT: s_nop 0
5095+
; GFX12-PAL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
5096+
; GFX12-PAL-NEXT: s_endpgm
5097+
entry:
5098+
%scevgep28 = getelementptr i8, ptr addrspace(5) %scevgep, i32 -24
5099+
%0 = load i32, ptr addrspace(5) %scevgep28, align 4
5100+
store i32 %0, ptr addrspace(1) %out
5101+
ret void
5102+
}
5103+
48945104
declare void @llvm.memset.p5.i64(ptr addrspace(5) nocapture writeonly, i8, i64, i1 immarg)
48955105
declare i32 @llvm.amdgcn.workitem.id.x()

0 commit comments

Comments
 (0)