@@ -4891,5 +4891,215 @@ bb:
4891
4891
ret void
4892
4892
}
4893
4893
4894
+ define amdgpu_gs void @sgpr_base_large_offset (ptr addrspace (1 ) %out , ptr addrspace (5 ) inreg %scevgep ) {
4895
+ ; GFX9-LABEL: sgpr_base_large_offset:
4896
+ ; GFX9: ; %bb.0: ; %entry
4897
+ ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5
4898
+ ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
4899
+ ; GFX9-NEXT: s_add_i32 s2, s2, 0xffe8
4900
+ ; GFX9-NEXT: scratch_load_dword v2, off, s2
4901
+ ; GFX9-NEXT: s_waitcnt vmcnt(0)
4902
+ ; GFX9-NEXT: global_store_dword v[0:1], v2, off
4903
+ ; GFX9-NEXT: s_endpgm
4904
+ ;
4905
+ ; GFX10-LABEL: sgpr_base_large_offset:
4906
+ ; GFX10: ; %bb.0: ; %entry
4907
+ ; GFX10-NEXT: s_add_u32 s0, s0, s5
4908
+ ; GFX10-NEXT: s_addc_u32 s1, s1, 0
4909
+ ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
4910
+ ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
4911
+ ; GFX10-NEXT: s_add_i32 s2, s2, 0xffe8
4912
+ ; GFX10-NEXT: scratch_load_dword v2, off, s2
4913
+ ; GFX10-NEXT: s_waitcnt vmcnt(0)
4914
+ ; GFX10-NEXT: global_store_dword v[0:1], v2, off
4915
+ ; GFX10-NEXT: s_endpgm
4916
+ ;
4917
+ ; GFX11-LABEL: sgpr_base_large_offset:
4918
+ ; GFX11: ; %bb.0: ; %entry
4919
+ ; GFX11-NEXT: s_add_i32 s0, s0, 0xffe8
4920
+ ; GFX11-NEXT: scratch_load_b32 v2, off, s0
4921
+ ; GFX11-NEXT: s_waitcnt vmcnt(0)
4922
+ ; GFX11-NEXT: global_store_b32 v[0:1], v2, off
4923
+ ; GFX11-NEXT: s_nop 0
4924
+ ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
4925
+ ; GFX11-NEXT: s_endpgm
4926
+ ;
4927
+ ; GFX12-LABEL: sgpr_base_large_offset:
4928
+ ; GFX12: ; %bb.0: ; %entry
4929
+ ; GFX12-NEXT: scratch_load_b32 v2, off, s0 offset:-24
4930
+ ; GFX12-NEXT: s_wait_loadcnt 0x0
4931
+ ; GFX12-NEXT: global_store_b32 v[0:1], v2, off
4932
+ ; GFX12-NEXT: s_nop 0
4933
+ ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
4934
+ ; GFX12-NEXT: s_endpgm
4935
+ ;
4936
+ ; GFX9-PAL-LABEL: sgpr_base_large_offset:
4937
+ ; GFX9-PAL: ; %bb.0: ; %entry
4938
+ ; GFX9-PAL-NEXT: s_getpc_b64 s[2:3]
4939
+ ; GFX9-PAL-NEXT: s_mov_b32 s2, s8
4940
+ ; GFX9-PAL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
4941
+ ; GFX9-PAL-NEXT: s_waitcnt lgkmcnt(0)
4942
+ ; GFX9-PAL-NEXT: s_and_b32 s3, s3, 0xffff
4943
+ ; GFX9-PAL-NEXT: s_add_u32 flat_scratch_lo, s2, s5
4944
+ ; GFX9-PAL-NEXT: s_addc_u32 flat_scratch_hi, s3, 0
4945
+ ; GFX9-PAL-NEXT: s_add_i32 s0, s0, 0xffe8
4946
+ ; GFX9-PAL-NEXT: scratch_load_dword v2, off, s0
4947
+ ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
4948
+ ; GFX9-PAL-NEXT: global_store_dword v[0:1], v2, off
4949
+ ; GFX9-PAL-NEXT: s_endpgm
4950
+ ;
4951
+ ; GFX940-LABEL: sgpr_base_large_offset:
4952
+ ; GFX940: ; %bb.0: ; %entry
4953
+ ; GFX940-NEXT: s_add_i32 s0, s0, 0xffe8
4954
+ ; GFX940-NEXT: scratch_load_dword v2, off, s0
4955
+ ; GFX940-NEXT: s_waitcnt vmcnt(0)
4956
+ ; GFX940-NEXT: global_store_dword v[0:1], v2, off sc0 sc1
4957
+ ; GFX940-NEXT: s_endpgm
4958
+ ;
4959
+ ; GFX10-PAL-LABEL: sgpr_base_large_offset:
4960
+ ; GFX10-PAL: ; %bb.0: ; %entry
4961
+ ; GFX10-PAL-NEXT: s_getpc_b64 s[2:3]
4962
+ ; GFX10-PAL-NEXT: s_mov_b32 s2, s8
4963
+ ; GFX10-PAL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
4964
+ ; GFX10-PAL-NEXT: s_waitcnt lgkmcnt(0)
4965
+ ; GFX10-PAL-NEXT: s_and_b32 s3, s3, 0xffff
4966
+ ; GFX10-PAL-NEXT: s_add_u32 s2, s2, s5
4967
+ ; GFX10-PAL-NEXT: s_addc_u32 s3, s3, 0
4968
+ ; GFX10-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
4969
+ ; GFX10-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
4970
+ ; GFX10-PAL-NEXT: s_add_i32 s0, s0, 0xffe8
4971
+ ; GFX10-PAL-NEXT: scratch_load_dword v2, off, s0
4972
+ ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0)
4973
+ ; GFX10-PAL-NEXT: global_store_dword v[0:1], v2, off
4974
+ ; GFX10-PAL-NEXT: s_endpgm
4975
+ ;
4976
+ ; GFX11-PAL-LABEL: sgpr_base_large_offset:
4977
+ ; GFX11-PAL: ; %bb.0: ; %entry
4978
+ ; GFX11-PAL-NEXT: s_add_i32 s0, s0, 0xffe8
4979
+ ; GFX11-PAL-NEXT: scratch_load_b32 v2, off, s0
4980
+ ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0)
4981
+ ; GFX11-PAL-NEXT: global_store_b32 v[0:1], v2, off
4982
+ ; GFX11-PAL-NEXT: s_nop 0
4983
+ ; GFX11-PAL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
4984
+ ; GFX11-PAL-NEXT: s_endpgm
4985
+ ;
4986
+ ; GFX12-PAL-LABEL: sgpr_base_large_offset:
4987
+ ; GFX12-PAL: ; %bb.0: ; %entry
4988
+ ; GFX12-PAL-NEXT: scratch_load_b32 v2, off, s0 offset:-24
4989
+ ; GFX12-PAL-NEXT: s_wait_loadcnt 0x0
4990
+ ; GFX12-PAL-NEXT: global_store_b32 v[0:1], v2, off
4991
+ ; GFX12-PAL-NEXT: s_nop 0
4992
+ ; GFX12-PAL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
4993
+ ; GFX12-PAL-NEXT: s_endpgm
4994
+ entry:
4995
+ %scevgep28 = getelementptr i8 , ptr addrspace (5 ) %scevgep , i32 65512
4996
+ %0 = load i32 , ptr addrspace (5 ) %scevgep28 , align 4
4997
+ store i32 %0 , ptr addrspace (1 ) %out
4998
+ ret void
4999
+ }
5000
+
5001
+ define amdgpu_gs void @sgpr_base_negative_offset (ptr addrspace (1 ) %out , ptr addrspace (5 ) inreg %scevgep ) {
5002
+ ; GFX9-LABEL: sgpr_base_negative_offset:
5003
+ ; GFX9: ; %bb.0: ; %entry
5004
+ ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5
5005
+ ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
5006
+ ; GFX9-NEXT: s_addk_i32 s2, 0xffe8
5007
+ ; GFX9-NEXT: scratch_load_dword v2, off, s2
5008
+ ; GFX9-NEXT: s_waitcnt vmcnt(0)
5009
+ ; GFX9-NEXT: global_store_dword v[0:1], v2, off
5010
+ ; GFX9-NEXT: s_endpgm
5011
+ ;
5012
+ ; GFX10-LABEL: sgpr_base_negative_offset:
5013
+ ; GFX10: ; %bb.0: ; %entry
5014
+ ; GFX10-NEXT: s_add_u32 s0, s0, s5
5015
+ ; GFX10-NEXT: s_addc_u32 s1, s1, 0
5016
+ ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
5017
+ ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
5018
+ ; GFX10-NEXT: scratch_load_dword v2, off, s2 offset:-24
5019
+ ; GFX10-NEXT: s_waitcnt vmcnt(0)
5020
+ ; GFX10-NEXT: global_store_dword v[0:1], v2, off
5021
+ ; GFX10-NEXT: s_endpgm
5022
+ ;
5023
+ ; GFX11-LABEL: sgpr_base_negative_offset:
5024
+ ; GFX11: ; %bb.0: ; %entry
5025
+ ; GFX11-NEXT: scratch_load_b32 v2, off, s0 offset:-24
5026
+ ; GFX11-NEXT: s_waitcnt vmcnt(0)
5027
+ ; GFX11-NEXT: global_store_b32 v[0:1], v2, off
5028
+ ; GFX11-NEXT: s_nop 0
5029
+ ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
5030
+ ; GFX11-NEXT: s_endpgm
5031
+ ;
5032
+ ; GFX12-LABEL: sgpr_base_negative_offset:
5033
+ ; GFX12: ; %bb.0: ; %entry
5034
+ ; GFX12-NEXT: scratch_load_b32 v2, off, s0 offset:-24
5035
+ ; GFX12-NEXT: s_wait_loadcnt 0x0
5036
+ ; GFX12-NEXT: global_store_b32 v[0:1], v2, off
5037
+ ; GFX12-NEXT: s_nop 0
5038
+ ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
5039
+ ; GFX12-NEXT: s_endpgm
5040
+ ;
5041
+ ; GFX9-PAL-LABEL: sgpr_base_negative_offset:
5042
+ ; GFX9-PAL: ; %bb.0: ; %entry
5043
+ ; GFX9-PAL-NEXT: s_getpc_b64 s[2:3]
5044
+ ; GFX9-PAL-NEXT: s_mov_b32 s2, s8
5045
+ ; GFX9-PAL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
5046
+ ; GFX9-PAL-NEXT: s_waitcnt lgkmcnt(0)
5047
+ ; GFX9-PAL-NEXT: s_and_b32 s3, s3, 0xffff
5048
+ ; GFX9-PAL-NEXT: s_add_u32 flat_scratch_lo, s2, s5
5049
+ ; GFX9-PAL-NEXT: s_addc_u32 flat_scratch_hi, s3, 0
5050
+ ; GFX9-PAL-NEXT: s_addk_i32 s0, 0xffe8
5051
+ ; GFX9-PAL-NEXT: scratch_load_dword v2, off, s0
5052
+ ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
5053
+ ; GFX9-PAL-NEXT: global_store_dword v[0:1], v2, off
5054
+ ; GFX9-PAL-NEXT: s_endpgm
5055
+ ;
5056
+ ; GFX940-LABEL: sgpr_base_negative_offset:
5057
+ ; GFX940: ; %bb.0: ; %entry
5058
+ ; GFX940-NEXT: s_addk_i32 s0, 0xffe8
5059
+ ; GFX940-NEXT: scratch_load_dword v2, off, s0
5060
+ ; GFX940-NEXT: s_waitcnt vmcnt(0)
5061
+ ; GFX940-NEXT: global_store_dword v[0:1], v2, off sc0 sc1
5062
+ ; GFX940-NEXT: s_endpgm
5063
+ ;
5064
+ ; GFX10-PAL-LABEL: sgpr_base_negative_offset:
5065
+ ; GFX10-PAL: ; %bb.0: ; %entry
5066
+ ; GFX10-PAL-NEXT: s_getpc_b64 s[2:3]
5067
+ ; GFX10-PAL-NEXT: s_mov_b32 s2, s8
5068
+ ; GFX10-PAL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
5069
+ ; GFX10-PAL-NEXT: s_waitcnt lgkmcnt(0)
5070
+ ; GFX10-PAL-NEXT: s_and_b32 s3, s3, 0xffff
5071
+ ; GFX10-PAL-NEXT: s_add_u32 s2, s2, s5
5072
+ ; GFX10-PAL-NEXT: s_addc_u32 s3, s3, 0
5073
+ ; GFX10-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
5074
+ ; GFX10-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
5075
+ ; GFX10-PAL-NEXT: scratch_load_dword v2, off, s0 offset:-24
5076
+ ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0)
5077
+ ; GFX10-PAL-NEXT: global_store_dword v[0:1], v2, off
5078
+ ; GFX10-PAL-NEXT: s_endpgm
5079
+ ;
5080
+ ; GFX11-PAL-LABEL: sgpr_base_negative_offset:
5081
+ ; GFX11-PAL: ; %bb.0: ; %entry
5082
+ ; GFX11-PAL-NEXT: scratch_load_b32 v2, off, s0 offset:-24
5083
+ ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0)
5084
+ ; GFX11-PAL-NEXT: global_store_b32 v[0:1], v2, off
5085
+ ; GFX11-PAL-NEXT: s_nop 0
5086
+ ; GFX11-PAL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
5087
+ ; GFX11-PAL-NEXT: s_endpgm
5088
+ ;
5089
+ ; GFX12-PAL-LABEL: sgpr_base_negative_offset:
5090
+ ; GFX12-PAL: ; %bb.0: ; %entry
5091
+ ; GFX12-PAL-NEXT: scratch_load_b32 v2, off, s0 offset:-24
5092
+ ; GFX12-PAL-NEXT: s_wait_loadcnt 0x0
5093
+ ; GFX12-PAL-NEXT: global_store_b32 v[0:1], v2, off
5094
+ ; GFX12-PAL-NEXT: s_nop 0
5095
+ ; GFX12-PAL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
5096
+ ; GFX12-PAL-NEXT: s_endpgm
5097
+ entry:
5098
+ %scevgep28 = getelementptr i8 , ptr addrspace (5 ) %scevgep , i32 -24
5099
+ %0 = load i32 , ptr addrspace (5 ) %scevgep28 , align 4
5100
+ store i32 %0 , ptr addrspace (1 ) %out
5101
+ ret void
5102
+ }
5103
+
4894
5104
declare void @llvm.memset.p5.i64 (ptr addrspace (5 ) nocapture writeonly , i8 , i64 , i1 immarg)
4895
5105
declare i32 @llvm.amdgcn.workitem.id.x ()
0 commit comments