Skip to content

Commit 671cbcf

Browse files
authored
AMDGPU: Add baseline tests for gep flag handling (#110814)
We need to know the address computation won't overflow on older subtargets to match the addressing mode of stack instructions.
1 parent 1be64e5 commit 671cbcf

File tree

1 file changed

+197
-0
lines changed

1 file changed

+197
-0
lines changed
Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; gfx8 required knowing no overflow happened to fold the addressing mode
3+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefix=GFX8 %s
4+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
5+
6+
define void @gep_noflags_alloca(i32 %idx, i32 %val) #0 {
7+
; GFX8-LABEL: gep_noflags_alloca:
8+
; GFX8: ; %bb.0:
9+
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10+
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0
11+
; GFX8-NEXT: v_lshrrev_b32_e64 v2, 6, s32
12+
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0
13+
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0
14+
; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
15+
; GFX8-NEXT: s_waitcnt vmcnt(0)
16+
; GFX8-NEXT: s_setpc_b64 s[30:31]
17+
;
18+
; GFX9-LABEL: gep_noflags_alloca:
19+
; GFX9: ; %bb.0:
20+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21+
; GFX9-NEXT: v_lshrrev_b32_e64 v2, 6, s32
22+
; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v2
23+
; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
24+
; GFX9-NEXT: s_waitcnt vmcnt(0)
25+
; GFX9-NEXT: s_setpc_b64 s[30:31]
26+
%alloca = alloca [32 x i32], addrspace(5)
27+
%gep0 = getelementptr [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %idx
28+
%gep1 = getelementptr i32, ptr addrspace(5) %gep0, i32 4
29+
store volatile i32 %val, ptr addrspace(5) %gep1
30+
ret void
31+
}
32+
33+
define void @gep_inbounds_alloca(i32 %idx, i32 %val) #0 {
34+
; GFX8-LABEL: gep_inbounds_alloca:
35+
; GFX8: ; %bb.0:
36+
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
37+
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0
38+
; GFX8-NEXT: v_lshrrev_b32_e64 v2, 6, s32
39+
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0
40+
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0
41+
; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
42+
; GFX8-NEXT: s_waitcnt vmcnt(0)
43+
; GFX8-NEXT: s_setpc_b64 s[30:31]
44+
;
45+
; GFX9-LABEL: gep_inbounds_alloca:
46+
; GFX9: ; %bb.0:
47+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
48+
; GFX9-NEXT: v_lshrrev_b32_e64 v2, 6, s32
49+
; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v2
50+
; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
51+
; GFX9-NEXT: s_waitcnt vmcnt(0)
52+
; GFX9-NEXT: s_setpc_b64 s[30:31]
53+
%alloca = alloca [32 x i32], addrspace(5)
54+
%gep0 = getelementptr inbounds [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %idx
55+
%gep1 = getelementptr i32, ptr addrspace(5) %gep0, i32 4
56+
store volatile i32 %val, ptr addrspace(5) %gep1
57+
ret void
58+
}
59+
60+
define void @gep_nuw_alloca(i32 %idx, i32 %val) #0 {
61+
; GFX8-LABEL: gep_nuw_alloca:
62+
; GFX8: ; %bb.0:
63+
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
64+
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0
65+
; GFX8-NEXT: v_lshrrev_b32_e64 v2, 6, s32
66+
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0
67+
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0
68+
; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
69+
; GFX8-NEXT: s_waitcnt vmcnt(0)
70+
; GFX8-NEXT: s_setpc_b64 s[30:31]
71+
;
72+
; GFX9-LABEL: gep_nuw_alloca:
73+
; GFX9: ; %bb.0:
74+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
75+
; GFX9-NEXT: v_lshrrev_b32_e64 v2, 6, s32
76+
; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v2
77+
; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
78+
; GFX9-NEXT: s_waitcnt vmcnt(0)
79+
; GFX9-NEXT: s_setpc_b64 s[30:31]
80+
%alloca = alloca [32 x i32], addrspace(5)
81+
%gep0 = getelementptr nuw [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %idx
82+
%gep1 = getelementptr i32, ptr addrspace(5) %gep0, i32 4
83+
store volatile i32 %val, ptr addrspace(5) %gep1
84+
ret void
85+
}
86+
87+
define void @gep_nusw_alloca(i32 %idx, i32 %val) #0 {
88+
; GFX8-LABEL: gep_nusw_alloca:
89+
; GFX8: ; %bb.0:
90+
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
91+
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0
92+
; GFX8-NEXT: v_lshrrev_b32_e64 v2, 6, s32
93+
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0
94+
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0
95+
; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
96+
; GFX8-NEXT: s_waitcnt vmcnt(0)
97+
; GFX8-NEXT: s_setpc_b64 s[30:31]
98+
;
99+
; GFX9-LABEL: gep_nusw_alloca:
100+
; GFX9: ; %bb.0:
101+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
102+
; GFX9-NEXT: v_lshrrev_b32_e64 v2, 6, s32
103+
; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v2
104+
; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
105+
; GFX9-NEXT: s_waitcnt vmcnt(0)
106+
; GFX9-NEXT: s_setpc_b64 s[30:31]
107+
%alloca = alloca [32 x i32], addrspace(5)
108+
%gep0 = getelementptr nusw [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %idx
109+
%gep1 = getelementptr i32, ptr addrspace(5) %gep0, i32 4
110+
store volatile i32 %val, ptr addrspace(5) %gep1
111+
ret void
112+
}
113+
114+
define void @gep_inbounds_nuw_alloca(i32 %idx, i32 %val) #0 {
115+
; GFX8-LABEL: gep_inbounds_nuw_alloca:
116+
; GFX8: ; %bb.0:
117+
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
118+
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0
119+
; GFX8-NEXT: v_lshrrev_b32_e64 v2, 6, s32
120+
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0
121+
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0
122+
; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
123+
; GFX8-NEXT: s_waitcnt vmcnt(0)
124+
; GFX8-NEXT: s_setpc_b64 s[30:31]
125+
;
126+
; GFX9-LABEL: gep_inbounds_nuw_alloca:
127+
; GFX9: ; %bb.0:
128+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
129+
; GFX9-NEXT: v_lshrrev_b32_e64 v2, 6, s32
130+
; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v2
131+
; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
132+
; GFX9-NEXT: s_waitcnt vmcnt(0)
133+
; GFX9-NEXT: s_setpc_b64 s[30:31]
134+
%alloca = alloca [32 x i32], addrspace(5)
135+
%gep0 = getelementptr inbounds nuw [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %idx
136+
%gep1 = getelementptr i32, ptr addrspace(5) %gep0, i32 4
137+
store volatile i32 %val, ptr addrspace(5) %gep1
138+
ret void
139+
}
140+
141+
define void @gep_nusw_nuw_alloca(i32 %idx, i32 %val) #0 {
142+
; GFX8-LABEL: gep_nusw_nuw_alloca:
143+
; GFX8: ; %bb.0:
144+
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
145+
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0
146+
; GFX8-NEXT: v_lshrrev_b32_e64 v2, 6, s32
147+
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0
148+
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0
149+
; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
150+
; GFX8-NEXT: s_waitcnt vmcnt(0)
151+
; GFX8-NEXT: s_setpc_b64 s[30:31]
152+
;
153+
; GFX9-LABEL: gep_nusw_nuw_alloca:
154+
; GFX9: ; %bb.0:
155+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
156+
; GFX9-NEXT: v_lshrrev_b32_e64 v2, 6, s32
157+
; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v2
158+
; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
159+
; GFX9-NEXT: s_waitcnt vmcnt(0)
160+
; GFX9-NEXT: s_setpc_b64 s[30:31]
161+
%alloca = alloca [32 x i32], addrspace(5)
162+
%gep0 = getelementptr nusw nuw [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %idx
163+
%gep1 = getelementptr i32, ptr addrspace(5) %gep0, i32 4
164+
store volatile i32 %val, ptr addrspace(5) %gep1
165+
ret void
166+
}
167+
168+
define void @gep_inbounds_nuw_alloca_nonpow2_scale(i32 %idx, i32 %val) #0 {
169+
; GFX8-LABEL: gep_inbounds_nuw_alloca_nonpow2_scale:
170+
; GFX8: ; %bb.0:
171+
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
172+
; GFX8-NEXT: s_movk_i32 s4, 0x84
173+
; GFX8-NEXT: v_mul_lo_u32 v0, v0, s4
174+
; GFX8-NEXT: v_lshrrev_b32_e64 v2, 6, s32
175+
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0
176+
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0
177+
; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
178+
; GFX8-NEXT: s_waitcnt vmcnt(0)
179+
; GFX8-NEXT: s_setpc_b64 s[30:31]
180+
;
181+
; GFX9-LABEL: gep_inbounds_nuw_alloca_nonpow2_scale:
182+
; GFX9: ; %bb.0:
183+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
184+
; GFX9-NEXT: s_movk_i32 s4, 0x84
185+
; GFX9-NEXT: v_mul_lo_u32 v0, v0, s4
186+
; GFX9-NEXT: v_lshrrev_b32_e64 v2, 6, s32
187+
; GFX9-NEXT: v_add_u32_e32 v0, v2, v0
188+
; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
189+
; GFX9-NEXT: s_waitcnt vmcnt(0)
190+
; GFX9-NEXT: s_setpc_b64 s[30:31]
191+
%alloca = alloca [5 x [33 x i32]], align 4, addrspace(5)
192+
%gep1 = getelementptr inbounds nuw [5 x [33 x i32]], ptr addrspace(5) %alloca, i32 0, i32 %idx, i32 4
193+
store volatile i32 %val, ptr addrspace(5) %gep1, align 4
194+
ret void
195+
}
196+
197+
attributes #0 = { nounwind }

0 commit comments

Comments
 (0)