Skip to content

Commit 2d3119c

Browse files
committed
AMDGPU: Add more tests for frame index code quality
There are also some bugs with sgpr constraints.
1 parent 335e137 commit 2d3119c

File tree

1 file changed

+148
-3
lines changed

1 file changed

+148
-3
lines changed

llvm/test/CodeGen/AMDGPU/captured-frame-index.ll

Lines changed: 148 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
; RUN: llc -mtriple=amdgcn-- -mcpu=tahiti -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
1+
; RUN: llc -mtriple=amdgcn-- -mcpu=tahiti -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
2+
3+
; TODO: Test with flat scratch
24

35
; GCN-LABEL: {{^}}store_fi_lifetime:
46
; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}}
@@ -143,7 +145,7 @@ define amdgpu_kernel void @stored_fi_to_global_2_small_objects(ptr addrspace(1)
143145
ret void
144146
}
145147

146-
; GCN-LABEL: {{^}}stored_fi_to_global_huge_frame_offset:
148+
; GCN-LABEL: {{^}}kernel_stored_fi_to_global_huge_frame_offset:
147149
; GCN: v_mov_b32_e32 [[BASE_0:v[0-9]+]], 0{{$}}
148150
; GCN: buffer_store_dword [[BASE_0]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4{{$}}
149151

@@ -158,7 +160,32 @@ define amdgpu_kernel void @stored_fi_to_global_2_small_objects(ptr addrspace(1)
158160
; GCN: buffer_store_dword [[K]], [[BASE_1_OFF_1]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen{{$}}
159161

160162
; GCN: buffer_store_dword [[BASE_1_OFF_2]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
161-
define amdgpu_kernel void @stored_fi_to_global_huge_frame_offset(ptr addrspace(1) %ptr) #0 {
163+
define amdgpu_kernel void @kernel_stored_fi_to_global_huge_frame_offset(ptr addrspace(1) %ptr) #0 {
164+
%tmp0 = alloca [4096 x i32], addrspace(5)
165+
%tmp1 = alloca [4096 x i32], addrspace(5)
166+
store volatile i32 0, ptr addrspace(5) %tmp0
167+
%gep1.tmp0 = getelementptr [4096 x i32], ptr addrspace(5) %tmp0, i32 0, i32 4095
168+
store volatile i32 999, ptr addrspace(5) %gep1.tmp0
169+
%gep0.tmp1 = getelementptr [4096 x i32], ptr addrspace(5) %tmp0, i32 0, i32 14
170+
store ptr addrspace(5) %gep0.tmp1, ptr addrspace(1) %ptr
171+
ret void
172+
}
173+
174+
; GCN-LABEL: {{^}}func_stored_fi_to_global_huge_frame_offset:
175+
; GCN: v_mov_b32_e32 [[BASE_0:v[0-9]+]], 0{{$}}
176+
; GCN: buffer_store_dword [[BASE_0]], off, s{{\[[0-9]+:[0-9]+\]}}, s32 offset:4{{$}}
177+
178+
; GCN: v_lshr_b32_e64 [[FI_TMP:v[0-9]+]], s32, 6
179+
; GCN: v_add_i32_e32 [[BASE_0_1:v[0-9]+]], vcc, 4, [[FI_TMP]]{{$}}
180+
181+
; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
182+
; GCN-DAG: v_add_i32_e32 [[BASE_1_OFF_1:v[0-9]+]], vcc, 0x3ffc, [[BASE_0_1]]
183+
184+
; GCN: v_add_i32_e32 [[BASE_1_OFF_2:v[0-9]+]], vcc, 56, [[BASE_0_1]]
185+
; GCN: buffer_store_dword [[K]], [[BASE_1_OFF_1]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen{{$}}
186+
187+
; GCN: buffer_store_dword [[BASE_1_OFF_2]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64
188+
define void @func_stored_fi_to_global_huge_frame_offset(ptr addrspace(1) %ptr) #0 {
162189
%tmp0 = alloca [4096 x i32], addrspace(5)
163190
%tmp1 = alloca [4096 x i32], addrspace(5)
164191
store volatile i32 0, ptr addrspace(5) %tmp0
@@ -190,6 +217,124 @@ entry:
190217
ret void
191218
}
192219

220+
; FIXME: This is broken, and the sgpr input just gets replaced with a VGPR
221+
; GCN-LABEL: {{^}}func_alloca_offset0__use_asm_sgpr:
222+
; GCN: v_lshr_b32_e64 [[FI:v[0-9]+]], s32, 6
223+
; GCN: ; use [[FI]]
224+
define void @func_alloca_offset0__use_asm_sgpr() {
225+
%alloca = alloca i32, addrspace(5)
226+
call void asm sideeffect "; use $0", "s"(ptr addrspace(5) %alloca)
227+
ret void
228+
}
229+
230+
; GCN-LABEL: {{^}}func_alloca_offset0__use_asm_vgpr:
231+
; GCN: v_lshr_b32_e64 [[FI:v[0-9]+]], s32, 6
232+
; GCN-NEXT: ;;#ASMSTART
233+
; GCN-NEXT: ; use [[FI]]
234+
define void @func_alloca_offset0__use_asm_vgpr() {
235+
%alloca = alloca i32, addrspace(5)
236+
call void asm sideeffect "; use $0", "v"(ptr addrspace(5) %alloca)
237+
ret void
238+
}
239+
240+
; GCN-LABEL: {{^}}func_alloca_offset0__use_asm_phys_sgpr:
241+
; GCN: s_lshr_b32 s8, s32, 6
242+
; GCN-NEXT: ;;#ASMSTART
243+
; GCN-NEXT: ; use s8
244+
define void @func_alloca_offset0__use_asm_phys_sgpr() {
245+
%alloca = alloca i32, addrspace(5)
246+
call void asm sideeffect "; use $0", "{s8}"(ptr addrspace(5) %alloca)
247+
ret void
248+
}
249+
250+
; GCN-LABEL: {{^}}func_alloca_offset0__use_asm_phys_vgpr:
251+
; GCN: v_lshr_b32_e64 v8, s32, 6
252+
; GCN-NEXT: ;;#ASMSTART
253+
; GCN-NEXT: ; use v8
254+
define void @func_alloca_offset0__use_asm_phys_vgpr() {
255+
%alloca = alloca i32, addrspace(5)
256+
call void asm sideeffect "; use $0", "{v8}"(ptr addrspace(5) %alloca)
257+
ret void
258+
}
259+
260+
; GCN-LABEL: {{^}}func_alloca_offset_use_asm_sgpr:
261+
; GCN: v_lshr_b32_e64 [[FI0_TMP0:v[0-9]+]], s32, 6
262+
; GCN-NEXT: v_add_i32_e32 [[FI0:v[0-9]+]], vcc, 16, [[FI0_TMP0]]
263+
264+
; GCN: v_lshr_b32_e64 [[TMP:v[0-9]+]], s32, 6
265+
; GCN-NEXT: s_movk_i32 vcc_lo, 0x4010
266+
; GCN-NEXT: v_add_i32_e32 [[TMP]], vcc, vcc_lo, [[TMP]]
267+
; GCN-NEXT: ;;#ASMSTART
268+
; GCN: ; use [[TMP]]
269+
define void @func_alloca_offset_use_asm_sgpr() {
270+
%alloca0 = alloca [4096 x i32], align 16, addrspace(5)
271+
%alloca1 = alloca i32, addrspace(5)
272+
call void asm sideeffect "; use $0", "s"(ptr addrspace(5) %alloca0)
273+
call void asm sideeffect "; use $0", "s"(ptr addrspace(5) %alloca1)
274+
ret void
275+
}
276+
277+
; FIXME: Shouldn't need to materialize constant
278+
; GCN-LABEL: {{^}}func_alloca_offset_use_asm_vgpr:
279+
; GCN: v_lshr_b32_e64 [[FI0_TMP:v[0-9]+]], s32, 6
280+
; GCN-NEXT: v_add_i32_e32 [[FI0:v[0-9]+]], vcc, 16, [[FI0_TMP]]
281+
; GCN-NEXT: ;;#ASMSTART
282+
; GCN-NEXT: ; use [[FI0]]
283+
; GCN-NEXT: ;;#ASMEND
284+
285+
; GCN: v_lshr_b32_e64 [[FI1_TMP:v[0-9]+]], s32, 6
286+
; GCN-NEXT: s_movk_i32 vcc_lo, 0x4010
287+
; GCN-NEXT: v_add_i32_e32 [[FI1:v[0-9]+]], vcc, vcc_lo, [[FI1_TMP]]
288+
; GCN-NEXT: ;;#ASMSTART
289+
; GCN-NEXT: ; use [[FI1]]
290+
; GCN-NEXT: ;;#ASMEND
291+
define void @func_alloca_offset_use_asm_vgpr() {
292+
%alloca0 = alloca [4096 x i32], align 16, addrspace(5)
293+
%alloca1 = alloca i32, addrspace(5)
294+
call void asm sideeffect "; use $0", "s"(ptr addrspace(5) %alloca0)
295+
call void asm sideeffect "; use $0", "v"(ptr addrspace(5) %alloca1)
296+
ret void
297+
}
298+
299+
; FIXME: Using VGPR for SGPR input
300+
; GCN-LABEL: {{^}}kernel_alloca_offset_use_asm_sgpr:
301+
; GCN: v_mov_b32_e32 v0, 16
302+
; GCN-NOT: v0
303+
; GCN: ;;#ASMSTART
304+
; GCN-NEXT: ; use v0
305+
; GCN-NEXT: ;;#ASMEND
306+
307+
; GCN: v_mov_b32_e32 v0, 0x4010
308+
; GCN-NEXT: ;;#ASMSTART
309+
; GCN-NEXT: ; use v0
310+
; GCN-NEXT: ;;#ASMEND
311+
define amdgpu_kernel void @kernel_alloca_offset_use_asm_sgpr() {
312+
%alloca0 = alloca [4096 x i32], align 16, addrspace(5)
313+
%alloca1 = alloca i32, addrspace(5)
314+
call void asm sideeffect "; use $0", "s"(ptr addrspace(5) %alloca0)
315+
call void asm sideeffect "; use $0", "s"(ptr addrspace(5) %alloca1)
316+
ret void
317+
}
318+
319+
; GCN-LABEL: {{^}}kernel_alloca_offset_use_asm_vgpr:
320+
; GCN: v_mov_b32_e32 v0, 16
321+
; GCN-NOT: v0
322+
; GCN: ;;#ASMSTART
323+
; GCN-NEXT: ; use v0
324+
; GCN-NEXT: ;;#ASMEND
325+
326+
; GCN: v_mov_b32_e32 v0, 0x4010
327+
; GCN-NEXT: ;;#ASMSTART
328+
; GCN-NEXT: ; use v0
329+
; GCN-NEXT: ;;#ASMEND
330+
define amdgpu_kernel void @kernel_alloca_offset_use_asm_vgpr() {
331+
%alloca0 = alloca [4096 x i32], align 16, addrspace(5)
332+
%alloca1 = alloca i32, addrspace(5)
333+
call void asm sideeffect "; use $0", "v"(ptr addrspace(5) %alloca0)
334+
call void asm sideeffect "; use $0", "v"(ptr addrspace(5) %alloca1)
335+
ret void
336+
}
337+
193338
declare void @llvm.lifetime.start.p5(i64, ptr addrspace(5) nocapture) #1
194339
declare void @llvm.lifetime.end.p5(i64, ptr addrspace(5) nocapture) #1
195340

0 commit comments

Comments
 (0)