1
- ; RUN: llc -mtriple=amdgcn-- -mcpu=tahiti -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
1
+ ; RUN: llc -mtriple=amdgcn-- -mcpu=tahiti -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
2
+
3
+ ; TODO: Test with flat scratch
2
4
3
5
; GCN-LABEL: {{^}}store_fi_lifetime:
4
6
; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}}
@@ -143,7 +145,7 @@ define amdgpu_kernel void @stored_fi_to_global_2_small_objects(ptr addrspace(1)
143
145
ret void
144
146
}
145
147
146
- ; GCN-LABEL: {{^}}stored_fi_to_global_huge_frame_offset :
148
+ ; GCN-LABEL: {{^}}kernel_stored_fi_to_global_huge_frame_offset :
147
149
; GCN: v_mov_b32_e32 [[BASE_0:v[0-9]+]], 0{{$}}
148
150
; GCN: buffer_store_dword [[BASE_0]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4{{$}}
149
151
@@ -158,7 +160,32 @@ define amdgpu_kernel void @stored_fi_to_global_2_small_objects(ptr addrspace(1)
158
160
; GCN: buffer_store_dword [[K]], [[BASE_1_OFF_1]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen{{$}}
159
161
160
162
; GCN: buffer_store_dword [[BASE_1_OFF_2]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
161
- define amdgpu_kernel void @stored_fi_to_global_huge_frame_offset (ptr addrspace (1 ) %ptr ) #0 {
163
+ define amdgpu_kernel void @kernel_stored_fi_to_global_huge_frame_offset (ptr addrspace (1 ) %ptr ) #0 {
164
+ %tmp0 = alloca [4096 x i32 ], addrspace (5 )
165
+ %tmp1 = alloca [4096 x i32 ], addrspace (5 )
166
+ store volatile i32 0 , ptr addrspace (5 ) %tmp0
167
+ %gep1.tmp0 = getelementptr [4096 x i32 ], ptr addrspace (5 ) %tmp0 , i32 0 , i32 4095
168
+ store volatile i32 999 , ptr addrspace (5 ) %gep1.tmp0
169
+ %gep0.tmp1 = getelementptr [4096 x i32 ], ptr addrspace (5 ) %tmp0 , i32 0 , i32 14
170
+ store ptr addrspace (5 ) %gep0.tmp1 , ptr addrspace (1 ) %ptr
171
+ ret void
172
+ }
173
+
174
+ ; GCN-LABEL: {{^}}func_stored_fi_to_global_huge_frame_offset:
175
+ ; GCN: v_mov_b32_e32 [[BASE_0:v[0-9]+]], 0{{$}}
176
+ ; GCN: buffer_store_dword [[BASE_0]], off, s{{\[[0-9]+:[0-9]+\]}}, s32 offset:4{{$}}
177
+
178
+ ; GCN: v_lshr_b32_e64 [[FI_TMP:v[0-9]+]], s32, 6
179
+ ; GCN: v_add_i32_e32 [[BASE_0_1:v[0-9]+]], vcc, 4, [[FI_TMP]]{{$}}
180
+
181
+ ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
182
+ ; GCN-DAG: v_add_i32_e32 [[BASE_1_OFF_1:v[0-9]+]], vcc, 0x3ffc, [[BASE_0_1]]
183
+
184
+ ; GCN: v_add_i32_e32 [[BASE_1_OFF_2:v[0-9]+]], vcc, 56, [[BASE_0_1]]
185
+ ; GCN: buffer_store_dword [[K]], [[BASE_1_OFF_1]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen{{$}}
186
+
187
+ ; GCN: buffer_store_dword [[BASE_1_OFF_2]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64
188
+ define void @func_stored_fi_to_global_huge_frame_offset (ptr addrspace (1 ) %ptr ) #0 {
162
189
%tmp0 = alloca [4096 x i32 ], addrspace (5 )
163
190
%tmp1 = alloca [4096 x i32 ], addrspace (5 )
164
191
store volatile i32 0 , ptr addrspace (5 ) %tmp0
@@ -190,6 +217,124 @@ entry:
190
217
ret void
191
218
}
192
219
220
+ ; FIXME: This is broken, and the sgpr input just gets replaced with a VGPR
221
+ ; GCN-LABEL: {{^}}func_alloca_offset0__use_asm_sgpr:
222
+ ; GCN: v_lshr_b32_e64 [[FI:v[0-9]+]], s32, 6
223
+ ; GCN: ; use [[FI]]
224
+ define void @func_alloca_offset0__use_asm_sgpr () {
225
+ %alloca = alloca i32 , addrspace (5 )
226
+ call void asm sideeffect "; use $0" , "s" (ptr addrspace (5 ) %alloca )
227
+ ret void
228
+ }
229
+
230
+ ; GCN-LABEL: {{^}}func_alloca_offset0__use_asm_vgpr:
231
+ ; GCN: v_lshr_b32_e64 [[FI:v[0-9]+]], s32, 6
232
+ ; GCN-NEXT: ;;#ASMSTART
233
+ ; GCN-NEXT: ; use [[FI]]
234
+ define void @func_alloca_offset0__use_asm_vgpr () {
235
+ %alloca = alloca i32 , addrspace (5 )
236
+ call void asm sideeffect "; use $0" , "v" (ptr addrspace (5 ) %alloca )
237
+ ret void
238
+ }
239
+
240
+ ; GCN-LABEL: {{^}}func_alloca_offset0__use_asm_phys_sgpr:
241
+ ; GCN: s_lshr_b32 s8, s32, 6
242
+ ; GCN-NEXT: ;;#ASMSTART
243
+ ; GCN-NEXT: ; use s8
244
+ define void @func_alloca_offset0__use_asm_phys_sgpr () {
245
+ %alloca = alloca i32 , addrspace (5 )
246
+ call void asm sideeffect "; use $0" , "{s8}" (ptr addrspace (5 ) %alloca )
247
+ ret void
248
+ }
249
+
250
+ ; GCN-LABEL: {{^}}func_alloca_offset0__use_asm_phys_vgpr:
251
+ ; GCN: v_lshr_b32_e64 v8, s32, 6
252
+ ; GCN-NEXT: ;;#ASMSTART
253
+ ; GCN-NEXT: ; use v8
254
+ define void @func_alloca_offset0__use_asm_phys_vgpr () {
255
+ %alloca = alloca i32 , addrspace (5 )
256
+ call void asm sideeffect "; use $0" , "{v8}" (ptr addrspace (5 ) %alloca )
257
+ ret void
258
+ }
259
+
260
+ ; GCN-LABEL: {{^}}func_alloca_offset_use_asm_sgpr:
261
+ ; GCN: v_lshr_b32_e64 [[FI0_TMP0:v[0-9]+]], s32, 6
262
+ ; GCN-NEXT: v_add_i32_e32 [[FI0:v[0-9]+]], vcc, 16, [[FI0_TMP0]]
263
+
264
+ ; GCN: v_lshr_b32_e64 [[TMP:v[0-9]+]], s32, 6
265
+ ; GCN-NEXT: s_movk_i32 vcc_lo, 0x4010
266
+ ; GCN-NEXT: v_add_i32_e32 [[TMP]], vcc, vcc_lo, [[TMP]]
267
+ ; GCN-NEXT: ;;#ASMSTART
268
+ ; GCN: ; use [[TMP]]
269
+ define void @func_alloca_offset_use_asm_sgpr () {
270
+ %alloca0 = alloca [4096 x i32 ], align 16 , addrspace (5 )
271
+ %alloca1 = alloca i32 , addrspace (5 )
272
+ call void asm sideeffect "; use $0" , "s" (ptr addrspace (5 ) %alloca0 )
273
+ call void asm sideeffect "; use $0" , "s" (ptr addrspace (5 ) %alloca1 )
274
+ ret void
275
+ }
276
+
277
+ ; FIXME: Shouldn't need to materialize constant
278
+ ; GCN-LABEL: {{^}}func_alloca_offset_use_asm_vgpr:
279
+ ; GCN: v_lshr_b32_e64 [[FI0_TMP:v[0-9]+]], s32, 6
280
+ ; GCN-NEXT: v_add_i32_e32 [[FI0:v[0-9]+]], vcc, 16, [[FI0_TMP]]
281
+ ; GCN-NEXT: ;;#ASMSTART
282
+ ; GCN-NEXT: ; use [[FI0]]
283
+ ; GCN-NEXT: ;;#ASMEND
284
+
285
+ ; GCN: v_lshr_b32_e64 [[FI1_TMP:v[0-9]+]], s32, 6
286
+ ; GCN-NEXT: s_movk_i32 vcc_lo, 0x4010
287
+ ; GCN-NEXT: v_add_i32_e32 [[FI1:v[0-9]+]], vcc, vcc_lo, [[FI1_TMP]]
288
+ ; GCN-NEXT: ;;#ASMSTART
289
+ ; GCN-NEXT: ; use [[FI1]]
290
+ ; GCN-NEXT: ;;#ASMEND
291
+ define void @func_alloca_offset_use_asm_vgpr () {
292
+ %alloca0 = alloca [4096 x i32 ], align 16 , addrspace (5 )
293
+ %alloca1 = alloca i32 , addrspace (5 )
294
+ call void asm sideeffect "; use $0" , "s" (ptr addrspace (5 ) %alloca0 )
295
+ call void asm sideeffect "; use $0" , "v" (ptr addrspace (5 ) %alloca1 )
296
+ ret void
297
+ }
298
+
299
+ ; FIXME: Using VGPR for SGPR input
300
+ ; GCN-LABEL: {{^}}kernel_alloca_offset_use_asm_sgpr:
301
+ ; GCN: v_mov_b32_e32 v0, 16
302
+ ; GCN-NOT: v0
303
+ ; GCN: ;;#ASMSTART
304
+ ; GCN-NEXT: ; use v0
305
+ ; GCN-NEXT: ;;#ASMEND
306
+
307
+ ; GCN: v_mov_b32_e32 v0, 0x4010
308
+ ; GCN-NEXT: ;;#ASMSTART
309
+ ; GCN-NEXT: ; use v0
310
+ ; GCN-NEXT: ;;#ASMEND
311
+ define amdgpu_kernel void @kernel_alloca_offset_use_asm_sgpr () {
312
+ %alloca0 = alloca [4096 x i32 ], align 16 , addrspace (5 )
313
+ %alloca1 = alloca i32 , addrspace (5 )
314
+ call void asm sideeffect "; use $0" , "s" (ptr addrspace (5 ) %alloca0 )
315
+ call void asm sideeffect "; use $0" , "s" (ptr addrspace (5 ) %alloca1 )
316
+ ret void
317
+ }
318
+
319
+ ; GCN-LABEL: {{^}}kernel_alloca_offset_use_asm_vgpr:
320
+ ; GCN: v_mov_b32_e32 v0, 16
321
+ ; GCN-NOT: v0
322
+ ; GCN: ;;#ASMSTART
323
+ ; GCN-NEXT: ; use v0
324
+ ; GCN-NEXT: ;;#ASMEND
325
+
326
+ ; GCN: v_mov_b32_e32 v0, 0x4010
327
+ ; GCN-NEXT: ;;#ASMSTART
328
+ ; GCN-NEXT: ; use v0
329
+ ; GCN-NEXT: ;;#ASMEND
330
+ define amdgpu_kernel void @kernel_alloca_offset_use_asm_vgpr () {
331
+ %alloca0 = alloca [4096 x i32 ], align 16 , addrspace (5 )
332
+ %alloca1 = alloca i32 , addrspace (5 )
333
+ call void asm sideeffect "; use $0" , "v" (ptr addrspace (5 ) %alloca0 )
334
+ call void asm sideeffect "; use $0" , "v" (ptr addrspace (5 ) %alloca1 )
335
+ ret void
336
+ }
337
+
193
338
declare void @llvm.lifetime.start.p5 (i64 , ptr addrspace (5 ) nocapture ) #1
194
339
declare void @llvm.lifetime.end.p5 (i64 , ptr addrspace (5 ) nocapture ) #1
195
340
0 commit comments