@@ -264,6 +264,142 @@ ret:
264
264
ret void
265
265
}
266
266
267
+ define amdgpu_kernel void @trap_with_use_after (ptr addrspace (1 ) %arg0 , ptr addrspace (1 ) %arg1 ) {
268
+ ; NOHSA-TRAP-GFX900-LABEL: trap_with_use_after:
269
+ ; NOHSA-TRAP-GFX900: ; %bb.0:
270
+ ; NOHSA-TRAP-GFX900-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
271
+ ; NOHSA-TRAP-GFX900-NEXT: v_mov_b32_e32 v0, 0
272
+ ; NOHSA-TRAP-GFX900-NEXT: s_waitcnt lgkmcnt(0)
273
+ ; NOHSA-TRAP-GFX900-NEXT: global_load_dword v1, v0, s[0:1] glc
274
+ ; NOHSA-TRAP-GFX900-NEXT: s_waitcnt vmcnt(0)
275
+ ; NOHSA-TRAP-GFX900-NEXT: s_cbranch_execnz .LBB2_2
276
+ ; NOHSA-TRAP-GFX900-NEXT: ; %bb.1:
277
+ ; NOHSA-TRAP-GFX900-NEXT: global_store_dword v0, v1, s[2:3]
278
+ ; NOHSA-TRAP-GFX900-NEXT: s_waitcnt vmcnt(0)
279
+ ; NOHSA-TRAP-GFX900-NEXT: .LBB2_2:
280
+ ; NOHSA-TRAP-GFX900-NEXT: s_endpgm
281
+ ;
282
+ ; HSA-TRAP-GFX803-LABEL: trap_with_use_after:
283
+ ; HSA-TRAP-GFX803: ; %bb.0:
284
+ ; HSA-TRAP-GFX803-NEXT: s_mov_b64 s[0:1], s[4:5]
285
+ ; HSA-TRAP-GFX803-NEXT: s_load_dwordx4 s[4:7], s[6:7], 0x0
286
+ ; HSA-TRAP-GFX803-NEXT: s_waitcnt lgkmcnt(0)
287
+ ; HSA-TRAP-GFX803-NEXT: v_mov_b32_e32 v0, s4
288
+ ; HSA-TRAP-GFX803-NEXT: v_mov_b32_e32 v1, s5
289
+ ; HSA-TRAP-GFX803-NEXT: flat_load_dword v2, v[0:1] glc
290
+ ; HSA-TRAP-GFX803-NEXT: s_waitcnt vmcnt(0)
291
+ ; HSA-TRAP-GFX803-NEXT: v_mov_b32_e32 v0, s6
292
+ ; HSA-TRAP-GFX803-NEXT: v_mov_b32_e32 v1, s7
293
+ ; HSA-TRAP-GFX803-NEXT: s_trap 2
294
+ ; HSA-TRAP-GFX803-NEXT: flat_store_dword v[0:1], v2
295
+ ; HSA-TRAP-GFX803-NEXT: s_waitcnt vmcnt(0)
296
+ ; HSA-TRAP-GFX803-NEXT: s_endpgm
297
+ ;
298
+ ; HSA-TRAP-GFX900-LABEL: trap_with_use_after:
299
+ ; HSA-TRAP-GFX900: ; %bb.0:
300
+ ; HSA-TRAP-GFX900-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
301
+ ; HSA-TRAP-GFX900-NEXT: v_mov_b32_e32 v0, 0
302
+ ; HSA-TRAP-GFX900-NEXT: s_waitcnt lgkmcnt(0)
303
+ ; HSA-TRAP-GFX900-NEXT: global_load_dword v1, v0, s[0:1] glc
304
+ ; HSA-TRAP-GFX900-NEXT: s_waitcnt vmcnt(0)
305
+ ; HSA-TRAP-GFX900-NEXT: s_trap 2
306
+ ; HSA-TRAP-GFX900-NEXT: global_store_dword v0, v1, s[2:3]
307
+ ; HSA-TRAP-GFX900-NEXT: s_waitcnt vmcnt(0)
308
+ ; HSA-TRAP-GFX900-NEXT: s_endpgm
309
+ ;
310
+ ; HSA-NOTRAP-GFX900-LABEL: trap_with_use_after:
311
+ ; HSA-NOTRAP-GFX900: ; %bb.0:
312
+ ; HSA-NOTRAP-GFX900-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
313
+ ; HSA-NOTRAP-GFX900-NEXT: v_mov_b32_e32 v0, 0
314
+ ; HSA-NOTRAP-GFX900-NEXT: s_waitcnt lgkmcnt(0)
315
+ ; HSA-NOTRAP-GFX900-NEXT: global_load_dword v1, v0, s[0:1] glc
316
+ ; HSA-NOTRAP-GFX900-NEXT: s_waitcnt vmcnt(0)
317
+ ; HSA-NOTRAP-GFX900-NEXT: s_cbranch_execnz .LBB2_2
318
+ ; HSA-NOTRAP-GFX900-NEXT: ; %bb.1:
319
+ ; HSA-NOTRAP-GFX900-NEXT: global_store_dword v0, v1, s[2:3]
320
+ ; HSA-NOTRAP-GFX900-NEXT: s_waitcnt vmcnt(0)
321
+ ; HSA-NOTRAP-GFX900-NEXT: .LBB2_2:
322
+ ; HSA-NOTRAP-GFX900-NEXT: s_endpgm
323
+ ;
324
+ ; HSA-TRAP-GFX1100-LABEL: trap_with_use_after:
325
+ ; HSA-TRAP-GFX1100: ; %bb.0:
326
+ ; HSA-TRAP-GFX1100-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
327
+ ; HSA-TRAP-GFX1100-NEXT: v_mov_b32_e32 v0, 0
328
+ ; HSA-TRAP-GFX1100-NEXT: s_waitcnt lgkmcnt(0)
329
+ ; HSA-TRAP-GFX1100-NEXT: global_load_b32 v1, v0, s[0:1] glc dlc
330
+ ; HSA-TRAP-GFX1100-NEXT: s_waitcnt vmcnt(0)
331
+ ; HSA-TRAP-GFX1100-NEXT: s_cbranch_execnz .LBB2_2
332
+ ; HSA-TRAP-GFX1100-NEXT: ; %bb.1:
333
+ ; HSA-TRAP-GFX1100-NEXT: global_store_b32 v0, v1, s[2:3] dlc
334
+ ; HSA-TRAP-GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
335
+ ; HSA-TRAP-GFX1100-NEXT: s_nop 0
336
+ ; HSA-TRAP-GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
337
+ ; HSA-TRAP-GFX1100-NEXT: s_endpgm
338
+ ; HSA-TRAP-GFX1100-NEXT: .LBB2_2:
339
+ ; HSA-TRAP-GFX1100-NEXT: s_trap 2
340
+ ; HSA-TRAP-GFX1100-NEXT: s_sendmsg_rtn_b32 s0, sendmsg(MSG_RTN_GET_DOORBELL)
341
+ ; HSA-TRAP-GFX1100-NEXT: s_mov_b32 ttmp2, m0
342
+ ; HSA-TRAP-GFX1100-NEXT: s_waitcnt lgkmcnt(0)
343
+ ; HSA-TRAP-GFX1100-NEXT: s_and_b32 s0, s0, 0x3ff
344
+ ; HSA-TRAP-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
345
+ ; HSA-TRAP-GFX1100-NEXT: s_bitset1_b32 s0, 10
346
+ ; HSA-TRAP-GFX1100-NEXT: s_mov_b32 m0, s0
347
+ ; HSA-TRAP-GFX1100-NEXT: s_sendmsg sendmsg(MSG_INTERRUPT)
348
+ ; HSA-TRAP-GFX1100-NEXT: s_mov_b32 m0, ttmp2
349
+ ; HSA-TRAP-GFX1100-NEXT: .LBB2_3: ; =>This Inner Loop Header: Depth=1
350
+ ; HSA-TRAP-GFX1100-NEXT: s_sethalt 5
351
+ ; HSA-TRAP-GFX1100-NEXT: s_branch .LBB2_3
352
+ ;
353
+ ; HSA-TRAP-GFX1100-O0-LABEL: trap_with_use_after:
354
+ ; HSA-TRAP-GFX1100-O0: ; %bb.0:
355
+ ; HSA-TRAP-GFX1100-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
356
+ ; HSA-TRAP-GFX1100-O0-NEXT: v_mov_b32_e32 v0, 0
357
+ ; HSA-TRAP-GFX1100-O0-NEXT: scratch_store_b32 off, v0, off offset:8 ; 4-byte Folded Spill
358
+ ; HSA-TRAP-GFX1100-O0-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
359
+ ; HSA-TRAP-GFX1100-O0-NEXT: s_load_b64 s[2:3], s[4:5], 0x8
360
+ ; HSA-TRAP-GFX1100-O0-NEXT: s_waitcnt lgkmcnt(0)
361
+ ; HSA-TRAP-GFX1100-O0-NEXT: v_writelane_b32 v1, s2, 0
362
+ ; HSA-TRAP-GFX1100-O0-NEXT: v_writelane_b32 v1, s3, 1
363
+ ; HSA-TRAP-GFX1100-O0-NEXT: s_or_saveexec_b32 s6, -1
364
+ ; HSA-TRAP-GFX1100-O0-NEXT: scratch_store_b32 off, v1, off offset:4 ; 4-byte Folded Spill
365
+ ; HSA-TRAP-GFX1100-O0-NEXT: s_mov_b32 exec_lo, s6
366
+ ; HSA-TRAP-GFX1100-O0-NEXT: global_load_b32 v0, v0, s[0:1] glc dlc
367
+ ; HSA-TRAP-GFX1100-O0-NEXT: s_waitcnt vmcnt(0)
368
+ ; HSA-TRAP-GFX1100-O0-NEXT: scratch_store_b32 off, v0, off ; 4-byte Folded Spill
369
+ ; HSA-TRAP-GFX1100-O0-NEXT: s_cbranch_execnz .LBB2_2
370
+ ; HSA-TRAP-GFX1100-O0-NEXT: ; %bb.1:
371
+ ; HSA-TRAP-GFX1100-O0-NEXT: s_or_saveexec_b32 s6, -1
372
+ ; HSA-TRAP-GFX1100-O0-NEXT: scratch_load_b32 v0, off, off offset:4 ; 4-byte Folded Reload
373
+ ; HSA-TRAP-GFX1100-O0-NEXT: s_mov_b32 exec_lo, s6
374
+ ; HSA-TRAP-GFX1100-O0-NEXT: s_waitcnt vmcnt(0)
375
+ ; HSA-TRAP-GFX1100-O0-NEXT: v_readlane_b32 s0, v0, 0
376
+ ; HSA-TRAP-GFX1100-O0-NEXT: v_readlane_b32 s1, v0, 1
377
+ ; HSA-TRAP-GFX1100-O0-NEXT: scratch_load_b32 v1, off, off offset:8 ; 4-byte Folded Reload
378
+ ; HSA-TRAP-GFX1100-O0-NEXT: scratch_load_b32 v2, off, off ; 4-byte Folded Reload
379
+ ; HSA-TRAP-GFX1100-O0-NEXT: s_waitcnt vmcnt(0)
380
+ ; HSA-TRAP-GFX1100-O0-NEXT: global_store_b32 v1, v2, s[0:1] dlc
381
+ ; HSA-TRAP-GFX1100-O0-NEXT: s_waitcnt_vscnt null, 0x0
382
+ ; HSA-TRAP-GFX1100-O0-NEXT: ; kill: killed $vgpr0
383
+ ; HSA-TRAP-GFX1100-O0-NEXT: s_endpgm
384
+ ; HSA-TRAP-GFX1100-O0-NEXT: .LBB2_2:
385
+ ; HSA-TRAP-GFX1100-O0-NEXT: s_trap 2
386
+ ; HSA-TRAP-GFX1100-O0-NEXT: s_sendmsg_rtn_b32 s0, sendmsg(MSG_RTN_GET_DOORBELL)
387
+ ; HSA-TRAP-GFX1100-O0-NEXT: s_mov_b32 ttmp2, m0
388
+ ; HSA-TRAP-GFX1100-O0-NEXT: s_waitcnt lgkmcnt(0)
389
+ ; HSA-TRAP-GFX1100-O0-NEXT: s_and_b32 s0, s0, 0x3ff
390
+ ; HSA-TRAP-GFX1100-O0-NEXT: s_or_b32 s0, s0, 0x400
391
+ ; HSA-TRAP-GFX1100-O0-NEXT: s_mov_b32 m0, s0
392
+ ; HSA-TRAP-GFX1100-O0-NEXT: s_sendmsg sendmsg(MSG_INTERRUPT)
393
+ ; HSA-TRAP-GFX1100-O0-NEXT: s_mov_b32 m0, ttmp2
394
+ ; HSA-TRAP-GFX1100-O0-NEXT: .LBB2_3: ; =>This Inner Loop Header: Depth=1
395
+ ; HSA-TRAP-GFX1100-O0-NEXT: s_sethalt 5
396
+ ; HSA-TRAP-GFX1100-O0-NEXT: s_branch .LBB2_3
397
+ %tmp = load volatile i32 , ptr addrspace (1 ) %arg0
398
+ call void @llvm.trap ()
399
+ store volatile i32 %tmp , ptr addrspace (1 ) %arg1
400
+ ret void
401
+ }
402
+
267
403
define amdgpu_kernel void @debugtrap (ptr addrspace (1 ) nocapture readonly %arg0 ) {
268
404
; NOHSA-TRAP-GFX900-LABEL: debugtrap:
269
405
; NOHSA-TRAP-GFX900: ; %bb.0:
@@ -334,6 +470,20 @@ define amdgpu_kernel void @debugtrap(ptr addrspace(1) nocapture readonly %arg0)
334
470
; HSA-TRAP-GFX1100-NEXT: s_nop 0
335
471
; HSA-TRAP-GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
336
472
; HSA-TRAP-GFX1100-NEXT: s_endpgm
473
+ ;
474
+ ; HSA-TRAP-GFX1100-O0-LABEL: debugtrap:
475
+ ; HSA-TRAP-GFX1100-O0: ; %bb.0:
476
+ ; HSA-TRAP-GFX1100-O0-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
477
+ ; HSA-TRAP-GFX1100-O0-NEXT: v_mov_b32_e32 v0, 0
478
+ ; HSA-TRAP-GFX1100-O0-NEXT: v_mov_b32_e32 v1, 1
479
+ ; HSA-TRAP-GFX1100-O0-NEXT: s_waitcnt lgkmcnt(0)
480
+ ; HSA-TRAP-GFX1100-O0-NEXT: global_store_b32 v0, v1, s[0:1] dlc
481
+ ; HSA-TRAP-GFX1100-O0-NEXT: s_waitcnt_vscnt null, 0x0
482
+ ; HSA-TRAP-GFX1100-O0-NEXT: s_trap 3
483
+ ; HSA-TRAP-GFX1100-O0-NEXT: v_mov_b32_e32 v1, 2
484
+ ; HSA-TRAP-GFX1100-O0-NEXT: global_store_b32 v0, v1, s[0:1] dlc
485
+ ; HSA-TRAP-GFX1100-O0-NEXT: s_waitcnt_vscnt null, 0x0
486
+ ; HSA-TRAP-GFX1100-O0-NEXT: s_endpgm
337
487
store volatile i32 1 , ptr addrspace (1 ) %arg0
338
488
call void @llvm.debugtrap ()
339
489
store volatile i32 2 , ptr addrspace (1 ) %arg0
0 commit comments