1
- ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks .py
2
- ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12,GFX12-WGP %s
3
- ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12,GFX12-CU %s
1
+ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks .py UTC_ARGS: --version 4
2
+ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -stop-after=si-memory-legalizer - mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12,GFX12-WGP %s
3
+ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -stop-after=si-memory-legalizer - mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12,GFX12-CU %s
4
4
5
5
define amdgpu_kernel void @flat_last_use_load_0 (ptr %in , ptr %out ) {
6
- ; GFX12-LABEL: flat_last_use_load_0:
7
- ; GFX12: ; % bb.0: ; % entry
8
- ; GFX12-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
9
- ; GFX12-NEXT: s_wait_kmcnt 0x0
10
- ; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
11
- ; GFX12-NEXT: flat_load_b32 v2, v[0:1] th:TH_LOAD_LU
12
- ; GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
13
- ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
14
- ; GFX12-NEXT: flat_store_b32 v[0:1], v2
15
- ; GFX12-NEXT: s_endpgm
6
+ ; GFX12-LABEL: name: flat_last_use_load_0
7
+ ; GFX12: bb.0. entry:
8
+ ; GFX12-NEXT: liveins: $sgpr0_sgpr1
9
+ ; GFX12-NEXT: {{ $}}
10
+ ; GFX12-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr0_sgpr1, 0, 0 :: (dereferenceable invariant load (s128) from %ir.in.kernarg.offset1, addrspace 4)
11
+ ; GFX12-NEXT: $vgpr0, $vgpr1 = V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 killed $sgpr0, killed $sgpr1, implicit $exec, implicit $exec, implicit $exec, implicit $exec, implicit $exec
12
+ ; GFX12-NEXT: renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 3, implicit $exec, implicit $flat_scr :: ("amdgpu-last-use" load (s32) from %ir.1)
13
+ ; GFX12-NEXT: $vgpr0, $vgpr1 = V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 killed $sgpr2, killed $sgpr3, implicit $exec, implicit $exec, implicit $exec, implicit $exec, implicit $exec
14
+ ; GFX12-NEXT: FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.2)
15
+ ; GFX12-NEXT: S_ENDPGM 0
16
16
entry:
17
17
%val = load i32 , ptr %in , align 4 , !amdgpu.last.use !{}
18
18
store i32 %val , ptr %out
19
19
ret void
20
20
}
21
21
22
22
define amdgpu_kernel void @flat_last_use_load_1 (ptr %in , ptr %out ) {
23
- ; GFX12-LABEL: flat_last_use_load_1:
24
- ; GFX12: ; %bb.0: ; %entry
25
- ; GFX12-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
26
- ; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
27
- ; GFX12-NEXT: s_wait_kmcnt 0x0
28
- ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
29
- ; GFX12-NEXT: v_add_co_u32 v0, s0, s0, v0
30
- ; GFX12-NEXT: v_add_co_ci_u32_e64 v1, null, s1, 0, s0
31
- ; GFX12-NEXT: flat_load_b32 v2, v[0:1] th:TH_LOAD_LU
32
- ; GFX12-NEXT: v_dual_mov_b32 v1, s3 :: v_dual_mov_b32 v0, s2
33
- ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
34
- ; GFX12-NEXT: flat_store_b32 v[0:1], v2
35
- ; GFX12-NEXT: s_endpgm
23
+ ; GFX12-LABEL: name: flat_last_use_load_1
24
+ ; GFX12: bb.0.entry:
25
+ ; GFX12-NEXT: liveins: $vgpr0, $sgpr0_sgpr1
26
+ ; GFX12-NEXT: {{ $}}
27
+ ; GFX12-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr0_sgpr1, 0, 0 :: (dereferenceable invariant load (s128) from %ir.in.kernarg.offset1, addrspace 4)
28
+ ; GFX12-NEXT: renamable $vgpr0 = nuw nsw V_LSHLREV_B32_e32 2, killed $vgpr0, implicit $exec
29
+ ; GFX12-NEXT: renamable $vgpr0, renamable $sgpr0 = V_ADD_CO_U32_e64 killed $sgpr0, killed $vgpr0, 0, implicit $exec
30
+ ; GFX12-NEXT: renamable $vgpr1, dead $sgpr_null = V_ADDC_U32_e64 killed $sgpr1, 0, killed $sgpr0, 0, implicit $exec
31
+ ; GFX12-NEXT: renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 3, implicit $exec, implicit $flat_scr :: ("amdgpu-last-use" load (s32) from %ir.val.gep)
32
+ ; GFX12-NEXT: $vgpr1, $vgpr0 = V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 killed $sgpr3, killed $sgpr2, implicit $exec, implicit $exec, implicit $exec, implicit $exec, implicit $exec
33
+ ; GFX12-NEXT: FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.2)
34
+ ; GFX12-NEXT: S_ENDPGM 0
36
35
entry:
37
36
%tid = call i32 @llvm.amdgcn.workitem.id.x ()
38
37
%val.gep = getelementptr inbounds i32 , ptr %in , i32 %tid
@@ -42,34 +41,36 @@ entry:
42
41
}
43
42
44
43
define amdgpu_kernel void @flat_last_use_and_volatile_load (ptr %in , ptr %out ) {
45
- ; GFX12-LABEL: flat_last_use_and_volatile_load:
46
- ; GFX12: ; %bb.0: ; %entry
47
- ; GFX12-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
48
- ; GFX12-NEXT: s_wait_kmcnt 0x0
49
- ; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
50
- ; GFX12-NEXT: flat_load_b32 v2, v[0:1] th:TH_LOAD_BYPASS scope:SCOPE_SYS
51
- ; GFX12-NEXT: s_wait_loadcnt 0x0
52
- ; GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
53
- ; GFX12-NEXT: s_wait_dscnt 0x0
54
- ; GFX12-NEXT: flat_store_b32 v[0:1], v2
55
- ; GFX12-NEXT: s_endpgm
44
+ ; GFX12-LABEL: name: flat_last_use_and_volatile_load
45
+ ; GFX12: bb.0.entry:
46
+ ; GFX12-NEXT: liveins: $sgpr0_sgpr1
47
+ ; GFX12-NEXT: {{ $}}
48
+ ; GFX12-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr0_sgpr1, 0, 0 :: (dereferenceable invariant load (s128) from %ir.in.kernarg.offset1, addrspace 4)
49
+ ; GFX12-NEXT: $vgpr0, $vgpr1 = V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 killed $sgpr0, killed $sgpr1, implicit $exec, implicit $exec, implicit $exec, implicit $exec, implicit $exec
50
+ ; GFX12-NEXT: renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 27, implicit $exec, implicit $flat_scr :: (volatile "amdgpu-last-use" load (s32) from %ir.1)
51
+ ; GFX12-NEXT: S_WAIT_BVHCNT_soft 0
52
+ ; GFX12-NEXT: S_WAIT_SAMPLECNT_soft 0
53
+ ; GFX12-NEXT: S_WAIT_LOADCNT_soft 0
54
+ ; GFX12-NEXT: $vgpr0, $vgpr1 = V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 killed $sgpr2, killed $sgpr3, implicit $exec, implicit $exec, implicit $exec, implicit $exec, implicit $exec
55
+ ; GFX12-NEXT: FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.2)
56
+ ; GFX12-NEXT: S_ENDPGM 0
56
57
entry:
57
58
%val = load volatile i32 , ptr %in , align 4 , !amdgpu.last.use !{}
58
59
store i32 %val , ptr %out
59
60
ret void
60
61
}
61
62
62
63
define amdgpu_kernel void @flat_last_use_and_nontemporal_load (ptr %in , ptr %out ) {
63
- ; GFX12-LABEL: flat_last_use_and_nontemporal_load:
64
- ; GFX12: ; % bb.0: ; % entry
65
- ; GFX12-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
66
- ; GFX12-NEXT: s_wait_kmcnt 0x0
67
- ; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
68
- ; GFX12-NEXT: flat_load_b32 v2, v[0:1] th:TH_LOAD_LU
69
- ; GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
70
- ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
71
- ; GFX12-NEXT: flat_store_b32 v[0:1], v2
72
- ; GFX12-NEXT: s_endpgm
64
+ ; GFX12-LABEL: name: flat_last_use_and_nontemporal_load
65
+ ; GFX12: bb.0. entry:
66
+ ; GFX12-NEXT: liveins: $sgpr0_sgpr1
67
+ ; GFX12-NEXT: {{ $}}
68
+ ; GFX12-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr0_sgpr1, 0, 0 :: (dereferenceable invariant load (s128) from %ir.in.kernarg.offset1, addrspace 4)
69
+ ; GFX12-NEXT: $vgpr0, $vgpr1 = V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 killed $sgpr0, killed $sgpr1, implicit $exec, implicit $exec, implicit $exec, implicit $exec, implicit $exec
70
+ ; GFX12-NEXT: renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 3, implicit $exec, implicit $flat_scr :: (non-temporal "amdgpu-last-use" load (s32) from %ir.1)
71
+ ; GFX12-NEXT: $vgpr0, $vgpr1 = V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 killed $sgpr2, killed $sgpr3, implicit $exec, implicit $exec, implicit $exec, implicit $exec, implicit $exec
72
+ ; GFX12-NEXT: FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.2)
73
+ ; GFX12-NEXT: S_ENDPGM 0
73
74
entry:
74
75
%val = load i32 , ptr %in , align 4 , !amdgpu.last.use !{}, !nontemporal !0
75
76
store i32 %val , ptr %out
0 commit comments