Skip to content

Commit 364872f

Browse files
committed
[AMDGPU] Check MIR after SIMemoryLegalizer instead of final ISA
Since llvm#72830 the memory legalizer tests have not shown s_waitcnt instructions inserted by SIMemoryLegalizer because they have mostly been removed by SIInsertWaitcnts. Checking the MIR immediately after SIMemoryLegalizer runs fixes this so you can see exactly what the pass has inserted.
1 parent 82219e5 commit 364872f

27 files changed

+228750
-211259
lines changed

llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll

Lines changed: 2824 additions & 2324 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll

Lines changed: 16777 additions & 14466 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-lastuse.ll

Lines changed: 48 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,37 @@
1-
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12,GFX12-WGP %s
3-
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12,GFX12-CU %s
1+
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
2+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -stop-after=si-memory-legalizer -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12,GFX12-WGP %s
3+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -stop-after=si-memory-legalizer -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12,GFX12-CU %s
44

55
define amdgpu_kernel void @flat_last_use_load_0(ptr %in, ptr %out) {
6-
; GFX12-LABEL: flat_last_use_load_0:
7-
; GFX12: ; %bb.0: ; %entry
8-
; GFX12-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
9-
; GFX12-NEXT: s_wait_kmcnt 0x0
10-
; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
11-
; GFX12-NEXT: flat_load_b32 v2, v[0:1] th:TH_LOAD_LU
12-
; GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
13-
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
14-
; GFX12-NEXT: flat_store_b32 v[0:1], v2
15-
; GFX12-NEXT: s_endpgm
6+
; GFX12-LABEL: name: flat_last_use_load_0
7+
; GFX12: bb.0.entry:
8+
; GFX12-NEXT: liveins: $sgpr0_sgpr1
9+
; GFX12-NEXT: {{ $}}
10+
; GFX12-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr0_sgpr1, 0, 0 :: (dereferenceable invariant load (s128) from %ir.in.kernarg.offset1, addrspace 4)
11+
; GFX12-NEXT: $vgpr0, $vgpr1 = V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 killed $sgpr0, killed $sgpr1, implicit $exec, implicit $exec, implicit $exec, implicit $exec, implicit $exec
12+
; GFX12-NEXT: renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 3, implicit $exec, implicit $flat_scr :: ("amdgpu-last-use" load (s32) from %ir.1)
13+
; GFX12-NEXT: $vgpr0, $vgpr1 = V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 killed $sgpr2, killed $sgpr3, implicit $exec, implicit $exec, implicit $exec, implicit $exec, implicit $exec
14+
; GFX12-NEXT: FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.2)
15+
; GFX12-NEXT: S_ENDPGM 0
1616
entry:
1717
%val = load i32, ptr %in, align 4, !amdgpu.last.use !{}
1818
store i32 %val, ptr %out
1919
ret void
2020
}
2121

2222
define amdgpu_kernel void @flat_last_use_load_1(ptr %in, ptr %out) {
23-
; GFX12-LABEL: flat_last_use_load_1:
24-
; GFX12: ; %bb.0: ; %entry
25-
; GFX12-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
26-
; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
27-
; GFX12-NEXT: s_wait_kmcnt 0x0
28-
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
29-
; GFX12-NEXT: v_add_co_u32 v0, s0, s0, v0
30-
; GFX12-NEXT: v_add_co_ci_u32_e64 v1, null, s1, 0, s0
31-
; GFX12-NEXT: flat_load_b32 v2, v[0:1] th:TH_LOAD_LU
32-
; GFX12-NEXT: v_dual_mov_b32 v1, s3 :: v_dual_mov_b32 v0, s2
33-
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
34-
; GFX12-NEXT: flat_store_b32 v[0:1], v2
35-
; GFX12-NEXT: s_endpgm
23+
; GFX12-LABEL: name: flat_last_use_load_1
24+
; GFX12: bb.0.entry:
25+
; GFX12-NEXT: liveins: $vgpr0, $sgpr0_sgpr1
26+
; GFX12-NEXT: {{ $}}
27+
; GFX12-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr0_sgpr1, 0, 0 :: (dereferenceable invariant load (s128) from %ir.in.kernarg.offset1, addrspace 4)
28+
; GFX12-NEXT: renamable $vgpr0 = nuw nsw V_LSHLREV_B32_e32 2, killed $vgpr0, implicit $exec
29+
; GFX12-NEXT: renamable $vgpr0, renamable $sgpr0 = V_ADD_CO_U32_e64 killed $sgpr0, killed $vgpr0, 0, implicit $exec
30+
; GFX12-NEXT: renamable $vgpr1, dead $sgpr_null = V_ADDC_U32_e64 killed $sgpr1, 0, killed $sgpr0, 0, implicit $exec
31+
; GFX12-NEXT: renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 3, implicit $exec, implicit $flat_scr :: ("amdgpu-last-use" load (s32) from %ir.val.gep)
32+
; GFX12-NEXT: $vgpr1, $vgpr0 = V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 killed $sgpr3, killed $sgpr2, implicit $exec, implicit $exec, implicit $exec, implicit $exec, implicit $exec
33+
; GFX12-NEXT: FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.2)
34+
; GFX12-NEXT: S_ENDPGM 0
3635
entry:
3736
%tid = call i32 @llvm.amdgcn.workitem.id.x()
3837
%val.gep = getelementptr inbounds i32, ptr %in, i32 %tid
@@ -42,34 +41,36 @@ entry:
4241
}
4342

4443
define amdgpu_kernel void @flat_last_use_and_volatile_load(ptr %in, ptr %out) {
45-
; GFX12-LABEL: flat_last_use_and_volatile_load:
46-
; GFX12: ; %bb.0: ; %entry
47-
; GFX12-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
48-
; GFX12-NEXT: s_wait_kmcnt 0x0
49-
; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
50-
; GFX12-NEXT: flat_load_b32 v2, v[0:1] th:TH_LOAD_BYPASS scope:SCOPE_SYS
51-
; GFX12-NEXT: s_wait_loadcnt 0x0
52-
; GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
53-
; GFX12-NEXT: s_wait_dscnt 0x0
54-
; GFX12-NEXT: flat_store_b32 v[0:1], v2
55-
; GFX12-NEXT: s_endpgm
44+
; GFX12-LABEL: name: flat_last_use_and_volatile_load
45+
; GFX12: bb.0.entry:
46+
; GFX12-NEXT: liveins: $sgpr0_sgpr1
47+
; GFX12-NEXT: {{ $}}
48+
; GFX12-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr0_sgpr1, 0, 0 :: (dereferenceable invariant load (s128) from %ir.in.kernarg.offset1, addrspace 4)
49+
; GFX12-NEXT: $vgpr0, $vgpr1 = V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 killed $sgpr0, killed $sgpr1, implicit $exec, implicit $exec, implicit $exec, implicit $exec, implicit $exec
50+
; GFX12-NEXT: renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 27, implicit $exec, implicit $flat_scr :: (volatile "amdgpu-last-use" load (s32) from %ir.1)
51+
; GFX12-NEXT: S_WAIT_BVHCNT_soft 0
52+
; GFX12-NEXT: S_WAIT_SAMPLECNT_soft 0
53+
; GFX12-NEXT: S_WAIT_LOADCNT_soft 0
54+
; GFX12-NEXT: $vgpr0, $vgpr1 = V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 killed $sgpr2, killed $sgpr3, implicit $exec, implicit $exec, implicit $exec, implicit $exec, implicit $exec
55+
; GFX12-NEXT: FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.2)
56+
; GFX12-NEXT: S_ENDPGM 0
5657
entry:
5758
%val = load volatile i32, ptr %in, align 4, !amdgpu.last.use !{}
5859
store i32 %val, ptr %out
5960
ret void
6061
}
6162

6263
define amdgpu_kernel void @flat_last_use_and_nontemporal_load(ptr %in, ptr %out) {
63-
; GFX12-LABEL: flat_last_use_and_nontemporal_load:
64-
; GFX12: ; %bb.0: ; %entry
65-
; GFX12-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
66-
; GFX12-NEXT: s_wait_kmcnt 0x0
67-
; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
68-
; GFX12-NEXT: flat_load_b32 v2, v[0:1] th:TH_LOAD_LU
69-
; GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
70-
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
71-
; GFX12-NEXT: flat_store_b32 v[0:1], v2
72-
; GFX12-NEXT: s_endpgm
64+
; GFX12-LABEL: name: flat_last_use_and_nontemporal_load
65+
; GFX12: bb.0.entry:
66+
; GFX12-NEXT: liveins: $sgpr0_sgpr1
67+
; GFX12-NEXT: {{ $}}
68+
; GFX12-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr0_sgpr1, 0, 0 :: (dereferenceable invariant load (s128) from %ir.in.kernarg.offset1, addrspace 4)
69+
; GFX12-NEXT: $vgpr0, $vgpr1 = V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 killed $sgpr0, killed $sgpr1, implicit $exec, implicit $exec, implicit $exec, implicit $exec, implicit $exec
70+
; GFX12-NEXT: renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 3, implicit $exec, implicit $flat_scr :: (non-temporal "amdgpu-last-use" load (s32) from %ir.1)
71+
; GFX12-NEXT: $vgpr0, $vgpr1 = V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 killed $sgpr2, killed $sgpr3, implicit $exec, implicit $exec, implicit $exec, implicit $exec, implicit $exec
72+
; GFX12-NEXT: FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.2)
73+
; GFX12-NEXT: S_ENDPGM 0
7374
entry:
7475
%val = load i32, ptr %in, align 4, !amdgpu.last.use !{}, !nontemporal !0
7576
store i32 %val, ptr %out

0 commit comments

Comments
 (0)