Skip to content

Commit eaac4a2

Browse files
authored
[AMDGPU] Document & Finalize GFX12 Memory Model (#98599)
Documents the memory model implemented as of #98591, with some fixes/optimizations to the implementation.
1 parent 1a5a1e9 commit eaac4a2

12 files changed

+2339
-331
lines changed

llvm/docs/AMDGPUUsage.rst

Lines changed: 2241 additions & 0 deletions
Large diffs are not rendered by default.

llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp

Lines changed: 98 additions & 87 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-global.ll

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,6 @@ define amdgpu_kernel void @workgroup_acquire_fence() {
7070
;
7171
; GFX12-WGP-LABEL: workgroup_acquire_fence:
7272
; GFX12-WGP: ; %bb.0: ; %entry
73-
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
74-
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
7573
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
7674
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
7775
; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
@@ -356,8 +354,6 @@ define amdgpu_kernel void @workgroup_one_as_acquire_fence() {
356354
;
357355
; GFX12-WGP-LABEL: workgroup_one_as_acquire_fence:
358356
; GFX12-WGP: ; %bb.0: ; %entry
359-
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
360-
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
361357
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
362358
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
363359
; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
@@ -661,17 +657,13 @@ define amdgpu_kernel void @agent_acquire_fence() {
661657
;
662658
; GFX12-WGP-LABEL: agent_acquire_fence:
663659
; GFX12-WGP: ; %bb.0: ; %entry
664-
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
665-
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
666660
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
667661
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
668662
; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV
669663
; GFX12-WGP-NEXT: s_endpgm
670664
;
671665
; GFX12-CU-LABEL: agent_acquire_fence:
672666
; GFX12-CU: ; %bb.0: ; %entry
673-
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
674-
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
675667
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
676668
; GFX12-CU-NEXT: s_wait_storecnt 0x0
677669
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
@@ -1041,17 +1033,13 @@ define amdgpu_kernel void @agent_one_as_acquire_fence() {
10411033
;
10421034
; GFX12-WGP-LABEL: agent_one_as_acquire_fence:
10431035
; GFX12-WGP: ; %bb.0: ; %entry
1044-
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
1045-
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
10461036
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
10471037
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
10481038
; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV
10491039
; GFX12-WGP-NEXT: s_endpgm
10501040
;
10511041
; GFX12-CU-LABEL: agent_one_as_acquire_fence:
10521042
; GFX12-CU: ; %bb.0: ; %entry
1053-
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
1054-
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
10551043
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
10561044
; GFX12-CU-NEXT: s_wait_storecnt 0x0
10571045
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
@@ -1423,17 +1411,13 @@ define amdgpu_kernel void @system_acquire_fence() {
14231411
;
14241412
; GFX12-WGP-LABEL: system_acquire_fence:
14251413
; GFX12-WGP: ; %bb.0: ; %entry
1426-
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
1427-
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
14281414
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
14291415
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
14301416
; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS
14311417
; GFX12-WGP-NEXT: s_endpgm
14321418
;
14331419
; GFX12-CU-LABEL: system_acquire_fence:
14341420
; GFX12-CU: ; %bb.0: ; %entry
1435-
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
1436-
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
14371421
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
14381422
; GFX12-CU-NEXT: s_wait_storecnt 0x0
14391423
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
@@ -1815,17 +1799,13 @@ define amdgpu_kernel void @system_one_as_acquire_fence() {
18151799
;
18161800
; GFX12-WGP-LABEL: system_one_as_acquire_fence:
18171801
; GFX12-WGP: ; %bb.0: ; %entry
1818-
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
1819-
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
18201802
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
18211803
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
18221804
; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS
18231805
; GFX12-WGP-NEXT: s_endpgm
18241806
;
18251807
; GFX12-CU-LABEL: system_one_as_acquire_fence:
18261808
; GFX12-CU: ; %bb.0: ; %entry
1827-
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
1828-
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
18291809
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
18301810
; GFX12-CU-NEXT: s_wait_storecnt 0x0
18311811
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS

llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -989,8 +989,6 @@ define amdgpu_kernel void @workgroup_acquire_fence() {
989989
;
990990
; GFX12-WGP-LABEL: workgroup_acquire_fence:
991991
; GFX12-WGP: ; %bb.0: ; %entry
992-
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
993-
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
994992
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
995993
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
996994
; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
@@ -1300,8 +1298,6 @@ define amdgpu_kernel void @workgroup_one_as_acquire_fence() {
13001298
;
13011299
; GFX12-WGP-LABEL: workgroup_one_as_acquire_fence:
13021300
; GFX12-WGP: ; %bb.0: ; %entry
1303-
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
1304-
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
13051301
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
13061302
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
13071303
; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
@@ -1605,17 +1601,13 @@ define amdgpu_kernel void @agent_acquire_fence() {
16051601
;
16061602
; GFX12-WGP-LABEL: agent_acquire_fence:
16071603
; GFX12-WGP: ; %bb.0: ; %entry
1608-
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
1609-
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
16101604
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
16111605
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
16121606
; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV
16131607
; GFX12-WGP-NEXT: s_endpgm
16141608
;
16151609
; GFX12-CU-LABEL: agent_acquire_fence:
16161610
; GFX12-CU: ; %bb.0: ; %entry
1617-
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
1618-
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
16191611
; GFX12-CU-NEXT: s_wait_storecnt 0x0
16201612
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
16211613
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
@@ -1985,17 +1977,13 @@ define amdgpu_kernel void @agent_one_as_acquire_fence() {
19851977
;
19861978
; GFX12-WGP-LABEL: agent_one_as_acquire_fence:
19871979
; GFX12-WGP: ; %bb.0: ; %entry
1988-
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
1989-
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
19901980
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
19911981
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
19921982
; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV
19931983
; GFX12-WGP-NEXT: s_endpgm
19941984
;
19951985
; GFX12-CU-LABEL: agent_one_as_acquire_fence:
19961986
; GFX12-CU: ; %bb.0: ; %entry
1997-
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
1998-
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
19991987
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
20001988
; GFX12-CU-NEXT: s_wait_storecnt 0x0
20011989
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
@@ -2367,17 +2355,13 @@ define amdgpu_kernel void @system_acquire_fence() {
23672355
;
23682356
; GFX12-WGP-LABEL: system_acquire_fence:
23692357
; GFX12-WGP: ; %bb.0: ; %entry
2370-
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
2371-
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
23722358
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
23732359
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
23742360
; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS
23752361
; GFX12-WGP-NEXT: s_endpgm
23762362
;
23772363
; GFX12-CU-LABEL: system_acquire_fence:
23782364
; GFX12-CU: ; %bb.0: ; %entry
2379-
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
2380-
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
23812365
; GFX12-CU-NEXT: s_wait_storecnt 0x0
23822366
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
23832367
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
@@ -2759,17 +2743,13 @@ define amdgpu_kernel void @system_one_as_acquire_fence() {
27592743
;
27602744
; GFX12-WGP-LABEL: system_one_as_acquire_fence:
27612745
; GFX12-WGP: ; %bb.0: ; %entry
2762-
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
2763-
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
27642746
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
27652747
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
27662748
; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS
27672749
; GFX12-WGP-NEXT: s_endpgm
27682750
;
27692751
; GFX12-CU-LABEL: system_one_as_acquire_fence:
27702752
; GFX12-CU: ; %bb.0: ; %entry
2771-
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
2772-
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
27732753
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
27742754
; GFX12-CU-NEXT: s_wait_storecnt 0x0
27752755
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS

llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll

Lines changed: 0 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -558,8 +558,6 @@ define amdgpu_kernel void @flat_agent_acquire_load(
558558
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
559559
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3
560560
; GFX12-WGP-NEXT: flat_load_b32 v2, v[0:1] scope:SCOPE_DEV
561-
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
562-
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
563561
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
564562
; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV
565563
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
@@ -577,8 +575,6 @@ define amdgpu_kernel void @flat_agent_acquire_load(
577575
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2
578576
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3
579577
; GFX12-CU-NEXT: flat_load_b32 v2, v[0:1] scope:SCOPE_DEV
580-
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
581-
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
582578
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
583579
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
584580
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
@@ -2544,8 +2540,6 @@ define amdgpu_kernel void @flat_agent_acquire_ret_atomicrmw(
25442540
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
25452541
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s2
25462542
; GFX12-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
2547-
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
2548-
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
25492543
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
25502544
; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV
25512545
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
@@ -2562,8 +2556,6 @@ define amdgpu_kernel void @flat_agent_acquire_ret_atomicrmw(
25622556
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
25632557
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s2
25642558
; GFX12-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
2565-
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
2566-
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
25672559
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
25682560
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
25692561
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
@@ -7749,8 +7741,6 @@ define amdgpu_kernel void @flat_agent_acquire_monotonic_ret_cmpxchg(
77497741
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
77507742
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
77517743
; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
7752-
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
7753-
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
77547744
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
77557745
; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV
77567746
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
@@ -7773,8 +7763,6 @@ define amdgpu_kernel void @flat_agent_acquire_monotonic_ret_cmpxchg(
77737763
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
77747764
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
77757765
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
7776-
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
7777-
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
77787766
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
77797767
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
77807768
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
@@ -9278,8 +9266,6 @@ define amdgpu_kernel void @flat_agent_acquire_acquire_ret_cmpxchg(
92789266
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
92799267
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
92809268
; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
9281-
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
9282-
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
92839269
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
92849270
; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV
92859271
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
@@ -9302,8 +9288,6 @@ define amdgpu_kernel void @flat_agent_acquire_acquire_ret_cmpxchg(
93029288
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
93039289
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
93049290
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
9305-
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
9306-
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
93079291
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
93089292
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
93099293
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
@@ -10873,8 +10857,6 @@ define amdgpu_kernel void @flat_agent_acquire_seq_cst_ret_cmpxchg(
1087310857
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
1087410858
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
1087510859
; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
10876-
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
10877-
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
1087810860
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
1087910861
; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV
1088010862
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
@@ -10902,8 +10884,6 @@ define amdgpu_kernel void @flat_agent_acquire_seq_cst_ret_cmpxchg(
1090210884
; GFX12-CU-NEXT: s_wait_storecnt 0x0
1090310885
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
1090410886
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
10905-
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
10906-
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
1090710887
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
1090810888
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
1090910889
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
@@ -12433,8 +12413,6 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_load(
1243312413
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
1243412414
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3
1243512415
; GFX12-WGP-NEXT: flat_load_b32 v2, v[0:1] scope:SCOPE_DEV
12436-
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
12437-
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
1243812416
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
1243912417
; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV
1244012418
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
@@ -12453,8 +12431,6 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_load(
1245312431
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2
1245412432
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3
1245512433
; GFX12-CU-NEXT: flat_load_b32 v2, v[0:1] scope:SCOPE_DEV
12456-
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
12457-
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
1245812434
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
1245912435
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
1246012436
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
@@ -14427,8 +14403,6 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_ret_atomicrmw(
1442714403
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
1442814404
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s2
1442914405
; GFX12-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
14430-
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
14431-
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
1443214406
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
1443314407
; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV
1443414408
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
@@ -14446,8 +14420,6 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_ret_atomicrmw(
1444614420
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
1444714421
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s2
1444814422
; GFX12-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
14449-
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
14450-
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
1445114423
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
1445214424
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
1445314425
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
@@ -19610,8 +19582,6 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_monotonic_ret_cmpxchg(
1961019582
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
1961119583
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
1961219584
; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
19613-
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
19614-
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
1961519585
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
1961619586
; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV
1961719587
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
@@ -19635,8 +19605,6 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_monotonic_ret_cmpxchg(
1963519605
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
1963619606
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
1963719607
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
19638-
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
19639-
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
1964019608
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
1964119609
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
1964219610
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
@@ -21179,8 +21147,6 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_acquire_ret_cmpxchg(
2117921147
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
2118021148
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
2118121149
; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
21182-
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
21183-
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
2118421150
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
2118521151
; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV
2118621152
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
@@ -21204,8 +21170,6 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_acquire_ret_cmpxchg(
2120421170
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
2120521171
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
2120621172
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
21207-
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
21208-
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
2120921173
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
2121021174
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
2121121175
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
@@ -22824,8 +22788,6 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_seq_cst_ret_cmpxchg(
2282422788
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
2282522789
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
2282622790
; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
22827-
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
22828-
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
2282922791
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
2283022792
; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV
2283122793
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
@@ -22854,8 +22816,6 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_seq_cst_ret_cmpxchg(
2285422816
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
2285522817
; GFX12-CU-NEXT: s_wait_storecnt 0x0
2285622818
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
22857-
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
22858-
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
2285922819
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
2286022820
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
2286122821
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0

0 commit comments

Comments
 (0)