@@ -22,6 +22,7 @@ define float @local_atomic_fmax_ret_f32(ptr addrspace(3) %ptr, float %val) {
22
22
; GFX12-NEXT: ds_max_num_rtn_f32 v0, v0, v1
23
23
; GFX12-NEXT: s_wait_dscnt 0x0
24
24
; GFX12-NEXT: global_inv scope:SCOPE_SE
25
+ ; GFX12-NEXT: s_wait_loadcnt 0x0
25
26
; GFX12-NEXT: s_setpc_b64 s[30:31]
26
27
;
27
28
; GFX942-LABEL: local_atomic_fmax_ret_f32:
@@ -94,6 +95,7 @@ define void @local_atomic_fmax_noret_f32(ptr addrspace(3) %ptr, float %val) {
94
95
; GFX12-NEXT: ds_max_num_f32 v0, v1
95
96
; GFX12-NEXT: s_wait_dscnt 0x0
96
97
; GFX12-NEXT: global_inv scope:SCOPE_SE
98
+ ; GFX12-NEXT: s_wait_loadcnt 0x0
97
99
; GFX12-NEXT: s_setpc_b64 s[30:31]
98
100
;
99
101
; GFX942-LABEL: local_atomic_fmax_noret_f32:
@@ -166,6 +168,7 @@ define double @local_atomic_fmax_ret_f64(ptr addrspace(3) %ptr, double %val) {
166
168
; GFX12-NEXT: ds_max_num_rtn_f64 v[0:1], v0, v[1:2]
167
169
; GFX12-NEXT: s_wait_dscnt 0x0
168
170
; GFX12-NEXT: global_inv scope:SCOPE_SE
171
+ ; GFX12-NEXT: s_wait_loadcnt 0x0
169
172
; GFX12-NEXT: s_setpc_b64 s[30:31]
170
173
;
171
174
; GFX942-LABEL: local_atomic_fmax_ret_f64:
@@ -242,6 +245,7 @@ define void @local_atomic_fmax_noret_f64(ptr addrspace(3) %ptr, double %val) {
242
245
; GFX12-NEXT: ds_max_num_f64 v0, v[1:2]
243
246
; GFX12-NEXT: s_wait_dscnt 0x0
244
247
; GFX12-NEXT: global_inv scope:SCOPE_SE
248
+ ; GFX12-NEXT: s_wait_loadcnt 0x0
245
249
; GFX12-NEXT: s_setpc_b64 s[30:31]
246
250
;
247
251
; GFX942-LABEL: local_atomic_fmax_noret_f64:
@@ -318,6 +322,7 @@ define float @global_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory(pt
318
322
; GFX12-NEXT: global_atomic_max_num_f32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
319
323
; GFX12-NEXT: s_wait_loadcnt 0x0
320
324
; GFX12-NEXT: global_inv scope:SCOPE_DEV
325
+ ; GFX12-NEXT: s_wait_loadcnt 0x0
321
326
; GFX12-NEXT: s_setpc_b64 s[30:31]
322
327
;
323
328
; GFX942-LABEL: global_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory:
@@ -464,6 +469,7 @@ define void @global_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory(p
464
469
; GFX12-NEXT: global_atomic_max_num_f32 v[0:1], v2, off scope:SCOPE_DEV
465
470
; GFX12-NEXT: s_wait_storecnt 0x0
466
471
; GFX12-NEXT: global_inv scope:SCOPE_DEV
472
+ ; GFX12-NEXT: s_wait_loadcnt 0x0
467
473
; GFX12-NEXT: s_setpc_b64 s[30:31]
468
474
;
469
475
; GFX942-LABEL: global_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory:
@@ -624,6 +630,7 @@ define double @global_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory(p
624
630
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
625
631
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
626
632
; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
633
+ ; GFX12-NEXT: s_wait_loadcnt 0x0
627
634
; GFX12-NEXT: s_setpc_b64 s[30:31]
628
635
;
629
636
; GFX942-LABEL: global_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory:
@@ -779,6 +786,7 @@ define void @global_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory(p
779
786
; GFX12-NEXT: s_cbranch_execnz .LBB7_1
780
787
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
781
788
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
789
+ ; GFX12-NEXT: s_wait_loadcnt 0x0
782
790
; GFX12-NEXT: s_setpc_b64 s[30:31]
783
791
;
784
792
; GFX942-LABEL: global_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory:
@@ -909,6 +917,7 @@ define float @flat_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory(ptr
909
917
; GFX12-NEXT: flat_atomic_max_num_f32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
910
918
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
911
919
; GFX12-NEXT: global_inv scope:SCOPE_DEV
920
+ ; GFX12-NEXT: s_wait_loadcnt 0x0
912
921
; GFX12-NEXT: s_setpc_b64 s[30:31]
913
922
;
914
923
; GFX942-LABEL: flat_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory:
@@ -1051,6 +1060,7 @@ define void @flat_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory(ptr
1051
1060
; GFX12-NEXT: flat_atomic_max_num_f32 v[0:1], v2 scope:SCOPE_DEV
1052
1061
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
1053
1062
; GFX12-NEXT: global_inv scope:SCOPE_DEV
1063
+ ; GFX12-NEXT: s_wait_loadcnt 0x0
1054
1064
; GFX12-NEXT: s_setpc_b64 s[30:31]
1055
1065
;
1056
1066
; GFX942-LABEL: flat_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory:
@@ -1210,6 +1220,7 @@ define double @flat_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory(ptr
1210
1220
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
1211
1221
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
1212
1222
; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
1223
+ ; GFX12-NEXT: s_wait_loadcnt 0x0
1213
1224
; GFX12-NEXT: s_setpc_b64 s[30:31]
1214
1225
;
1215
1226
; GFX942-LABEL: flat_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory:
@@ -1363,6 +1374,7 @@ define void @flat_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory(ptr
1363
1374
; GFX12-NEXT: s_cbranch_execnz .LBB11_1
1364
1375
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
1365
1376
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
1377
+ ; GFX12-NEXT: s_wait_loadcnt 0x0
1366
1378
; GFX12-NEXT: s_setpc_b64 s[30:31]
1367
1379
;
1368
1380
; GFX942-LABEL: flat_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory:
@@ -1495,6 +1507,7 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_m
1495
1507
; GFX12-NEXT: buffer_atomic_max_num_f32 v0, v1, s[0:3], null offen th:TH_ATOMIC_RETURN
1496
1508
; GFX12-NEXT: s_wait_loadcnt 0x0
1497
1509
; GFX12-NEXT: global_inv scope:SCOPE_DEV
1510
+ ; GFX12-NEXT: s_wait_loadcnt 0x0
1498
1511
; GFX12-NEXT: s_setpc_b64 s[30:31]
1499
1512
;
1500
1513
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory:
@@ -1651,6 +1664,7 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_
1651
1664
; GFX12-NEXT: buffer_atomic_max_num_f32 v0, v1, s[0:3], null offen
1652
1665
; GFX12-NEXT: s_wait_storecnt 0x0
1653
1666
; GFX12-NEXT: global_inv scope:SCOPE_DEV
1667
+ ; GFX12-NEXT: s_wait_loadcnt 0x0
1654
1668
; GFX12-NEXT: s_setpc_b64 s[30:31]
1655
1669
;
1656
1670
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory:
@@ -1824,6 +1838,7 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_
1824
1838
; GFX12-NEXT: s_cbranch_execnz .LBB14_1
1825
1839
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
1826
1840
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4
1841
+ ; GFX12-NEXT: s_wait_loadcnt 0x0
1827
1842
; GFX12-NEXT: s_setpc_b64 s[30:31]
1828
1843
;
1829
1844
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory:
@@ -1994,6 +2009,7 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_
1994
2009
; GFX12-NEXT: s_cbranch_execnz .LBB15_1
1995
2010
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
1996
2011
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4
2012
+ ; GFX12-NEXT: s_wait_loadcnt 0x0
1997
2013
; GFX12-NEXT: s_setpc_b64 s[30:31]
1998
2014
;
1999
2015
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory:
0 commit comments