Skip to content

Commit 9ebc6d2

Browse files
committed
[AMDGPU][InsertWaitCnts] Track global_wb/inv/wbinv
wb/wbinv use storecnt, inv uses loadcnt. Track them as VMEM_WRITE_ACCESS and VMEM_READ_ACCESS to avoid InsertWaitCnt incorrectly eliminating the waitcnts after these instructions. Solves SWDEV-526604
1 parent 2b3bca6 commit 9ebc6d2

32 files changed

+952
-8
lines changed

llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -698,6 +698,16 @@ class SIInsertWaitcnts {
698698
// Return the appropriate VMEM_*_ACCESS type for Inst, which must be a VMEM or
699699
// FLAT instruction.
700700
WaitEventType getVmemWaitEventType(const MachineInstr &Inst) const {
701+
switch (Inst.getOpcode()) {
702+
case AMDGPU::GLOBAL_INV:
703+
return VMEM_READ_ACCESS; // tracked using loadcnt
704+
case AMDGPU::GLOBAL_WB:
705+
case AMDGPU::GLOBAL_WBINV:
706+
return VMEM_WRITE_ACCESS; // tracked using storecnt
707+
default:
708+
break;
709+
}
710+
701711
// Maps VMEM access types to their corresponding WaitEventType.
702712
static const WaitEventType VmemReadMapping[NUM_VMEM_TYPES] = {
703713
VMEM_READ_ACCESS, VMEM_SAMPLER_READ_ACCESS, VMEM_BVH_READ_ACCESS};
@@ -2130,15 +2140,11 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
21302140
ScoreBrackets->updateByEvent(TII, TRI, MRI, LDS_ACCESS, Inst);
21312141
}
21322142
} else if (TII->isFLAT(Inst)) {
2133-
// TODO: Track this properly.
2134-
if (isCacheInvOrWBInst(Inst))
2135-
return;
2136-
2137-
assert(Inst.mayLoadOrStore());
2138-
21392143
int FlatASCount = 0;
21402144

2141-
if (mayAccessVMEMThroughFlat(Inst)) {
2145+
assert(isCacheInvOrWBInst(Inst) || Inst.mayLoadOrStore());
2146+
2147+
if (isCacheInvOrWBInst(Inst) || mayAccessVMEMThroughFlat(Inst)) {
21422148
++FlatASCount;
21432149
ScoreBrackets->updateByEvent(TII, TRI, MRI, getVmemWaitEventType(Inst),
21442150
Inst);

llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmax.ll

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ define float @local_atomic_fmax_ret_f32(ptr addrspace(3) %ptr, float %val) {
2222
; GFX12-NEXT: ds_max_num_rtn_f32 v0, v0, v1
2323
; GFX12-NEXT: s_wait_dscnt 0x0
2424
; GFX12-NEXT: global_inv scope:SCOPE_SE
25+
; GFX12-NEXT: s_wait_loadcnt 0x0
2526
; GFX12-NEXT: s_setpc_b64 s[30:31]
2627
;
2728
; GFX942-LABEL: local_atomic_fmax_ret_f32:
@@ -94,6 +95,7 @@ define void @local_atomic_fmax_noret_f32(ptr addrspace(3) %ptr, float %val) {
9495
; GFX12-NEXT: ds_max_num_f32 v0, v1
9596
; GFX12-NEXT: s_wait_dscnt 0x0
9697
; GFX12-NEXT: global_inv scope:SCOPE_SE
98+
; GFX12-NEXT: s_wait_loadcnt 0x0
9799
; GFX12-NEXT: s_setpc_b64 s[30:31]
98100
;
99101
; GFX942-LABEL: local_atomic_fmax_noret_f32:
@@ -166,6 +168,7 @@ define double @local_atomic_fmax_ret_f64(ptr addrspace(3) %ptr, double %val) {
166168
; GFX12-NEXT: ds_max_num_rtn_f64 v[0:1], v0, v[1:2]
167169
; GFX12-NEXT: s_wait_dscnt 0x0
168170
; GFX12-NEXT: global_inv scope:SCOPE_SE
171+
; GFX12-NEXT: s_wait_loadcnt 0x0
169172
; GFX12-NEXT: s_setpc_b64 s[30:31]
170173
;
171174
; GFX942-LABEL: local_atomic_fmax_ret_f64:
@@ -242,6 +245,7 @@ define void @local_atomic_fmax_noret_f64(ptr addrspace(3) %ptr, double %val) {
242245
; GFX12-NEXT: ds_max_num_f64 v0, v[1:2]
243246
; GFX12-NEXT: s_wait_dscnt 0x0
244247
; GFX12-NEXT: global_inv scope:SCOPE_SE
248+
; GFX12-NEXT: s_wait_loadcnt 0x0
245249
; GFX12-NEXT: s_setpc_b64 s[30:31]
246250
;
247251
; GFX942-LABEL: local_atomic_fmax_noret_f64:
@@ -318,6 +322,7 @@ define float @global_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory(pt
318322
; GFX12-NEXT: global_atomic_max_num_f32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
319323
; GFX12-NEXT: s_wait_loadcnt 0x0
320324
; GFX12-NEXT: global_inv scope:SCOPE_DEV
325+
; GFX12-NEXT: s_wait_loadcnt 0x0
321326
; GFX12-NEXT: s_setpc_b64 s[30:31]
322327
;
323328
; GFX942-LABEL: global_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory:
@@ -464,6 +469,7 @@ define void @global_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory(p
464469
; GFX12-NEXT: global_atomic_max_num_f32 v[0:1], v2, off scope:SCOPE_DEV
465470
; GFX12-NEXT: s_wait_storecnt 0x0
466471
; GFX12-NEXT: global_inv scope:SCOPE_DEV
472+
; GFX12-NEXT: s_wait_loadcnt 0x0
467473
; GFX12-NEXT: s_setpc_b64 s[30:31]
468474
;
469475
; GFX942-LABEL: global_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory:
@@ -624,6 +630,7 @@ define double @global_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory(p
624630
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
625631
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
626632
; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
633+
; GFX12-NEXT: s_wait_loadcnt 0x0
627634
; GFX12-NEXT: s_setpc_b64 s[30:31]
628635
;
629636
; GFX942-LABEL: global_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory:
@@ -779,6 +786,7 @@ define void @global_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory(p
779786
; GFX12-NEXT: s_cbranch_execnz .LBB7_1
780787
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
781788
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
789+
; GFX12-NEXT: s_wait_loadcnt 0x0
782790
; GFX12-NEXT: s_setpc_b64 s[30:31]
783791
;
784792
; GFX942-LABEL: global_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory:
@@ -909,6 +917,7 @@ define float @flat_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory(ptr
909917
; GFX12-NEXT: flat_atomic_max_num_f32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
910918
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
911919
; GFX12-NEXT: global_inv scope:SCOPE_DEV
920+
; GFX12-NEXT: s_wait_loadcnt 0x0
912921
; GFX12-NEXT: s_setpc_b64 s[30:31]
913922
;
914923
; GFX942-LABEL: flat_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory:
@@ -1051,6 +1060,7 @@ define void @flat_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory(ptr
10511060
; GFX12-NEXT: flat_atomic_max_num_f32 v[0:1], v2 scope:SCOPE_DEV
10521061
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
10531062
; GFX12-NEXT: global_inv scope:SCOPE_DEV
1063+
; GFX12-NEXT: s_wait_loadcnt 0x0
10541064
; GFX12-NEXT: s_setpc_b64 s[30:31]
10551065
;
10561066
; GFX942-LABEL: flat_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory:
@@ -1210,6 +1220,7 @@ define double @flat_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory(ptr
12101220
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
12111221
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
12121222
; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
1223+
; GFX12-NEXT: s_wait_loadcnt 0x0
12131224
; GFX12-NEXT: s_setpc_b64 s[30:31]
12141225
;
12151226
; GFX942-LABEL: flat_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory:
@@ -1363,6 +1374,7 @@ define void @flat_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory(ptr
13631374
; GFX12-NEXT: s_cbranch_execnz .LBB11_1
13641375
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
13651376
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
1377+
; GFX12-NEXT: s_wait_loadcnt 0x0
13661378
; GFX12-NEXT: s_setpc_b64 s[30:31]
13671379
;
13681380
; GFX942-LABEL: flat_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory:
@@ -1495,6 +1507,7 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_m
14951507
; GFX12-NEXT: buffer_atomic_max_num_f32 v0, v1, s[0:3], null offen th:TH_ATOMIC_RETURN
14961508
; GFX12-NEXT: s_wait_loadcnt 0x0
14971509
; GFX12-NEXT: global_inv scope:SCOPE_DEV
1510+
; GFX12-NEXT: s_wait_loadcnt 0x0
14981511
; GFX12-NEXT: s_setpc_b64 s[30:31]
14991512
;
15001513
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory:
@@ -1651,6 +1664,7 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_
16511664
; GFX12-NEXT: buffer_atomic_max_num_f32 v0, v1, s[0:3], null offen
16521665
; GFX12-NEXT: s_wait_storecnt 0x0
16531666
; GFX12-NEXT: global_inv scope:SCOPE_DEV
1667+
; GFX12-NEXT: s_wait_loadcnt 0x0
16541668
; GFX12-NEXT: s_setpc_b64 s[30:31]
16551669
;
16561670
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory:
@@ -1824,6 +1838,7 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_
18241838
; GFX12-NEXT: s_cbranch_execnz .LBB14_1
18251839
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
18261840
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4
1841+
; GFX12-NEXT: s_wait_loadcnt 0x0
18271842
; GFX12-NEXT: s_setpc_b64 s[30:31]
18281843
;
18291844
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory:
@@ -1994,6 +2009,7 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_
19942009
; GFX12-NEXT: s_cbranch_execnz .LBB15_1
19952010
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
19962011
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4
2012+
; GFX12-NEXT: s_wait_loadcnt 0x0
19972013
; GFX12-NEXT: s_setpc_b64 s[30:31]
19982014
;
19992015
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory:

llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmin.ll

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ define float @local_atomic_fmin_ret_f32(ptr addrspace(3) %ptr, float %val) {
2222
; GFX12-NEXT: ds_min_num_rtn_f32 v0, v0, v1
2323
; GFX12-NEXT: s_wait_dscnt 0x0
2424
; GFX12-NEXT: global_inv scope:SCOPE_SE
25+
; GFX12-NEXT: s_wait_loadcnt 0x0
2526
; GFX12-NEXT: s_setpc_b64 s[30:31]
2627
;
2728
; GFX942-LABEL: local_atomic_fmin_ret_f32:
@@ -94,6 +95,7 @@ define void @local_atomic_fmin_noret_f32(ptr addrspace(3) %ptr, float %val) {
9495
; GFX12-NEXT: ds_min_num_f32 v0, v1
9596
; GFX12-NEXT: s_wait_dscnt 0x0
9697
; GFX12-NEXT: global_inv scope:SCOPE_SE
98+
; GFX12-NEXT: s_wait_loadcnt 0x0
9799
; GFX12-NEXT: s_setpc_b64 s[30:31]
98100
;
99101
; GFX942-LABEL: local_atomic_fmin_noret_f32:
@@ -166,6 +168,7 @@ define double @local_atomic_fmin_ret_f64(ptr addrspace(3) %ptr, double %val) {
166168
; GFX12-NEXT: ds_min_num_rtn_f64 v[0:1], v0, v[1:2]
167169
; GFX12-NEXT: s_wait_dscnt 0x0
168170
; GFX12-NEXT: global_inv scope:SCOPE_SE
171+
; GFX12-NEXT: s_wait_loadcnt 0x0
169172
; GFX12-NEXT: s_setpc_b64 s[30:31]
170173
;
171174
; GFX942-LABEL: local_atomic_fmin_ret_f64:
@@ -242,6 +245,7 @@ define void @local_atomic_fmin_noret_f64(ptr addrspace(3) %ptr, double %val) {
242245
; GFX12-NEXT: ds_min_num_f64 v0, v[1:2]
243246
; GFX12-NEXT: s_wait_dscnt 0x0
244247
; GFX12-NEXT: global_inv scope:SCOPE_SE
248+
; GFX12-NEXT: s_wait_loadcnt 0x0
245249
; GFX12-NEXT: s_setpc_b64 s[30:31]
246250
;
247251
; GFX942-LABEL: local_atomic_fmin_noret_f64:
@@ -318,6 +322,7 @@ define float @global_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory(pt
318322
; GFX12-NEXT: global_atomic_min_num_f32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
319323
; GFX12-NEXT: s_wait_loadcnt 0x0
320324
; GFX12-NEXT: global_inv scope:SCOPE_DEV
325+
; GFX12-NEXT: s_wait_loadcnt 0x0
321326
; GFX12-NEXT: s_setpc_b64 s[30:31]
322327
;
323328
; GFX942-LABEL: global_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory:
@@ -464,6 +469,7 @@ define void @global_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory(p
464469
; GFX12-NEXT: global_atomic_min_num_f32 v[0:1], v2, off scope:SCOPE_DEV
465470
; GFX12-NEXT: s_wait_storecnt 0x0
466471
; GFX12-NEXT: global_inv scope:SCOPE_DEV
472+
; GFX12-NEXT: s_wait_loadcnt 0x0
467473
; GFX12-NEXT: s_setpc_b64 s[30:31]
468474
;
469475
; GFX942-LABEL: global_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory:
@@ -624,6 +630,7 @@ define double @global_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory(p
624630
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
625631
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
626632
; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
633+
; GFX12-NEXT: s_wait_loadcnt 0x0
627634
; GFX12-NEXT: s_setpc_b64 s[30:31]
628635
;
629636
; GFX942-LABEL: global_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory:
@@ -779,6 +786,7 @@ define void @global_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory(p
779786
; GFX12-NEXT: s_cbranch_execnz .LBB7_1
780787
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
781788
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
789+
; GFX12-NEXT: s_wait_loadcnt 0x0
782790
; GFX12-NEXT: s_setpc_b64 s[30:31]
783791
;
784792
; GFX942-LABEL: global_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory:
@@ -909,6 +917,7 @@ define float @flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory(ptr
909917
; GFX12-NEXT: flat_atomic_min_num_f32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
910918
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
911919
; GFX12-NEXT: global_inv scope:SCOPE_DEV
920+
; GFX12-NEXT: s_wait_loadcnt 0x0
912921
; GFX12-NEXT: s_setpc_b64 s[30:31]
913922
;
914923
; GFX942-LABEL: flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory:
@@ -1051,6 +1060,7 @@ define void @flat_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory(ptr
10511060
; GFX12-NEXT: flat_atomic_min_num_f32 v[0:1], v2 scope:SCOPE_DEV
10521061
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
10531062
; GFX12-NEXT: global_inv scope:SCOPE_DEV
1063+
; GFX12-NEXT: s_wait_loadcnt 0x0
10541064
; GFX12-NEXT: s_setpc_b64 s[30:31]
10551065
;
10561066
; GFX942-LABEL: flat_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory:
@@ -1210,6 +1220,7 @@ define double @flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory(ptr
12101220
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
12111221
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
12121222
; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
1223+
; GFX12-NEXT: s_wait_loadcnt 0x0
12131224
; GFX12-NEXT: s_setpc_b64 s[30:31]
12141225
;
12151226
; GFX942-LABEL: flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory:
@@ -1363,6 +1374,7 @@ define void @flat_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory(ptr
13631374
; GFX12-NEXT: s_cbranch_execnz .LBB11_1
13641375
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
13651376
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
1377+
; GFX12-NEXT: s_wait_loadcnt 0x0
13661378
; GFX12-NEXT: s_setpc_b64 s[30:31]
13671379
;
13681380
; GFX942-LABEL: flat_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory:
@@ -1495,6 +1507,7 @@ define float @buffer_fat_ptr_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_m
14951507
; GFX12-NEXT: buffer_atomic_min_num_f32 v0, v1, s[0:3], null offen th:TH_ATOMIC_RETURN
14961508
; GFX12-NEXT: s_wait_loadcnt 0x0
14971509
; GFX12-NEXT: global_inv scope:SCOPE_DEV
1510+
; GFX12-NEXT: s_wait_loadcnt 0x0
14981511
; GFX12-NEXT: s_setpc_b64 s[30:31]
14991512
;
15001513
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory:
@@ -1651,6 +1664,7 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_
16511664
; GFX12-NEXT: buffer_atomic_min_num_f32 v0, v1, s[0:3], null offen
16521665
; GFX12-NEXT: s_wait_storecnt 0x0
16531666
; GFX12-NEXT: global_inv scope:SCOPE_DEV
1667+
; GFX12-NEXT: s_wait_loadcnt 0x0
16541668
; GFX12-NEXT: s_setpc_b64 s[30:31]
16551669
;
16561670
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory:
@@ -1824,6 +1838,7 @@ define double @buffer_fat_ptr_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_
18241838
; GFX12-NEXT: s_cbranch_execnz .LBB14_1
18251839
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
18261840
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4
1841+
; GFX12-NEXT: s_wait_loadcnt 0x0
18271842
; GFX12-NEXT: s_setpc_b64 s[30:31]
18281843
;
18291844
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory:
@@ -1994,6 +2009,7 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_
19942009
; GFX12-NEXT: s_cbranch_execnz .LBB15_1
19952010
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
19962011
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4
2012+
; GFX12-NEXT: s_wait_loadcnt 0x0
19972013
; GFX12-NEXT: s_setpc_b64 s[30:31]
19982014
;
19992015
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory:

llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_uinc_wrap.ll

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -576,6 +576,7 @@ define amdgpu_kernel void @global_atomic_inc_ret_i32_offset_sistem(ptr addrspace
576576
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
577577
; GFX12-NEXT: v_dual_mov_b32 v0, 42 :: v_dual_mov_b32 v1, 0
578578
; GFX12-NEXT: global_wb scope:SCOPE_SYS
579+
; GFX12-NEXT: s_wait_storecnt 0x0
579580
; GFX12-NEXT: s_wait_kmcnt 0x0
580581
; GFX12-NEXT: global_atomic_inc_u32 v0, v1, v0, s[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
581582
; GFX12-NEXT: s_wait_loadcnt 0x0
@@ -805,6 +806,7 @@ define amdgpu_kernel void @global_atomic_inc_noret_i32_offset_system(ptr addrspa
805806
; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
806807
; GFX12-NEXT: v_dual_mov_b32 v0, 42 :: v_dual_mov_b32 v1, 0
807808
; GFX12-NEXT: global_wb scope:SCOPE_SYS
809+
; GFX12-NEXT: s_wait_storecnt 0x0
808810
; GFX12-NEXT: s_wait_kmcnt 0x0
809811
; GFX12-NEXT: global_atomic_inc_u32 v1, v0, s[0:1] offset:16 scope:SCOPE_SYS
810812
; GFX12-NEXT: s_wait_storecnt 0x0
@@ -1721,6 +1723,7 @@ define amdgpu_kernel void @global_atomic_inc_ret_i64_offset_system(ptr addrspace
17211723
; GFX12-NEXT: v_mov_b32_e32 v0, 42
17221724
; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
17231725
; GFX12-NEXT: global_wb scope:SCOPE_SYS
1726+
; GFX12-NEXT: s_wait_storecnt 0x0
17241727
; GFX12-NEXT: s_wait_kmcnt 0x0
17251728
; GFX12-NEXT: global_atomic_inc_u64 v[0:1], v2, v[0:1], s[2:3] offset:32 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
17261729
; GFX12-NEXT: s_wait_loadcnt 0x0
@@ -1968,6 +1971,7 @@ define amdgpu_kernel void @global_atomic_inc_noret_i64_offset_system(ptr addrspa
19681971
; GFX12-NEXT: v_mov_b32_e32 v0, 42
19691972
; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
19701973
; GFX12-NEXT: global_wb scope:SCOPE_SYS
1974+
; GFX12-NEXT: s_wait_storecnt 0x0
19711975
; GFX12-NEXT: s_wait_kmcnt 0x0
19721976
; GFX12-NEXT: global_atomic_inc_u64 v2, v[0:1], s[0:1] offset:32 scope:SCOPE_SYS
19731977
; GFX12-NEXT: s_wait_storecnt 0x0
@@ -2466,6 +2470,7 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_system(ptr %out, ptr %
24662470
; GFX12-NEXT: s_wait_kmcnt 0x0
24672471
; GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
24682472
; GFX12-NEXT: global_wb scope:SCOPE_SYS
2473+
; GFX12-NEXT: s_wait_storecnt 0x0
24692474
; GFX12-NEXT: flat_atomic_inc_u32 v2, v[0:1], v2 offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
24702475
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
24712476
; GFX12-NEXT: global_inv scope:SCOPE_SYS
@@ -2718,6 +2723,7 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_system(ptr %ptr) #1
27182723
; GFX12-NEXT: s_wait_kmcnt 0x0
27192724
; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
27202725
; GFX12-NEXT: global_wb scope:SCOPE_SYS
2726+
; GFX12-NEXT: s_wait_storecnt 0x0
27212727
; GFX12-NEXT: flat_atomic_inc_u32 v[0:1], v2 offset:16 scope:SCOPE_SYS
27222728
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
27232729
; GFX12-NEXT: global_inv scope:SCOPE_SYS
@@ -3423,6 +3429,7 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_system(ptr %out, ptr %
34233429
; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s2
34243430
; GFX12-NEXT: v_mov_b32_e32 v3, s3
34253431
; GFX12-NEXT: global_wb scope:SCOPE_SYS
3432+
; GFX12-NEXT: s_wait_storecnt 0x0
34263433
; GFX12-NEXT: flat_atomic_inc_u64 v[0:1], v[2:3], v[0:1] offset:32 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
34273434
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
34283435
; GFX12-NEXT: global_inv scope:SCOPE_SYS
@@ -3693,6 +3700,7 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_system(ptr %ptr) #1
36933700
; GFX12-NEXT: s_wait_kmcnt 0x0
36943701
; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
36953702
; GFX12-NEXT: global_wb scope:SCOPE_SYS
3703+
; GFX12-NEXT: s_wait_storecnt 0x0
36963704
; GFX12-NEXT: flat_atomic_inc_u64 v[2:3], v[0:1] offset:32 scope:SCOPE_SYS
36973705
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
36983706
; GFX12-NEXT: global_inv scope:SCOPE_SYS
@@ -4072,6 +4080,7 @@ define amdgpu_kernel void @nocse_lds_atomic_inc_ret_i32(ptr addrspace(1) %out0,
40724080
; GFX12-NEXT: ds_inc_rtn_u32 v2, v0, v1
40734081
; GFX12-NEXT: s_wait_dscnt 0x0
40744082
; GFX12-NEXT: global_inv scope:SCOPE_SE
4083+
; GFX12-NEXT: s_wait_loadcnt 0x0
40754084
; GFX12-NEXT: ds_inc_rtn_u32 v0, v0, v1
40764085
; GFX12-NEXT: s_wait_dscnt 0x0
40774086
; GFX12-NEXT: global_inv scope:SCOPE_SE

0 commit comments

Comments
 (0)