Skip to content

Commit f1ea77f

Browse files
authored
[AMDGPU][SIInsertWaitcnts] Set initial state for VS_CNT in non-kernel functions (#75436)
Split from #72830
1 parent e7432ba commit f1ea77f

File tree

4 files changed

+110
-254
lines changed

4 files changed

+110
-254
lines changed

llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,11 @@ class WaitcntBrackets {
292292
VgprVmemTypes[GprNo] = 0;
293293
}
294294

295+
void setNonKernelFunctionInitialState() {
296+
setScoreUB(VS_CNT, getWaitCountMax(VS_CNT));
297+
PendingEvents |= WaitEventMaskForInst[VS_CNT];
298+
}
299+
295300
void print(raw_ostream &);
296301
void dump() { print(dbgs()); }
297302

@@ -1865,6 +1870,11 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
18651870
;
18661871
BuildMI(EntryBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT)).addImm(0);
18671872

1873+
auto NonKernelInitialState =
1874+
std::make_unique<WaitcntBrackets>(ST, Limits, Encoding);
1875+
NonKernelInitialState->setNonKernelFunctionInitialState();
1876+
BlockInfos[&EntryBB].Incoming = std::move(NonKernelInitialState);
1877+
18681878
Modified = true;
18691879
}
18701880

llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ define void @back_off_barrier_no_fence(ptr %in, ptr %out) #0 {
5555
; GFX11-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5656
; GFX11-BACKOFF-NEXT: flat_load_b32 v0, v[0:1]
5757
; GFX11-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
58+
; GFX11-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0
5859
; GFX11-BACKOFF-NEXT: s_barrier
5960
; GFX11-BACKOFF-NEXT: flat_store_b32 v[2:3], v0
6061
; GFX11-BACKOFF-NEXT: s_waitcnt lgkmcnt(0)

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.chain.arg.ll

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg(ptr addrspace(1) %out, i32 %
1717
; GFX11-NEXT: v_mov_b32_e32 v0, v10
1818
; GFX11-NEXT: s_not_b32 exec_lo, exec_lo
1919
; GFX11-NEXT: global_store_b32 v[8:9], v0, off
20-
; GFX11-NEXT: s_nop 0
21-
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2220
; GFX11-NEXT: s_endpgm
2321
;
2422
; GFX10-LABEL: set_inactive_chain_arg:
@@ -39,8 +37,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg(ptr addrspace(1) %out, i32 %
3937
; GFX11_W64-NEXT: v_mov_b32_e32 v0, v10
4038
; GFX11_W64-NEXT: s_not_b64 exec, exec
4139
; GFX11_W64-NEXT: global_store_b32 v[8:9], v0, off
42-
; GFX11_W64-NEXT: s_nop 0
43-
; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
4440
; GFX11_W64-NEXT: s_endpgm
4541
;
4642
; GFX10_W64-LABEL: set_inactive_chain_arg:
@@ -68,8 +64,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_64(ptr addrspace(1) %out, i6
6864
; GFX11-NEXT: v_mov_b32_e32 v1, v11
6965
; GFX11-NEXT: s_not_b32 exec_lo, exec_lo
7066
; GFX11-NEXT: global_store_b64 v[8:9], v[0:1], off
71-
; GFX11-NEXT: s_nop 0
72-
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
7367
; GFX11-NEXT: s_endpgm
7468
;
7569
; GFX10-LABEL: set_inactive_chain_arg_64:
@@ -94,8 +88,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_64(ptr addrspace(1) %out, i6
9488
; GFX11_W64-NEXT: v_mov_b32_e32 v1, v11
9589
; GFX11_W64-NEXT: s_not_b64 exec, exec
9690
; GFX11_W64-NEXT: global_store_b64 v[8:9], v[0:1], off
97-
; GFX11_W64-NEXT: s_nop 0
98-
; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
9991
; GFX11_W64-NEXT: s_endpgm
10092
;
10193
; GFX10_W64-LABEL: set_inactive_chain_arg_64:
@@ -133,8 +125,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_dpp(ptr addrspace(1) %out, i
133125
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
134126
; GFX11-NEXT: v_mov_b32_e32 v2, v1
135127
; GFX11-NEXT: global_store_b32 v[8:9], v2, off
136-
; GFX11-NEXT: s_nop 0
137-
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
138128
; GFX11-NEXT: s_endpgm
139129
;
140130
; GFX10-LABEL: set_inactive_chain_arg_dpp:
@@ -174,8 +164,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_dpp(ptr addrspace(1) %out, i
174164
; GFX11_W64-NEXT: s_delay_alu instid0(VALU_DEP_1)
175165
; GFX11_W64-NEXT: v_mov_b32_e32 v2, v1
176166
; GFX11_W64-NEXT: global_store_b32 v[8:9], v2, off
177-
; GFX11_W64-NEXT: s_nop 0
178-
; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
179167
; GFX11_W64-NEXT: s_endpgm
180168
;
181169
; GFX10_W64-LABEL: set_inactive_chain_arg_dpp:
@@ -233,8 +221,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_call(ptr addrspace(1) %out,
233221
; GISEL11-NEXT: s_delay_alu instid0(VALU_DEP_1)
234222
; GISEL11-NEXT: v_mov_b32_e32 v0, v12
235223
; GISEL11-NEXT: global_store_b32 v[41:42], v0, off
236-
; GISEL11-NEXT: s_nop 0
237-
; GISEL11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
238224
; GISEL11-NEXT: s_endpgm
239225
;
240226
; DAGISEL11-LABEL: set_inactive_chain_arg_call:
@@ -265,8 +251,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_call(ptr addrspace(1) %out,
265251
; DAGISEL11-NEXT: s_delay_alu instid0(VALU_DEP_1)
266252
; DAGISEL11-NEXT: v_mov_b32_e32 v0, v12
267253
; DAGISEL11-NEXT: global_store_b32 v[41:42], v0, off
268-
; DAGISEL11-NEXT: s_nop 0
269-
; DAGISEL11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
270254
; DAGISEL11-NEXT: s_endpgm
271255
;
272256
; GISEL10-LABEL: set_inactive_chain_arg_call:
@@ -380,8 +364,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_call(ptr addrspace(1) %out,
380364
; GISEL11_W64-NEXT: s_delay_alu instid0(VALU_DEP_1)
381365
; GISEL11_W64-NEXT: v_mov_b32_e32 v0, v12
382366
; GISEL11_W64-NEXT: global_store_b32 v[41:42], v0, off
383-
; GISEL11_W64-NEXT: s_nop 0
384-
; GISEL11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
385367
; GISEL11_W64-NEXT: s_endpgm
386368
;
387369
; DAGISEL11_W64-LABEL: set_inactive_chain_arg_call:
@@ -419,8 +401,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_call(ptr addrspace(1) %out,
419401
; DAGISEL11_W64-NEXT: s_delay_alu instid0(VALU_DEP_1)
420402
; DAGISEL11_W64-NEXT: v_mov_b32_e32 v0, v12
421403
; DAGISEL11_W64-NEXT: global_store_b32 v[41:42], v0, off
422-
; DAGISEL11_W64-NEXT: s_nop 0
423-
; DAGISEL11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
424404
; DAGISEL11_W64-NEXT: s_endpgm
425405
;
426406
; GISEL10_W64-LABEL: set_inactive_chain_arg_call:
@@ -538,8 +518,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_last_vgpr(ptr addrspace(1) %
538518
; GISEL11-NEXT: s_delay_alu instid0(VALU_DEP_1)
539519
; GISEL11-NEXT: v_mov_b32_e32 v0, v12
540520
; GISEL11-NEXT: global_store_b32 v[41:42], v0, off
541-
; GISEL11-NEXT: s_nop 0
542-
; GISEL11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
543521
; GISEL11-NEXT: s_endpgm
544522
;
545523
; DAGISEL11-LABEL: set_inactive_chain_arg_last_vgpr:
@@ -570,8 +548,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_last_vgpr(ptr addrspace(1) %
570548
; DAGISEL11-NEXT: s_delay_alu instid0(VALU_DEP_1)
571549
; DAGISEL11-NEXT: v_mov_b32_e32 v0, v12
572550
; DAGISEL11-NEXT: global_store_b32 v[41:42], v0, off
573-
; DAGISEL11-NEXT: s_nop 0
574-
; DAGISEL11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
575551
; DAGISEL11-NEXT: s_endpgm
576552
;
577553
; GISEL10-LABEL: set_inactive_chain_arg_last_vgpr:
@@ -685,8 +661,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_last_vgpr(ptr addrspace(1) %
685661
; GISEL11_W64-NEXT: s_delay_alu instid0(VALU_DEP_1)
686662
; GISEL11_W64-NEXT: v_mov_b32_e32 v0, v12
687663
; GISEL11_W64-NEXT: global_store_b32 v[41:42], v0, off
688-
; GISEL11_W64-NEXT: s_nop 0
689-
; GISEL11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
690664
; GISEL11_W64-NEXT: s_endpgm
691665
;
692666
; DAGISEL11_W64-LABEL: set_inactive_chain_arg_last_vgpr:
@@ -724,8 +698,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_last_vgpr(ptr addrspace(1) %
724698
; DAGISEL11_W64-NEXT: s_delay_alu instid0(VALU_DEP_1)
725699
; DAGISEL11_W64-NEXT: v_mov_b32_e32 v0, v12
726700
; DAGISEL11_W64-NEXT: global_store_b32 v[41:42], v0, off
727-
; DAGISEL11_W64-NEXT: s_nop 0
728-
; DAGISEL11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
729701
; DAGISEL11_W64-NEXT: s_endpgm
730702
;
731703
; GISEL10_W64-LABEL: set_inactive_chain_arg_last_vgpr:

0 commit comments

Comments
 (0)