Skip to content

Commit 36ef291

Browse files
authored
[AMDGPU] Fix hang caused by VS_CNT handling at calls (#78318)
Fix a potential hang introduced by #77439 and #77935. This line: setScoreUB(VS_CNT, getScoreLB(VS_CNT) + getWaitCountMax(VS_CNT)); could potentialy set UB lower than it was before, which confused SIInsertWaitcnts's fixed point algorithm. This was only triggered a STORE instruction with an implicit-def, which seems odd but apparently happens for some spills.
1 parent 5fcf907 commit 36ef291

File tree

2 files changed

+51
-1
lines changed

2 files changed

+51
-1
lines changed

llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -293,7 +293,7 @@ class WaitcntBrackets {
293293
}
294294

295295
void setStateOnFunctionEntryOrReturn() {
296-
setScoreUB(VS_CNT, getScoreLB(VS_CNT) + getWaitCountMax(VS_CNT));
296+
setScoreUB(VS_CNT, getScoreUB(VS_CNT) + getWaitCountMax(VS_CNT));
297297
PendingEvents |= WaitEventMaskForInst[VS_CNT];
298298
}
299299

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
2+
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass si-insert-waitcnts %s -o - | FileCheck %s
3+
4+
---
5+
name: test
6+
tracksRegLiveness: true
7+
stack:
8+
- { id: 0, name: '', type: spill-slot, offset: 4, size: 40, alignment: 4,
9+
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
10+
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
11+
machineFunctionInfo:
12+
frameOffsetReg: '$sgpr33'
13+
body: |
14+
; CHECK-LABEL: name: test
15+
; CHECK: bb.0:
16+
; CHECK-NEXT: successors: %bb.1(0x80000000)
17+
; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $vgpr0, $vgpr1, $vgpr31, $vgpr40, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
18+
; CHECK-NEXT: {{ $}}
19+
; CHECK-NEXT: S_WAITCNT 0
20+
; CHECK-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed undef $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 :: (store (s128) into %stack.0, align 4, addrspace 5)
21+
; CHECK-NEXT: {{ $}}
22+
; CHECK-NEXT: bb.1:
23+
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
24+
; CHECK-NEXT: liveins: $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $vgpr31, $vgpr40, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $vgpr0_vgpr1:0x000000000000000F
25+
; CHECK-NEXT: {{ $}}
26+
; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed undef renamable $sgpr0_sgpr1, 0, csr_amdgpu
27+
; CHECK-NEXT: S_CBRANCH_EXECNZ %bb.1, implicit $exec
28+
; CHECK-NEXT: {{ $}}
29+
; CHECK-NEXT: bb.2:
30+
; CHECK-NEXT: liveins: $sgpr46, $vgpr40
31+
; CHECK-NEXT: {{ $}}
32+
; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31
33+
bb.0:
34+
successors: %bb.1(0x80000000)
35+
liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $vgpr0, $vgpr1, $vgpr31, $vgpr40, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
36+
37+
SCRATCH_STORE_DWORDX4_SADDR killed undef $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 :: (store (s128) into %stack.0, align 4, addrspace 5)
38+
39+
bb.1:
40+
successors: %bb.1(0x40000000), %bb.2(0x40000000)
41+
liveins: $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $vgpr31, $vgpr40, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $vgpr0_vgpr1:0x000000000000000F
42+
43+
dead $sgpr30_sgpr31 = SI_CALL killed undef renamable $sgpr0_sgpr1, 0, csr_amdgpu
44+
S_CBRANCH_EXECNZ %bb.1, implicit $exec
45+
46+
bb.2:
47+
liveins: $sgpr46, $vgpr40
48+
49+
S_SETPC_B64_return undef $sgpr30_sgpr31
50+
...

0 commit comments

Comments
 (0)