Skip to content

Commit da4ec0f

Browse files
mbrkusaninjmmartinez
authored andcommitted
[AMDGPU][SIPreEmitPeephole] Fix mustRetainExeczBranch (llvm#120121)
Do not remove S_CBRANCH_EXECZ if one of the following blocks contains an unconditional branch to a block other than the one immediately following it. This can cause unwanted behavior like infinite loops.
1 parent 6fa759e commit da4ec0f

File tree

2 files changed

+94
-0
lines changed

2 files changed

+94
-0
lines changed

llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,10 @@ bool SIPreEmitPeephole::mustRetainExeczBranch(
365365
if (MI.isConditionalBranch())
366366
return true;
367367

368+
if (MI.isUnconditionalBranch() &&
369+
TII->getBranchDestBlock(MI) != MBB.getNextNode())
370+
return true;
371+
368372
if (MI.isMetaInstruction())
369373
continue;
370374

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -run-pass=si-pre-emit-peephole %s -o - | FileCheck %s
3+
# Do no remove S_CBRANCH_EXECZ if the following block contains an unconditional
4+
# branch to a block other than the one immediately following it.
5+
6+
---
7+
name: test
8+
body: |
9+
; CHECK-LABEL: name: test
10+
; CHECK: bb.0:
11+
; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.1(0x40000000)
12+
; CHECK-NEXT: liveins: $vgpr0, $vgpr1
13+
; CHECK-NEXT: {{ $}}
14+
; CHECK-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
15+
; CHECK-NEXT: V_CMPX_EQ_U32_nosdst_e32 0, killed $vgpr0, implicit-def $exec, implicit $exec
16+
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.5, implicit $exec
17+
; CHECK-NEXT: {{ $}}
18+
; CHECK-NEXT: bb.1:
19+
; CHECK-NEXT: successors: %bb.2(0x80000000)
20+
; CHECK-NEXT: liveins: $vgpr1, $sgpr0_sgpr1
21+
; CHECK-NEXT: {{ $}}
22+
; CHECK-NEXT: renamable $sgpr2_sgpr3 = IMPLICIT_DEF
23+
; CHECK-NEXT: renamable $sgpr4_sgpr5 = IMPLICIT_DEF
24+
; CHECK-NEXT: S_BRANCH %bb.2
25+
; CHECK-NEXT: {{ $}}
26+
; CHECK-NEXT: bb.2:
27+
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000)
28+
; CHECK-NEXT: liveins: $vgpr1, $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5
29+
; CHECK-NEXT: {{ $}}
30+
; CHECK-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc
31+
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec
32+
; CHECK-NEXT: {{ $}}
33+
; CHECK-NEXT: bb.3:
34+
; CHECK-NEXT: successors: %bb.1(0x80000000)
35+
; CHECK-NEXT: liveins: $vgpr1, $sgpr0_sgpr1, $sgpr2_sgpr3
36+
; CHECK-NEXT: {{ $}}
37+
; CHECK-NEXT: renamable $sgpr4_sgpr5 = IMPLICIT_DEF
38+
; CHECK-NEXT: S_BRANCH %bb.1
39+
; CHECK-NEXT: {{ $}}
40+
; CHECK-NEXT: bb.4:
41+
; CHECK-NEXT: successors: %bb.5(0x80000000)
42+
; CHECK-NEXT: liveins: $vgpr1, $sgpr0_sgpr1, $sgpr2_sgpr3
43+
; CHECK-NEXT: {{ $}}
44+
; CHECK-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc
45+
; CHECK-NEXT: {{ $}}
46+
; CHECK-NEXT: bb.5:
47+
; CHECK-NEXT: liveins: $vgpr1, $sgpr0_sgpr1
48+
; CHECK-NEXT: {{ $}}
49+
; CHECK-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr0_sgpr1, implicit-def $scc
50+
; CHECK-NEXT: renamable $vgpr0 = V_CVT_F32_U32_e32 killed $vgpr1, implicit $mode, implicit $exec
51+
; CHECK-NEXT: SI_RETURN_TO_EPILOG killed $vgpr0
52+
bb.0:
53+
liveins: $vgpr0, $vgpr1
54+
55+
$sgpr0_sgpr1 = S_MOV_B64 $exec
56+
V_CMPX_EQ_U32_nosdst_e32 0, killed $vgpr0, implicit-def $exec, implicit $exec
57+
S_CBRANCH_EXECZ %bb.5, implicit $exec
58+
59+
bb.1:
60+
liveins: $vgpr1, $sgpr0_sgpr1
61+
62+
renamable $sgpr2_sgpr3 = IMPLICIT_DEF
63+
renamable $sgpr4_sgpr5 = IMPLICIT_DEF
64+
S_BRANCH %bb.2
65+
66+
bb.2:
67+
liveins: $vgpr1, $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5
68+
69+
$exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc
70+
S_CBRANCH_EXECZ %bb.4, implicit $exec
71+
72+
bb.3:
73+
liveins: $vgpr1, $sgpr0_sgpr1, $sgpr2_sgpr3
74+
75+
renamable $sgpr4_sgpr5 = IMPLICIT_DEF
76+
S_BRANCH %bb.1
77+
78+
bb.4:
79+
liveins: $vgpr1, $sgpr0_sgpr1, $sgpr2_sgpr3
80+
81+
$exec = S_OR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc
82+
83+
bb.5:
84+
liveins: $vgpr1, $sgpr0_sgpr1
85+
86+
$exec = S_OR_B64 $exec, killed renamable $sgpr0_sgpr1, implicit-def $scc
87+
renamable $vgpr0 = V_CVT_F32_U32_e32 killed $vgpr1, implicit $mode, implicit $exec
88+
SI_RETURN_TO_EPILOG killed $vgpr0
89+
90+
...

0 commit comments

Comments
 (0)