Skip to content

Commit d5ab379

Browse files
committed
AMDGPU: Add baseline test for broken machine sinking
1 parent 7be7f23 commit d5ab379

File tree

3 files changed

+615
-0
lines changed

3 files changed

+615
-0
lines changed
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 < %s | FileCheck %s
3+
4+
; A VGPR loop variable was incorrectly sunk into a flow block, past
5+
; the si_end_cf reconvergence point.
6+
7+
define void @machinesink_loop_variable_out_of_divergent_loop(i32 %arg, i1 %cmp49280.not, i32 %arg1, i1 %cmp108) {
8+
; CHECK-LABEL: machinesink_loop_variable_out_of_divergent_loop:
9+
; CHECK: ; %bb.0: ; %entry
10+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11+
; CHECK-NEXT: v_and_b32_e32 v1, 1, v1
12+
; CHECK-NEXT: v_and_b32_e32 v3, 1, v3
13+
; CHECK-NEXT: s_mov_b32 s5, 0
14+
; CHECK-NEXT: v_cmp_eq_u32_e64 s4, 1, v1
15+
; CHECK-NEXT: v_mov_b32_e32 v1, 0
16+
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
17+
; CHECK-NEXT: s_xor_b32 s6, s4, -1
18+
; CHECK-NEXT: s_inst_prefetch 0x1
19+
; CHECK-NEXT: s_branch .LBB0_3
20+
; CHECK-NEXT: .p2align 6
21+
; CHECK-NEXT: .LBB0_1: ; %Flow
22+
; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
23+
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s8
24+
; CHECK-NEXT: v_add_nc_u32_e32 v4, -4, v4
25+
; CHECK-NEXT: .LBB0_2: ; %Flow1
26+
; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
27+
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s7
28+
; CHECK-NEXT: v_cmp_ne_u32_e64 s4, 0, v3
29+
; CHECK-NEXT: ;;#ASMSTART
30+
; CHECK-NEXT: ; j lastloop entry
31+
; CHECK-NEXT: ;;#ASMEND
32+
; CHECK-NEXT: s_or_b32 s5, s4, s5
33+
; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s5
34+
; CHECK-NEXT: s_cbranch_execz .LBB0_8
35+
; CHECK-NEXT: .LBB0_3: ; %for.body33
36+
; CHECK-NEXT: ; =>This Loop Header: Depth=1
37+
; CHECK-NEXT: ; Child Loop BB0_6 Depth 2
38+
; CHECK-NEXT: v_mov_b32_e32 v4, 0
39+
; CHECK-NEXT: v_mov_b32_e32 v3, 0
40+
; CHECK-NEXT: s_and_saveexec_b32 s7, s6
41+
; CHECK-NEXT: s_cbranch_execz .LBB0_2
42+
; CHECK-NEXT: ; %bb.4: ; %for.body51.preheader
43+
; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
44+
; CHECK-NEXT: s_mov_b32 s8, 0
45+
; CHECK-NEXT: s_mov_b32 s9, 0
46+
; CHECK-NEXT: s_branch .LBB0_6
47+
; CHECK-NEXT: .p2align 6
48+
; CHECK-NEXT: .LBB0_5: ; %if.end118
49+
; CHECK-NEXT: ; in Loop: Header=BB0_6 Depth=2
50+
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s4
51+
; CHECK-NEXT: s_add_i32 s9, s9, 4
52+
; CHECK-NEXT: ;;#ASMSTART
53+
; CHECK-NEXT: ; backedge
54+
; CHECK-NEXT: ;;#ASMEND
55+
; CHECK-NEXT: v_add_nc_u32_e32 v4, s9, v2
56+
; CHECK-NEXT: v_cmp_ge_u32_e64 s4, v4, v0
57+
; CHECK-NEXT: s_or_b32 s8, s4, s8
58+
; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s8
59+
; CHECK-NEXT: s_cbranch_execz .LBB0_1
60+
; CHECK-NEXT: .LBB0_6: ; %for.body51
61+
; CHECK-NEXT: ; Parent Loop BB0_3 Depth=1
62+
; CHECK-NEXT: ; => This Inner Loop Header: Depth=2
63+
; CHECK-NEXT: v_mov_b32_e32 v3, 1
64+
; CHECK-NEXT: s_and_saveexec_b32 s4, vcc_lo
65+
; CHECK-NEXT: s_cbranch_execz .LBB0_5
66+
; CHECK-NEXT: ; %bb.7: ; %if.then112
67+
; CHECK-NEXT: ; in Loop: Header=BB0_6 Depth=2
68+
; CHECK-NEXT: s_add_i32 s10, s9, 4
69+
; CHECK-NEXT: v_mov_b32_e32 v3, 0
70+
; CHECK-NEXT: v_mov_b32_e32 v4, s10
71+
; CHECK-NEXT: ds_write_b32 v1, v4
72+
; CHECK-NEXT: s_branch .LBB0_5
73+
; CHECK-NEXT: .LBB0_8: ; %for.body159.preheader
74+
; CHECK-NEXT: s_inst_prefetch 0x2
75+
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s5
76+
; CHECK-NEXT: s_mov_b32 vcc_lo, exec_lo
77+
; CHECK-NEXT: .LBB0_9: ; %for.body159
78+
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
79+
; CHECK-NEXT: s_cbranch_vccnz .LBB0_9
80+
; CHECK-NEXT: ; %bb.10: ; %DummyReturnBlock
81+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
82+
; CHECK-NEXT: s_setpc_b64 s[30:31]
83+
entry:
84+
br label %for.body33
85+
86+
for.body33: ; preds = %for.end121, %entry
87+
br i1 %cmp49280.not, label %for.end121, label %for.body51
88+
89+
for.body51: ; preds = %if.end118, %for.body33
90+
%add48284 = phi i32 [ %add48, %if.end118 ], [ %arg1, %for.body33 ]
91+
%collision.0281 = phi i32 [ %inc119, %if.end118 ], [ 1, %for.body33 ]
92+
br i1 %cmp108, label %if.then112, label %if.end118
93+
94+
if.then112: ; preds = %for.body51
95+
%inc101 = add i32 %collision.0281, 3
96+
store i32 %inc101, ptr addrspace(3) null, align 2147483648
97+
br label %if.end118
98+
99+
if.end118: ; preds = %if.then112, %for.body51
100+
%thCollNum.5 = phi i32 [ 0, %if.then112 ], [ 1, %for.body51 ]
101+
%inc119 = add i32 %collision.0281, 4
102+
tail call void asm sideeffect "; backedge", ""()
103+
%add48 = add i32 %add48284, 4
104+
%cmp49 = icmp ult i32 %add48, %arg
105+
br i1 %cmp49, label %for.body51, label %for.end121
106+
107+
for.end121: ; preds = %if.end118, %for.body33
108+
%thCollNum.1.lcssa = phi i32 [ 0, %for.body33 ], [ %thCollNum.5, %if.end118 ]
109+
%j.0.lcssa = phi i32 [ 0, %for.body33 ], [ %add48284, %if.end118 ]
110+
%i5 = tail call i32 asm sideeffect "; j lastloop entry", "=v,0"(i32 %j.0.lcssa)
111+
%cmp31 = icmp eq i32 %thCollNum.1.lcssa, 0
112+
br i1 %cmp31, label %for.body33, label %for.body159
113+
114+
for.body159: ; preds = %for.body159, %for.end121
115+
br label %for.body159
116+
}
Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
2+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 -run-pass=machine-sink -o - %s | FileCheck %s
3+
4+
# A VGPR loop variable was incorrectly sunk into a flow block, past
5+
# the si_end_cf reconvergence point.
6+
7+
---
8+
name: machinesink_loop_vgpr_out_of_divergent_loop
9+
tracksRegLiveness: true
10+
machineFunctionInfo:
11+
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
12+
frameOffsetReg: '$sgpr33'
13+
stackPtrOffsetReg: '$sgpr32'
14+
body: |
15+
; CHECK-LABEL: name: machinesink_loop_vgpr_out_of_divergent_loop
16+
; CHECK: bb.0:
17+
; CHECK-NEXT: successors: %bb.1(0x80000000)
18+
; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr8
19+
; CHECK-NEXT: {{ $}}
20+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
21+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
22+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
23+
; CHECK-NEXT: {{ $}}
24+
; CHECK-NEXT: bb.1:
25+
; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.2(0x40000000)
26+
; CHECK-NEXT: {{ $}}
27+
; CHECK-NEXT: [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF [[COPY1]], %bb.6, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
28+
; CHECK-NEXT: S_BRANCH %bb.2
29+
; CHECK-NEXT: {{ $}}
30+
; CHECK-NEXT: bb.2:
31+
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000)
32+
; CHECK-NEXT: {{ $}}
33+
; CHECK-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32 = SI_IF [[COPY1]], %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
34+
; CHECK-NEXT: S_BRANCH %bb.3
35+
; CHECK-NEXT: {{ $}}
36+
; CHECK-NEXT: bb.3:
37+
; CHECK-NEXT: successors: %bb.4(0x80000000)
38+
; CHECK-NEXT: {{ $}}
39+
; CHECK-NEXT: S_NOP 0
40+
; CHECK-NEXT: {{ $}}
41+
; CHECK-NEXT: bb.4:
42+
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.5(0x40000000)
43+
; CHECK-NEXT: {{ $}}
44+
; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */
45+
; CHECK-NEXT: [[SI_IF_BREAK:%[0-9]+]]:sreg_32 = SI_IF_BREAK killed [[SI_IF1]], [[SI_IF]], implicit-def dead $scc
46+
; CHECK-NEXT: SI_LOOP [[SI_IF_BREAK]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
47+
; CHECK-NEXT: S_BRANCH %bb.5
48+
; CHECK-NEXT: {{ $}}
49+
; CHECK-NEXT: bb.5:
50+
; CHECK-NEXT: successors: %bb.2(0x80000000)
51+
; CHECK-NEXT: {{ $}}
52+
; CHECK-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.4
53+
; CHECK-NEXT: SI_END_CF [[SI_IF_BREAK]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
54+
; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
55+
; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]]
56+
; CHECK-NEXT: S_BRANCH %bb.2
57+
; CHECK-NEXT: {{ $}}
58+
; CHECK-NEXT: bb.6:
59+
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.7(0x40000000)
60+
; CHECK-NEXT: {{ $}}
61+
; CHECK-NEXT: SI_LOOP [[SI_IF]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
62+
; CHECK-NEXT: S_BRANCH %bb.7
63+
; CHECK-NEXT: {{ $}}
64+
; CHECK-NEXT: bb.7:
65+
; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.8(0x40000000)
66+
; CHECK-NEXT: {{ $}}
67+
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.7, implicit undef $vcc
68+
; CHECK-NEXT: S_BRANCH %bb.8
69+
; CHECK-NEXT: {{ $}}
70+
; CHECK-NEXT: bb.8:
71+
; CHECK-NEXT: SI_RETURN
72+
bb.0:
73+
liveins: $vgpr0, $vgpr1, $sgpr8
74+
75+
%0:vgpr_32 = COPY $vgpr0
76+
%1:sreg_32 = COPY $sgpr8
77+
%2:vgpr_32 = COPY $vgpr1
78+
79+
bb.1:
80+
%3:sreg_32 = SI_IF %1, %bb.6, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
81+
S_BRANCH %bb.2
82+
83+
bb.2:
84+
%4:sreg_32 = SI_IF %1, %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
85+
S_BRANCH %bb.3
86+
87+
bb.3:
88+
S_NOP 0
89+
90+
bb.4:
91+
INLINEASM &"", 1 /* sideeffect attdialect */
92+
%5:vgpr_32 = V_ADD_U32_e64 %0, %1, 0, implicit $exec
93+
%6:sreg_32 = SI_IF_BREAK killed %4, %3, implicit-def dead $scc
94+
SI_LOOP %6, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
95+
S_BRANCH %bb.5
96+
97+
bb.5:
98+
%7:vgpr_32 = PHI %0, %bb.4
99+
SI_END_CF %6, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
100+
INLINEASM &"", 1, implicit %5
101+
S_BRANCH %bb.2
102+
103+
bb.6:
104+
SI_LOOP %3, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
105+
S_BRANCH %bb.7
106+
107+
bb.7:
108+
S_CBRANCH_VCCNZ %bb.7, implicit undef $vcc
109+
S_BRANCH %bb.8
110+
111+
bb.8:
112+
SI_RETURN
113+
114+
...
115+
116+
# The same testcase, except the relevant instruction is scalar and
117+
# could be legally sunk.
118+
---
119+
name: machinesink_loop_sgpr_out_of_divergent_loop
120+
tracksRegLiveness: true
121+
machineFunctionInfo:
122+
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
123+
frameOffsetReg: '$sgpr33'
124+
stackPtrOffsetReg: '$sgpr32'
125+
body: |
126+
; CHECK-LABEL: name: machinesink_loop_sgpr_out_of_divergent_loop
127+
; CHECK: bb.0:
128+
; CHECK-NEXT: successors: %bb.1(0x80000000)
129+
; CHECK-NEXT: liveins: $sgpr8, $sgpr9, $sgpr10
130+
; CHECK-NEXT: {{ $}}
131+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr8
132+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr9
133+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr10
134+
; CHECK-NEXT: {{ $}}
135+
; CHECK-NEXT: bb.1:
136+
; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.2(0x40000000)
137+
; CHECK-NEXT: {{ $}}
138+
; CHECK-NEXT: [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF [[COPY1]], %bb.6, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
139+
; CHECK-NEXT: S_BRANCH %bb.2
140+
; CHECK-NEXT: {{ $}}
141+
; CHECK-NEXT: bb.2:
142+
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000)
143+
; CHECK-NEXT: {{ $}}
144+
; CHECK-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32 = SI_IF [[COPY1]], %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
145+
; CHECK-NEXT: S_BRANCH %bb.3
146+
; CHECK-NEXT: {{ $}}
147+
; CHECK-NEXT: bb.3:
148+
; CHECK-NEXT: successors: %bb.4(0x80000000)
149+
; CHECK-NEXT: {{ $}}
150+
; CHECK-NEXT: S_NOP 0
151+
; CHECK-NEXT: {{ $}}
152+
; CHECK-NEXT: bb.4:
153+
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.5(0x40000000)
154+
; CHECK-NEXT: {{ $}}
155+
; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */
156+
; CHECK-NEXT: [[SI_IF_BREAK:%[0-9]+]]:sreg_32 = SI_IF_BREAK killed [[SI_IF1]], [[SI_IF]], implicit-def dead $scc
157+
; CHECK-NEXT: SI_LOOP [[SI_IF_BREAK]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
158+
; CHECK-NEXT: S_BRANCH %bb.5
159+
; CHECK-NEXT: {{ $}}
160+
; CHECK-NEXT: bb.5:
161+
; CHECK-NEXT: successors: %bb.2(0x80000000)
162+
; CHECK-NEXT: {{ $}}
163+
; CHECK-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.4
164+
; CHECK-NEXT: SI_END_CF [[SI_IF_BREAK]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
165+
; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def dead $scc
166+
; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[S_ADD_I32_]]
167+
; CHECK-NEXT: S_BRANCH %bb.2
168+
; CHECK-NEXT: {{ $}}
169+
; CHECK-NEXT: bb.6:
170+
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.7(0x40000000)
171+
; CHECK-NEXT: {{ $}}
172+
; CHECK-NEXT: SI_LOOP [[SI_IF]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
173+
; CHECK-NEXT: S_BRANCH %bb.7
174+
; CHECK-NEXT: {{ $}}
175+
; CHECK-NEXT: bb.7:
176+
; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.8(0x40000000)
177+
; CHECK-NEXT: {{ $}}
178+
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.7, implicit undef $vcc
179+
; CHECK-NEXT: S_BRANCH %bb.8
180+
; CHECK-NEXT: {{ $}}
181+
; CHECK-NEXT: bb.8:
182+
; CHECK-NEXT: SI_RETURN
183+
bb.0:
184+
liveins: $sgpr8, $sgpr9, $sgpr10
185+
186+
%0:sreg_32 = COPY $sgpr8
187+
%1:sreg_32 = COPY $sgpr9
188+
%2:sreg_32 = COPY $sgpr10
189+
190+
bb.1:
191+
%3:sreg_32 = SI_IF %1, %bb.6, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
192+
S_BRANCH %bb.2
193+
194+
bb.2:
195+
%4:sreg_32 = SI_IF %1, %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
196+
S_BRANCH %bb.3
197+
198+
bb.3:
199+
S_NOP 0
200+
201+
bb.4:
202+
INLINEASM &"", 1 /* sideeffect attdialect */
203+
%5:sreg_32 = S_ADD_I32 %0, %1, implicit-def dead $scc
204+
%6:sreg_32 = SI_IF_BREAK killed %4, %3, implicit-def dead $scc
205+
SI_LOOP %6, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
206+
S_BRANCH %bb.5
207+
208+
bb.5:
209+
%7:vgpr_32 = PHI %0, %bb.4
210+
SI_END_CF %6, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
211+
INLINEASM &"", 1, implicit %5
212+
S_BRANCH %bb.2
213+
214+
bb.6:
215+
SI_LOOP %3, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
216+
S_BRANCH %bb.7
217+
218+
bb.7:
219+
S_CBRANCH_VCCNZ %bb.7, implicit undef $vcc
220+
S_BRANCH %bb.8
221+
222+
bb.8:
223+
SI_RETURN
224+
225+
...

0 commit comments

Comments
 (0)