Skip to content

Commit 5a92673

Browse files
SWDEV-433501: Fix test for machine-sink
Change-Id: I06b1d7fceafbb9fcb230b4609a7adbbfb95f79d8
1 parent e31c3cc commit 5a92673

File tree

1 file changed

+99
-99
lines changed

1 file changed

+99
-99
lines changed

llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll

Lines changed: 99 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
113113
; CHECK-NEXT: v_mov_b32_e32 v42, v0
114114
; CHECK-NEXT: s_mov_b32 s48, exec_lo
115115
; CHECK-NEXT: v_cmpx_ne_u32_e32 0, v42
116-
; CHECK-NEXT: s_cbranch_execz .LBB0_25
116+
; CHECK-NEXT: s_cbranch_execz .LBB0_24
117117
; CHECK-NEXT: ; %bb.1: ; %.preheader5
118118
; CHECK-NEXT: v_mul_lo_u32 v0, v40, 14
119119
; CHECK-NEXT: s_mov_b32 s4, 0
@@ -133,58 +133,75 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
133133
; CHECK-NEXT: s_mov_b32 s49, 0
134134
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v45
135135
; CHECK-NEXT: s_and_b32 exec_lo, exec_lo, vcc_lo
136-
; CHECK-NEXT: s_cbranch_execz .LBB0_25
136+
; CHECK-NEXT: s_cbranch_execz .LBB0_24
137137
; CHECK-NEXT: ; %bb.4:
138138
; CHECK-NEXT: v_lshlrev_b32_e32 v43, 10, v43
139139
; CHECK-NEXT: v_add_nc_u32_e32 v46, 0x3c05, v0
140140
; CHECK-NEXT: v_mov_b32_e32 v47, 0
141+
; CHECK-NEXT: s_mov_b32 s55, 0
141142
; CHECK-NEXT: s_getpc_b64 s[42:43]
142143
; CHECK-NEXT: s_add_u32 s42, s42, _Z10atomic_incPU3AS3Vj@rel32@lo+4
143144
; CHECK-NEXT: s_addc_u32 s43, s43, _Z10atomic_incPU3AS3Vj@rel32@hi+12
144-
; CHECK-NEXT: s_mov_b32 s55, 0
145-
; CHECK-NEXT: .LBB0_5: ; =>This Loop Header: Depth=1
146-
; CHECK-NEXT: ; Child Loop BB0_8 Depth 2
147-
; CHECK-NEXT: ; Child Loop BB0_20 Depth 2
145+
; CHECK-NEXT: s_branch .LBB0_7
146+
; CHECK-NEXT: .LBB0_5: ; %Flow37
147+
; CHECK-NEXT: ; in Loop: Header=BB0_7 Depth=1
148+
; CHECK-NEXT: s_inst_prefetch 0x2
149+
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s56
150+
; CHECK-NEXT: .LBB0_6: ; %Flow38
151+
; CHECK-NEXT: ; in Loop: Header=BB0_7 Depth=1
152+
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s55
153+
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc_lo, s54, v45
154+
; CHECK-NEXT: v_cmp_lt_u32_e64 s4, 59, v47
155+
; CHECK-NEXT: v_add_nc_u32_e32 v46, 1, v46
156+
; CHECK-NEXT: s_mov_b32 s55, s54
157+
; CHECK-NEXT: s_or_b32 s4, vcc_lo, s4
158+
; CHECK-NEXT: s_and_b32 s4, exec_lo, s4
159+
; CHECK-NEXT: s_or_b32 s49, s4, s49
160+
; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s49
161+
; CHECK-NEXT: s_cbranch_execz .LBB0_24
162+
; CHECK-NEXT: .LBB0_7: ; =>This Loop Header: Depth=1
163+
; CHECK-NEXT: ; Child Loop BB0_10 Depth 2
164+
; CHECK-NEXT: ; Child Loop BB0_22 Depth 2
148165
; CHECK-NEXT: v_add_nc_u32_e32 v0, s55, v44
149166
; CHECK-NEXT: s_lshl_b32 s4, s55, 5
150167
; CHECK-NEXT: s_add_i32 s54, s55, 1
151168
; CHECK-NEXT: s_add_i32 s5, s55, 5
152169
; CHECK-NEXT: v_or3_b32 v57, s4, v43, s54
153170
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
154171
; CHECK-NEXT: ds_read_u8 v56, v0
155-
; CHECK-NEXT: v_mov_b32_e32 v59, s54
172+
; CHECK-NEXT: v_mov_b32_e32 v58, s54
156173
; CHECK-NEXT: s_mov_b32 s56, exec_lo
157174
; CHECK-NEXT: v_cmpx_lt_u32_e64 s5, v42
158-
; CHECK-NEXT: s_cbranch_execz .LBB0_17
159-
; CHECK-NEXT: ; %bb.6: ; %.preheader2
160-
; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1
161-
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
162-
; CHECK-NEXT: v_and_b32_e32 v58, 0xff, v56
175+
; CHECK-NEXT: s_cbranch_execz .LBB0_19
176+
; CHECK-NEXT: ; %bb.8: ; %.preheader2
177+
; CHECK-NEXT: ; in Loop: Header=BB0_7 Depth=1
163178
; CHECK-NEXT: s_mov_b32 s57, 0
164179
; CHECK-NEXT: s_mov_b32 s58, 0
165-
; CHECK-NEXT: s_branch .LBB0_8
166-
; CHECK-NEXT: .LBB0_7: ; in Loop: Header=BB0_8 Depth=2
180+
; CHECK-NEXT: s_branch .LBB0_10
181+
; CHECK-NEXT: .LBB0_9: ; in Loop: Header=BB0_10 Depth=2
167182
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s59
168183
; CHECK-NEXT: s_add_i32 s58, s58, 4
169184
; CHECK-NEXT: s_add_i32 s4, s55, s58
170185
; CHECK-NEXT: v_add_nc_u32_e32 v0, s58, v57
171186
; CHECK-NEXT: s_add_i32 s5, s4, 5
172187
; CHECK-NEXT: s_add_i32 s4, s4, 1
173188
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc_lo, s5, v42
174-
; CHECK-NEXT: v_mov_b32_e32 v59, s4
189+
; CHECK-NEXT: v_mov_b32_e32 v58, s4
175190
; CHECK-NEXT: s_or_b32 s57, vcc_lo, s57
176191
; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s57
177-
; CHECK-NEXT: s_cbranch_execz .LBB0_16
178-
; CHECK-NEXT: .LBB0_8: ; Parent Loop BB0_5 Depth=1
192+
; CHECK-NEXT: s_cbranch_execz .LBB0_18
193+
; CHECK-NEXT: .LBB0_10: ; Parent Loop BB0_7 Depth=1
179194
; CHECK-NEXT: ; => This Inner Loop Header: Depth=2
180-
; CHECK-NEXT: v_add_nc_u32_e32 v60, s58, v46
181-
; CHECK-NEXT: v_add_nc_u32_e32 v59, s58, v57
195+
; CHECK-NEXT: v_add_nc_u32_e32 v59, s58, v46
196+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
197+
; CHECK-NEXT: v_and_b32_e32 v60, 0xff, v56
198+
; CHECK-NEXT: v_add_nc_u32_e32 v58, s58, v57
182199
; CHECK-NEXT: s_mov_b32 s59, exec_lo
183-
; CHECK-NEXT: ds_read_u8 v0, v60
200+
; CHECK-NEXT: ds_read_u8 v0, v59
184201
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
185-
; CHECK-NEXT: v_cmpx_eq_u16_e64 v58, v0
186-
; CHECK-NEXT: s_cbranch_execz .LBB0_10
187-
; CHECK-NEXT: ; %bb.9: ; in Loop: Header=BB0_8 Depth=2
202+
; CHECK-NEXT: v_cmpx_eq_u16_e64 v60, v0
203+
; CHECK-NEXT: s_cbranch_execz .LBB0_12
204+
; CHECK-NEXT: ; %bb.11: ; in Loop: Header=BB0_10 Depth=2
188205
; CHECK-NEXT: v_mov_b32_e32 v31, v41
189206
; CHECK-NEXT: v_mov_b32_e32 v0, 0x3c00
190207
; CHECK-NEXT: s_add_u32 s8, s34, 40
@@ -197,15 +214,15 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
197214
; CHECK-NEXT: v_add_nc_u32_e32 v47, 1, v47
198215
; CHECK-NEXT: s_swappc_b64 s[30:31], s[42:43]
199216
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
200-
; CHECK-NEXT: ds_write_b32 v0, v59
201-
; CHECK-NEXT: .LBB0_10: ; in Loop: Header=BB0_8 Depth=2
217+
; CHECK-NEXT: ds_write_b32 v0, v58
218+
; CHECK-NEXT: .LBB0_12: ; in Loop: Header=BB0_10 Depth=2
202219
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s59
203-
; CHECK-NEXT: ds_read_u8 v0, v60 offset:1
220+
; CHECK-NEXT: ds_read_u8 v0, v59 offset:1
204221
; CHECK-NEXT: s_mov_b32 s59, exec_lo
205222
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
206-
; CHECK-NEXT: v_cmpx_eq_u16_e64 v58, v0
207-
; CHECK-NEXT: s_cbranch_execz .LBB0_12
208-
; CHECK-NEXT: ; %bb.11: ; in Loop: Header=BB0_8 Depth=2
223+
; CHECK-NEXT: v_cmpx_eq_u16_e64 v60, v0
224+
; CHECK-NEXT: s_cbranch_execz .LBB0_14
225+
; CHECK-NEXT: ; %bb.13: ; in Loop: Header=BB0_10 Depth=2
209226
; CHECK-NEXT: v_mov_b32_e32 v31, v41
210227
; CHECK-NEXT: v_mov_b32_e32 v0, 0x3c00
211228
; CHECK-NEXT: s_add_u32 s8, s34, 40
@@ -215,19 +232,19 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
215232
; CHECK-NEXT: s_mov_b32 s12, s41
216233
; CHECK-NEXT: s_mov_b32 s13, s40
217234
; CHECK-NEXT: s_mov_b32 s14, s33
218-
; CHECK-NEXT: v_add_nc_u32_e32 v61, 1, v59
235+
; CHECK-NEXT: v_add_nc_u32_e32 v61, 1, v58
219236
; CHECK-NEXT: v_add_nc_u32_e32 v47, 1, v47
220237
; CHECK-NEXT: s_swappc_b64 s[30:31], s[42:43]
221238
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
222239
; CHECK-NEXT: ds_write_b32 v0, v61
223-
; CHECK-NEXT: .LBB0_12: ; in Loop: Header=BB0_8 Depth=2
240+
; CHECK-NEXT: .LBB0_14: ; in Loop: Header=BB0_10 Depth=2
224241
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s59
225-
; CHECK-NEXT: ds_read_u8 v0, v60 offset:2
242+
; CHECK-NEXT: ds_read_u8 v0, v59 offset:2
226243
; CHECK-NEXT: s_mov_b32 s59, exec_lo
227244
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
228-
; CHECK-NEXT: v_cmpx_eq_u16_e64 v58, v0
229-
; CHECK-NEXT: s_cbranch_execz .LBB0_14
230-
; CHECK-NEXT: ; %bb.13: ; in Loop: Header=BB0_8 Depth=2
245+
; CHECK-NEXT: v_cmpx_eq_u16_e64 v60, v0
246+
; CHECK-NEXT: s_cbranch_execz .LBB0_16
247+
; CHECK-NEXT: ; %bb.15: ; in Loop: Header=BB0_10 Depth=2
231248
; CHECK-NEXT: v_mov_b32_e32 v31, v41
232249
; CHECK-NEXT: v_mov_b32_e32 v0, 0x3c00
233250
; CHECK-NEXT: s_add_u32 s8, s34, 40
@@ -237,19 +254,19 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
237254
; CHECK-NEXT: s_mov_b32 s12, s41
238255
; CHECK-NEXT: s_mov_b32 s13, s40
239256
; CHECK-NEXT: s_mov_b32 s14, s33
240-
; CHECK-NEXT: v_add_nc_u32_e32 v61, 2, v59
257+
; CHECK-NEXT: v_add_nc_u32_e32 v61, 2, v58
241258
; CHECK-NEXT: v_add_nc_u32_e32 v47, 1, v47
242259
; CHECK-NEXT: s_swappc_b64 s[30:31], s[42:43]
243260
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
244261
; CHECK-NEXT: ds_write_b32 v0, v61
245-
; CHECK-NEXT: .LBB0_14: ; in Loop: Header=BB0_8 Depth=2
262+
; CHECK-NEXT: .LBB0_16: ; in Loop: Header=BB0_10 Depth=2
246263
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s59
247-
; CHECK-NEXT: ds_read_u8 v0, v60 offset:3
264+
; CHECK-NEXT: ds_read_u8 v0, v59 offset:3
248265
; CHECK-NEXT: s_mov_b32 s59, exec_lo
249266
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
250-
; CHECK-NEXT: v_cmpx_eq_u16_e64 v58, v0
251-
; CHECK-NEXT: s_cbranch_execz .LBB0_7
252-
; CHECK-NEXT: ; %bb.15: ; in Loop: Header=BB0_8 Depth=2
267+
; CHECK-NEXT: v_cmpx_eq_u16_e64 v60, v0
268+
; CHECK-NEXT: s_cbranch_execz .LBB0_9
269+
; CHECK-NEXT: ; %bb.17: ; in Loop: Header=BB0_10 Depth=2
253270
; CHECK-NEXT: v_mov_b32_e32 v31, v41
254271
; CHECK-NEXT: v_mov_b32_e32 v0, 0x3c00
255272
; CHECK-NEXT: s_add_u32 s8, s34, 40
@@ -259,45 +276,45 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
259276
; CHECK-NEXT: s_mov_b32 s12, s41
260277
; CHECK-NEXT: s_mov_b32 s13, s40
261278
; CHECK-NEXT: s_mov_b32 s14, s33
262-
; CHECK-NEXT: v_add_nc_u32_e32 v59, 3, v59
279+
; CHECK-NEXT: v_add_nc_u32_e32 v58, 3, v58
263280
; CHECK-NEXT: v_add_nc_u32_e32 v47, 1, v47
264281
; CHECK-NEXT: s_swappc_b64 s[30:31], s[42:43]
265282
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
266-
; CHECK-NEXT: ds_write_b32 v0, v59
267-
; CHECK-NEXT: s_branch .LBB0_7
268-
; CHECK-NEXT: .LBB0_16: ; %Flow43
269-
; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1
283+
; CHECK-NEXT: ds_write_b32 v0, v58
284+
; CHECK-NEXT: s_branch .LBB0_9
285+
; CHECK-NEXT: .LBB0_18: ; %Flow39
286+
; CHECK-NEXT: ; in Loop: Header=BB0_7 Depth=1
270287
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s57
271288
; CHECK-NEXT: v_mov_b32_e32 v57, v0
272-
; CHECK-NEXT: .LBB0_17: ; %Flow44
273-
; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1
289+
; CHECK-NEXT: .LBB0_19: ; %Flow40
290+
; CHECK-NEXT: ; in Loop: Header=BB0_7 Depth=1
274291
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s56
275292
; CHECK-NEXT: s_mov_b32 s55, exec_lo
276-
; CHECK-NEXT: v_cmpx_lt_u32_e64 v59, v42
277-
; CHECK-NEXT: s_cbranch_execz .LBB0_23
278-
; CHECK-NEXT: ; %bb.18: ; %.preheader
279-
; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1
293+
; CHECK-NEXT: v_cmpx_lt_u32_e64 v58, v42
294+
; CHECK-NEXT: s_cbranch_execz .LBB0_6
295+
; CHECK-NEXT: ; %bb.20: ; %.preheader
296+
; CHECK-NEXT: ; in Loop: Header=BB0_7 Depth=1
280297
; CHECK-NEXT: s_mov_b32 s56, 0
281298
; CHECK-NEXT: s_inst_prefetch 0x1
282-
; CHECK-NEXT: s_branch .LBB0_20
299+
; CHECK-NEXT: s_branch .LBB0_22
283300
; CHECK-NEXT: .p2align 6
284-
; CHECK-NEXT: .LBB0_19: ; in Loop: Header=BB0_20 Depth=2
301+
; CHECK-NEXT: .LBB0_21: ; in Loop: Header=BB0_22 Depth=2
285302
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s57
286-
; CHECK-NEXT: v_add_nc_u32_e32 v59, 1, v59
303+
; CHECK-NEXT: v_add_nc_u32_e32 v58, 1, v58
287304
; CHECK-NEXT: v_add_nc_u32_e32 v57, 1, v57
288-
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc_lo, v59, v42
305+
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc_lo, v58, v42
289306
; CHECK-NEXT: s_or_b32 s56, vcc_lo, s56
290307
; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s56
291-
; CHECK-NEXT: s_cbranch_execz .LBB0_22
292-
; CHECK-NEXT: .LBB0_20: ; Parent Loop BB0_5 Depth=1
308+
; CHECK-NEXT: s_cbranch_execz .LBB0_5
309+
; CHECK-NEXT: .LBB0_22: ; Parent Loop BB0_7 Depth=1
293310
; CHECK-NEXT: ; => This Inner Loop Header: Depth=2
294-
; CHECK-NEXT: v_add_nc_u32_e32 v0, v44, v59
311+
; CHECK-NEXT: v_add_nc_u32_e32 v0, v44, v58
295312
; CHECK-NEXT: ds_read_u8 v0, v0
296313
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
297314
; CHECK-NEXT: v_cmp_eq_u16_sdwa s4, v56, v0 src0_sel:BYTE_0 src1_sel:DWORD
298315
; CHECK-NEXT: s_and_saveexec_b32 s57, s4
299-
; CHECK-NEXT: s_cbranch_execz .LBB0_19
300-
; CHECK-NEXT: ; %bb.21: ; in Loop: Header=BB0_20 Depth=2
316+
; CHECK-NEXT: s_cbranch_execz .LBB0_21
317+
; CHECK-NEXT: ; %bb.23: ; in Loop: Header=BB0_22 Depth=2
301318
; CHECK-NEXT: v_mov_b32_e32 v31, v41
302319
; CHECK-NEXT: v_mov_b32_e32 v0, 0x3c00
303320
; CHECK-NEXT: s_add_u32 s8, s34, 40
@@ -311,25 +328,8 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
311328
; CHECK-NEXT: s_swappc_b64 s[30:31], s[42:43]
312329
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
313330
; CHECK-NEXT: ds_write_b32 v0, v57
314-
; CHECK-NEXT: s_branch .LBB0_19
315-
; CHECK-NEXT: .LBB0_22: ; %Flow41
316-
; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1
317-
; CHECK-NEXT: s_inst_prefetch 0x2
318-
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s56
319-
; CHECK-NEXT: .LBB0_23: ; %Flow42
320-
; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1
321-
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s55
322-
; CHECK-NEXT: ; %bb.24: ; in Loop: Header=BB0_5 Depth=1
323-
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc_lo, s54, v45
324-
; CHECK-NEXT: v_cmp_lt_u32_e64 s4, 59, v47
325-
; CHECK-NEXT: v_add_nc_u32_e32 v46, 1, v46
326-
; CHECK-NEXT: s_mov_b32 s55, s54
327-
; CHECK-NEXT: s_or_b32 s4, vcc_lo, s4
328-
; CHECK-NEXT: s_and_b32 s4, exec_lo, s4
329-
; CHECK-NEXT: s_or_b32 s49, s4, s49
330-
; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s49
331-
; CHECK-NEXT: s_cbranch_execnz .LBB0_5
332-
; CHECK-NEXT: .LBB0_25: ; %Flow49
331+
; CHECK-NEXT: s_branch .LBB0_21
332+
; CHECK-NEXT: .LBB0_24: ; %Flow45
333333
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s48
334334
; CHECK-NEXT: v_mov_b32_e32 v31, v41
335335
; CHECK-NEXT: v_mov_b32_e32 v0, 1
@@ -346,8 +346,8 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
346346
; CHECK-NEXT: ds_read_b32 v47, v0 offset:15360
347347
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
348348
; CHECK-NEXT: v_cmpx_gt_u32_e64 v47, v40
349-
; CHECK-NEXT: s_cbranch_execz .LBB0_33
350-
; CHECK-NEXT: ; %bb.26:
349+
; CHECK-NEXT: s_cbranch_execz .LBB0_32
350+
; CHECK-NEXT: ; %bb.25:
351351
; CHECK-NEXT: s_add_u32 s52, s44, 8
352352
; CHECK-NEXT: s_addc_u32 s53, s45, 0
353353
; CHECK-NEXT: s_getpc_b64 s[42:43]
@@ -360,8 +360,8 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
360360
; CHECK-NEXT: s_getpc_b64 s[48:49]
361361
; CHECK-NEXT: s_add_u32 s48, s48, _Z14get_local_sizej@rel32@lo+4
362362
; CHECK-NEXT: s_addc_u32 s49, s49, _Z14get_local_sizej@rel32@hi+12
363-
; CHECK-NEXT: s_branch .LBB0_28
364-
; CHECK-NEXT: .LBB0_27: ; in Loop: Header=BB0_28 Depth=1
363+
; CHECK-NEXT: s_branch .LBB0_27
364+
; CHECK-NEXT: .LBB0_26: ; in Loop: Header=BB0_27 Depth=1
365365
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s55
366366
; CHECK-NEXT: v_mov_b32_e32 v31, v41
367367
; CHECK-NEXT: v_mov_b32_e32 v0, 0
@@ -377,8 +377,8 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
377377
; CHECK-NEXT: v_cmp_le_u32_e32 vcc_lo, v47, v40
378378
; CHECK-NEXT: s_or_b32 s54, vcc_lo, s54
379379
; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s54
380-
; CHECK-NEXT: s_cbranch_execz .LBB0_33
381-
; CHECK-NEXT: .LBB0_28: ; =>This Inner Loop Header: Depth=1
380+
; CHECK-NEXT: s_cbranch_execz .LBB0_32
381+
; CHECK-NEXT: .LBB0_27: ; =>This Inner Loop Header: Depth=1
382382
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v40
383383
; CHECK-NEXT: s_mov_b32 s55, exec_lo
384384
; CHECK-NEXT: ds_read_b32 v0, v0
@@ -406,8 +406,8 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
406406
; CHECK-NEXT: v_or_b32_e32 v5, v46, v57
407407
; CHECK-NEXT: v_or_b32_e32 v4, v45, v56
408408
; CHECK-NEXT: v_cmpx_ne_u64_e32 0, v[4:5]
409-
; CHECK-NEXT: s_cbranch_execz .LBB0_27
410-
; CHECK-NEXT: ; %bb.29: ; in Loop: Header=BB0_28 Depth=1
409+
; CHECK-NEXT: s_cbranch_execz .LBB0_26
410+
; CHECK-NEXT: ; %bb.28: ; in Loop: Header=BB0_27 Depth=1
411411
; CHECK-NEXT: s_clause 0x1
412412
; CHECK-NEXT: global_load_dwordx2 v[58:59], v[2:3], off offset:16
413413
; CHECK-NEXT: global_load_dwordx2 v[60:61], v[0:1], off offset:16
@@ -443,8 +443,8 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
443443
; CHECK-NEXT: s_mov_b32 s4, exec_lo
444444
; CHECK-NEXT: v_cmpx_gt_u32_e32 12, v0
445445
; CHECK-NEXT: s_xor_b32 s4, exec_lo, s4
446-
; CHECK-NEXT: s_cbranch_execz .LBB0_31
447-
; CHECK-NEXT: ; %bb.30: ; in Loop: Header=BB0_28 Depth=1
446+
; CHECK-NEXT: s_cbranch_execz .LBB0_30
447+
; CHECK-NEXT: ; %bb.29: ; in Loop: Header=BB0_27 Depth=1
448448
; CHECK-NEXT: v_xor_b32_e32 v5, v60, v58
449449
; CHECK-NEXT: v_lshrrev_b64 v[3:4], 16, v[56:57]
450450
; CHECK-NEXT: v_mul_u32_u24_e32 v11, 0x180, v73
@@ -469,11 +469,11 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
469469
; CHECK-NEXT: ; implicit-def: $vgpr42
470470
; CHECK-NEXT: ; implicit-def: $vgpr43
471471
; CHECK-NEXT: ; implicit-def: $vgpr44
472-
; CHECK-NEXT: .LBB0_31: ; %Flow
473-
; CHECK-NEXT: ; in Loop: Header=BB0_28 Depth=1
472+
; CHECK-NEXT: .LBB0_30: ; %Flow
473+
; CHECK-NEXT: ; in Loop: Header=BB0_27 Depth=1
474474
; CHECK-NEXT: s_andn2_saveexec_b32 s4, s4
475-
; CHECK-NEXT: s_cbranch_execz .LBB0_27
476-
; CHECK-NEXT: ; %bb.32: ; in Loop: Header=BB0_28 Depth=1
475+
; CHECK-NEXT: s_cbranch_execz .LBB0_26
476+
; CHECK-NEXT: ; %bb.31: ; in Loop: Header=BB0_27 Depth=1
477477
; CHECK-NEXT: v_mov_b32_e32 v31, v41
478478
; CHECK-NEXT: v_mov_b32_e32 v0, v42
479479
; CHECK-NEXT: v_mov_b32_e32 v1, v43
@@ -486,8 +486,8 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
486486
; CHECK-NEXT: s_mov_b32 s13, s40
487487
; CHECK-NEXT: s_mov_b32 s14, s33
488488
; CHECK-NEXT: s_swappc_b64 s[30:31], s[44:45]
489-
; CHECK-NEXT: s_branch .LBB0_27
490-
; CHECK-NEXT: .LBB0_33:
489+
; CHECK-NEXT: s_branch .LBB0_26
490+
; CHECK-NEXT: .LBB0_32:
491491
; CHECK-NEXT: s_endpgm
492492
%6 = tail call i64 @_Z13get_global_idj(i32 noundef 0) #4
493493
%7 = trunc i64 %6 to i32
@@ -878,11 +878,11 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
878878
; CHECK-NEXT: s_or_b32 s6, vcc_lo, s6
879879
; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s6
880880
; CHECK-NEXT: s_cbranch_execnz .LBB1_3
881-
; CHECK-NEXT: ; %bb.4: ; %Flow3
881+
; CHECK-NEXT: ; %bb.4: ; %Flow2
882882
; CHECK-NEXT: ; in Loop: Header=BB1_1 Depth=1
883883
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s6
884884
; CHECK-NEXT: v_mov_b32_e32 v47, v0
885-
; CHECK-NEXT: .LBB1_5: ; %Flow4
885+
; CHECK-NEXT: .LBB1_5: ; %Flow3
886886
; CHECK-NEXT: ; in Loop: Header=BB1_1 Depth=1
887887
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s5
888888
; CHECK-NEXT: s_mov_b32 s48, exec_lo
@@ -932,7 +932,7 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
932932
; CHECK-NEXT: ; in Loop: Header=BB1_1 Depth=1
933933
; CHECK-NEXT: s_inst_prefetch 0x2
934934
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s49
935-
; CHECK-NEXT: .LBB1_11: ; %Flow2
935+
; CHECK-NEXT: .LBB1_11: ; %Flow1
936936
; CHECK-NEXT: ; in Loop: Header=BB1_1 Depth=1
937937
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s48
938938
; CHECK-NEXT: ; %bb.12: ; %.32

0 commit comments

Comments
 (0)