@@ -264,11 +264,11 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
264
264
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
265
265
; CHECK-NEXT: ds_write_b32 v0, v58
266
266
; CHECK-NEXT: s_branch .LBB0_7
267
- ; CHECK-NEXT: .LBB0_16: ; %Flow43
267
+ ; CHECK-NEXT: .LBB0_16: ; %Flow45
268
268
; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1
269
269
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s57
270
270
; CHECK-NEXT: v_mov_b32_e32 v57, v0
271
- ; CHECK-NEXT: .LBB0_17: ; %Flow44
271
+ ; CHECK-NEXT: .LBB0_17: ; %Flow46
272
272
; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1
273
273
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s56
274
274
; CHECK-NEXT: s_mov_b32 s55, exec_lo
@@ -311,11 +311,11 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
311
311
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
312
312
; CHECK-NEXT: ds_write_b32 v0, v57
313
313
; CHECK-NEXT: s_branch .LBB0_19
314
- ; CHECK-NEXT: .LBB0_22: ; %Flow41
314
+ ; CHECK-NEXT: .LBB0_22: ; %Flow43
315
315
; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1
316
316
; CHECK-NEXT: s_inst_prefetch 0x2
317
317
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s56
318
- ; CHECK-NEXT: .LBB0_23: ; %Flow42
318
+ ; CHECK-NEXT: .LBB0_23: ; %Flow44
319
319
; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1
320
320
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s55
321
321
; CHECK-NEXT: ; %bb.24: ; in Loop: Header=BB0_5 Depth=1
@@ -328,7 +328,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
328
328
; CHECK-NEXT: s_or_b32 s49, s4, s49
329
329
; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s49
330
330
; CHECK-NEXT: s_cbranch_execnz .LBB0_5
331
- ; CHECK-NEXT: .LBB0_25: ; %Flow49
331
+ ; CHECK-NEXT: .LBB0_25: ; %Flow51
332
332
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s48
333
333
; CHECK-NEXT: v_mov_b32_e32 v31, v41
334
334
; CHECK-NEXT: v_mov_b32_e32 v0, 1
@@ -347,18 +347,16 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
347
347
; CHECK-NEXT: v_cmpx_gt_u32_e64 v47, v40
348
348
; CHECK-NEXT: s_cbranch_execz .LBB0_33
349
349
; CHECK-NEXT: ; %bb.26:
350
- ; CHECK-NEXT: s_add_u32 s52, s44, 8
351
- ; CHECK-NEXT: s_addc_u32 s53, s45, 0
352
350
; CHECK-NEXT: s_getpc_b64 s[42:43]
353
351
; CHECK-NEXT: s_add_u32 s42, s42, _Z10atomic_addPU3AS1Vjj@rel32@lo+4
354
352
; CHECK-NEXT: s_addc_u32 s43, s43, _Z10atomic_addPU3AS1Vjj@rel32@hi+12
355
353
; CHECK-NEXT: s_mov_b32 s54, 0
356
- ; CHECK-NEXT: s_getpc_b64 s[44:45]
357
- ; CHECK-NEXT: s_add_u32 s44, s44, _Z10atomic_subPU3AS1Vjj@rel32@lo+4
358
- ; CHECK-NEXT: s_addc_u32 s45, s45, _Z10atomic_subPU3AS1Vjj@rel32@hi+12
359
354
; CHECK-NEXT: s_getpc_b64 s[48:49]
360
- ; CHECK-NEXT: s_add_u32 s48, s48, _Z14get_local_sizej@rel32@lo+4
361
- ; CHECK-NEXT: s_addc_u32 s49, s49, _Z14get_local_sizej@rel32@hi+12
355
+ ; CHECK-NEXT: s_add_u32 s48, s48, _Z10atomic_subPU3AS1Vjj@rel32@lo+4
356
+ ; CHECK-NEXT: s_addc_u32 s49, s49, _Z10atomic_subPU3AS1Vjj@rel32@hi+12
357
+ ; CHECK-NEXT: s_getpc_b64 s[52:53]
358
+ ; CHECK-NEXT: s_add_u32 s52, s52, _Z14get_local_sizej@rel32@lo+4
359
+ ; CHECK-NEXT: s_addc_u32 s53, s53, _Z14get_local_sizej@rel32@hi+12
362
360
; CHECK-NEXT: s_branch .LBB0_28
363
361
; CHECK-NEXT: .LBB0_27: ; in Loop: Header=BB0_28 Depth=1
364
362
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s55
@@ -371,7 +369,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
371
369
; CHECK-NEXT: s_mov_b32 s12, s41
372
370
; CHECK-NEXT: s_mov_b32 s13, s40
373
371
; CHECK-NEXT: s_mov_b32 s14, s33
374
- ; CHECK-NEXT: s_swappc_b64 s[30:31], s[48:49 ]
372
+ ; CHECK-NEXT: s_swappc_b64 s[30:31], s[52:53 ]
375
373
; CHECK-NEXT: v_add_co_u32 v40, vcc_lo, v0, v40
376
374
; CHECK-NEXT: v_cmp_le_u32_e32 vcc_lo, v47, v40
377
375
; CHECK-NEXT: s_or_b32 s54, vcc_lo, s54
@@ -388,15 +386,15 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
388
386
; CHECK-NEXT: v_mul_u32_u24_e32 v1, 0x180, v63
389
387
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 5, v62
390
388
; CHECK-NEXT: v_lshlrev_b32_e32 v4, 5, v72
391
- ; CHECK-NEXT: v_add_co_u32 v2, s4, s52 , v1
392
- ; CHECK-NEXT: v_add_co_ci_u32_e64 v3, null, s53 , 0, s4
389
+ ; CHECK-NEXT: v_add_co_u32 v2, s4, s44 , v1
390
+ ; CHECK-NEXT: v_add_co_ci_u32_e64 v3, null, s45 , 0, s4
393
391
; CHECK-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
394
392
; CHECK-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v3, vcc_lo
395
393
; CHECK-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4
396
394
; CHECK-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo
397
395
; CHECK-NEXT: s_clause 0x1
398
- ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
399
- ; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off
396
+ ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:8
397
+ ; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off offset:8
400
398
; CHECK-NEXT: s_waitcnt vmcnt(0)
401
399
; CHECK-NEXT: v_xor_b32_e32 v46, v9, v5
402
400
; CHECK-NEXT: v_xor_b32_e32 v45, v8, v4
@@ -408,8 +406,8 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
408
406
; CHECK-NEXT: s_cbranch_execz .LBB0_27
409
407
; CHECK-NEXT: ; %bb.29: ; in Loop: Header=BB0_28 Depth=1
410
408
; CHECK-NEXT: s_clause 0x1
411
- ; CHECK-NEXT: global_load_dwordx2 v[58:59], v[2:3], off offset:16
412
- ; CHECK-NEXT: global_load_dwordx2 v[60:61], v[0:1], off offset:16
409
+ ; CHECK-NEXT: global_load_dwordx2 v[58:59], v[2:3], off offset:24
410
+ ; CHECK-NEXT: global_load_dwordx2 v[60:61], v[0:1], off offset:24
413
411
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 4, v45
414
412
; CHECK-NEXT: v_alignbit_b32 v1, v46, v45, 12
415
413
; CHECK-NEXT: v_and_b32_e32 v2, 0xf0000, v45
@@ -484,7 +482,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
484
482
; CHECK-NEXT: s_mov_b32 s12, s41
485
483
; CHECK-NEXT: s_mov_b32 s13, s40
486
484
; CHECK-NEXT: s_mov_b32 s14, s33
487
- ; CHECK-NEXT: s_swappc_b64 s[30:31], s[44:45 ]
485
+ ; CHECK-NEXT: s_swappc_b64 s[30:31], s[48:49 ]
488
486
; CHECK-NEXT: s_branch .LBB0_27
489
487
; CHECK-NEXT: .LBB0_33:
490
488
; CHECK-NEXT: s_endpgm
0 commit comments