@@ -143,11 +143,11 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
143
143
; CHECK-NEXT: v_mov_b32_e32 v47, 0
144
144
; CHECK-NEXT: s_mov_b32 s49, 0
145
145
; CHECK-NEXT: s_branch .LBB0_7
146
- ; CHECK-NEXT: .LBB0_5: ; %Flow41
146
+ ; CHECK-NEXT: .LBB0_5: ; %Flow43
147
147
; CHECK-NEXT: ; in Loop: Header=BB0_7 Depth=1
148
148
; CHECK-NEXT: s_inst_prefetch 0x2
149
149
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s49
150
- ; CHECK-NEXT: .LBB0_6: ; %Flow42
150
+ ; CHECK-NEXT: .LBB0_6: ; %Flow44
151
151
; CHECK-NEXT: ; in Loop: Header=BB0_7 Depth=1
152
152
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc_lo, s48, v45
153
153
; CHECK-NEXT: v_cmp_lt_u32_e64 s4, 59, v47
@@ -304,7 +304,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
304
304
; CHECK-NEXT: ds_write_b32 v0, v58
305
305
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s55
306
306
; CHECK-NEXT: s_branch .LBB0_9
307
- ; CHECK-NEXT: .LBB0_18: ; %Flow43
307
+ ; CHECK-NEXT: .LBB0_18: ; %Flow45
308
308
; CHECK-NEXT: ; in Loop: Header=BB0_7 Depth=1
309
309
; CHECK-NEXT: v_mov_b32_e32 v57, v0
310
310
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s52
@@ -357,7 +357,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
357
357
; CHECK-NEXT: ds_write_b32 v0, v57
358
358
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s53
359
359
; CHECK-NEXT: s_branch .LBB0_21
360
- ; CHECK-NEXT: .LBB0_24: ; %Flow47
360
+ ; CHECK-NEXT: .LBB0_24: ; %Flow49
361
361
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s42
362
362
; CHECK-NEXT: .LBB0_25:
363
363
; CHECK-NEXT: v_mov_b32_e32 v31, v40
@@ -382,13 +382,11 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
382
382
; CHECK-NEXT: s_cmov_b32 exec_lo, vcc_lo
383
383
; CHECK-NEXT: s_cbranch_scc0 .LBB0_34
384
384
; CHECK-NEXT: ; %bb.26:
385
- ; CHECK-NEXT: s_add_u32 s42, s44, 8
386
- ; CHECK-NEXT: s_addc_u32 s43, s45, 0
387
- ; CHECK-NEXT: s_mov_b32 s44, 0
385
+ ; CHECK-NEXT: s_mov_b32 s42, 0
388
386
; CHECK-NEXT: s_branch .LBB0_29
389
- ; CHECK-NEXT: .LBB0_27: ; %Flow38
387
+ ; CHECK-NEXT: .LBB0_27: ; %Flow40
390
388
; CHECK-NEXT: ; in Loop: Header=BB0_29 Depth=1
391
- ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s45
389
+ ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s43
392
390
; CHECK-NEXT: .LBB0_28: ; in Loop: Header=BB0_29 Depth=1
393
391
; CHECK-NEXT: v_mov_b32_e32 v31, v40
394
392
; CHECK-NEXT: v_mov_b32_e32 v0, 0
@@ -405,13 +403,13 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
405
403
; CHECK-NEXT: s_swappc_b64 s[30:31], s[6:7]
406
404
; CHECK-NEXT: v_add_co_u32 v41, vcc_lo, v0, v41
407
405
; CHECK-NEXT: v_cmp_le_u32_e32 vcc_lo, v47, v41
408
- ; CHECK-NEXT: s_or_b32 s44 , vcc_lo, s44
409
- ; CHECK-NEXT: s_andn2_b32 s4, exec_lo, s44
410
- ; CHECK-NEXT: s_cselect_b32 exec_lo, s4, s44
406
+ ; CHECK-NEXT: s_or_b32 s42 , vcc_lo, s42
407
+ ; CHECK-NEXT: s_andn2_b32 s4, exec_lo, s42
408
+ ; CHECK-NEXT: s_cselect_b32 exec_lo, s4, s42
411
409
; CHECK-NEXT: s_cbranch_scc0 .LBB0_34
412
410
; CHECK-NEXT: .LBB0_29: ; =>This Inner Loop Header: Depth=1
413
411
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v41
414
- ; CHECK-NEXT: s_mov_b32 s45 , exec_lo
412
+ ; CHECK-NEXT: s_mov_b32 s43 , exec_lo
415
413
; CHECK-NEXT: ds_read_b32 v0, v0
416
414
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
417
415
; CHECK-NEXT: v_lshrrev_b32_e32 v63, 10, v0
@@ -420,15 +418,15 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
420
418
; CHECK-NEXT: v_mul_u32_u24_e32 v1, 0x180, v63
421
419
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 5, v62
422
420
; CHECK-NEXT: v_lshlrev_b32_e32 v4, 5, v72
423
- ; CHECK-NEXT: v_add_co_u32 v2, s4, s42 , v1
424
- ; CHECK-NEXT: v_add_co_ci_u32_e64 v3, null, s43 , 0, s4
421
+ ; CHECK-NEXT: v_add_co_u32 v2, s4, s44 , v1
422
+ ; CHECK-NEXT: v_add_co_ci_u32_e64 v3, null, s45 , 0, s4
425
423
; CHECK-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
426
424
; CHECK-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v3, vcc_lo
427
425
; CHECK-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4
428
426
; CHECK-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo
429
427
; CHECK-NEXT: s_clause 0x1
430
- ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
431
- ; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off
428
+ ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:8
429
+ ; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off offset:8
432
430
; CHECK-NEXT: s_waitcnt vmcnt(0)
433
431
; CHECK-NEXT: v_xor_b32_e32 v46, v9, v5
434
432
; CHECK-NEXT: v_xor_b32_e32 v45, v8, v4
@@ -442,8 +440,8 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
442
440
; CHECK-NEXT: s_cbranch_scc0 .LBB0_28
443
441
; CHECK-NEXT: ; %bb.30: ; in Loop: Header=BB0_29 Depth=1
444
442
; CHECK-NEXT: s_clause 0x1
445
- ; CHECK-NEXT: global_load_dwordx2 v[58:59], v[2:3], off offset:16
446
- ; CHECK-NEXT: global_load_dwordx2 v[60:61], v[0:1], off offset:16
443
+ ; CHECK-NEXT: global_load_dwordx2 v[58:59], v[2:3], off offset:24
444
+ ; CHECK-NEXT: global_load_dwordx2 v[60:61], v[0:1], off offset:24
447
445
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 4, v45
448
446
; CHECK-NEXT: v_alignbit_b32 v1, v46, v45, 12
449
447
; CHECK-NEXT: v_and_b32_e32 v2, 0xf0000, v45
0 commit comments