@@ -1823,22 +1823,22 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out, ptr addrspace
1823
1823
; GFX1264: ; %bb.0: ; %entry
1824
1824
; GFX1264-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
1825
1825
; GFX1264-NEXT: s_mov_b64 s[6:7], exec
1826
- ; GFX1264-NEXT: s_mov_b32 s9, 0
1827
- ; GFX1264-NEXT: v_mbcnt_lo_u32_b32 v0, s6, 0
1828
1826
; GFX1264-NEXT: s_mov_b64 s[4:5], exec
1827
+ ; GFX1264-NEXT: v_mbcnt_lo_u32_b32 v0, s6, 0
1829
1828
; GFX1264-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1830
1829
; GFX1264-NEXT: v_mbcnt_hi_u32_b32 v2, s7, v0
1831
1830
; GFX1264-NEXT: ; implicit-def: $vgpr0_vgpr1
1832
1831
; GFX1264-NEXT: v_cmpx_eq_u32_e32 0, v2
1833
1832
; GFX1264-NEXT: s_cbranch_execz .LBB3_2
1834
1833
; GFX1264-NEXT: ; %bb.1:
1835
- ; GFX1264-NEXT: s_bcnt1_i32_b64 s8, s[6:7]
1834
+ ; GFX1264-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
1835
+ ; GFX1264-NEXT: v_mov_b32_e32 v1, 0
1836
+ ; GFX1264-NEXT: s_wait_alu 0xfffe
1837
+ ; GFX1264-NEXT: s_mul_i32 s6, s6, 5
1836
1838
; GFX1264-NEXT: s_mov_b32 s11, 0x31016000
1837
- ; GFX1264-NEXT: s_mul_u64 s[6:7], s[8:9], 5
1838
- ; GFX1264-NEXT: s_mov_b32 s10, -1
1839
1839
; GFX1264-NEXT: s_wait_alu 0xfffe
1840
1840
; GFX1264-NEXT: v_mov_b32_e32 v0, s6
1841
- ; GFX1264-NEXT: v_mov_b32_e32 v1, s7
1841
+ ; GFX1264-NEXT: s_mov_b32 s10, -1
1842
1842
; GFX1264-NEXT: s_wait_kmcnt 0x0
1843
1843
; GFX1264-NEXT: s_mov_b32 s8, s2
1844
1844
; GFX1264-NEXT: s_mov_b32 s9, s3
@@ -1860,29 +1860,27 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out, ptr addrspace
1860
1860
; GFX1232-LABEL: add_i64_constant:
1861
1861
; GFX1232: ; %bb.0: ; %entry
1862
1862
; GFX1232-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
1863
- ; GFX1232-NEXT: s_mov_b32 s7, exec_lo
1864
- ; GFX1232-NEXT: s_mov_b32 s5, 0
1865
- ; GFX1232-NEXT: v_mbcnt_lo_u32_b32 v2, s7, 0
1866
1863
; GFX1232-NEXT: s_mov_b32 s6, exec_lo
1864
+ ; GFX1232-NEXT: s_mov_b32 s4, exec_lo
1865
+ ; GFX1232-NEXT: v_mbcnt_lo_u32_b32 v2, s6, 0
1867
1866
; GFX1232-NEXT: ; implicit-def: $vgpr0_vgpr1
1868
1867
; GFX1232-NEXT: s_delay_alu instid0(VALU_DEP_1)
1869
1868
; GFX1232-NEXT: v_cmpx_eq_u32_e32 0, v2
1870
1869
; GFX1232-NEXT: s_cbranch_execz .LBB3_2
1871
1870
; GFX1232-NEXT: ; %bb.1:
1872
- ; GFX1232-NEXT: s_bcnt1_i32_b32 s4, s7
1871
+ ; GFX1232-NEXT: s_bcnt1_i32_b32 s5, s6
1873
1872
; GFX1232-NEXT: s_mov_b32 s11, 0x31016000
1874
- ; GFX1232-NEXT: s_mul_u64 s[4:5], s[4:5] , 5
1873
+ ; GFX1232-NEXT: s_mul_i32 s5, s5 , 5
1875
1874
; GFX1232-NEXT: s_mov_b32 s10, -1
1876
- ; GFX1232-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
1875
+ ; GFX1232-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, 0
1877
1876
; GFX1232-NEXT: s_wait_kmcnt 0x0
1878
1877
; GFX1232-NEXT: s_mov_b32 s8, s2
1879
1878
; GFX1232-NEXT: s_mov_b32 s9, s3
1880
1879
; GFX1232-NEXT: buffer_atomic_add_u64 v[0:1], off, s[8:11], null th:TH_ATOMIC_RETURN scope:SCOPE_DEV
1881
1880
; GFX1232-NEXT: s_wait_loadcnt 0x0
1882
1881
; GFX1232-NEXT: global_inv scope:SCOPE_DEV
1883
1882
; GFX1232-NEXT: .LBB3_2:
1884
- ; GFX1232-NEXT: s_wait_alu 0xfffe
1885
- ; GFX1232-NEXT: s_or_b32 exec_lo, exec_lo, s6
1883
+ ; GFX1232-NEXT: s_or_b32 exec_lo, exec_lo, s4
1886
1884
; GFX1232-NEXT: s_wait_kmcnt 0x0
1887
1885
; GFX1232-NEXT: v_readfirstlane_b32 s3, v1
1888
1886
; GFX1232-NEXT: v_readfirstlane_b32 s2, v0
@@ -5372,22 +5370,22 @@ define amdgpu_kernel void @sub_i64_constant(ptr addrspace(1) %out, ptr addrspace
5372
5370
; GFX1264: ; %bb.0: ; %entry
5373
5371
; GFX1264-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
5374
5372
; GFX1264-NEXT: s_mov_b64 s[6:7], exec
5375
- ; GFX1264-NEXT: s_mov_b32 s9, 0
5376
- ; GFX1264-NEXT: v_mbcnt_lo_u32_b32 v0, s6, 0
5377
5373
; GFX1264-NEXT: s_mov_b64 s[4:5], exec
5374
+ ; GFX1264-NEXT: v_mbcnt_lo_u32_b32 v0, s6, 0
5378
5375
; GFX1264-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
5379
5376
; GFX1264-NEXT: v_mbcnt_hi_u32_b32 v2, s7, v0
5380
5377
; GFX1264-NEXT: ; implicit-def: $vgpr0_vgpr1
5381
5378
; GFX1264-NEXT: v_cmpx_eq_u32_e32 0, v2
5382
5379
; GFX1264-NEXT: s_cbranch_execz .LBB9_2
5383
5380
; GFX1264-NEXT: ; %bb.1:
5384
- ; GFX1264-NEXT: s_bcnt1_i32_b64 s8, s[6:7]
5381
+ ; GFX1264-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
5382
+ ; GFX1264-NEXT: v_mov_b32_e32 v1, 0
5383
+ ; GFX1264-NEXT: s_wait_alu 0xfffe
5384
+ ; GFX1264-NEXT: s_mul_i32 s6, s6, 5
5385
5385
; GFX1264-NEXT: s_mov_b32 s11, 0x31016000
5386
- ; GFX1264-NEXT: s_mul_u64 s[6:7], s[8:9], 5
5387
- ; GFX1264-NEXT: s_mov_b32 s10, -1
5388
5386
; GFX1264-NEXT: s_wait_alu 0xfffe
5389
5387
; GFX1264-NEXT: v_mov_b32_e32 v0, s6
5390
- ; GFX1264-NEXT: v_mov_b32_e32 v1, s7
5388
+ ; GFX1264-NEXT: s_mov_b32 s10, -1
5391
5389
; GFX1264-NEXT: s_wait_kmcnt 0x0
5392
5390
; GFX1264-NEXT: s_mov_b32 s8, s2
5393
5391
; GFX1264-NEXT: s_mov_b32 s9, s3
@@ -5412,29 +5410,27 @@ define amdgpu_kernel void @sub_i64_constant(ptr addrspace(1) %out, ptr addrspace
5412
5410
; GFX1232-LABEL: sub_i64_constant:
5413
5411
; GFX1232: ; %bb.0: ; %entry
5414
5412
; GFX1232-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
5415
- ; GFX1232-NEXT: s_mov_b32 s7, exec_lo
5416
- ; GFX1232-NEXT: s_mov_b32 s5, 0
5417
- ; GFX1232-NEXT: v_mbcnt_lo_u32_b32 v2, s7, 0
5418
5413
; GFX1232-NEXT: s_mov_b32 s6, exec_lo
5414
+ ; GFX1232-NEXT: s_mov_b32 s4, exec_lo
5415
+ ; GFX1232-NEXT: v_mbcnt_lo_u32_b32 v2, s6, 0
5419
5416
; GFX1232-NEXT: ; implicit-def: $vgpr0_vgpr1
5420
5417
; GFX1232-NEXT: s_delay_alu instid0(VALU_DEP_1)
5421
5418
; GFX1232-NEXT: v_cmpx_eq_u32_e32 0, v2
5422
5419
; GFX1232-NEXT: s_cbranch_execz .LBB9_2
5423
5420
; GFX1232-NEXT: ; %bb.1:
5424
- ; GFX1232-NEXT: s_bcnt1_i32_b32 s4, s7
5421
+ ; GFX1232-NEXT: s_bcnt1_i32_b32 s5, s6
5425
5422
; GFX1232-NEXT: s_mov_b32 s11, 0x31016000
5426
- ; GFX1232-NEXT: s_mul_u64 s[4:5], s[4:5] , 5
5423
+ ; GFX1232-NEXT: s_mul_i32 s5, s5 , 5
5427
5424
; GFX1232-NEXT: s_mov_b32 s10, -1
5428
- ; GFX1232-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
5425
+ ; GFX1232-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, 0
5429
5426
; GFX1232-NEXT: s_wait_kmcnt 0x0
5430
5427
; GFX1232-NEXT: s_mov_b32 s8, s2
5431
5428
; GFX1232-NEXT: s_mov_b32 s9, s3
5432
5429
; GFX1232-NEXT: buffer_atomic_sub_u64 v[0:1], off, s[8:11], null th:TH_ATOMIC_RETURN scope:SCOPE_DEV
5433
5430
; GFX1232-NEXT: s_wait_loadcnt 0x0
5434
5431
; GFX1232-NEXT: global_inv scope:SCOPE_DEV
5435
5432
; GFX1232-NEXT: .LBB9_2:
5436
- ; GFX1232-NEXT: s_wait_alu 0xfffe
5437
- ; GFX1232-NEXT: s_or_b32 exec_lo, exec_lo, s6
5433
+ ; GFX1232-NEXT: s_or_b32 exec_lo, exec_lo, s4
5438
5434
; GFX1232-NEXT: s_wait_kmcnt 0x0
5439
5435
; GFX1232-NEXT: v_readfirstlane_b32 s2, v0
5440
5436
; GFX1232-NEXT: v_mul_u32_u24_e32 v0, 5, v2
0 commit comments