@@ -8225,7 +8225,7 @@ define amdgpu_kernel void @sdiv_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) {
8225
8225
; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1
8226
8226
; GFX6-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x9
8227
8227
; GFX6-NEXT: s_mov_b32 s7, 0xf000
8228
- ; GFX6-NEXT: v_mul_hi_u32 v3, s2, v0
8228
+ ; GFX6-NEXT: v_mul_hi_u32 v3, v0, s2
8229
8229
; GFX6-NEXT: v_mul_lo_u32 v2, v1, s2
8230
8230
; GFX6-NEXT: v_mul_lo_u32 v4, v0, s2
8231
8231
; GFX6-NEXT: s_mov_b32 s6, -1
@@ -8251,7 +8251,7 @@ define amdgpu_kernel void @sdiv_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) {
8251
8251
; GFX6-NEXT: v_addc_u32_e32 v3, vcc, v8, v4, vcc
8252
8252
; GFX6-NEXT: v_addc_u32_e64 v2, vcc, v1, v3, s[0:1]
8253
8253
; GFX6-NEXT: v_mul_lo_u32 v4, v2, s2
8254
- ; GFX6-NEXT: v_mul_hi_u32 v5, s2, v0
8254
+ ; GFX6-NEXT: v_mul_hi_u32 v5, v0, s2
8255
8255
; GFX6-NEXT: v_add_i32_e32 v4, vcc, v5, v4
8256
8256
; GFX6-NEXT: v_mul_lo_u32 v5, v0, s2
8257
8257
; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, v0, v4
@@ -8294,7 +8294,7 @@ define amdgpu_kernel void @sdiv_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) {
8294
8294
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1
8295
8295
; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v8, v2, vcc
8296
8296
; GFX6-NEXT: v_mul_lo_u32 v2, v1, s3
8297
- ; GFX6-NEXT: v_mul_hi_u32 v3, s3, v0
8297
+ ; GFX6-NEXT: v_mul_hi_u32 v3, v0, s3
8298
8298
; GFX6-NEXT: v_mul_lo_u32 v4, v0, s3
8299
8299
; GFX6-NEXT: v_add_i32_e32 v2, vcc, v3, v2
8300
8300
; GFX6-NEXT: v_sub_i32_e32 v4, vcc, s0, v4
@@ -8344,7 +8344,7 @@ define amdgpu_kernel void @sdiv_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) {
8344
8344
; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0
8345
8345
; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1
8346
8346
; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
8347
- ; GFX9-NEXT: v_mul_hi_u32 v3, s8, v0
8347
+ ; GFX9-NEXT: v_mul_hi_u32 v3, v0, s8
8348
8348
; GFX9-NEXT: v_mul_lo_u32 v2, v1, s8
8349
8349
; GFX9-NEXT: v_mul_lo_u32 v4, v0, s8
8350
8350
; GFX9-NEXT: v_add_u32_e32 v2, v3, v2
@@ -8366,7 +8366,7 @@ define amdgpu_kernel void @sdiv_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) {
8366
8366
; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v7, v4, vcc
8367
8367
; GFX9-NEXT: v_addc_co_u32_e64 v2, vcc, v1, v3, s[2:3]
8368
8368
; GFX9-NEXT: v_mul_lo_u32 v4, v2, s8
8369
- ; GFX9-NEXT: v_mul_hi_u32 v6, s8, v0
8369
+ ; GFX9-NEXT: v_mul_hi_u32 v6, v0, s8
8370
8370
; GFX9-NEXT: v_mul_lo_u32 v8, v0, s8
8371
8371
; GFX9-NEXT: v_add_u32_e32 v1, v1, v3
8372
8372
; GFX9-NEXT: v_add_u32_e32 v4, v6, v4
@@ -8411,7 +8411,7 @@ define amdgpu_kernel void @sdiv_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) {
8411
8411
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v7, v2, vcc
8412
8412
; GFX9-NEXT: v_mul_lo_u32 v4, v0, s3
8413
8413
; GFX9-NEXT: v_mul_lo_u32 v2, v1, s3
8414
- ; GFX9-NEXT: v_mul_hi_u32 v3, s3, v0
8414
+ ; GFX9-NEXT: v_mul_hi_u32 v3, v0, s3
8415
8415
; GFX9-NEXT: v_sub_co_u32_e32 v4, vcc, s0, v4
8416
8416
; GFX9-NEXT: v_add_u32_e32 v2, v3, v2
8417
8417
; GFX9-NEXT: v_mov_b32_e32 v3, s1
@@ -8873,7 +8873,7 @@ define amdgpu_kernel void @ssdiv_v2i64_mixed_pow2k_denom(<2 x i64> addrspace(1)*
8873
8873
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
8874
8874
; GFX6-NEXT: s_ashr_i32 s0, s9, 31
8875
8875
; GFX6-NEXT: s_lshr_b32 s0, s0, 20
8876
- ; GFX6-NEXT: v_mul_hi_u32 v2, s6, v0
8876
+ ; GFX6-NEXT: v_mul_hi_u32 v2, v0, s6
8877
8877
; GFX6-NEXT: v_mul_lo_u32 v3, v1, s6
8878
8878
; GFX6-NEXT: s_add_u32 s2, s8, s0
8879
8879
; GFX6-NEXT: s_addc_u32 s3, s9, 0
@@ -8902,7 +8902,7 @@ define amdgpu_kernel void @ssdiv_v2i64_mixed_pow2k_denom(<2 x i64> addrspace(1)*
8902
8902
; GFX6-NEXT: v_addc_u32_e32 v3, vcc, v6, v5, vcc
8903
8903
; GFX6-NEXT: v_addc_u32_e64 v2, vcc, v1, v3, s[0:1]
8904
8904
; GFX6-NEXT: v_mul_lo_u32 v5, v2, s6
8905
- ; GFX6-NEXT: v_mul_hi_u32 v7, s6, v0
8905
+ ; GFX6-NEXT: v_mul_hi_u32 v7, v0, s6
8906
8906
; GFX6-NEXT: v_add_i32_e32 v5, vcc, v7, v5
8907
8907
; GFX6-NEXT: v_mul_lo_u32 v7, v0, s6
8908
8908
; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, v0, v5
@@ -8944,7 +8944,7 @@ define amdgpu_kernel void @ssdiv_v2i64_mixed_pow2k_denom(<2 x i64> addrspace(1)*
8944
8944
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1
8945
8945
; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v6, v2, vcc
8946
8946
; GFX6-NEXT: v_mul_lo_u32 v2, v1, s9
8947
- ; GFX6-NEXT: v_mul_hi_u32 v3, s9, v0
8947
+ ; GFX6-NEXT: v_mul_hi_u32 v3, v0, s9
8948
8948
; GFX6-NEXT: v_mul_lo_u32 v4, v0, s9
8949
8949
; GFX6-NEXT: v_add_i32_e32 v2, vcc, v3, v2
8950
8950
; GFX6-NEXT: v_sub_i32_e32 v4, vcc, s0, v4
@@ -8999,7 +8999,7 @@ define amdgpu_kernel void @ssdiv_v2i64_mixed_pow2k_denom(<2 x i64> addrspace(1)*
8999
8999
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
9000
9000
; GFX9-NEXT: s_ashr_i32 s2, s5, 31
9001
9001
; GFX9-NEXT: s_lshr_b32 s2, s2, 20
9002
- ; GFX9-NEXT: v_mul_hi_u32 v2, s8, v0
9002
+ ; GFX9-NEXT: v_mul_hi_u32 v2, v0, s8
9003
9003
; GFX9-NEXT: v_mul_lo_u32 v3, v1, s8
9004
9004
; GFX9-NEXT: v_mul_lo_u32 v5, v0, s8
9005
9005
; GFX9-NEXT: s_add_u32 s4, s4, s2
@@ -9025,7 +9025,7 @@ define amdgpu_kernel void @ssdiv_v2i64_mixed_pow2k_denom(<2 x i64> addrspace(1)*
9025
9025
; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v6, v5, vcc
9026
9026
; GFX9-NEXT: v_addc_co_u32_e64 v2, vcc, v1, v3, s[2:3]
9027
9027
; GFX9-NEXT: v_mul_lo_u32 v5, v2, s8
9028
- ; GFX9-NEXT: v_mul_hi_u32 v7, s8, v0
9028
+ ; GFX9-NEXT: v_mul_hi_u32 v7, v0, s8
9029
9029
; GFX9-NEXT: v_mul_lo_u32 v8, v0, s8
9030
9030
; GFX9-NEXT: v_add_u32_e32 v1, v1, v3
9031
9031
; GFX9-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24
@@ -9070,7 +9070,7 @@ define amdgpu_kernel void @ssdiv_v2i64_mixed_pow2k_denom(<2 x i64> addrspace(1)*
9070
9070
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v6, v2, vcc
9071
9071
; GFX9-NEXT: v_mul_lo_u32 v5, v0, s3
9072
9072
; GFX9-NEXT: v_mul_lo_u32 v2, v1, s3
9073
- ; GFX9-NEXT: v_mul_hi_u32 v3, s3, v0
9073
+ ; GFX9-NEXT: v_mul_hi_u32 v3, v0, s3
9074
9074
; GFX9-NEXT: v_sub_co_u32_e32 v5, vcc, s6, v5
9075
9075
; GFX9-NEXT: v_add_u32_e32 v2, v3, v2
9076
9076
; GFX9-NEXT: v_mov_b32_e32 v3, s7
@@ -9689,7 +9689,7 @@ define amdgpu_kernel void @srem_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) {
9689
9689
; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1
9690
9690
; GFX6-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x9
9691
9691
; GFX6-NEXT: s_mov_b32 s7, 0xf000
9692
- ; GFX6-NEXT: v_mul_hi_u32 v3, s2, v0
9692
+ ; GFX6-NEXT: v_mul_hi_u32 v3, v0, s2
9693
9693
; GFX6-NEXT: v_mul_lo_u32 v2, v1, s2
9694
9694
; GFX6-NEXT: v_mul_lo_u32 v4, v0, s2
9695
9695
; GFX6-NEXT: s_mov_b32 s6, -1
@@ -9715,7 +9715,7 @@ define amdgpu_kernel void @srem_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) {
9715
9715
; GFX6-NEXT: v_addc_u32_e32 v3, vcc, v8, v4, vcc
9716
9716
; GFX6-NEXT: v_addc_u32_e64 v2, vcc, v1, v3, s[0:1]
9717
9717
; GFX6-NEXT: v_mul_lo_u32 v4, v2, s2
9718
- ; GFX6-NEXT: v_mul_hi_u32 v5, s2, v0
9718
+ ; GFX6-NEXT: v_mul_hi_u32 v5, v0, s2
9719
9719
; GFX6-NEXT: v_add_i32_e32 v4, vcc, v5, v4
9720
9720
; GFX6-NEXT: v_mul_lo_u32 v5, v0, s2
9721
9721
; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, v0, v4
@@ -9757,7 +9757,7 @@ define amdgpu_kernel void @srem_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) {
9757
9757
; GFX6-NEXT: v_addc_u32_e32 v2, vcc, v5, v7, vcc
9758
9758
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1
9759
9759
; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v8, v2, vcc
9760
- ; GFX6-NEXT: v_mul_hi_u32 v2, s3, v0
9760
+ ; GFX6-NEXT: v_mul_hi_u32 v2, v0, s3
9761
9761
; GFX6-NEXT: v_mul_lo_u32 v1, v1, s3
9762
9762
; GFX6-NEXT: v_mul_lo_u32 v0, v0, s3
9763
9763
; GFX6-NEXT: v_add_i32_e32 v1, vcc, v2, v1
@@ -9806,7 +9806,7 @@ define amdgpu_kernel void @srem_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) {
9806
9806
; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0
9807
9807
; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1
9808
9808
; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
9809
- ; GFX9-NEXT: v_mul_hi_u32 v3, s8, v0
9809
+ ; GFX9-NEXT: v_mul_hi_u32 v3, v0, s8
9810
9810
; GFX9-NEXT: v_mul_lo_u32 v2, v1, s8
9811
9811
; GFX9-NEXT: v_mul_lo_u32 v4, v0, s8
9812
9812
; GFX9-NEXT: v_add_u32_e32 v2, v3, v2
@@ -9828,7 +9828,7 @@ define amdgpu_kernel void @srem_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) {
9828
9828
; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v7, v4, vcc
9829
9829
; GFX9-NEXT: v_addc_co_u32_e64 v2, vcc, v1, v3, s[2:3]
9830
9830
; GFX9-NEXT: v_mul_lo_u32 v4, v2, s8
9831
- ; GFX9-NEXT: v_mul_hi_u32 v6, s8, v0
9831
+ ; GFX9-NEXT: v_mul_hi_u32 v6, v0, s8
9832
9832
; GFX9-NEXT: v_mul_lo_u32 v8, v0, s8
9833
9833
; GFX9-NEXT: v_add_u32_e32 v1, v1, v3
9834
9834
; GFX9-NEXT: v_add_u32_e32 v4, v6, v4
@@ -9871,7 +9871,7 @@ define amdgpu_kernel void @srem_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) {
9871
9871
; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v6, v5, vcc
9872
9872
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v1
9873
9873
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v7, v2, vcc
9874
- ; GFX9-NEXT: v_mul_hi_u32 v2, s3, v0
9874
+ ; GFX9-NEXT: v_mul_hi_u32 v2, v0, s3
9875
9875
; GFX9-NEXT: v_mul_lo_u32 v1, v1, s3
9876
9876
; GFX9-NEXT: v_mul_lo_u32 v0, v0, s3
9877
9877
; GFX9-NEXT: v_add_u32_e32 v1, v2, v1
0 commit comments