Skip to content

Commit c234051

Browse files
committed
[DAGCombiner][AMDGPU] Canonicalize constants to the RHS of MULHU/MULHS.
This allows special constants like to 0 to be recognized. It's also expected by isel patterns if a target had a mulh with immediate instructions. The commuting done by tablegen won't commute patterns with immediates since it expects DAGCombine to have done it. Reviewed By: foad Differential Revision: https://reviews.llvm.org/D107486
1 parent 2dad797 commit c234051

File tree

5 files changed

+72
-72
lines changed

5 files changed

+72
-72
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4475,6 +4475,11 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
44754475
if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
44764476
return C;
44774477

4478+
// canonicalize constant to RHS.
4479+
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4480+
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
4481+
return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
4482+
44784483
// fold (mulhs x, 0) -> 0
44794484
if (isNullConstant(N1))
44804485
return N1;
@@ -4527,6 +4532,11 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
45274532
if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
45284533
return C;
45294534

4535+
// canonicalize constant to RHS.
4536+
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4537+
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
4538+
return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
4539+
45304540
// fold (mulhu x, 0) -> 0
45314541
if (isNullConstant(N1))
45324542
return N1;

llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -8225,7 +8225,7 @@ define amdgpu_kernel void @sdiv_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) {
82258225
; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1
82268226
; GFX6-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x9
82278227
; GFX6-NEXT: s_mov_b32 s7, 0xf000
8228-
; GFX6-NEXT: v_mul_hi_u32 v3, s2, v0
8228+
; GFX6-NEXT: v_mul_hi_u32 v3, v0, s2
82298229
; GFX6-NEXT: v_mul_lo_u32 v2, v1, s2
82308230
; GFX6-NEXT: v_mul_lo_u32 v4, v0, s2
82318231
; GFX6-NEXT: s_mov_b32 s6, -1
@@ -8251,7 +8251,7 @@ define amdgpu_kernel void @sdiv_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) {
82518251
; GFX6-NEXT: v_addc_u32_e32 v3, vcc, v8, v4, vcc
82528252
; GFX6-NEXT: v_addc_u32_e64 v2, vcc, v1, v3, s[0:1]
82538253
; GFX6-NEXT: v_mul_lo_u32 v4, v2, s2
8254-
; GFX6-NEXT: v_mul_hi_u32 v5, s2, v0
8254+
; GFX6-NEXT: v_mul_hi_u32 v5, v0, s2
82558255
; GFX6-NEXT: v_add_i32_e32 v4, vcc, v5, v4
82568256
; GFX6-NEXT: v_mul_lo_u32 v5, v0, s2
82578257
; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, v0, v4
@@ -8294,7 +8294,7 @@ define amdgpu_kernel void @sdiv_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) {
82948294
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1
82958295
; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v8, v2, vcc
82968296
; GFX6-NEXT: v_mul_lo_u32 v2, v1, s3
8297-
; GFX6-NEXT: v_mul_hi_u32 v3, s3, v0
8297+
; GFX6-NEXT: v_mul_hi_u32 v3, v0, s3
82988298
; GFX6-NEXT: v_mul_lo_u32 v4, v0, s3
82998299
; GFX6-NEXT: v_add_i32_e32 v2, vcc, v3, v2
83008300
; GFX6-NEXT: v_sub_i32_e32 v4, vcc, s0, v4
@@ -8344,7 +8344,7 @@ define amdgpu_kernel void @sdiv_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) {
83448344
; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0
83458345
; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1
83468346
; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
8347-
; GFX9-NEXT: v_mul_hi_u32 v3, s8, v0
8347+
; GFX9-NEXT: v_mul_hi_u32 v3, v0, s8
83488348
; GFX9-NEXT: v_mul_lo_u32 v2, v1, s8
83498349
; GFX9-NEXT: v_mul_lo_u32 v4, v0, s8
83508350
; GFX9-NEXT: v_add_u32_e32 v2, v3, v2
@@ -8366,7 +8366,7 @@ define amdgpu_kernel void @sdiv_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) {
83668366
; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v7, v4, vcc
83678367
; GFX9-NEXT: v_addc_co_u32_e64 v2, vcc, v1, v3, s[2:3]
83688368
; GFX9-NEXT: v_mul_lo_u32 v4, v2, s8
8369-
; GFX9-NEXT: v_mul_hi_u32 v6, s8, v0
8369+
; GFX9-NEXT: v_mul_hi_u32 v6, v0, s8
83708370
; GFX9-NEXT: v_mul_lo_u32 v8, v0, s8
83718371
; GFX9-NEXT: v_add_u32_e32 v1, v1, v3
83728372
; GFX9-NEXT: v_add_u32_e32 v4, v6, v4
@@ -8411,7 +8411,7 @@ define amdgpu_kernel void @sdiv_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) {
84118411
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v7, v2, vcc
84128412
; GFX9-NEXT: v_mul_lo_u32 v4, v0, s3
84138413
; GFX9-NEXT: v_mul_lo_u32 v2, v1, s3
8414-
; GFX9-NEXT: v_mul_hi_u32 v3, s3, v0
8414+
; GFX9-NEXT: v_mul_hi_u32 v3, v0, s3
84158415
; GFX9-NEXT: v_sub_co_u32_e32 v4, vcc, s0, v4
84168416
; GFX9-NEXT: v_add_u32_e32 v2, v3, v2
84178417
; GFX9-NEXT: v_mov_b32_e32 v3, s1
@@ -8873,7 +8873,7 @@ define amdgpu_kernel void @ssdiv_v2i64_mixed_pow2k_denom(<2 x i64> addrspace(1)*
88738873
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
88748874
; GFX6-NEXT: s_ashr_i32 s0, s9, 31
88758875
; GFX6-NEXT: s_lshr_b32 s0, s0, 20
8876-
; GFX6-NEXT: v_mul_hi_u32 v2, s6, v0
8876+
; GFX6-NEXT: v_mul_hi_u32 v2, v0, s6
88778877
; GFX6-NEXT: v_mul_lo_u32 v3, v1, s6
88788878
; GFX6-NEXT: s_add_u32 s2, s8, s0
88798879
; GFX6-NEXT: s_addc_u32 s3, s9, 0
@@ -8902,7 +8902,7 @@ define amdgpu_kernel void @ssdiv_v2i64_mixed_pow2k_denom(<2 x i64> addrspace(1)*
89028902
; GFX6-NEXT: v_addc_u32_e32 v3, vcc, v6, v5, vcc
89038903
; GFX6-NEXT: v_addc_u32_e64 v2, vcc, v1, v3, s[0:1]
89048904
; GFX6-NEXT: v_mul_lo_u32 v5, v2, s6
8905-
; GFX6-NEXT: v_mul_hi_u32 v7, s6, v0
8905+
; GFX6-NEXT: v_mul_hi_u32 v7, v0, s6
89068906
; GFX6-NEXT: v_add_i32_e32 v5, vcc, v7, v5
89078907
; GFX6-NEXT: v_mul_lo_u32 v7, v0, s6
89088908
; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, v0, v5
@@ -8944,7 +8944,7 @@ define amdgpu_kernel void @ssdiv_v2i64_mixed_pow2k_denom(<2 x i64> addrspace(1)*
89448944
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1
89458945
; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v6, v2, vcc
89468946
; GFX6-NEXT: v_mul_lo_u32 v2, v1, s9
8947-
; GFX6-NEXT: v_mul_hi_u32 v3, s9, v0
8947+
; GFX6-NEXT: v_mul_hi_u32 v3, v0, s9
89488948
; GFX6-NEXT: v_mul_lo_u32 v4, v0, s9
89498949
; GFX6-NEXT: v_add_i32_e32 v2, vcc, v3, v2
89508950
; GFX6-NEXT: v_sub_i32_e32 v4, vcc, s0, v4
@@ -8999,7 +8999,7 @@ define amdgpu_kernel void @ssdiv_v2i64_mixed_pow2k_denom(<2 x i64> addrspace(1)*
89998999
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
90009000
; GFX9-NEXT: s_ashr_i32 s2, s5, 31
90019001
; GFX9-NEXT: s_lshr_b32 s2, s2, 20
9002-
; GFX9-NEXT: v_mul_hi_u32 v2, s8, v0
9002+
; GFX9-NEXT: v_mul_hi_u32 v2, v0, s8
90039003
; GFX9-NEXT: v_mul_lo_u32 v3, v1, s8
90049004
; GFX9-NEXT: v_mul_lo_u32 v5, v0, s8
90059005
; GFX9-NEXT: s_add_u32 s4, s4, s2
@@ -9025,7 +9025,7 @@ define amdgpu_kernel void @ssdiv_v2i64_mixed_pow2k_denom(<2 x i64> addrspace(1)*
90259025
; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v6, v5, vcc
90269026
; GFX9-NEXT: v_addc_co_u32_e64 v2, vcc, v1, v3, s[2:3]
90279027
; GFX9-NEXT: v_mul_lo_u32 v5, v2, s8
9028-
; GFX9-NEXT: v_mul_hi_u32 v7, s8, v0
9028+
; GFX9-NEXT: v_mul_hi_u32 v7, v0, s8
90299029
; GFX9-NEXT: v_mul_lo_u32 v8, v0, s8
90309030
; GFX9-NEXT: v_add_u32_e32 v1, v1, v3
90319031
; GFX9-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24
@@ -9070,7 +9070,7 @@ define amdgpu_kernel void @ssdiv_v2i64_mixed_pow2k_denom(<2 x i64> addrspace(1)*
90709070
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v6, v2, vcc
90719071
; GFX9-NEXT: v_mul_lo_u32 v5, v0, s3
90729072
; GFX9-NEXT: v_mul_lo_u32 v2, v1, s3
9073-
; GFX9-NEXT: v_mul_hi_u32 v3, s3, v0
9073+
; GFX9-NEXT: v_mul_hi_u32 v3, v0, s3
90749074
; GFX9-NEXT: v_sub_co_u32_e32 v5, vcc, s6, v5
90759075
; GFX9-NEXT: v_add_u32_e32 v2, v3, v2
90769076
; GFX9-NEXT: v_mov_b32_e32 v3, s7
@@ -9689,7 +9689,7 @@ define amdgpu_kernel void @srem_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) {
96899689
; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1
96909690
; GFX6-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x9
96919691
; GFX6-NEXT: s_mov_b32 s7, 0xf000
9692-
; GFX6-NEXT: v_mul_hi_u32 v3, s2, v0
9692+
; GFX6-NEXT: v_mul_hi_u32 v3, v0, s2
96939693
; GFX6-NEXT: v_mul_lo_u32 v2, v1, s2
96949694
; GFX6-NEXT: v_mul_lo_u32 v4, v0, s2
96959695
; GFX6-NEXT: s_mov_b32 s6, -1
@@ -9715,7 +9715,7 @@ define amdgpu_kernel void @srem_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) {
97159715
; GFX6-NEXT: v_addc_u32_e32 v3, vcc, v8, v4, vcc
97169716
; GFX6-NEXT: v_addc_u32_e64 v2, vcc, v1, v3, s[0:1]
97179717
; GFX6-NEXT: v_mul_lo_u32 v4, v2, s2
9718-
; GFX6-NEXT: v_mul_hi_u32 v5, s2, v0
9718+
; GFX6-NEXT: v_mul_hi_u32 v5, v0, s2
97199719
; GFX6-NEXT: v_add_i32_e32 v4, vcc, v5, v4
97209720
; GFX6-NEXT: v_mul_lo_u32 v5, v0, s2
97219721
; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, v0, v4
@@ -9757,7 +9757,7 @@ define amdgpu_kernel void @srem_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) {
97579757
; GFX6-NEXT: v_addc_u32_e32 v2, vcc, v5, v7, vcc
97589758
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1
97599759
; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v8, v2, vcc
9760-
; GFX6-NEXT: v_mul_hi_u32 v2, s3, v0
9760+
; GFX6-NEXT: v_mul_hi_u32 v2, v0, s3
97619761
; GFX6-NEXT: v_mul_lo_u32 v1, v1, s3
97629762
; GFX6-NEXT: v_mul_lo_u32 v0, v0, s3
97639763
; GFX6-NEXT: v_add_i32_e32 v1, vcc, v2, v1
@@ -9806,7 +9806,7 @@ define amdgpu_kernel void @srem_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) {
98069806
; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0
98079807
; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1
98089808
; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
9809-
; GFX9-NEXT: v_mul_hi_u32 v3, s8, v0
9809+
; GFX9-NEXT: v_mul_hi_u32 v3, v0, s8
98109810
; GFX9-NEXT: v_mul_lo_u32 v2, v1, s8
98119811
; GFX9-NEXT: v_mul_lo_u32 v4, v0, s8
98129812
; GFX9-NEXT: v_add_u32_e32 v2, v3, v2
@@ -9828,7 +9828,7 @@ define amdgpu_kernel void @srem_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) {
98289828
; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v7, v4, vcc
98299829
; GFX9-NEXT: v_addc_co_u32_e64 v2, vcc, v1, v3, s[2:3]
98309830
; GFX9-NEXT: v_mul_lo_u32 v4, v2, s8
9831-
; GFX9-NEXT: v_mul_hi_u32 v6, s8, v0
9831+
; GFX9-NEXT: v_mul_hi_u32 v6, v0, s8
98329832
; GFX9-NEXT: v_mul_lo_u32 v8, v0, s8
98339833
; GFX9-NEXT: v_add_u32_e32 v1, v1, v3
98349834
; GFX9-NEXT: v_add_u32_e32 v4, v6, v4
@@ -9871,7 +9871,7 @@ define amdgpu_kernel void @srem_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) {
98719871
; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v6, v5, vcc
98729872
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v1
98739873
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v7, v2, vcc
9874-
; GFX9-NEXT: v_mul_hi_u32 v2, s3, v0
9874+
; GFX9-NEXT: v_mul_hi_u32 v2, v0, s3
98759875
; GFX9-NEXT: v_mul_lo_u32 v1, v1, s3
98769876
; GFX9-NEXT: v_mul_lo_u32 v0, v0, s3
98779877
; GFX9-NEXT: v_add_u32_e32 v1, v2, v1

llvm/test/CodeGen/AMDGPU/sdiv64.ll

Lines changed: 18 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1203,18 +1203,17 @@ define amdgpu_kernel void @s_test_sdiv_k_num_i64(i64 addrspace(1)* %out, i64 %x)
12031203
; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v4
12041204
; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
12051205
; GCN-NEXT: v_mul_lo_u32 v4, v3, 24
1206-
; GCN-NEXT: v_mul_hi_u32 v0, 24, v0
1207-
; GCN-NEXT: v_mul_hi_u32 v5, 24, v3
1208-
; GCN-NEXT: v_mul_hi_u32 v3, 0, v3
1206+
; GCN-NEXT: v_mul_hi_u32 v0, v0, 24
1207+
; GCN-NEXT: v_mul_hi_u32 v3, v3, 24
1208+
; GCN-NEXT: v_mov_b32_e32 v5, s9
12091209
; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v4
1210-
; GCN-NEXT: v_addc_u32_e32 v2, vcc, v2, v5, vcc
1210+
; GCN-NEXT: v_addc_u32_e32 v2, vcc, v2, v3, vcc
12111211
; GCN-NEXT: v_add_i32_e32 v0, vcc, 0, v0
12121212
; GCN-NEXT: v_addc_u32_e32 v0, vcc, 0, v2, vcc
1213-
; GCN-NEXT: v_addc_u32_e32 v1, vcc, v3, v1, vcc
1213+
; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
12141214
; GCN-NEXT: v_mul_lo_u32 v2, s8, v1
12151215
; GCN-NEXT: v_mul_hi_u32 v3, s8, v0
12161216
; GCN-NEXT: v_mul_lo_u32 v4, s9, v0
1217-
; GCN-NEXT: v_mov_b32_e32 v5, s9
12181217
; GCN-NEXT: v_add_i32_e32 v2, vcc, v3, v2
12191218
; GCN-NEXT: v_mul_lo_u32 v3, s8, v0
12201219
; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v4
@@ -1420,14 +1419,13 @@ define i64 @v_test_sdiv_k_num_i64(i64 %x) {
14201419
; GCN-NEXT: v_add_i32_e32 v3, vcc, v3, v5
14211420
; GCN-NEXT: v_addc_u32_e32 v4, vcc, 0, v4, vcc
14221421
; GCN-NEXT: v_mul_lo_u32 v5, v4, 24
1423-
; GCN-NEXT: v_mul_hi_u32 v3, 24, v3
1424-
; GCN-NEXT: v_mul_hi_u32 v6, 24, v4
1425-
; GCN-NEXT: v_mul_hi_u32 v4, 0, v4
1422+
; GCN-NEXT: v_mul_hi_u32 v3, v3, 24
1423+
; GCN-NEXT: v_mul_hi_u32 v4, v4, 24
14261424
; GCN-NEXT: v_add_i32_e32 v3, vcc, v3, v5
1427-
; GCN-NEXT: v_addc_u32_e32 v5, vcc, v13, v6, vcc
1425+
; GCN-NEXT: v_addc_u32_e32 v4, vcc, v13, v4, vcc
14281426
; GCN-NEXT: v_add_i32_e32 v3, vcc, 0, v3
1429-
; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v5, vcc
1430-
; GCN-NEXT: v_addc_u32_e32 v4, vcc, v4, v12, vcc
1427+
; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v4, vcc
1428+
; GCN-NEXT: v_addc_u32_e32 v4, vcc, 0, v12, vcc
14311429
; GCN-NEXT: v_mul_lo_u32 v5, v0, v4
14321430
; GCN-NEXT: v_mul_hi_u32 v6, v0, v3
14331431
; GCN-NEXT: v_mul_lo_u32 v7, v1, v3
@@ -1629,25 +1627,23 @@ define i64 @v_test_sdiv_pow2_k_num_i64(i64 %x) {
16291627
; GCN-NEXT: v_add_i32_e32 v4, vcc, v4, v8
16301628
; GCN-NEXT: v_addc_u32_e64 v4, vcc, v4, v6, s[4:5]
16311629
; GCN-NEXT: v_add_i32_e32 v3, vcc, v3, v5
1632-
; GCN-NEXT: s_mov_b32 s4, 0x8000
16331630
; GCN-NEXT: v_addc_u32_e32 v4, vcc, 0, v4, vcc
1634-
; GCN-NEXT: v_mul_hi_u32 v3, s4, v3
1635-
; GCN-NEXT: v_mul_hi_u32 v5, s4, v4
1636-
; GCN-NEXT: v_lshlrev_b32_e32 v6, 15, v4
1637-
; GCN-NEXT: v_mul_hi_u32 v4, 0, v4
1638-
; GCN-NEXT: v_add_i32_e32 v3, vcc, v3, v6
1639-
; GCN-NEXT: v_addc_u32_e32 v5, vcc, v13, v5, vcc
1631+
; GCN-NEXT: v_lshrrev_b32_e32 v5, 17, v4
1632+
; GCN-NEXT: v_lshlrev_b32_e32 v4, 15, v4
1633+
; GCN-NEXT: v_lshrrev_b32_e32 v3, 17, v3
1634+
; GCN-NEXT: v_add_i32_e32 v3, vcc, v3, v4
1635+
; GCN-NEXT: v_addc_u32_e32 v4, vcc, v13, v5, vcc
16401636
; GCN-NEXT: v_add_i32_e32 v3, vcc, 0, v3
1641-
; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v5, vcc
1642-
; GCN-NEXT: v_addc_u32_e32 v4, vcc, v4, v12, vcc
1637+
; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v4, vcc
1638+
; GCN-NEXT: v_addc_u32_e32 v4, vcc, 0, v12, vcc
16431639
; GCN-NEXT: v_mul_lo_u32 v5, v0, v4
16441640
; GCN-NEXT: v_mul_hi_u32 v6, v0, v3
16451641
; GCN-NEXT: v_mul_lo_u32 v7, v1, v3
16461642
; GCN-NEXT: v_add_i32_e32 v5, vcc, v6, v5
16471643
; GCN-NEXT: v_mul_lo_u32 v6, v0, v3
16481644
; GCN-NEXT: v_add_i32_e32 v5, vcc, v5, v7
16491645
; GCN-NEXT: v_sub_i32_e32 v7, vcc, 0, v5
1650-
; GCN-NEXT: v_sub_i32_e32 v6, vcc, s4, v6
1646+
; GCN-NEXT: v_sub_i32_e32 v6, vcc, 0x8000, v6
16511647
; GCN-NEXT: v_subb_u32_e64 v7, s[4:5], v7, v1, vcc
16521648
; GCN-NEXT: v_sub_i32_e64 v8, s[4:5], v6, v0
16531649
; GCN-NEXT: v_subbrev_u32_e64 v7, s[4:5], 0, v7, s[4:5]

llvm/test/CodeGen/AMDGPU/srem64.ll

Lines changed: 17 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1391,14 +1391,13 @@ define amdgpu_kernel void @s_test_srem_k_num_i64(i64 addrspace(1)* %out, i64 %x)
13911391
; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v4
13921392
; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
13931393
; GCN-NEXT: v_mul_lo_u32 v4, v3, 24
1394-
; GCN-NEXT: v_mul_hi_u32 v0, 24, v0
1395-
; GCN-NEXT: v_mul_hi_u32 v5, 24, v3
1396-
; GCN-NEXT: v_mul_hi_u32 v3, 0, v3
1394+
; GCN-NEXT: v_mul_hi_u32 v0, v0, 24
1395+
; GCN-NEXT: v_mul_hi_u32 v3, v3, 24
13971396
; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v4
1398-
; GCN-NEXT: v_addc_u32_e32 v2, vcc, v2, v5, vcc
1397+
; GCN-NEXT: v_addc_u32_e32 v2, vcc, v2, v3, vcc
13991398
; GCN-NEXT: v_add_i32_e32 v0, vcc, 0, v0
14001399
; GCN-NEXT: v_addc_u32_e32 v0, vcc, 0, v2, vcc
1401-
; GCN-NEXT: v_addc_u32_e32 v1, vcc, v3, v1, vcc
1400+
; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
14021401
; GCN-NEXT: v_mul_lo_u32 v1, s8, v1
14031402
; GCN-NEXT: v_mul_hi_u32 v2, s8, v0
14041403
; GCN-NEXT: v_mul_lo_u32 v3, s9, v0
@@ -1605,14 +1604,13 @@ define i64 @v_test_srem_k_num_i64(i64 %x) {
16051604
; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v4
16061605
; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
16071606
; GCN-NEXT: v_mul_lo_u32 v4, v3, 24
1608-
; GCN-NEXT: v_mul_hi_u32 v2, 24, v2
1609-
; GCN-NEXT: v_mul_hi_u32 v5, 24, v3
1610-
; GCN-NEXT: v_mul_hi_u32 v3, 0, v3
1607+
; GCN-NEXT: v_mul_hi_u32 v2, v2, 24
1608+
; GCN-NEXT: v_mul_hi_u32 v3, v3, 24
16111609
; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v4
1612-
; GCN-NEXT: v_addc_u32_e32 v4, vcc, v12, v5, vcc
1610+
; GCN-NEXT: v_addc_u32_e32 v3, vcc, v12, v3, vcc
16131611
; GCN-NEXT: v_add_i32_e32 v2, vcc, 0, v2
1614-
; GCN-NEXT: v_addc_u32_e32 v2, vcc, 0, v4, vcc
1615-
; GCN-NEXT: v_addc_u32_e32 v3, vcc, v3, v11, vcc
1612+
; GCN-NEXT: v_addc_u32_e32 v2, vcc, 0, v3, vcc
1613+
; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v11, vcc
16161614
; GCN-NEXT: v_mul_lo_u32 v3, v0, v3
16171615
; GCN-NEXT: v_mul_hi_u32 v4, v0, v2
16181616
; GCN-NEXT: v_mul_lo_u32 v5, v1, v2
@@ -1812,25 +1810,23 @@ define i64 @v_test_srem_pow2_k_num_i64(i64 %x) {
18121810
; GCN-NEXT: v_add_i32_e32 v3, vcc, v3, v7
18131811
; GCN-NEXT: v_addc_u32_e64 v3, vcc, v3, v5, s[4:5]
18141812
; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v4
1815-
; GCN-NEXT: s_mov_b32 s4, 0x8000
18161813
; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
1817-
; GCN-NEXT: v_mul_hi_u32 v2, s4, v2
1818-
; GCN-NEXT: v_mul_hi_u32 v4, s4, v3
1819-
; GCN-NEXT: v_lshlrev_b32_e32 v5, 15, v3
1820-
; GCN-NEXT: v_mul_hi_u32 v3, 0, v3
1821-
; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v5
1822-
; GCN-NEXT: v_addc_u32_e32 v4, vcc, v12, v4, vcc
1814+
; GCN-NEXT: v_lshrrev_b32_e32 v4, 17, v3
1815+
; GCN-NEXT: v_lshlrev_b32_e32 v3, 15, v3
1816+
; GCN-NEXT: v_lshrrev_b32_e32 v2, 17, v2
1817+
; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v3
1818+
; GCN-NEXT: v_addc_u32_e32 v3, vcc, v12, v4, vcc
18231819
; GCN-NEXT: v_add_i32_e32 v2, vcc, 0, v2
1824-
; GCN-NEXT: v_addc_u32_e32 v2, vcc, 0, v4, vcc
1825-
; GCN-NEXT: v_addc_u32_e32 v3, vcc, v3, v11, vcc
1820+
; GCN-NEXT: v_addc_u32_e32 v2, vcc, 0, v3, vcc
1821+
; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v11, vcc
18261822
; GCN-NEXT: v_mul_lo_u32 v3, v0, v3
18271823
; GCN-NEXT: v_mul_hi_u32 v4, v0, v2
18281824
; GCN-NEXT: v_mul_lo_u32 v5, v1, v2
18291825
; GCN-NEXT: v_mul_lo_u32 v2, v0, v2
18301826
; GCN-NEXT: v_add_i32_e32 v3, vcc, v4, v3
18311827
; GCN-NEXT: v_add_i32_e32 v3, vcc, v3, v5
18321828
; GCN-NEXT: v_sub_i32_e32 v4, vcc, 0, v3
1833-
; GCN-NEXT: v_sub_i32_e32 v2, vcc, s4, v2
1829+
; GCN-NEXT: v_sub_i32_e32 v2, vcc, 0x8000, v2
18341830
; GCN-NEXT: v_subb_u32_e64 v4, s[4:5], v4, v1, vcc
18351831
; GCN-NEXT: v_sub_i32_e64 v5, s[4:5], v2, v0
18361832
; GCN-NEXT: v_subbrev_u32_e64 v6, s[6:7], 0, v4, s[4:5]

llvm/test/CodeGen/AMDGPU/udiv64.ll

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -779,23 +779,21 @@ define amdgpu_kernel void @s_test_udiv24_i48(i48 addrspace(1)* %out, i48 %x, i48
779779
; GCN-NEXT: v_mov_b32_e32 v3, s6
780780
; GCN-NEXT: v_alignbit_b32 v3, s7, v3, 24
781781
; GCN-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc
782-
; GCN-NEXT: v_mul_hi_u32 v5, v3, v1
783782
; GCN-NEXT: v_mul_lo_u32 v4, v3, v2
784-
; GCN-NEXT: v_mul_hi_u32 v6, v3, v2
785-
; GCN-NEXT: v_mul_hi_u32 v1, 0, v1
786-
; GCN-NEXT: v_mul_hi_u32 v2, 0, v2
787-
; GCN-NEXT: v_add_i32_e32 v4, vcc, v5, v4
788-
; GCN-NEXT: v_addc_u32_e32 v5, vcc, v9, v6, vcc
789-
; GCN-NEXT: v_add_i32_e32 v4, vcc, 0, v4
790-
; GCN-NEXT: v_addc_u32_e32 v1, vcc, v5, v1, vcc
791-
; GCN-NEXT: v_addc_u32_e32 v2, vcc, v2, v8, vcc
783+
; GCN-NEXT: v_mul_hi_u32 v1, v3, v1
784+
; GCN-NEXT: v_mul_hi_u32 v2, v3, v2
785+
; GCN-NEXT: s_mov_b32 s7, 0xf000
786+
; GCN-NEXT: s_mov_b32 s6, -1
787+
; GCN-NEXT: v_add_i32_e32 v1, vcc, v1, v4
788+
; GCN-NEXT: v_addc_u32_e32 v2, vcc, v9, v2, vcc
789+
; GCN-NEXT: v_add_i32_e32 v1, vcc, 0, v1
790+
; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v2, vcc
791+
; GCN-NEXT: v_addc_u32_e32 v2, vcc, 0, v8, vcc
792792
; GCN-NEXT: v_add_i32_e32 v1, vcc, 0, v1
793793
; GCN-NEXT: v_addc_u32_e32 v2, vcc, v9, v2, vcc
794794
; GCN-NEXT: v_mul_lo_u32 v4, v0, v2
795795
; GCN-NEXT: v_mul_hi_u32 v5, v0, v1
796796
; GCN-NEXT: v_mul_lo_u32 v6, v0, v1
797-
; GCN-NEXT: s_mov_b32 s7, 0xf000
798-
; GCN-NEXT: s_mov_b32 s6, -1
799797
; GCN-NEXT: v_add_i32_e32 v4, vcc, v5, v4
800798
; GCN-NEXT: v_sub_i32_e32 v3, vcc, v3, v6
801799
; GCN-NEXT: v_subb_u32_e32 v4, vcc, 0, v4, vcc

0 commit comments

Comments
 (0)