Skip to content

Commit 9fb4bc5

Browse files
authored
[DAG] SimplifyMultipleUseDemandedBits - ignore SRL node if we're just demanding known sign bits (#114389)
Check to see if we are only demanding (shifted) signbits from a SRL node that are also signbits in the source node. We can't demand any upper zero bits that the SRL will shift in (up to max shift amount), and the lower demanded bits bound must already be all signbits.
1 parent 6c28530 commit 9fb4bc5

File tree

6 files changed

+446
-455
lines changed

6 files changed

+446
-455
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -808,6 +808,24 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
808808
}
809809
break;
810810
}
811+
case ISD::SRL: {
812+
// If we are only demanding sign bits then we can use the shift source
813+
// directly.
814+
if (std::optional<uint64_t> MaxSA =
815+
DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
816+
SDValue Op0 = Op.getOperand(0);
817+
unsigned ShAmt = *MaxSA;
818+
// Must already be signbits in DemandedBits bounds, and can't demand any
819+
// shifted in zeroes.
820+
if (DemandedBits.countl_zero() >= ShAmt) {
821+
unsigned NumSignBits =
822+
DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
823+
if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
824+
return Op0;
825+
}
826+
}
827+
break;
828+
}
811829
case ISD::SETCC: {
812830
SDValue Op0 = Op.getOperand(0);
813831
SDValue Op1 = Op.getOperand(1);

llvm/test/CodeGen/AMDGPU/div-rem-by-constant-64.ll

Lines changed: 22 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1052,16 +1052,15 @@ define noundef i64 @srem64_i32max(i64 noundef %i) {
10521052
; GFX9-NEXT: s_mov_b32 s6, 0x80000001
10531053
; GFX9-NEXT: v_ashrrev_i32_e32 v6, 31, v1
10541054
; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, 3, v[2:3]
1055-
; GFX9-NEXT: v_mul_i32_i24_e32 v8, 3, v6
1056-
; GFX9-NEXT: v_lshl_add_u32 v9, v6, 31, v6
1057-
; GFX9-NEXT: v_mov_b32_e32 v10, v5
1055+
; GFX9-NEXT: v_lshl_add_u32 v8, v6, 31, v6
1056+
; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v6, 3, 0
1057+
; GFX9-NEXT: v_mov_b32_e32 v9, v5
10581058
; GFX9-NEXT: v_mov_b32_e32 v5, v3
10591059
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, s6, v[4:5]
1060-
; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v6, 3, 0
1061-
; GFX9-NEXT: v_mov_b32_e32 v2, v3
1062-
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v10, v2
1063-
; GFX9-NEXT: v_add3_u32 v7, v7, v9, v8
1060+
; GFX9-NEXT: v_add3_u32 v7, v7, v8, v6
10641061
; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v0, -1, v[6:7]
1062+
; GFX9-NEXT: v_mov_b32_e32 v2, v3
1063+
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v9, v2
10651064
; GFX9-NEXT: v_addc_co_u32_e64 v3, s[4:5], 0, 0, vcc
10661065
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v1, s6, v[2:3]
10671066
; GFX9-NEXT: v_sub_u32_e32 v5, v5, v1
@@ -1085,10 +1084,9 @@ define noundef i64 @srem64_i32max(i64 noundef %i) {
10851084
; GFX942: ; %bb.0: ; %entry
10861085
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10871086
; GFX942-NEXT: v_ashrrev_i32_e32 v2, 31, v1
1088-
; GFX942-NEXT: v_mul_i32_i24_e32 v4, 3, v2
1089-
; GFX942-NEXT: v_lshl_add_u32 v5, v2, 31, v2
1087+
; GFX942-NEXT: v_lshl_add_u32 v4, v2, 31, v2
10901088
; GFX942-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v2, 3, 0
1091-
; GFX942-NEXT: v_add3_u32 v3, v3, v5, v4
1089+
; GFX942-NEXT: v_add3_u32 v3, v3, v4, v2
10921090
; GFX942-NEXT: v_mul_hi_u32 v4, v0, 3
10931091
; GFX942-NEXT: v_mov_b32_e32 v5, 0
10941092
; GFX942-NEXT: v_mad_u64_u32 v[6:7], s[0:1], v1, 3, v[4:5]
@@ -1125,17 +1123,16 @@ define noundef i64 @srem64_i32max(i64 noundef %i) {
11251123
; GFX1030-NEXT: v_mul_hi_u32 v2, v0, 3
11261124
; GFX1030-NEXT: v_mov_b32_e32 v3, 0
11271125
; GFX1030-NEXT: v_ashrrev_i32_e32 v6, 31, v1
1128-
; GFX1030-NEXT: v_mul_i32_i24_e32 v7, 3, v6
11291126
; GFX1030-NEXT: v_mad_u64_u32 v[4:5], null, v1, 3, v[2:3]
1130-
; GFX1030-NEXT: v_mov_b32_e32 v8, v5
1127+
; GFX1030-NEXT: v_mov_b32_e32 v7, v5
11311128
; GFX1030-NEXT: v_mov_b32_e32 v5, v3
11321129
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, v6, 3, 0
11331130
; GFX1030-NEXT: v_lshl_add_u32 v6, v6, 31, v6
11341131
; GFX1030-NEXT: v_mad_u64_u32 v[4:5], null, 0x80000001, v0, v[4:5]
1135-
; GFX1030-NEXT: v_add3_u32 v3, v3, v6, v7
1132+
; GFX1030-NEXT: v_add3_u32 v3, v3, v6, v2
11361133
; GFX1030-NEXT: v_mov_b32_e32 v4, v5
11371134
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, v0, -1, v[2:3]
1138-
; GFX1030-NEXT: v_add_co_u32 v4, s4, v8, v4
1135+
; GFX1030-NEXT: v_add_co_u32 v4, s4, v7, v4
11391136
; GFX1030-NEXT: v_add_co_ci_u32_e64 v5, null, 0, 0, s4
11401137
; GFX1030-NEXT: v_sub_nc_u32_e32 v6, v3, v1
11411138
; GFX1030-NEXT: v_mad_u64_u32 v[3:4], null, 0x80000001, v1, v[4:5]
@@ -1167,16 +1164,15 @@ define noundef i64 @sdiv64_i32max(i64 noundef %i) {
11671164
; GFX9-NEXT: s_mov_b32 s6, 0x80000001
11681165
; GFX9-NEXT: v_ashrrev_i32_e32 v6, 31, v1
11691166
; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, 3, v[2:3]
1170-
; GFX9-NEXT: v_mul_i32_i24_e32 v8, 3, v6
1171-
; GFX9-NEXT: v_lshl_add_u32 v9, v6, 31, v6
1172-
; GFX9-NEXT: v_mov_b32_e32 v10, v5
1167+
; GFX9-NEXT: v_lshl_add_u32 v8, v6, 31, v6
1168+
; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v6, 3, 0
1169+
; GFX9-NEXT: v_mov_b32_e32 v9, v5
11731170
; GFX9-NEXT: v_mov_b32_e32 v5, v3
11741171
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, s6, v[4:5]
1175-
; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v6, 3, 0
1176-
; GFX9-NEXT: v_mov_b32_e32 v2, v3
1177-
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v10, v2
1178-
; GFX9-NEXT: v_add3_u32 v7, v7, v9, v8
1172+
; GFX9-NEXT: v_add3_u32 v7, v7, v8, v6
11791173
; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v0, -1, v[6:7]
1174+
; GFX9-NEXT: v_mov_b32_e32 v2, v3
1175+
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v9, v2
11801176
; GFX9-NEXT: v_addc_co_u32_e64 v3, s[4:5], 0, 0, vcc
11811177
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v1, s6, v[2:3]
11821178
; GFX9-NEXT: v_sub_u32_e32 v5, v5, v1
@@ -1195,10 +1191,9 @@ define noundef i64 @sdiv64_i32max(i64 noundef %i) {
11951191
; GFX942: ; %bb.0: ; %entry
11961192
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11971193
; GFX942-NEXT: v_ashrrev_i32_e32 v2, 31, v1
1198-
; GFX942-NEXT: v_mul_i32_i24_e32 v4, 3, v2
1199-
; GFX942-NEXT: v_lshl_add_u32 v5, v2, 31, v2
1194+
; GFX942-NEXT: v_lshl_add_u32 v4, v2, 31, v2
12001195
; GFX942-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v2, 3, 0
1201-
; GFX942-NEXT: v_add3_u32 v3, v3, v5, v4
1196+
; GFX942-NEXT: v_add3_u32 v3, v3, v4, v2
12021197
; GFX942-NEXT: v_mul_hi_u32 v4, v0, 3
12031198
; GFX942-NEXT: v_mov_b32_e32 v5, 0
12041199
; GFX942-NEXT: v_mad_u64_u32 v[6:7], s[0:1], v1, 3, v[4:5]
@@ -1227,17 +1222,16 @@ define noundef i64 @sdiv64_i32max(i64 noundef %i) {
12271222
; GFX1030-NEXT: v_mul_hi_u32 v2, v0, 3
12281223
; GFX1030-NEXT: v_mov_b32_e32 v3, 0
12291224
; GFX1030-NEXT: v_ashrrev_i32_e32 v6, 31, v1
1230-
; GFX1030-NEXT: v_mul_i32_i24_e32 v7, 3, v6
12311225
; GFX1030-NEXT: v_mad_u64_u32 v[4:5], null, v1, 3, v[2:3]
1232-
; GFX1030-NEXT: v_mov_b32_e32 v8, v5
1226+
; GFX1030-NEXT: v_mov_b32_e32 v7, v5
12331227
; GFX1030-NEXT: v_mov_b32_e32 v5, v3
12341228
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, v6, 3, 0
12351229
; GFX1030-NEXT: v_lshl_add_u32 v6, v6, 31, v6
12361230
; GFX1030-NEXT: v_mad_u64_u32 v[4:5], null, 0x80000001, v0, v[4:5]
1237-
; GFX1030-NEXT: v_add3_u32 v3, v3, v6, v7
1231+
; GFX1030-NEXT: v_add3_u32 v3, v3, v6, v2
12381232
; GFX1030-NEXT: v_mov_b32_e32 v4, v5
12391233
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, v0, -1, v[2:3]
1240-
; GFX1030-NEXT: v_add_co_u32 v4, s4, v8, v4
1234+
; GFX1030-NEXT: v_add_co_u32 v4, s4, v7, v4
12411235
; GFX1030-NEXT: v_add_co_ci_u32_e64 v5, null, 0, 0, s4
12421236
; GFX1030-NEXT: v_sub_nc_u32_e32 v6, v3, v1
12431237
; GFX1030-NEXT: v_mad_u64_u32 v[3:4], null, 0x80000001, v1, v[4:5]

0 commit comments

Comments
 (0)