Skip to content

Commit 4f0d68e

Browse files
committed
[DAG] Remove OneUse restriction when folding (shl (add x, c1), c2)
1 parent 25bea3e commit 4f0d68e

File tree

14 files changed

+189
-136
lines changed

14 files changed

+189
-136
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10070,7 +10070,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
1007010070
// Variant of version done on multiply, except mul by a power of 2 is turned
1007110071
// into a shift.
1007210072
if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
10073-
N0->hasOneUse() && TLI.isDesirableToCommuteWithShift(N, Level)) {
10073+
TLI.isDesirableToCommuteWithShift(N, Level)) {
1007410074
SDValue N01 = N0.getOperand(1);
1007510075
if (SDValue Shl1 =
1007610076
DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1})) {

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17518,6 +17518,9 @@ AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N,
1751817518
SDValue ShiftLHS = N->getOperand(0);
1751917519
EVT VT = N->getValueType(0);
1752017520

17521+
// if (!ShiftLHS->hasOneUse())
17522+
// return false;
17523+
1752117524
// If ShiftLHS is unsigned bit extraction: ((x >> C) & mask), then do not
1752217525
// combine it with shift 'N' to let it be lowered to UBFX except:
1752317526
// ((x >> C) & mask) << C.

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1036,6 +1036,11 @@ bool AMDGPUTargetLowering::isDesirableToCommuteWithShift(
10361036
assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
10371037
N->getOpcode() == ISD::SRL) &&
10381038
"Expected shift op");
1039+
1040+
// if (!N->getOperand(0).hasOneUse()) {
1041+
// return false;
1042+
// }
1043+
10391044
// Always commute pre-type legalization and right shifts.
10401045
// We're looking for shl(or(x,y),z) patterns.
10411046
if (Level < CombineLevel::AfterLegalizeTypes ||

llvm/lib/Target/Hexagon/HexagonISelLowering.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2156,6 +2156,16 @@ bool HexagonTargetLowering::hasBitTest(SDValue X, SDValue Y) const {
21562156
return X.getValueType().isScalarInteger(); // 'tstbit'
21572157
}
21582158

2159+
bool HexagonTargetLowering::isDesirableToCommuteWithShift(
2160+
const SDNode *N, CombineLevel Level) const {
2161+
assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
2162+
N->getOpcode() == ISD::SRL) &&
2163+
"Expected shift op");
2164+
2165+
// if (!N->getOperand(0)->hasOneUse())
2166+
// return false;
2167+
return true;
2168+
}
21592169
bool HexagonTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
21602170
return isTruncateFree(EVT::getEVT(Ty1), EVT::getEVT(Ty2));
21612171
}

llvm/lib/Target/Hexagon/HexagonISelLowering.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,9 @@ class HexagonTargetLowering : public TargetLowering {
155155

156156
bool hasBitTest(SDValue X, SDValue Y) const override;
157157

158+
bool isDesirableToCommuteWithShift(const SDNode *N,
159+
CombineLevel Level) const override;
160+
158161
bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
159162

160163
/// Return true if an FMA operation is faster than a pair of mul and add

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17207,6 +17207,18 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
1720717207
return false;
1720817208
}
1720917209

17210+
bool PPCTargetLowering::isDesirableToCommuteWithShift(
17211+
const SDNode *N, CombineLevel Level) const {
17212+
assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
17213+
N->getOpcode() == ISD::SRL) &&
17214+
"Expected shift op");
17215+
17216+
// if (!N->getOperand(0).hasOneUse()) {
17217+
// return false;
17218+
// }
17219+
return true;
17220+
}
17221+
1721017222
bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
1721117223
const CallInst &I,
1721217224
MachineFunction &MF,

llvm/lib/Target/PowerPC/PPCISelLowering.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1064,6 +1064,9 @@ namespace llvm {
10641064

10651065
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
10661066

1067+
bool isDesirableToCommuteWithShift(const SDNode *N,
1068+
CombineLevel Level) const override;
1069+
10671070
bool getTgtMemIntrinsic(IntrinsicInfo &Info,
10681071
const CallInst &I,
10691072
MachineFunction &MF,

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3490,6 +3490,17 @@ X86TargetLowering::preferredShiftLegalizationStrategy(
34903490
ExpansionFactor);
34913491
}
34923492

3493+
bool X86TargetLowering::isDesirableToCommuteWithShift(
3494+
const SDNode *N, CombineLevel Level) const {
3495+
assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
3496+
N->getOpcode() == ISD::SRL) &&
3497+
"Expected shift op");
3498+
3499+
// if (!N->getOperand(0)->hasOneUse())
3500+
// return false;
3501+
return true;
3502+
}
3503+
34933504
bool X86TargetLowering::shouldSplatInsEltVarIndex(EVT VT) const {
34943505
// Any legal vector type can be splatted more efficiently than
34953506
// loading/spilling from memory.

llvm/lib/Target/X86/X86ISelLowering.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1181,6 +1181,9 @@ namespace llvm {
11811181
preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N,
11821182
unsigned ExpansionFactor) const override;
11831183

1184+
bool isDesirableToCommuteWithShift(const SDNode *N,
1185+
CombineLevel Level) const override;
1186+
11841187
bool shouldSplatInsEltVarIndex(EVT VT) const override;
11851188

11861189
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override {

llvm/test/CodeGen/ARM/add-like-or.ll

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -249,27 +249,28 @@ entry:
249249
define i32 @multiuse(i32 %i, ptr %x, ptr %y) {
250250
; CHECK-T1-LABEL: multiuse:
251251
; CHECK-T1: @ %bb.0: @ %entry
252+
; CHECK-T1-NEXT: lsls r2, r0, #3
253+
; CHECK-T1-NEXT: adds r1, r1, r2
254+
; CHECK-T1-NEXT: ldr r1, [r1, #4]
252255
; CHECK-T1-NEXT: lsls r0, r0, #1
256+
; CHECK-T1-NEXT: adds r0, r1, r0
253257
; CHECK-T1-NEXT: adds r0, r0, #1
254-
; CHECK-T1-NEXT: lsls r2, r0, #2
255-
; CHECK-T1-NEXT: ldr r1, [r1, r2]
256-
; CHECK-T1-NEXT: adds r0, r0, r1
257258
; CHECK-T1-NEXT: bx lr
258259
;
259260
; CHECK-T2-LABEL: multiuse:
260261
; CHECK-T2: @ %bb.0: @ %entry
261-
; CHECK-T2-NEXT: lsls r0, r0, #1
262+
; CHECK-T2-NEXT: add.w r1, r1, r0, lsl #3
263+
; CHECK-T2-NEXT: ldr r1, [r1, #4]
264+
; CHECK-T2-NEXT: add.w r0, r1, r0, lsl #1
262265
; CHECK-T2-NEXT: adds r0, #1
263-
; CHECK-T2-NEXT: ldr.w r1, [r1, r0, lsl #2]
264-
; CHECK-T2-NEXT: add r0, r1
265266
; CHECK-T2-NEXT: bx lr
266267
;
267268
; CHECK-A-LABEL: multiuse:
268269
; CHECK-A: @ %bb.0: @ %entry
269-
; CHECK-A-NEXT: mov r2, #1
270-
; CHECK-A-NEXT: orr r0, r2, r0, lsl #1
271-
; CHECK-A-NEXT: ldr r1, [r1, r0, lsl #2]
272-
; CHECK-A-NEXT: add r0, r0, r1
270+
; CHECK-A-NEXT: add r1, r1, r0, lsl #3
271+
; CHECK-A-NEXT: ldr r1, [r1, #4]
272+
; CHECK-A-NEXT: add r0, r1, r0, lsl #1
273+
; CHECK-A-NEXT: add r0, r0, #1
273274
; CHECK-A-NEXT: bx lr
274275
entry:
275276
%mul = shl i32 %i, 1

llvm/test/CodeGen/RISCV/riscv-shifted-extend.ll

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -70,11 +70,9 @@ define void @test2(ptr nocapture noundef writeonly %array1, i64 noundef %a, i64
7070
; RV64-LABEL: test2:
7171
; RV64: # %bb.0: # %entry
7272
; RV64-NEXT: addi a3, a1, 5
73-
; RV64-NEXT: slli a4, a3, 3
74-
; RV64-NEXT: add a4, a0, a4
75-
; RV64-NEXT: sd a2, 0(a4)
7673
; RV64-NEXT: slli a1, a1, 3
7774
; RV64-NEXT: add a0, a1, a0
75+
; RV64-NEXT: sd a2, 40(a0)
7876
; RV64-NEXT: sd a2, 48(a0)
7977
; RV64-NEXT: sd a3, 280(a0)
8078
; RV64-NEXT: ret
@@ -100,11 +98,9 @@ define void @test3(ptr nocapture noundef %array1, i64 noundef %a, i64 noundef %b
10098
; RV64-NEXT: # %bb.1: # %entry
10199
; RV64-NEXT: mv a5, a2
102100
; RV64-NEXT: .LBB3_2: # %entry
103-
; RV64-NEXT: slli a2, a4, 3
104-
; RV64-NEXT: add a2, a0, a2
105-
; RV64-NEXT: sd a5, 0(a2)
106101
; RV64-NEXT: slli a1, a1, 3
107102
; RV64-NEXT: add a0, a1, a0
103+
; RV64-NEXT: sd a5, 40(a0)
108104
; RV64-NEXT: sd a5, 48(a0)
109105
; RV64-NEXT: sd a4, 280(a0)
110106
; RV64-NEXT: ret

llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,14 @@
77
define <vscale x 4 x i1> @srem_eq_fold_nxv4i8(<vscale x 4 x i8> %va) {
88
; CHECK-LABEL: srem_eq_fold_nxv4i8:
99
; CHECK: # %bb.0:
10-
; CHECK-NEXT: li a0, 42
10+
; CHECK-NEXT: li a0, -85
1111
; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
12-
; CHECK-NEXT: vmv.v.x v9, a0
13-
; CHECK-NEXT: li a1, -85
14-
; CHECK-NEXT: vmacc.vx v9, a1, v8
15-
; CHECK-NEXT: vsll.vi v8, v9, 7
16-
; CHECK-NEXT: vsrl.vi v9, v9, 1
17-
; CHECK-NEXT: vor.vv v8, v9, v8
12+
; CHECK-NEXT: vmul.vx v8, v8, a0
13+
; CHECK-NEXT: vsll.vi v9, v8, 7
14+
; CHECK-NEXT: li a0, 42
15+
; CHECK-NEXT: vadd.vx v8, v8, a0
16+
; CHECK-NEXT: vsrl.vi v8, v8, 1
17+
; CHECK-NEXT: vor.vv v8, v8, v9
1818
; CHECK-NEXT: vmsleu.vx v0, v8, a0
1919
; CHECK-NEXT: ret
2020
%rem = srem <vscale x 4 x i8> %va, splat (i8 6)

0 commit comments

Comments
 (0)