Skip to content

Commit 0e0b2d2

Browse files
committed
[DAG] Fold add(mul(add(A, CA), CM), CB) -> add(mul(A, CM), CM*CA+CB)
This is useful when the inner add has multiple uses, and so cannot be canonicalized by pushing the constants down through the mul. I have added patterns for both `add(mul(add(A, CA), CM), CB)` and with an extra add `add(add(mul(add(A, CA), CM), B) CB)` as the second can come up when lowering geps.
1 parent 27af108 commit 0e0b2d2

File tree

2 files changed

+51
-21
lines changed

2 files changed

+51
-21
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2838,6 +2838,38 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
28382838
return DAG.getNode(ISD::ADD, DL, VT, Not, N0.getOperand(0));
28392839
}
28402840

2841+
// Fold add(mul(add(A, CA), CM), CB) -> add(mul(A, CM), CM*CA+CB).
2842+
// This can help if the inner add has multiple uses.
2843+
APInt CM, CA;
2844+
if (ConstantSDNode *CB = dyn_cast<ConstantSDNode>(N1)) {
2845+
if (VT.getScalarSizeInBits() <= 64) {
2846+
if (sd_match(N0, m_OneUse(m_Mul(m_Add(m_Value(A), m_ConstInt(CA)),
2847+
m_ConstInt(CM)))) &&
2848+
TLI.isLegalAddImmediate(
2849+
(CA * CM + CB->getAPIntValue()).getSExtValue())) {
2850+
SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N1), VT, A,
2851+
DAG.getConstant(CM, DL, VT));
2852+
return DAG.getNode(
2853+
ISD::ADD, DL, VT, Mul,
2854+
DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT));
2855+
}
2856+
// Also look in case there is an intermediate add.
2857+
if (sd_match(N0, m_OneUse(m_Add(
2858+
m_OneUse(m_Mul(m_Add(m_Value(A), m_ConstInt(CA)),
2859+
m_ConstInt(CM))),
2860+
m_Value(B)))) &&
2861+
TLI.isLegalAddImmediate(
2862+
(CA * CM + CB->getAPIntValue()).getSExtValue())) {
2863+
SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N1), VT, A,
2864+
DAG.getConstant(CM, DL, VT));
2865+
SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N1), VT, Mul, B);
2866+
return DAG.getNode(
2867+
ISD::ADD, DL, VT, Add,
2868+
DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT));
2869+
}
2870+
}
2871+
}
2872+
28412873
if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
28422874
return Combined;
28432875

llvm/test/CodeGen/AArch64/addimm-mulimm.ll

Lines changed: 19 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -166,9 +166,9 @@ define signext i32 @addmuladd_multiuse(i32 signext %a) {
166166
; CHECK-LABEL: addmuladd_multiuse:
167167
; CHECK: // %bb.0:
168168
; CHECK-NEXT: mov w8, #324 // =0x144
169+
; CHECK-NEXT: mov w9, #1300 // =0x514
170+
; CHECK-NEXT: madd w8, w0, w8, w9
169171
; CHECK-NEXT: add w9, w0, #4
170-
; CHECK-NEXT: mov w10, #4 // =0x4
171-
; CHECK-NEXT: madd w8, w9, w8, w10
172172
; CHECK-NEXT: eor w0, w9, w8
173173
; CHECK-NEXT: ret
174174
%tmp0 = add i32 %a, 4
@@ -198,11 +198,10 @@ define signext i32 @addmuladd_multiuse2(i32 signext %a) {
198198
; CHECK-LABEL: addmuladd_multiuse2:
199199
; CHECK: // %bb.0:
200200
; CHECK-NEXT: mov w8, #324 // =0x144
201-
; CHECK-NEXT: add w9, w0, #4
202-
; CHECK-NEXT: mov w11, #4 // =0x4
203-
; CHECK-NEXT: lsl w10, w9, #2
204-
; CHECK-NEXT: madd w8, w9, w8, w11
205-
; CHECK-NEXT: add w9, w10, #4
201+
; CHECK-NEXT: lsl w9, w0, #2
202+
; CHECK-NEXT: mov w10, #1300 // =0x514
203+
; CHECK-NEXT: madd w8, w0, w8, w10
204+
; CHECK-NEXT: add w9, w9, #20
206205
; CHECK-NEXT: eor w0, w8, w9
207206
; CHECK-NEXT: ret
208207
%tmp0 = add i32 %a, 4
@@ -233,8 +232,8 @@ define signext i32 @addaddmuladd_multiuse(i32 signext %a, i32 %b) {
233232
; CHECK: // %bb.0:
234233
; CHECK-NEXT: mov w8, #324 // =0x144
235234
; CHECK-NEXT: add w9, w0, #4
236-
; CHECK-NEXT: madd w8, w9, w8, w1
237-
; CHECK-NEXT: add w8, w8, #4
235+
; CHECK-NEXT: madd w8, w0, w8, w1
236+
; CHECK-NEXT: add w8, w8, #1300
238237
; CHECK-NEXT: eor w0, w9, w8
239238
; CHECK-NEXT: ret
240239
%tmp0 = add i32 %a, 4
@@ -249,12 +248,11 @@ define signext i32 @addaddmuladd_multiuse2(i32 signext %a, i32 %b) {
249248
; CHECK-LABEL: addaddmuladd_multiuse2:
250249
; CHECK: // %bb.0:
251250
; CHECK-NEXT: mov w8, #324 // =0x144
252-
; CHECK-NEXT: add w9, w0, #4
253-
; CHECK-NEXT: mov w10, #162 // =0xa2
254-
; CHECK-NEXT: madd w8, w9, w8, w1
255-
; CHECK-NEXT: madd w9, w9, w10, w1
256-
; CHECK-NEXT: add w8, w8, #4
257-
; CHECK-NEXT: add w9, w9, #4
251+
; CHECK-NEXT: mov w9, #162 // =0xa2
252+
; CHECK-NEXT: madd w8, w0, w8, w1
253+
; CHECK-NEXT: madd w9, w0, w9, w1
254+
; CHECK-NEXT: add w8, w8, #1300
255+
; CHECK-NEXT: add w9, w9, #652
258256
; CHECK-NEXT: eor w0, w9, w8
259257
; CHECK-NEXT: ret
260258
%tmp0 = add i32 %a, 4
@@ -319,17 +317,17 @@ define void @addmuladd_gep(ptr %p, i64 %a) {
319317
define i32 @addmuladd_gep2(ptr %p, i32 %a) {
320318
; CHECK-LABEL: addmuladd_gep2:
321319
; CHECK: // %bb.0:
320+
; CHECK-NEXT: mov w8, #3240 // =0xca8
322321
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
323-
; CHECK-NEXT: sxtw x8, w1
324-
; CHECK-NEXT: mov w9, #3240 // =0xca8
325-
; CHECK-NEXT: add x8, x8, #1
326-
; CHECK-NEXT: madd x9, x8, x9, x0
327-
; CHECK-NEXT: ldr w9, [x9, #20]
328-
; CHECK-NEXT: tbnz w9, #31, .LBB22_2
322+
; CHECK-NEXT: smaddl x8, w1, w8, x0
323+
; CHECK-NEXT: ldr w8, [x8, #3260]
324+
; CHECK-NEXT: tbnz w8, #31, .LBB22_2
329325
; CHECK-NEXT: // %bb.1:
330326
; CHECK-NEXT: mov w0, wzr
331327
; CHECK-NEXT: ret
332328
; CHECK-NEXT: .LBB22_2: // %then
329+
; CHECK-NEXT: sxtw x8, w1
330+
; CHECK-NEXT: add x8, x8, #1
333331
; CHECK-NEXT: str x8, [x0]
334332
; CHECK-NEXT: mov w0, #1 // =0x1
335333
; CHECK-NEXT: ret

0 commit comments

Comments
 (0)