Skip to content

Commit fcf945f

Browse files
authored
[DAG] Fold add(mul(add(A, CA), CM), CB) -> add(mul(A, CM), CM*CA+CB) (#90860)
This is useful when the inner add has multiple uses, and so cannot be canonicalized by pushing the constants down through the mul. This patch adds patterns for both `add(mul(add(A, CA), CM), CB)` and with an extra add `add(add(mul(add(A, CA), CM), B) CB)` as the second can come up when lowering geps.
1 parent 965f3ca commit fcf945f

File tree

2 files changed

+91
-36
lines changed

2 files changed

+91
-36
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2838,6 +2838,66 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
28382838
return DAG.getNode(ISD::ADD, DL, VT, Not, N0.getOperand(0));
28392839
}
28402840

2841+
// Fold add(mul(add(A, CA), CM), CB) -> add(mul(A, CM), CM*CA+CB).
2842+
// This can help if the inner add has multiple uses.
2843+
APInt CM, CA;
2844+
if (ConstantSDNode *CB = dyn_cast<ConstantSDNode>(N1)) {
2845+
if (VT.getScalarSizeInBits() <= 64) {
2846+
if (sd_match(N0, m_OneUse(m_Mul(m_Add(m_Value(A), m_ConstInt(CA)),
2847+
m_ConstInt(CM)))) &&
2848+
TLI.isLegalAddImmediate(
2849+
(CA * CM + CB->getAPIntValue()).getSExtValue())) {
2850+
SDNodeFlags Flags;
2851+
// If all the inputs are nuw, the outputs can be nuw. If all the input
2852+
// are _also_ nsw the outputs can be too.
2853+
if (N->getFlags().hasNoUnsignedWrap() &&
2854+
N0->getFlags().hasNoUnsignedWrap() &&
2855+
N0.getOperand(0)->getFlags().hasNoUnsignedWrap()) {
2856+
Flags.setNoUnsignedWrap(true);
2857+
if (N->getFlags().hasNoSignedWrap() &&
2858+
N0->getFlags().hasNoSignedWrap() &&
2859+
N0.getOperand(0)->getFlags().hasNoSignedWrap())
2860+
Flags.setNoSignedWrap(true);
2861+
}
2862+
SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N1), VT, A,
2863+
DAG.getConstant(CM, DL, VT), Flags);
2864+
return DAG.getNode(
2865+
ISD::ADD, DL, VT, Mul,
2866+
DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags);
2867+
}
2868+
// Also look in case there is an intermediate add.
2869+
if (sd_match(N0, m_OneUse(m_Add(
2870+
m_OneUse(m_Mul(m_Add(m_Value(A), m_ConstInt(CA)),
2871+
m_ConstInt(CM))),
2872+
m_Value(B)))) &&
2873+
TLI.isLegalAddImmediate(
2874+
(CA * CM + CB->getAPIntValue()).getSExtValue())) {
2875+
SDNodeFlags Flags;
2876+
// If all the inputs are nuw, the outputs can be nuw. If all the input
2877+
// are _also_ nsw the outputs can be too.
2878+
SDValue OMul =
2879+
N0.getOperand(0) == B ? N0.getOperand(1) : N0.getOperand(0);
2880+
if (N->getFlags().hasNoUnsignedWrap() &&
2881+
N0->getFlags().hasNoUnsignedWrap() &&
2882+
OMul->getFlags().hasNoUnsignedWrap() &&
2883+
OMul.getOperand(0)->getFlags().hasNoUnsignedWrap()) {
2884+
Flags.setNoUnsignedWrap(true);
2885+
if (N->getFlags().hasNoSignedWrap() &&
2886+
N0->getFlags().hasNoSignedWrap() &&
2887+
OMul->getFlags().hasNoSignedWrap() &&
2888+
OMul.getOperand(0)->getFlags().hasNoSignedWrap())
2889+
Flags.setNoSignedWrap(true);
2890+
}
2891+
SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N1), VT, A,
2892+
DAG.getConstant(CM, DL, VT), Flags);
2893+
SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N1), VT, Mul, B, Flags);
2894+
return DAG.getNode(
2895+
ISD::ADD, DL, VT, Add,
2896+
DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags);
2897+
}
2898+
}
2899+
}
2900+
28412901
if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
28422902
return Combined;
28432903

llvm/test/CodeGen/AArch64/addimm-mulimm.ll

Lines changed: 31 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -166,9 +166,9 @@ define signext i32 @addmuladd_multiuse(i32 signext %a) {
166166
; CHECK-LABEL: addmuladd_multiuse:
167167
; CHECK: // %bb.0:
168168
; CHECK-NEXT: mov w8, #324 // =0x144
169+
; CHECK-NEXT: mov w9, #1300 // =0x514
170+
; CHECK-NEXT: madd w8, w0, w8, w9
169171
; CHECK-NEXT: add w9, w0, #4
170-
; CHECK-NEXT: mov w10, #4 // =0x4
171-
; CHECK-NEXT: madd w8, w9, w8, w10
172172
; CHECK-NEXT: eor w0, w9, w8
173173
; CHECK-NEXT: ret
174174
%tmp0 = add i32 %a, 4
@@ -198,11 +198,10 @@ define signext i32 @addmuladd_multiuse2(i32 signext %a) {
198198
; CHECK-LABEL: addmuladd_multiuse2:
199199
; CHECK: // %bb.0:
200200
; CHECK-NEXT: mov w8, #324 // =0x144
201-
; CHECK-NEXT: add w9, w0, #4
202-
; CHECK-NEXT: mov w11, #4 // =0x4
203-
; CHECK-NEXT: lsl w10, w9, #2
204-
; CHECK-NEXT: madd w8, w9, w8, w11
205-
; CHECK-NEXT: add w9, w10, #4
201+
; CHECK-NEXT: lsl w9, w0, #2
202+
; CHECK-NEXT: mov w10, #1300 // =0x514
203+
; CHECK-NEXT: madd w8, w0, w8, w10
204+
; CHECK-NEXT: add w9, w9, #20
206205
; CHECK-NEXT: eor w0, w8, w9
207206
; CHECK-NEXT: ret
208207
%tmp0 = add i32 %a, 4
@@ -233,8 +232,8 @@ define signext i32 @addaddmuladd_multiuse(i32 signext %a, i32 %b) {
233232
; CHECK: // %bb.0:
234233
; CHECK-NEXT: mov w8, #324 // =0x144
235234
; CHECK-NEXT: add w9, w0, #4
236-
; CHECK-NEXT: madd w8, w9, w8, w1
237-
; CHECK-NEXT: add w8, w8, #4
235+
; CHECK-NEXT: madd w8, w0, w8, w1
236+
; CHECK-NEXT: add w8, w8, #1300
238237
; CHECK-NEXT: eor w0, w9, w8
239238
; CHECK-NEXT: ret
240239
%tmp0 = add i32 %a, 4
@@ -249,12 +248,11 @@ define signext i32 @addaddmuladd_multiuse2(i32 signext %a, i32 %b) {
249248
; CHECK-LABEL: addaddmuladd_multiuse2:
250249
; CHECK: // %bb.0:
251250
; CHECK-NEXT: mov w8, #324 // =0x144
252-
; CHECK-NEXT: add w9, w0, #4
253-
; CHECK-NEXT: mov w10, #162 // =0xa2
254-
; CHECK-NEXT: madd w8, w9, w8, w1
255-
; CHECK-NEXT: madd w9, w9, w10, w1
256-
; CHECK-NEXT: add w8, w8, #4
257-
; CHECK-NEXT: add w9, w9, #4
251+
; CHECK-NEXT: mov w9, #162 // =0xa2
252+
; CHECK-NEXT: madd w8, w0, w8, w1
253+
; CHECK-NEXT: madd w9, w0, w9, w1
254+
; CHECK-NEXT: add w8, w8, #1300
255+
; CHECK-NEXT: add w9, w9, #652
258256
; CHECK-NEXT: eor w0, w9, w8
259257
; CHECK-NEXT: ret
260258
%tmp0 = add i32 %a, 4
@@ -319,17 +317,17 @@ define void @addmuladd_gep(ptr %p, i64 %a) {
319317
define i32 @addmuladd_gep2(ptr %p, i32 %a) {
320318
; CHECK-LABEL: addmuladd_gep2:
321319
; CHECK: // %bb.0:
320+
; CHECK-NEXT: mov w8, #3240 // =0xca8
322321
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
323-
; CHECK-NEXT: sxtw x8, w1
324-
; CHECK-NEXT: mov w9, #3240 // =0xca8
325-
; CHECK-NEXT: add x8, x8, #1
326-
; CHECK-NEXT: madd x9, x8, x9, x0
327-
; CHECK-NEXT: ldr w9, [x9, #20]
328-
; CHECK-NEXT: tbnz w9, #31, .LBB22_2
322+
; CHECK-NEXT: smaddl x8, w1, w8, x0
323+
; CHECK-NEXT: ldr w8, [x8, #3260]
324+
; CHECK-NEXT: tbnz w8, #31, .LBB22_2
329325
; CHECK-NEXT: // %bb.1:
330326
; CHECK-NEXT: mov w0, wzr
331327
; CHECK-NEXT: ret
332328
; CHECK-NEXT: .LBB22_2: // %then
329+
; CHECK-NEXT: sxtw x8, w1
330+
; CHECK-NEXT: add x8, x8, #1
333331
; CHECK-NEXT: str x8, [x0]
334332
; CHECK-NEXT: mov w0, #1 // =0x1
335333
; CHECK-NEXT: ret
@@ -351,11 +349,10 @@ define signext i32 @addmuladd_multiuse2_nsw(i32 signext %a) {
351349
; CHECK-LABEL: addmuladd_multiuse2_nsw:
352350
; CHECK: // %bb.0:
353351
; CHECK-NEXT: mov w8, #324 // =0x144
354-
; CHECK-NEXT: add w9, w0, #4
355-
; CHECK-NEXT: mov w11, #4 // =0x4
356-
; CHECK-NEXT: lsl w10, w9, #2
357-
; CHECK-NEXT: madd w8, w9, w8, w11
358-
; CHECK-NEXT: add w9, w10, #4
352+
; CHECK-NEXT: lsl w9, w0, #2
353+
; CHECK-NEXT: mov w10, #1300 // =0x514
354+
; CHECK-NEXT: madd w8, w0, w8, w10
355+
; CHECK-NEXT: add w9, w9, #20
359356
; CHECK-NEXT: eor w0, w8, w9
360357
; CHECK-NEXT: ret
361358
%tmp0 = add nsw i32 %a, 4
@@ -371,11 +368,10 @@ define signext i32 @addmuladd_multiuse2_nuw(i32 signext %a) {
371368
; CHECK-LABEL: addmuladd_multiuse2_nuw:
372369
; CHECK: // %bb.0:
373370
; CHECK-NEXT: mov w8, #324 // =0x144
374-
; CHECK-NEXT: add w9, w0, #4
375-
; CHECK-NEXT: mov w11, #4 // =0x4
376-
; CHECK-NEXT: lsl w10, w9, #2
377-
; CHECK-NEXT: madd w8, w9, w8, w11
378-
; CHECK-NEXT: add w9, w10, #4
371+
; CHECK-NEXT: lsl w9, w0, #2
372+
; CHECK-NEXT: mov w10, #1300 // =0x514
373+
; CHECK-NEXT: madd w8, w0, w8, w10
374+
; CHECK-NEXT: add w9, w9, #20
379375
; CHECK-NEXT: eor w0, w8, w9
380376
; CHECK-NEXT: ret
381377
%tmp0 = add nuw i32 %a, 4
@@ -391,11 +387,10 @@ define signext i32 @addmuladd_multiuse2_nswnuw(i32 signext %a) {
391387
; CHECK-LABEL: addmuladd_multiuse2_nswnuw:
392388
; CHECK: // %bb.0:
393389
; CHECK-NEXT: mov w8, #324 // =0x144
394-
; CHECK-NEXT: add w9, w0, #4
395-
; CHECK-NEXT: mov w11, #4 // =0x4
396-
; CHECK-NEXT: lsl w10, w9, #2
397-
; CHECK-NEXT: madd w8, w9, w8, w11
398-
; CHECK-NEXT: add w9, w10, #4
390+
; CHECK-NEXT: lsl w9, w0, #2
391+
; CHECK-NEXT: mov w10, #1300 // =0x514
392+
; CHECK-NEXT: madd w8, w0, w8, w10
393+
; CHECK-NEXT: add w9, w9, #20
399394
; CHECK-NEXT: eor w0, w8, w9
400395
; CHECK-NEXT: ret
401396
%tmp0 = add nsw nuw i32 %a, 4

0 commit comments

Comments
 (0)