Skip to content

Commit fef1456

Browse files
authored
[RISCV] Add 2^N + 2^M expanding pattern for mul (#137954)
1 parent 05a2b33 commit fef1456

File tree

9 files changed

+369
-291
lines changed

9 files changed

+369
-291
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 27 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -15407,6 +15407,30 @@ static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG,
1540715407
return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
1540815408
}
1540915409

15410+
// X * (2^N +/- 2^M) -> (add/sub (shl X, C1), (shl X, C2))
15411+
static SDValue expandMulToAddOrSubOfShl(SDNode *N, SelectionDAG &DAG,
15412+
uint64_t MulAmt) {
15413+
uint64_t MulAmtLowBit = MulAmt & (-MulAmt);
15414+
ISD::NodeType Op;
15415+
uint64_t ShiftAmt1;
15416+
if (isPowerOf2_64(MulAmt + MulAmtLowBit)) {
15417+
Op = ISD::SUB;
15418+
ShiftAmt1 = MulAmt + MulAmtLowBit;
15419+
} else if (isPowerOf2_64(MulAmt - MulAmtLowBit)) {
15420+
Op = ISD::ADD;
15421+
ShiftAmt1 = MulAmt - MulAmtLowBit;
15422+
} else {
15423+
return SDValue();
15424+
}
15425+
EVT VT = N->getValueType(0);
15426+
SDLoc DL(N);
15427+
SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
15428+
DAG.getConstant(Log2_64(ShiftAmt1), DL, VT));
15429+
SDValue Shift2 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
15430+
DAG.getConstant(Log2_64(MulAmtLowBit), DL, VT));
15431+
return DAG.getNode(Op, DL, VT, Shift1, Shift2);
15432+
}
15433+
1541015434
// Try to expand a scalar multiply to a faster sequence.
1541115435
static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
1541215436
TargetLowering::DAGCombinerInfo &DCI,
@@ -15540,22 +15564,7 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
1554015564
return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359);
1554115565
}
1554215566
}
15543-
}
1554415567

15545-
// 2^N - 2^M -> (sub (shl X, C1), (shl X, C2))
15546-
uint64_t MulAmtLowBit = MulAmt & (-MulAmt);
15547-
if (isPowerOf2_64(MulAmt + MulAmtLowBit)) {
15548-
uint64_t ShiftAmt1 = MulAmt + MulAmtLowBit;
15549-
SDLoc DL(N);
15550-
SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
15551-
DAG.getConstant(Log2_64(ShiftAmt1), DL, VT));
15552-
SDValue Shift2 =
15553-
DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
15554-
DAG.getConstant(Log2_64(MulAmtLowBit), DL, VT));
15555-
return DAG.getNode(ISD::SUB, DL, VT, Shift1, Shift2);
15556-
}
15557-
15558-
if (HasShlAdd) {
1555915568
for (uint64_t Divisor : {3, 5, 9}) {
1556015569
if (MulAmt % Divisor != 0)
1556115570
continue;
@@ -15581,6 +15590,9 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
1558115590
}
1558215591
}
1558315592

15593+
if (SDValue V = expandMulToAddOrSubOfShl(N, DAG, MulAmt))
15594+
return V;
15595+
1558415596
return SDValue();
1558515597
}
1558615598

llvm/test/CodeGen/RISCV/mul.ll

Lines changed: 46 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -502,24 +502,23 @@ define i32 @muli32_p18(i32 %a) nounwind {
502502
;
503503
; RV32IM-LABEL: muli32_p18:
504504
; RV32IM: # %bb.0:
505-
; RV32IM-NEXT: li a1, 18
506-
; RV32IM-NEXT: mul a0, a0, a1
505+
; RV32IM-NEXT: slli a1, a0, 1
506+
; RV32IM-NEXT: slli a0, a0, 4
507+
; RV32IM-NEXT: add a0, a0, a1
507508
; RV32IM-NEXT: ret
508509
;
509510
; RV64I-LABEL: muli32_p18:
510511
; RV64I: # %bb.0:
511-
; RV64I-NEXT: addi sp, sp, -16
512-
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
513-
; RV64I-NEXT: li a1, 18
514-
; RV64I-NEXT: call __muldi3
515-
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
516-
; RV64I-NEXT: addi sp, sp, 16
512+
; RV64I-NEXT: slli a1, a0, 1
513+
; RV64I-NEXT: slli a0, a0, 4
514+
; RV64I-NEXT: add a0, a0, a1
517515
; RV64I-NEXT: ret
518516
;
519517
; RV64IM-LABEL: muli32_p18:
520518
; RV64IM: # %bb.0:
521-
; RV64IM-NEXT: li a1, 18
522-
; RV64IM-NEXT: mulw a0, a0, a1
519+
; RV64IM-NEXT: slli a1, a0, 1
520+
; RV64IM-NEXT: slli a0, a0, 4
521+
; RV64IM-NEXT: addw a0, a0, a1
523522
; RV64IM-NEXT: ret
524523
%1 = mul i32 %a, 18
525524
ret i32 %1
@@ -593,24 +592,23 @@ define i32 @muli32_p34(i32 %a) nounwind {
593592
;
594593
; RV32IM-LABEL: muli32_p34:
595594
; RV32IM: # %bb.0:
596-
; RV32IM-NEXT: li a1, 34
597-
; RV32IM-NEXT: mul a0, a0, a1
595+
; RV32IM-NEXT: slli a1, a0, 1
596+
; RV32IM-NEXT: slli a0, a0, 5
597+
; RV32IM-NEXT: add a0, a0, a1
598598
; RV32IM-NEXT: ret
599599
;
600600
; RV64I-LABEL: muli32_p34:
601601
; RV64I: # %bb.0:
602-
; RV64I-NEXT: addi sp, sp, -16
603-
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
604-
; RV64I-NEXT: li a1, 34
605-
; RV64I-NEXT: call __muldi3
606-
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
607-
; RV64I-NEXT: addi sp, sp, 16
602+
; RV64I-NEXT: slli a1, a0, 1
603+
; RV64I-NEXT: slli a0, a0, 5
604+
; RV64I-NEXT: add a0, a0, a1
608605
; RV64I-NEXT: ret
609606
;
610607
; RV64IM-LABEL: muli32_p34:
611608
; RV64IM: # %bb.0:
612-
; RV64IM-NEXT: li a1, 34
613-
; RV64IM-NEXT: mulw a0, a0, a1
609+
; RV64IM-NEXT: slli a1, a0, 1
610+
; RV64IM-NEXT: slli a0, a0, 5
611+
; RV64IM-NEXT: addw a0, a0, a1
614612
; RV64IM-NEXT: ret
615613
%1 = mul i32 %a, 34
616614
ret i32 %1
@@ -624,24 +622,23 @@ define i32 @muli32_p36(i32 %a) nounwind {
624622
;
625623
; RV32IM-LABEL: muli32_p36:
626624
; RV32IM: # %bb.0:
627-
; RV32IM-NEXT: li a1, 36
628-
; RV32IM-NEXT: mul a0, a0, a1
625+
; RV32IM-NEXT: slli a1, a0, 2
626+
; RV32IM-NEXT: slli a0, a0, 5
627+
; RV32IM-NEXT: add a0, a0, a1
629628
; RV32IM-NEXT: ret
630629
;
631630
; RV64I-LABEL: muli32_p36:
632631
; RV64I: # %bb.0:
633-
; RV64I-NEXT: addi sp, sp, -16
634-
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
635-
; RV64I-NEXT: li a1, 36
636-
; RV64I-NEXT: call __muldi3
637-
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
638-
; RV64I-NEXT: addi sp, sp, 16
632+
; RV64I-NEXT: slli a1, a0, 2
633+
; RV64I-NEXT: slli a0, a0, 5
634+
; RV64I-NEXT: add a0, a0, a1
639635
; RV64I-NEXT: ret
640636
;
641637
; RV64IM-LABEL: muli32_p36:
642638
; RV64IM: # %bb.0:
643-
; RV64IM-NEXT: li a1, 36
644-
; RV64IM-NEXT: mulw a0, a0, a1
639+
; RV64IM-NEXT: slli a1, a0, 2
640+
; RV64IM-NEXT: slli a0, a0, 5
641+
; RV64IM-NEXT: addw a0, a0, a1
645642
; RV64IM-NEXT: ret
646643
%1 = mul i32 %a, 36
647644
ret i32 %1
@@ -886,10 +883,14 @@ define i64 @muli64_p72(i64 %a) nounwind {
886883
; RV32IM-LABEL: muli64_p72:
887884
; RV32IM: # %bb.0:
888885
; RV32IM-NEXT: li a2, 72
889-
; RV32IM-NEXT: mul a1, a1, a2
890-
; RV32IM-NEXT: mulhu a3, a0, a2
891-
; RV32IM-NEXT: add a1, a3, a1
892-
; RV32IM-NEXT: mul a0, a0, a2
886+
; RV32IM-NEXT: slli a3, a1, 3
887+
; RV32IM-NEXT: slli a1, a1, 6
888+
; RV32IM-NEXT: add a1, a1, a3
889+
; RV32IM-NEXT: slli a3, a0, 3
890+
; RV32IM-NEXT: mulhu a2, a0, a2
891+
; RV32IM-NEXT: slli a0, a0, 6
892+
; RV32IM-NEXT: add a1, a2, a1
893+
; RV32IM-NEXT: add a0, a0, a3
893894
; RV32IM-NEXT: ret
894895
;
895896
; RV64I-LABEL: muli64_p72:
@@ -899,8 +900,9 @@ define i64 @muli64_p72(i64 %a) nounwind {
899900
;
900901
; RV64IM-LABEL: muli64_p72:
901902
; RV64IM: # %bb.0:
902-
; RV64IM-NEXT: li a1, 72
903-
; RV64IM-NEXT: mul a0, a0, a1
903+
; RV64IM-NEXT: slli a1, a0, 3
904+
; RV64IM-NEXT: slli a0, a0, 6
905+
; RV64IM-NEXT: add a0, a0, a1
904906
; RV64IM-NEXT: ret
905907
%1 = mul i64 %a, 72
906908
ret i64 %1
@@ -1263,12 +1265,16 @@ define i64 @muli64_p4352(i64 %a) nounwind {
12631265
;
12641266
; RV32IM-LABEL: muli64_p4352:
12651267
; RV32IM: # %bb.0:
1268+
; RV32IM-NEXT: slli a2, a1, 8
1269+
; RV32IM-NEXT: slli a1, a1, 12
1270+
; RV32IM-NEXT: add a1, a1, a2
12661271
; RV32IM-NEXT: li a2, 17
12671272
; RV32IM-NEXT: slli a2, a2, 8
1268-
; RV32IM-NEXT: mul a1, a1, a2
1269-
; RV32IM-NEXT: mulhu a3, a0, a2
1270-
; RV32IM-NEXT: add a1, a3, a1
1271-
; RV32IM-NEXT: mul a0, a0, a2
1273+
; RV32IM-NEXT: mulhu a2, a0, a2
1274+
; RV32IM-NEXT: add a1, a2, a1
1275+
; RV32IM-NEXT: slli a2, a0, 8
1276+
; RV32IM-NEXT: slli a0, a0, 12
1277+
; RV32IM-NEXT: add a0, a0, a2
12721278
; RV32IM-NEXT: ret
12731279
;
12741280
; RV64I-LABEL: muli64_p4352:

llvm/test/CodeGen/RISCV/rv32xtheadba.ll

Lines changed: 33 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,9 @@ define i32 @addmul6(i32 %a, i32 %b) {
116116
define i32 @addmul10(i32 %a, i32 %b) {
117117
; RV32I-LABEL: addmul10:
118118
; RV32I: # %bb.0:
119-
; RV32I-NEXT: li a2, 10
120-
; RV32I-NEXT: mul a0, a0, a2
119+
; RV32I-NEXT: slli a2, a0, 1
120+
; RV32I-NEXT: slli a0, a0, 3
121+
; RV32I-NEXT: add a0, a0, a2
121122
; RV32I-NEXT: add a0, a0, a1
122123
; RV32I-NEXT: ret
123124
;
@@ -153,8 +154,9 @@ define i32 @addmul12(i32 %a, i32 %b) {
153154
define i32 @addmul18(i32 %a, i32 %b) {
154155
; RV32I-LABEL: addmul18:
155156
; RV32I: # %bb.0:
156-
; RV32I-NEXT: li a2, 18
157-
; RV32I-NEXT: mul a0, a0, a2
157+
; RV32I-NEXT: slli a2, a0, 1
158+
; RV32I-NEXT: slli a0, a0, 4
159+
; RV32I-NEXT: add a0, a0, a2
158160
; RV32I-NEXT: add a0, a0, a1
159161
; RV32I-NEXT: ret
160162
;
@@ -171,8 +173,9 @@ define i32 @addmul18(i32 %a, i32 %b) {
171173
define i32 @addmul20(i32 %a, i32 %b) {
172174
; RV32I-LABEL: addmul20:
173175
; RV32I: # %bb.0:
174-
; RV32I-NEXT: li a2, 20
175-
; RV32I-NEXT: mul a0, a0, a2
176+
; RV32I-NEXT: slli a2, a0, 2
177+
; RV32I-NEXT: slli a0, a0, 4
178+
; RV32I-NEXT: add a0, a0, a2
176179
; RV32I-NEXT: add a0, a0, a1
177180
; RV32I-NEXT: ret
178181
;
@@ -208,8 +211,9 @@ define i32 @addmul24(i32 %a, i32 %b) {
208211
define i32 @addmul36(i32 %a, i32 %b) {
209212
; RV32I-LABEL: addmul36:
210213
; RV32I: # %bb.0:
211-
; RV32I-NEXT: li a2, 36
212-
; RV32I-NEXT: mul a0, a0, a2
214+
; RV32I-NEXT: slli a2, a0, 2
215+
; RV32I-NEXT: slli a0, a0, 5
216+
; RV32I-NEXT: add a0, a0, a2
213217
; RV32I-NEXT: add a0, a0, a1
214218
; RV32I-NEXT: ret
215219
;
@@ -226,8 +230,9 @@ define i32 @addmul36(i32 %a, i32 %b) {
226230
define i32 @addmul40(i32 %a, i32 %b) {
227231
; RV32I-LABEL: addmul40:
228232
; RV32I: # %bb.0:
229-
; RV32I-NEXT: li a2, 40
230-
; RV32I-NEXT: mul a0, a0, a2
233+
; RV32I-NEXT: slli a2, a0, 3
234+
; RV32I-NEXT: slli a0, a0, 5
235+
; RV32I-NEXT: add a0, a0, a2
231236
; RV32I-NEXT: add a0, a0, a1
232237
; RV32I-NEXT: ret
233238
;
@@ -244,8 +249,9 @@ define i32 @addmul40(i32 %a, i32 %b) {
244249
define i32 @addmul72(i32 %a, i32 %b) {
245250
; RV32I-LABEL: addmul72:
246251
; RV32I: # %bb.0:
247-
; RV32I-NEXT: li a2, 72
248-
; RV32I-NEXT: mul a0, a0, a2
252+
; RV32I-NEXT: slli a2, a0, 3
253+
; RV32I-NEXT: slli a0, a0, 6
254+
; RV32I-NEXT: add a0, a0, a2
249255
; RV32I-NEXT: add a0, a0, a1
250256
; RV32I-NEXT: ret
251257
;
@@ -279,8 +285,9 @@ define i32 @mul96(i32 %a) {
279285
define i32 @mul160(i32 %a) {
280286
; RV32I-LABEL: mul160:
281287
; RV32I: # %bb.0:
282-
; RV32I-NEXT: li a1, 160
283-
; RV32I-NEXT: mul a0, a0, a1
288+
; RV32I-NEXT: slli a1, a0, 5
289+
; RV32I-NEXT: slli a0, a0, 7
290+
; RV32I-NEXT: add a0, a0, a1
284291
; RV32I-NEXT: ret
285292
;
286293
; RV32XTHEADBA-LABEL: mul160:
@@ -312,8 +319,9 @@ define i32 @mul200(i32 %a) {
312319
define i32 @mul288(i32 %a) {
313320
; RV32I-LABEL: mul288:
314321
; RV32I: # %bb.0:
315-
; RV32I-NEXT: li a1, 288
316-
; RV32I-NEXT: mul a0, a0, a1
322+
; RV32I-NEXT: slli a1, a0, 5
323+
; RV32I-NEXT: slli a0, a0, 8
324+
; RV32I-NEXT: add a0, a0, a1
317325
; RV32I-NEXT: ret
318326
;
319327
; RV32XTHEADBA-LABEL: mul288:
@@ -328,8 +336,9 @@ define i32 @mul288(i32 %a) {
328336
define i32 @mul258(i32 %a) {
329337
; RV32I-LABEL: mul258:
330338
; RV32I: # %bb.0:
331-
; RV32I-NEXT: li a1, 258
332-
; RV32I-NEXT: mul a0, a0, a1
339+
; RV32I-NEXT: slli a1, a0, 1
340+
; RV32I-NEXT: slli a0, a0, 8
341+
; RV32I-NEXT: add a0, a0, a1
333342
; RV32I-NEXT: ret
334343
;
335344
; RV32XTHEADBA-LABEL: mul258:
@@ -344,8 +353,9 @@ define i32 @mul258(i32 %a) {
344353
define i32 @mul260(i32 %a) {
345354
; RV32I-LABEL: mul260:
346355
; RV32I: # %bb.0:
347-
; RV32I-NEXT: li a1, 260
348-
; RV32I-NEXT: mul a0, a0, a1
356+
; RV32I-NEXT: slli a1, a0, 2
357+
; RV32I-NEXT: slli a0, a0, 8
358+
; RV32I-NEXT: add a0, a0, a1
349359
; RV32I-NEXT: ret
350360
;
351361
; RV32XTHEADBA-LABEL: mul260:
@@ -360,8 +370,9 @@ define i32 @mul260(i32 %a) {
360370
define i32 @mul264(i32 %a) {
361371
; RV32I-LABEL: mul264:
362372
; RV32I: # %bb.0:
363-
; RV32I-NEXT: li a1, 264
364-
; RV32I-NEXT: mul a0, a0, a1
373+
; RV32I-NEXT: slli a1, a0, 3
374+
; RV32I-NEXT: slli a0, a0, 8
375+
; RV32I-NEXT: add a0, a0, a1
365376
; RV32I-NEXT: ret
366377
;
367378
; RV32XTHEADBA-LABEL: mul264:

0 commit comments

Comments
 (0)