Skip to content

Commit 85e63fb

Browse files
committed
[RISCV] Add 2^N + 2^M expanding pattern for mul
1 parent cba1d49 commit 85e63fb

File tree

9 files changed

+432
-316
lines changed

9 files changed

+432
-316
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 40 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -15436,25 +15436,56 @@ static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG,
1543615436
return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
1543715437
}
1543815438

15439+
// 2^N +/- 2^M -> (add/sub (shl X, C1), (shl X, C2))
15440+
static SDValue expandMulToAddOrSubOfShl(SDNode *N, SelectionDAG &DAG) {
15441+
ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
15442+
if (!CNode)
15443+
return SDValue();
15444+
uint64_t MulAmt = CNode->getZExtValue();
15445+
uint64_t MulAmtLowBit = MulAmt & (-MulAmt);
15446+
ISD::NodeType Op;
15447+
if (isPowerOf2_64(MulAmt + MulAmtLowBit))
15448+
Op = ISD::SUB;
15449+
else if (isPowerOf2_64(MulAmt - MulAmtLowBit))
15450+
Op = ISD::ADD;
15451+
else
15452+
return SDValue();
15453+
uint64_t ShiftAmt1 = MulAmt + MulAmtLowBit;
15454+
SDLoc DL(N);
15455+
SDValue Shift1 =
15456+
DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(0),
15457+
DAG.getConstant(Log2_64(ShiftAmt1), DL, N->getValueType(0)));
15458+
SDValue Shift2 = DAG.getNode(
15459+
ISD::SHL, DL, N->getValueType(0), N->getOperand(0),
15460+
DAG.getConstant(Log2_64(MulAmtLowBit), DL, N->getValueType(0)));
15461+
return DAG.getNode(Op, DL, N->getValueType(0), Shift1, Shift2);
15462+
}
15463+
1543915464
// Try to expand a scalar multiply to a faster sequence.
1544015465
static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
1544115466
TargetLowering::DAGCombinerInfo &DCI,
1544215467
const RISCVSubtarget &Subtarget) {
1544315468

1544415469
EVT VT = N->getValueType(0);
1544515470

15471+
const bool HasShlAdd =
15472+
Subtarget.hasStdExtZba() || Subtarget.hasVendorXTHeadBa();
15473+
1544615474
// LI + MUL is usually smaller than the alternative sequence.
1544715475
if (DAG.getMachineFunction().getFunction().hasMinSize())
1544815476
return SDValue();
1544915477

15450-
if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
15451-
return SDValue();
15452-
1545315478
if (VT != Subtarget.getXLenVT())
1545415479
return SDValue();
1545515480

15456-
const bool HasShlAdd =
15457-
Subtarget.hasStdExtZba() || Subtarget.hasVendorXTHeadBa();
15481+
// This may prevent some ShlAdd optimizations. Try this combination
15482+
// later if we have that.
15483+
if (!HasShlAdd)
15484+
if (SDValue V = expandMulToAddOrSubOfShl(N, DAG))
15485+
return V;
15486+
15487+
if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
15488+
return SDValue();
1545815489

1545915490
ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
1546015491
if (!CNode)
@@ -15569,22 +15600,7 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
1556915600
return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359);
1557015601
}
1557115602
}
15572-
}
1557315603

15574-
// 2^N - 2^M -> (sub (shl X, C1), (shl X, C2))
15575-
uint64_t MulAmtLowBit = MulAmt & (-MulAmt);
15576-
if (isPowerOf2_64(MulAmt + MulAmtLowBit)) {
15577-
uint64_t ShiftAmt1 = MulAmt + MulAmtLowBit;
15578-
SDLoc DL(N);
15579-
SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
15580-
DAG.getConstant(Log2_64(ShiftAmt1), DL, VT));
15581-
SDValue Shift2 =
15582-
DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
15583-
DAG.getConstant(Log2_64(MulAmtLowBit), DL, VT));
15584-
return DAG.getNode(ISD::SUB, DL, VT, Shift1, Shift2);
15585-
}
15586-
15587-
if (HasShlAdd) {
1558815604
for (uint64_t Divisor : {3, 5, 9}) {
1558915605
if (MulAmt % Divisor != 0)
1559015606
continue;
@@ -15608,6 +15624,10 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
1560815624
}
1560915625
}
1561015626
}
15627+
15628+
// Delayed
15629+
if (SDValue V = expandMulToAddOrSubOfShl(N, DAG))
15630+
return V;
1561115631
}
1561215632

1561315633
return SDValue();

llvm/test/CodeGen/RISCV/mul.ll

Lines changed: 96 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -467,29 +467,30 @@ define i32 @mulhu_constant(i32 %a) nounwind {
467467
define i32 @muli32_p10(i32 %a) nounwind {
468468
; RV32I-LABEL: muli32_p10:
469469
; RV32I: # %bb.0:
470-
; RV32I-NEXT: li a1, 10
471-
; RV32I-NEXT: tail __mulsi3
470+
; RV32I-NEXT: slli a1, a0, 1
471+
; RV32I-NEXT: slli a0, a0, 3
472+
; RV32I-NEXT: add a0, a0, a1
473+
; RV32I-NEXT: ret
472474
;
473475
; RV32IM-LABEL: muli32_p10:
474476
; RV32IM: # %bb.0:
475-
; RV32IM-NEXT: li a1, 10
476-
; RV32IM-NEXT: mul a0, a0, a1
477+
; RV32IM-NEXT: slli a1, a0, 1
478+
; RV32IM-NEXT: slli a0, a0, 3
479+
; RV32IM-NEXT: add a0, a0, a1
477480
; RV32IM-NEXT: ret
478481
;
479482
; RV64I-LABEL: muli32_p10:
480483
; RV64I: # %bb.0:
481-
; RV64I-NEXT: addi sp, sp, -16
482-
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
483-
; RV64I-NEXT: li a1, 10
484-
; RV64I-NEXT: call __muldi3
485-
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
486-
; RV64I-NEXT: addi sp, sp, 16
484+
; RV64I-NEXT: slli a1, a0, 1
485+
; RV64I-NEXT: slli a0, a0, 3
486+
; RV64I-NEXT: add a0, a0, a1
487487
; RV64I-NEXT: ret
488488
;
489489
; RV64IM-LABEL: muli32_p10:
490490
; RV64IM: # %bb.0:
491-
; RV64IM-NEXT: li a1, 10
492-
; RV64IM-NEXT: mulw a0, a0, a1
491+
; RV64IM-NEXT: slli a1, a0, 1
492+
; RV64IM-NEXT: slli a0, a0, 3
493+
; RV64IM-NEXT: addw a0, a0, a1
493494
; RV64IM-NEXT: ret
494495
%1 = mul i32 %a, 10
495496
ret i32 %1
@@ -498,8 +499,10 @@ define i32 @muli32_p10(i32 %a) nounwind {
498499
define i32 @muli32_p14(i32 %a) nounwind {
499500
; RV32I-LABEL: muli32_p14:
500501
; RV32I: # %bb.0:
501-
; RV32I-NEXT: li a1, 14
502-
; RV32I-NEXT: tail __mulsi3
502+
; RV32I-NEXT: slli a1, a0, 1
503+
; RV32I-NEXT: slli a0, a0, 4
504+
; RV32I-NEXT: sub a0, a0, a1
505+
; RV32I-NEXT: ret
503506
;
504507
; RV32IM-LABEL: muli32_p14:
505508
; RV32IM: # %bb.0:
@@ -528,29 +531,30 @@ define i32 @muli32_p14(i32 %a) nounwind {
528531
define i32 @muli32_p20(i32 %a) nounwind {
529532
; RV32I-LABEL: muli32_p20:
530533
; RV32I: # %bb.0:
531-
; RV32I-NEXT: li a1, 20
532-
; RV32I-NEXT: tail __mulsi3
534+
; RV32I-NEXT: slli a1, a0, 2
535+
; RV32I-NEXT: slli a0, a0, 4
536+
; RV32I-NEXT: add a0, a0, a1
537+
; RV32I-NEXT: ret
533538
;
534539
; RV32IM-LABEL: muli32_p20:
535540
; RV32IM: # %bb.0:
536-
; RV32IM-NEXT: li a1, 20
537-
; RV32IM-NEXT: mul a0, a0, a1
541+
; RV32IM-NEXT: slli a1, a0, 2
542+
; RV32IM-NEXT: slli a0, a0, 4
543+
; RV32IM-NEXT: add a0, a0, a1
538544
; RV32IM-NEXT: ret
539545
;
540546
; RV64I-LABEL: muli32_p20:
541547
; RV64I: # %bb.0:
542-
; RV64I-NEXT: addi sp, sp, -16
543-
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
544-
; RV64I-NEXT: li a1, 20
545-
; RV64I-NEXT: call __muldi3
546-
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
547-
; RV64I-NEXT: addi sp, sp, 16
548+
; RV64I-NEXT: slli a1, a0, 2
549+
; RV64I-NEXT: slli a0, a0, 4
550+
; RV64I-NEXT: add a0, a0, a1
548551
; RV64I-NEXT: ret
549552
;
550553
; RV64IM-LABEL: muli32_p20:
551554
; RV64IM: # %bb.0:
552-
; RV64IM-NEXT: li a1, 20
553-
; RV64IM-NEXT: mulw a0, a0, a1
555+
; RV64IM-NEXT: slli a1, a0, 2
556+
; RV64IM-NEXT: slli a0, a0, 4
557+
; RV64IM-NEXT: addw a0, a0, a1
554558
; RV64IM-NEXT: ret
555559
%1 = mul i32 %a, 20
556560
ret i32 %1
@@ -559,8 +563,10 @@ define i32 @muli32_p20(i32 %a) nounwind {
559563
define i32 @muli32_p28(i32 %a) nounwind {
560564
; RV32I-LABEL: muli32_p28:
561565
; RV32I: # %bb.0:
562-
; RV32I-NEXT: li a1, 28
563-
; RV32I-NEXT: tail __mulsi3
566+
; RV32I-NEXT: slli a1, a0, 2
567+
; RV32I-NEXT: slli a0, a0, 5
568+
; RV32I-NEXT: sub a0, a0, a1
569+
; RV32I-NEXT: ret
564570
;
565571
; RV32IM-LABEL: muli32_p28:
566572
; RV32IM: # %bb.0:
@@ -589,8 +595,10 @@ define i32 @muli32_p28(i32 %a) nounwind {
589595
define i32 @muli32_p30(i32 %a) nounwind {
590596
; RV32I-LABEL: muli32_p30:
591597
; RV32I: # %bb.0:
592-
; RV32I-NEXT: li a1, 30
593-
; RV32I-NEXT: tail __mulsi3
598+
; RV32I-NEXT: slli a1, a0, 1
599+
; RV32I-NEXT: slli a0, a0, 5
600+
; RV32I-NEXT: sub a0, a0, a1
601+
; RV32I-NEXT: ret
594602
;
595603
; RV32IM-LABEL: muli32_p30:
596604
; RV32IM: # %bb.0:
@@ -619,8 +627,10 @@ define i32 @muli32_p30(i32 %a) nounwind {
619627
define i32 @muli32_p56(i32 %a) nounwind {
620628
; RV32I-LABEL: muli32_p56:
621629
; RV32I: # %bb.0:
622-
; RV32I-NEXT: li a1, 56
623-
; RV32I-NEXT: tail __mulsi3
630+
; RV32I-NEXT: slli a1, a0, 3
631+
; RV32I-NEXT: slli a0, a0, 6
632+
; RV32I-NEXT: sub a0, a0, a1
633+
; RV32I-NEXT: ret
624634
;
625635
; RV32IM-LABEL: muli32_p56:
626636
; RV32IM: # %bb.0:
@@ -649,8 +659,10 @@ define i32 @muli32_p56(i32 %a) nounwind {
649659
define i32 @muli32_p60(i32 %a) nounwind {
650660
; RV32I-LABEL: muli32_p60:
651661
; RV32I: # %bb.0:
652-
; RV32I-NEXT: li a1, 60
653-
; RV32I-NEXT: tail __mulsi3
662+
; RV32I-NEXT: slli a1, a0, 2
663+
; RV32I-NEXT: slli a0, a0, 6
664+
; RV32I-NEXT: sub a0, a0, a1
665+
; RV32I-NEXT: ret
654666
;
655667
; RV32IM-LABEL: muli32_p60:
656668
; RV32IM: # %bb.0:
@@ -679,8 +691,10 @@ define i32 @muli32_p60(i32 %a) nounwind {
679691
define i32 @muli32_p62(i32 %a) nounwind {
680692
; RV32I-LABEL: muli32_p62:
681693
; RV32I: # %bb.0:
682-
; RV32I-NEXT: li a1, 62
683-
; RV32I-NEXT: tail __mulsi3
694+
; RV32I-NEXT: slli a1, a0, 1
695+
; RV32I-NEXT: slli a0, a0, 6
696+
; RV32I-NEXT: sub a0, a0, a1
697+
; RV32I-NEXT: ret
684698
;
685699
; RV32IM-LABEL: muli32_p62:
686700
; RV32IM: # %bb.0:
@@ -895,8 +909,10 @@ define i64 @muli64_p60(i64 %a) nounwind {
895909
;
896910
; RV64I-LABEL: muli64_p60:
897911
; RV64I: # %bb.0:
898-
; RV64I-NEXT: li a1, 60
899-
; RV64I-NEXT: tail __muldi3
912+
; RV64I-NEXT: slli a1, a0, 2
913+
; RV64I-NEXT: slli a0, a0, 6
914+
; RV64I-NEXT: sub a0, a0, a1
915+
; RV64I-NEXT: ret
900916
;
901917
; RV64IM-LABEL: muli64_p60:
902918
; RV64IM: # %bb.0:
@@ -923,21 +939,28 @@ define i64 @muli64_p68(i64 %a) nounwind {
923939
; RV32IM-LABEL: muli64_p68:
924940
; RV32IM: # %bb.0:
925941
; RV32IM-NEXT: li a2, 68
926-
; RV32IM-NEXT: mul a1, a1, a2
927-
; RV32IM-NEXT: mulhu a3, a0, a2
928-
; RV32IM-NEXT: add a1, a3, a1
929-
; RV32IM-NEXT: mul a0, a0, a2
942+
; RV32IM-NEXT: slli a3, a1, 2
943+
; RV32IM-NEXT: slli a1, a1, 6
944+
; RV32IM-NEXT: add a1, a1, a3
945+
; RV32IM-NEXT: slli a3, a0, 2
946+
; RV32IM-NEXT: mulhu a2, a0, a2
947+
; RV32IM-NEXT: slli a0, a0, 6
948+
; RV32IM-NEXT: add a1, a2, a1
949+
; RV32IM-NEXT: add a0, a0, a3
930950
; RV32IM-NEXT: ret
931951
;
932952
; RV64I-LABEL: muli64_p68:
933953
; RV64I: # %bb.0:
934-
; RV64I-NEXT: li a1, 68
935-
; RV64I-NEXT: tail __muldi3
954+
; RV64I-NEXT: slli a1, a0, 2
955+
; RV64I-NEXT: slli a0, a0, 6
956+
; RV64I-NEXT: add a0, a0, a1
957+
; RV64I-NEXT: ret
936958
;
937959
; RV64IM-LABEL: muli64_p68:
938960
; RV64IM: # %bb.0:
939-
; RV64IM-NEXT: li a1, 68
940-
; RV64IM-NEXT: mul a0, a0, a1
961+
; RV64IM-NEXT: slli a1, a0, 2
962+
; RV64IM-NEXT: slli a0, a0, 6
963+
; RV64IM-NEXT: add a0, a0, a1
941964
; RV64IM-NEXT: ret
942965
%1 = mul i64 %a, 68
943966
ret i64 %1
@@ -1093,8 +1116,10 @@ define i64 @muli64_m65(i64 %a) nounwind {
10931116
define i32 @muli32_p384(i32 %a) nounwind {
10941117
; RV32I-LABEL: muli32_p384:
10951118
; RV32I: # %bb.0:
1096-
; RV32I-NEXT: li a1, 384
1097-
; RV32I-NEXT: tail __mulsi3
1119+
; RV32I-NEXT: slli a1, a0, 7
1120+
; RV32I-NEXT: slli a0, a0, 9
1121+
; RV32I-NEXT: sub a0, a0, a1
1122+
; RV32I-NEXT: ret
10981123
;
10991124
; RV32IM-LABEL: muli32_p384:
11001125
; RV32IM: # %bb.0:
@@ -1123,8 +1148,10 @@ define i32 @muli32_p384(i32 %a) nounwind {
11231148
define i32 @muli32_p12288(i32 %a) nounwind {
11241149
; RV32I-LABEL: muli32_p12288:
11251150
; RV32I: # %bb.0:
1126-
; RV32I-NEXT: lui a1, 3
1127-
; RV32I-NEXT: tail __mulsi3
1151+
; RV32I-NEXT: slli a1, a0, 12
1152+
; RV32I-NEXT: slli a0, a0, 14
1153+
; RV32I-NEXT: sub a0, a0, a1
1154+
; RV32I-NEXT: ret
11281155
;
11291156
; RV32IM-LABEL: muli32_p12288:
11301157
; RV32IM: # %bb.0:
@@ -1300,12 +1327,16 @@ define i64 @muli64_p4352(i64 %a) nounwind {
13001327
;
13011328
; RV32IM-LABEL: muli64_p4352:
13021329
; RV32IM: # %bb.0:
1330+
; RV32IM-NEXT: slli a2, a1, 8
1331+
; RV32IM-NEXT: slli a1, a1, 12
1332+
; RV32IM-NEXT: add a1, a1, a2
13031333
; RV32IM-NEXT: li a2, 17
13041334
; RV32IM-NEXT: slli a2, a2, 8
1305-
; RV32IM-NEXT: mul a1, a1, a2
1306-
; RV32IM-NEXT: mulhu a3, a0, a2
1307-
; RV32IM-NEXT: add a1, a3, a1
1308-
; RV32IM-NEXT: mul a0, a0, a2
1335+
; RV32IM-NEXT: mulhu a2, a0, a2
1336+
; RV32IM-NEXT: add a1, a2, a1
1337+
; RV32IM-NEXT: slli a2, a0, 8
1338+
; RV32IM-NEXT: slli a0, a0, 12
1339+
; RV32IM-NEXT: add a0, a0, a2
13091340
; RV32IM-NEXT: ret
13101341
;
13111342
; RV64I-LABEL: muli64_p4352:
@@ -2032,12 +2063,16 @@ define i64 @muland_demand(i64 %x) nounwind {
20322063
; RV64I-NEXT: li a1, -29
20332064
; RV64I-NEXT: srli a1, a1, 2
20342065
; RV64I-NEXT: and a0, a0, a1
2035-
; RV64I-NEXT: li a1, 12
2036-
; RV64I-NEXT: tail __muldi3
2066+
; RV64I-NEXT: slli a1, a0, 2
2067+
; RV64I-NEXT: slli a0, a0, 4
2068+
; RV64I-NEXT: sub a0, a0, a1
2069+
; RV64I-NEXT: ret
20372070
;
20382071
; RV64IM-LABEL: muland_demand:
20392072
; RV64IM: # %bb.0:
2040-
; RV64IM-NEXT: andi a0, a0, -8
2073+
; RV64IM-NEXT: li a1, -29
2074+
; RV64IM-NEXT: srli a1, a1, 2
2075+
; RV64IM-NEXT: and a0, a0, a1
20412076
; RV64IM-NEXT: slli a1, a0, 2
20422077
; RV64IM-NEXT: slli a0, a0, 4
20432078
; RV64IM-NEXT: sub a0, a0, a1
@@ -2068,9 +2103,10 @@ define i64 @mulzext_demand(i32 signext %x) nounwind {
20682103
;
20692104
; RV64I-LABEL: mulzext_demand:
20702105
; RV64I: # %bb.0:
2071-
; RV64I-NEXT: li a1, 3
2072-
; RV64I-NEXT: slli a1, a1, 32
2073-
; RV64I-NEXT: tail __muldi3
2106+
; RV64I-NEXT: slli a1, a0, 32
2107+
; RV64I-NEXT: slli a0, a0, 34
2108+
; RV64I-NEXT: sub a0, a0, a1
2109+
; RV64I-NEXT: ret
20742110
;
20752111
; RV64IM-LABEL: mulzext_demand:
20762112
; RV64IM: # %bb.0:

0 commit comments

Comments
 (0)