Skip to content

Commit 4298fc5

Browse files
preamesdtcxzyw
andauthored
[RISCV] Move strength reduction of mul X, 3/5/9*2^N to combine (#89966)
This moves our last major category tablegen driven multiply strength reduction into the post legalize combine framework. The one slightly tricky bit is making sure that we use a leading shl if we can form a slli.uw, and trailing shl otherwise. Having the trailing shl is critical for shNadd matching, and folding any following sext.w. As can be seen in the TD deltas, this allows us to kill off both the actual multiply patterns and the explicit add (mul X, C) Y patterns. The later are now handled by the generic shNadd matching code, with the exception of the THead only C=200 case because we don't (yet) have a multiply expansion with two shNadd + a shift. --------- Co-authored-by: Yingwei Zheng <[email protected]>
1 parent b59461a commit 4298fc5

File tree

6 files changed

+32
-117
lines changed

6 files changed

+32
-117
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13565,10 +13565,27 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
1356513565
if (MulAmt % Divisor != 0)
1356613566
continue;
1356713567
uint64_t MulAmt2 = MulAmt / Divisor;
13568-
// 3/5/9 * 2^N -> shXadd (sll X, C), (sll X, C)
13569-
// Matched in tablegen, avoid perturbing patterns.
13570-
if (isPowerOf2_64(MulAmt2))
13571-
return SDValue();
13568+
// 3/5/9 * 2^N -> shl (shXadd X, X), N
13569+
if (isPowerOf2_64(MulAmt2)) {
13570+
SDLoc DL(N);
13571+
SDValue X = N->getOperand(0);
13572+
// Put the shift first if we can fold a zext into the
13573+
// shift forming a slli.uw.
13574+
if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) &&
13575+
X.getConstantOperandVal(1) == UINT64_C(0xffffffff)) {
13576+
SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, X,
13577+
DAG.getConstant(Log2_64(MulAmt2), DL, VT));
13578+
return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Shl,
13579+
DAG.getConstant(Log2_64(Divisor - 1), DL, VT), Shl);
13580+
}
13581+
// Otherwise, put rhe shl second so that it can fold with following
13582+
// instructions (e.g. sext or add).
13583+
SDValue Mul359 =
13584+
DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13585+
DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
13586+
return DAG.getNode(ISD::SHL, DL, VT, Mul359,
13587+
DAG.getConstant(Log2_64(MulAmt2), DL, VT));
13588+
}
1357213589

1357313590
// 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)
1357413591
if (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9) {

llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -549,40 +549,11 @@ def : Pat<(add_non_imm12 sh2add_op:$rs1, (XLenVT GPR:$rs2)),
549549
def : Pat<(add_non_imm12 sh3add_op:$rs1, (XLenVT GPR:$rs2)),
550550
(TH_ADDSL GPR:$rs2, sh3add_op:$rs1, 3)>;
551551

552-
def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 6)), GPR:$rs2),
553-
(TH_ADDSL GPR:$rs2, (XLenVT (TH_ADDSL GPR:$rs1, GPR:$rs1, 1)), 1)>;
554-
def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 10)), GPR:$rs2),
555-
(TH_ADDSL GPR:$rs2, (XLenVT (TH_ADDSL GPR:$rs1, GPR:$rs1, 2)), 1)>;
556-
def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 18)), GPR:$rs2),
557-
(TH_ADDSL GPR:$rs2, (XLenVT (TH_ADDSL GPR:$rs1, GPR:$rs1, 3)), 1)>;
558-
def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 12)), GPR:$rs2),
559-
(TH_ADDSL GPR:$rs2, (XLenVT (TH_ADDSL GPR:$rs1, GPR:$rs1, 1)), 2)>;
560-
def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 20)), GPR:$rs2),
561-
(TH_ADDSL GPR:$rs2, (XLenVT (TH_ADDSL GPR:$rs1, GPR:$rs1, 2)), 2)>;
562-
def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 36)), GPR:$rs2),
563-
(TH_ADDSL GPR:$rs2, (XLenVT (TH_ADDSL GPR:$rs1, GPR:$rs1, 3)), 2)>;
564-
def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 24)), GPR:$rs2),
565-
(TH_ADDSL GPR:$rs2, (XLenVT (TH_ADDSL GPR:$rs1, GPR:$rs1, 1)), 3)>;
566-
def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 40)), GPR:$rs2),
567-
(TH_ADDSL GPR:$rs2, (XLenVT (TH_ADDSL GPR:$rs1, GPR:$rs1, 2)), 3)>;
568-
def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 72)), GPR:$rs2),
569-
(TH_ADDSL GPR:$rs2, (XLenVT (TH_ADDSL GPR:$rs1, GPR:$rs1, 3)), 3)>;
570-
571552
def : Pat<(add (XLenVT GPR:$r), CSImm12MulBy4:$i),
572553
(TH_ADDSL GPR:$r, (XLenVT (ADDI (XLenVT X0), (SimmShiftRightBy2XForm CSImm12MulBy4:$i))), 2)>;
573554
def : Pat<(add (XLenVT GPR:$r), CSImm12MulBy8:$i),
574555
(TH_ADDSL GPR:$r, (XLenVT (ADDI (XLenVT X0), (SimmShiftRightBy3XForm CSImm12MulBy8:$i))), 3)>;
575556

576-
def : Pat<(mul (XLenVT GPR:$r), C3LeftShift:$i),
577-
(SLLI (XLenVT (TH_ADDSL GPR:$r, GPR:$r, 1)),
578-
(TrailingZeros C3LeftShift:$i))>;
579-
def : Pat<(mul (XLenVT GPR:$r), C5LeftShift:$i),
580-
(SLLI (XLenVT (TH_ADDSL GPR:$r, GPR:$r, 2)),
581-
(TrailingZeros C5LeftShift:$i))>;
582-
def : Pat<(mul (XLenVT GPR:$r), C9LeftShift:$i),
583-
(SLLI (XLenVT (TH_ADDSL GPR:$r, GPR:$r, 3)),
584-
(TrailingZeros C9LeftShift:$i))>;
585-
586557
def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 200)),
587558
(SLLI (XLenVT (TH_ADDSL (XLenVT (TH_ADDSL GPR:$r, GPR:$r, 2)),
588559
(XLenVT (TH_ADDSL GPR:$r, GPR:$r, 2)), 2)), 3)>;

llvm/lib/Target/RISCV/RISCVInstrInfoZb.td

Lines changed: 0 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -173,42 +173,6 @@ def BCLRIANDIMaskLow : SDNodeXForm<imm, [{
173173
SDLoc(N), N->getValueType(0));
174174
}]>;
175175

176-
def C3LeftShift : PatLeaf<(imm), [{
177-
uint64_t C = N->getZExtValue();
178-
return C > 3 && (C >> llvm::countr_zero(C)) == 3;
179-
}]>;
180-
181-
def C5LeftShift : PatLeaf<(imm), [{
182-
uint64_t C = N->getZExtValue();
183-
return C > 5 && (C >> llvm::countr_zero(C)) == 5;
184-
}]>;
185-
186-
def C9LeftShift : PatLeaf<(imm), [{
187-
uint64_t C = N->getZExtValue();
188-
return C > 9 && (C >> llvm::countr_zero(C)) == 9;
189-
}]>;
190-
191-
// Constant of the form (3 << C) where C is less than 32.
192-
def C3LeftShiftUW : PatLeaf<(imm), [{
193-
uint64_t C = N->getZExtValue();
194-
unsigned Shift = llvm::countr_zero(C);
195-
return 1 <= Shift && Shift < 32 && (C >> Shift) == 3;
196-
}]>;
197-
198-
// Constant of the form (5 << C) where C is less than 32.
199-
def C5LeftShiftUW : PatLeaf<(imm), [{
200-
uint64_t C = N->getZExtValue();
201-
unsigned Shift = llvm::countr_zero(C);
202-
return 1 <= Shift && Shift < 32 && (C >> Shift) == 5;
203-
}]>;
204-
205-
// Constant of the form (9 << C) where C is less than 32.
206-
def C9LeftShiftUW : PatLeaf<(imm), [{
207-
uint64_t C = N->getZExtValue();
208-
unsigned Shift = llvm::countr_zero(C);
209-
return 1 <= Shift && Shift < 32 && (C >> Shift) == 9;
210-
}]>;
211-
212176
def CSImm12MulBy4 : PatLeaf<(imm), [{
213177
if (!N->hasOneUse())
214178
return false;
@@ -693,42 +657,13 @@ foreach i = {1,2,3} in {
693657
(shxadd pat:$rs1, GPR:$rs2)>;
694658
}
695659

696-
def : Pat<(add_like (mul_oneuse GPR:$rs1, (XLenVT 6)), GPR:$rs2),
697-
(SH1ADD (XLenVT (SH1ADD GPR:$rs1, GPR:$rs1)), GPR:$rs2)>;
698-
def : Pat<(add_like (mul_oneuse GPR:$rs1, (XLenVT 10)), GPR:$rs2),
699-
(SH1ADD (XLenVT (SH2ADD GPR:$rs1, GPR:$rs1)), GPR:$rs2)>;
700-
def : Pat<(add_like (mul_oneuse GPR:$rs1, (XLenVT 18)), GPR:$rs2),
701-
(SH1ADD (XLenVT (SH3ADD GPR:$rs1, GPR:$rs1)), GPR:$rs2)>;
702-
def : Pat<(add_like (mul_oneuse GPR:$rs1, (XLenVT 12)), GPR:$rs2),
703-
(SH2ADD (XLenVT (SH1ADD GPR:$rs1, GPR:$rs1)), GPR:$rs2)>;
704-
def : Pat<(add_like (mul_oneuse GPR:$rs1, (XLenVT 20)), GPR:$rs2),
705-
(SH2ADD (XLenVT (SH2ADD GPR:$rs1, GPR:$rs1)), GPR:$rs2)>;
706-
def : Pat<(add_like (mul_oneuse GPR:$rs1, (XLenVT 36)), GPR:$rs2),
707-
(SH2ADD (XLenVT (SH3ADD GPR:$rs1, GPR:$rs1)), GPR:$rs2)>;
708-
def : Pat<(add_like (mul_oneuse GPR:$rs1, (XLenVT 24)), GPR:$rs2),
709-
(SH3ADD (XLenVT (SH1ADD GPR:$rs1, GPR:$rs1)), GPR:$rs2)>;
710-
def : Pat<(add_like (mul_oneuse GPR:$rs1, (XLenVT 40)), GPR:$rs2),
711-
(SH3ADD (XLenVT (SH2ADD GPR:$rs1, GPR:$rs1)), GPR:$rs2)>;
712-
def : Pat<(add_like (mul_oneuse GPR:$rs1, (XLenVT 72)), GPR:$rs2),
713-
(SH3ADD (XLenVT (SH3ADD GPR:$rs1, GPR:$rs1)), GPR:$rs2)>;
714-
715660
def : Pat<(add_like (XLenVT GPR:$r), CSImm12MulBy4:$i),
716661
(SH2ADD (XLenVT (ADDI (XLenVT X0), (SimmShiftRightBy2XForm CSImm12MulBy4:$i))),
717662
GPR:$r)>;
718663
def : Pat<(add_like (XLenVT GPR:$r), CSImm12MulBy8:$i),
719664
(SH3ADD (XLenVT (ADDI (XLenVT X0), (SimmShiftRightBy3XForm CSImm12MulBy8:$i))),
720665
GPR:$r)>;
721666

722-
def : Pat<(mul (XLenVT GPR:$r), C3LeftShift:$i),
723-
(SLLI (XLenVT (SH1ADD GPR:$r, GPR:$r)),
724-
(TrailingZeros C3LeftShift:$i))>;
725-
def : Pat<(mul (XLenVT GPR:$r), C5LeftShift:$i),
726-
(SLLI (XLenVT (SH2ADD GPR:$r, GPR:$r)),
727-
(TrailingZeros C5LeftShift:$i))>;
728-
def : Pat<(mul (XLenVT GPR:$r), C9LeftShift:$i),
729-
(SLLI (XLenVT (SH3ADD GPR:$r, GPR:$r)),
730-
(TrailingZeros C9LeftShift:$i))>;
731-
732667
} // Predicates = [HasStdExtZba]
733668

734669
let Predicates = [HasStdExtZba, IsRV64] in {
@@ -780,15 +715,6 @@ def : Pat<(i64 (add_like_non_imm12 (and GPR:$rs1, 0x3FFFFFFFC), (XLenVT GPR:$rs2
780715
def : Pat<(i64 (add_like_non_imm12 (and GPR:$rs1, 0x7FFFFFFF8), (XLenVT GPR:$rs2))),
781716
(SH3ADD_UW (XLenVT (SRLI GPR:$rs1, 3)), GPR:$rs2)>;
782717

783-
def : Pat<(i64 (mul (and_oneuse GPR:$r, 0xFFFFFFFF), C3LeftShiftUW:$i)),
784-
(SH1ADD (XLenVT (SLLI_UW GPR:$r, (TrailingZeros C3LeftShiftUW:$i))),
785-
(XLenVT (SLLI_UW GPR:$r, (TrailingZeros C3LeftShiftUW:$i))))>;
786-
def : Pat<(i64 (mul (and_oneuse GPR:$r, 0xFFFFFFFF), C5LeftShiftUW:$i)),
787-
(SH2ADD (XLenVT (SLLI_UW GPR:$r, (TrailingZeros C5LeftShiftUW:$i))),
788-
(XLenVT (SLLI_UW GPR:$r, (TrailingZeros C5LeftShiftUW:$i))))>;
789-
def : Pat<(i64 (mul (and_oneuse GPR:$r, 0xFFFFFFFF), C9LeftShiftUW:$i)),
790-
(SH3ADD (XLenVT (SLLI_UW GPR:$r, (TrailingZeros C9LeftShiftUW:$i))),
791-
(XLenVT (SLLI_UW GPR:$r, (TrailingZeros C9LeftShiftUW:$i))))>;
792718
} // Predicates = [HasStdExtZba, IsRV64]
793719

794720
let Predicates = [HasStdExtZbcOrZbkc] in {

llvm/test/CodeGen/RISCV/addimm-mulimm.ll

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -600,8 +600,9 @@ define i64 @add_mul_combine_infinite_loop(i64 %x) {
600600
; RV32IMB-NEXT: sh3add a1, a1, a2
601601
; RV32IMB-NEXT: sh1add a0, a0, a0
602602
; RV32IMB-NEXT: slli a2, a0, 3
603-
; RV32IMB-NEXT: addi a0, a2, 2047
604-
; RV32IMB-NEXT: addi a0, a0, 1
603+
; RV32IMB-NEXT: li a3, 1
604+
; RV32IMB-NEXT: slli a3, a3, 11
605+
; RV32IMB-NEXT: sh3add a0, a0, a3
605606
; RV32IMB-NEXT: sltu a2, a0, a2
606607
; RV32IMB-NEXT: add a1, a1, a2
607608
; RV32IMB-NEXT: ret
@@ -610,8 +611,8 @@ define i64 @add_mul_combine_infinite_loop(i64 %x) {
610611
; RV64IMB: # %bb.0:
611612
; RV64IMB-NEXT: addi a0, a0, 86
612613
; RV64IMB-NEXT: sh1add a0, a0, a0
613-
; RV64IMB-NEXT: li a1, -16
614-
; RV64IMB-NEXT: sh3add a0, a0, a1
614+
; RV64IMB-NEXT: slli a0, a0, 3
615+
; RV64IMB-NEXT: addi a0, a0, -16
615616
; RV64IMB-NEXT: ret
616617
%tmp0 = mul i64 %x, 24
617618
%tmp1 = add i64 %tmp0, 2048

llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zba.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -646,8 +646,8 @@ define i64 @zext_mul12884901888(i32 signext %a) {
646646
;
647647
; RV64ZBA-LABEL: zext_mul12884901888:
648648
; RV64ZBA: # %bb.0:
649-
; RV64ZBA-NEXT: sh1add a0, a0, a0
650649
; RV64ZBA-NEXT: slli a0, a0, 32
650+
; RV64ZBA-NEXT: sh1add a0, a0, a0
651651
; RV64ZBA-NEXT: ret
652652
%b = zext i32 %a to i64
653653
%c = mul i64 %b, 12884901888
@@ -667,8 +667,8 @@ define i64 @zext_mul21474836480(i32 signext %a) {
667667
;
668668
; RV64ZBA-LABEL: zext_mul21474836480:
669669
; RV64ZBA: # %bb.0:
670-
; RV64ZBA-NEXT: sh2add a0, a0, a0
671670
; RV64ZBA-NEXT: slli a0, a0, 32
671+
; RV64ZBA-NEXT: sh2add a0, a0, a0
672672
; RV64ZBA-NEXT: ret
673673
%b = zext i32 %a to i64
674674
%c = mul i64 %b, 21474836480
@@ -688,8 +688,8 @@ define i64 @zext_mul38654705664(i32 signext %a) {
688688
;
689689
; RV64ZBA-LABEL: zext_mul38654705664:
690690
; RV64ZBA: # %bb.0:
691-
; RV64ZBA-NEXT: sh3add a0, a0, a0
692691
; RV64ZBA-NEXT: slli a0, a0, 32
692+
; RV64ZBA-NEXT: sh3add a0, a0, a0
693693
; RV64ZBA-NEXT: ret
694694
%b = zext i32 %a to i64
695695
%c = mul i64 %b, 38654705664

llvm/test/CodeGen/RISCV/rv64zba.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -865,8 +865,8 @@ define i64 @zext_mul12884901888(i32 signext %a) {
865865
;
866866
; RV64ZBA-LABEL: zext_mul12884901888:
867867
; RV64ZBA: # %bb.0:
868-
; RV64ZBA-NEXT: sh1add a0, a0, a0
869868
; RV64ZBA-NEXT: slli a0, a0, 32
869+
; RV64ZBA-NEXT: sh1add a0, a0, a0
870870
; RV64ZBA-NEXT: ret
871871
%b = zext i32 %a to i64
872872
%c = mul i64 %b, 12884901888
@@ -886,8 +886,8 @@ define i64 @zext_mul21474836480(i32 signext %a) {
886886
;
887887
; RV64ZBA-LABEL: zext_mul21474836480:
888888
; RV64ZBA: # %bb.0:
889-
; RV64ZBA-NEXT: sh2add a0, a0, a0
890889
; RV64ZBA-NEXT: slli a0, a0, 32
890+
; RV64ZBA-NEXT: sh2add a0, a0, a0
891891
; RV64ZBA-NEXT: ret
892892
%b = zext i32 %a to i64
893893
%c = mul i64 %b, 21474836480
@@ -907,8 +907,8 @@ define i64 @zext_mul38654705664(i32 signext %a) {
907907
;
908908
; RV64ZBA-LABEL: zext_mul38654705664:
909909
; RV64ZBA: # %bb.0:
910-
; RV64ZBA-NEXT: sh3add a0, a0, a0
911910
; RV64ZBA-NEXT: slli a0, a0, 32
911+
; RV64ZBA-NEXT: sh3add a0, a0, a0
912912
; RV64ZBA-NEXT: ret
913913
%b = zext i32 %a to i64
914914
%c = mul i64 %b, 38654705664

0 commit comments

Comments
 (0)