[RISCV] Expand constant multiplication for targets without M extension #137195

el-ev · 2025-04-24T15:31:55Z

On RISC-V machines without a native multiply instruction (e.g., rv32i base), multiplying a variable by a constant integer often compiles to a call to a library routine like __mul{s,d}i3.

	.globl __mulxi3
	.type  __mulxi3, @function
__mulxi3:
	mv     a2, a0
	mv     a0, zero
.L1:
	andi   a3, a1, 1
	beqz   a3, .L2
	add    a0, a0, a2
.L2:
	srli   a1, a1, 1
	slli   a2, a2, 1
	bnez   a1, .L1
	ret

This library function implements multiplication in software using a loop of shifts and adds, processing the constant bit by bit. On rv32i, it requires a minimum of 8 instructions (for multiply by 0) and up to about 200 instructions (by 0xffffffff), involves heavy branching and function call overhead.

When not optimizing for size, we could expand the constant multiplication into a sequence of shift and add/sub instructions. For now we use non-adjacent form for the shift and add/sub sequence, which could save 1/2 - 2/3 instructions compared to a shl+add-only sequence.

llvmbot · 2025-04-24T15:32:30Z

@llvm/pr-subscribers-backend-risc-v

Author: Iris Shi (el-ev)

Changes

Closes #137023

Patch is 48.24 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/137195.diff

9 Files Affected:

(modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+40-20)
(modified) llvm/test/CodeGen/RISCV/mul.ll (+229-30)
(modified) llvm/test/CodeGen/RISCV/rv32xtheadba.ll (+33-22)
(modified) llvm/test/CodeGen/RISCV/rv32zba.ll (+33-22)
(modified) llvm/test/CodeGen/RISCV/rv64xtheadba.ll (+39-26)
(modified) llvm/test/CodeGen/RISCV/rv64zba.ll (+60-46)
(modified) llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll (+49-50)
(modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll (+76-64)
(modified) llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll (+6-6)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 02451ee716865..34d789d1ff5c8 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -15436,6 +15436,31 @@ static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG,
   return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
 }
 
+// 2^N +/- 2^M -> (add/sub (shl X, C1), (shl X, C2))
+static SDValue expandMulToAddOrSubOfShl(SDNode *N, SelectionDAG &DAG) {
+  ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
+  if (!CNode)
+    return SDValue();
+  uint64_t MulAmt = CNode->getZExtValue();
+  uint64_t MulAmtLowBit = MulAmt & (-MulAmt);
+  ISD::NodeType Op;
+  if (isPowerOf2_64(MulAmt + MulAmtLowBit))
+    Op = ISD::SUB;
+  else if (isPowerOf2_64(MulAmt - MulAmtLowBit))
+    Op = ISD::ADD;
+  else
+    return SDValue();
+  uint64_t ShiftAmt1 = MulAmt + MulAmtLowBit;
+  SDLoc DL(N);
+  SDValue Shift1 =
+      DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(0),
+                  DAG.getConstant(Log2_64(ShiftAmt1), DL, N->getValueType(0)));
+  SDValue Shift2 = DAG.getNode(
+      ISD::SHL, DL, N->getValueType(0), N->getOperand(0),
+      DAG.getConstant(Log2_64(MulAmtLowBit), DL, N->getValueType(0)));
+  return DAG.getNode(Op, DL, N->getValueType(0), Shift1, Shift2);
+}
+
 // Try to expand a scalar multiply to a faster sequence.
 static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
                          TargetLowering::DAGCombinerInfo &DCI,
@@ -15443,18 +15468,24 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
 
   EVT VT = N->getValueType(0);
 
+  const bool HasShlAdd =
+      Subtarget.hasStdExtZba() || Subtarget.hasVendorXTHeadBa();
+
   // LI + MUL is usually smaller than the alternative sequence.
   if (DAG.getMachineFunction().getFunction().hasMinSize())
     return SDValue();
 
-  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
-    return SDValue();
-
   if (VT != Subtarget.getXLenVT())
     return SDValue();
 
-  const bool HasShlAdd =
-      Subtarget.hasStdExtZba() || Subtarget.hasVendorXTHeadBa();
+  // This may prevent some ShlAdd optimizations. Try this combination
+  // later if we have that.
+  if (!HasShlAdd)
+    if (SDValue V = expandMulToAddOrSubOfShl(N, DAG))
+      return V;
+
+  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
+    return SDValue();
 
   ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
   if (!CNode)
@@ -15569,22 +15600,7 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
         return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359);
       }
     }
-  }
 
-  // 2^N - 2^M -> (sub (shl X, C1), (shl X, C2))
-  uint64_t MulAmtLowBit = MulAmt & (-MulAmt);
-  if (isPowerOf2_64(MulAmt + MulAmtLowBit)) {
-    uint64_t ShiftAmt1 = MulAmt + MulAmtLowBit;
-    SDLoc DL(N);
-    SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
-                                 DAG.getConstant(Log2_64(ShiftAmt1), DL, VT));
-    SDValue Shift2 =
-        DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
-                    DAG.getConstant(Log2_64(MulAmtLowBit), DL, VT));
-    return DAG.getNode(ISD::SUB, DL, VT, Shift1, Shift2);
-  }
-
-  if (HasShlAdd) {
     for (uint64_t Divisor : {3, 5, 9}) {
       if (MulAmt % Divisor != 0)
         continue;
@@ -15608,6 +15624,10 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
         }
       }
     }
+
+    // Delayed
+    if (SDValue V = expandMulToAddOrSubOfShl(N, DAG))
+      return V;
   }
 
   return SDValue();
diff --git a/llvm/test/CodeGen/RISCV/mul.ll b/llvm/test/CodeGen/RISCV/mul.ll
index 548c7e1c6ea8c..15aa522815605 100644
--- a/llvm/test/CodeGen/RISCV/mul.ll
+++ b/llvm/test/CodeGen/RISCV/mul.ll
@@ -464,11 +464,45 @@ define i32 @mulhu_constant(i32 %a) nounwind {
   ret i32 %4
 }
 
+define i32 @muli32_p10(i32 %a) nounwind {
+; RV32I-LABEL: muli32_p10:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a1, a0, 1
+; RV32I-NEXT:    slli a0, a0, 3
+; RV32I-NEXT:    add a0, a0, a1
+; RV32I-NEXT:    ret
+;
+; RV32IM-LABEL: muli32_p10:
+; RV32IM:       # %bb.0:
+; RV32IM-NEXT:    slli a1, a0, 1
+; RV32IM-NEXT:    slli a0, a0, 3
+; RV32IM-NEXT:    add a0, a0, a1
+; RV32IM-NEXT:    ret
+;
+; RV64I-LABEL: muli32_p10:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 1
+; RV64I-NEXT:    slli a0, a0, 3
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: muli32_p10:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    slli a1, a0, 1
+; RV64IM-NEXT:    slli a0, a0, 3
+; RV64IM-NEXT:    addw a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = mul i32 %a, 10
+  ret i32 %1
+}
+
 define i32 @muli32_p14(i32 %a) nounwind {
 ; RV32I-LABEL: muli32_p14:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a1, 14
-; RV32I-NEXT:    tail __mulsi3
+; RV32I-NEXT:    slli a1, a0, 1
+; RV32I-NEXT:    slli a0, a0, 4
+; RV32I-NEXT:    sub a0, a0, a1
+; RV32I-NEXT:    ret
 ;
 ; RV32IM-LABEL: muli32_p14:
 ; RV32IM:       # %bb.0:
@@ -494,11 +528,45 @@ define i32 @muli32_p14(i32 %a) nounwind {
   ret i32 %1
 }
 
+define i32 @muli32_p20(i32 %a) nounwind {
+; RV32I-LABEL: muli32_p20:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a1, a0, 2
+; RV32I-NEXT:    slli a0, a0, 4
+; RV32I-NEXT:    add a0, a0, a1
+; RV32I-NEXT:    ret
+;
+; RV32IM-LABEL: muli32_p20:
+; RV32IM:       # %bb.0:
+; RV32IM-NEXT:    slli a1, a0, 2
+; RV32IM-NEXT:    slli a0, a0, 4
+; RV32IM-NEXT:    add a0, a0, a1
+; RV32IM-NEXT:    ret
+;
+; RV64I-LABEL: muli32_p20:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 2
+; RV64I-NEXT:    slli a0, a0, 4
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: muli32_p20:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    slli a1, a0, 2
+; RV64IM-NEXT:    slli a0, a0, 4
+; RV64IM-NEXT:    addw a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = mul i32 %a, 20
+  ret i32 %1
+}
+
 define i32 @muli32_p28(i32 %a) nounwind {
 ; RV32I-LABEL: muli32_p28:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a1, 28
-; RV32I-NEXT:    tail __mulsi3
+; RV32I-NEXT:    slli a1, a0, 2
+; RV32I-NEXT:    slli a0, a0, 5
+; RV32I-NEXT:    sub a0, a0, a1
+; RV32I-NEXT:    ret
 ;
 ; RV32IM-LABEL: muli32_p28:
 ; RV32IM:       # %bb.0:
@@ -527,8 +595,10 @@ define i32 @muli32_p28(i32 %a) nounwind {
 define i32 @muli32_p30(i32 %a) nounwind {
 ; RV32I-LABEL: muli32_p30:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a1, 30
-; RV32I-NEXT:    tail __mulsi3
+; RV32I-NEXT:    slli a1, a0, 1
+; RV32I-NEXT:    slli a0, a0, 5
+; RV32I-NEXT:    sub a0, a0, a1
+; RV32I-NEXT:    ret
 ;
 ; RV32IM-LABEL: muli32_p30:
 ; RV32IM:       # %bb.0:
@@ -557,8 +627,10 @@ define i32 @muli32_p30(i32 %a) nounwind {
 define i32 @muli32_p56(i32 %a) nounwind {
 ; RV32I-LABEL: muli32_p56:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a1, 56
-; RV32I-NEXT:    tail __mulsi3
+; RV32I-NEXT:    slli a1, a0, 3
+; RV32I-NEXT:    slli a0, a0, 6
+; RV32I-NEXT:    sub a0, a0, a1
+; RV32I-NEXT:    ret
 ;
 ; RV32IM-LABEL: muli32_p56:
 ; RV32IM:       # %bb.0:
@@ -587,8 +659,10 @@ define i32 @muli32_p56(i32 %a) nounwind {
 define i32 @muli32_p60(i32 %a) nounwind {
 ; RV32I-LABEL: muli32_p60:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a1, 60
-; RV32I-NEXT:    tail __mulsi3
+; RV32I-NEXT:    slli a1, a0, 2
+; RV32I-NEXT:    slli a0, a0, 6
+; RV32I-NEXT:    sub a0, a0, a1
+; RV32I-NEXT:    ret
 ;
 ; RV32IM-LABEL: muli32_p60:
 ; RV32IM:       # %bb.0:
@@ -617,8 +691,10 @@ define i32 @muli32_p60(i32 %a) nounwind {
 define i32 @muli32_p62(i32 %a) nounwind {
 ; RV32I-LABEL: muli32_p62:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a1, 62
-; RV32I-NEXT:    tail __mulsi3
+; RV32I-NEXT:    slli a1, a0, 1
+; RV32I-NEXT:    slli a0, a0, 6
+; RV32I-NEXT:    sub a0, a0, a1
+; RV32I-NEXT:    ret
 ;
 ; RV32IM-LABEL: muli32_p62:
 ; RV32IM:       # %bb.0:
@@ -672,6 +748,34 @@ define i32 @muli32_p65(i32 %a) nounwind {
   ret i32 %1
 }
 
+define i32 @muli32_p66(i32 %a) nounwind {
+; RV32I-LABEL: muli32_p66:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a1, a0, 6
+; RV32I-NEXT:    add a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32IM-LABEL: muli32_p66:
+; RV32IM:       # %bb.0:
+; RV32IM-NEXT:    slli a1, a0, 6
+; RV32IM-NEXT:    add a0, a1, a0
+; RV32IM-NEXT:    ret
+;
+; RV64I-LABEL: muli32_p66:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 6
+; RV64I-NEXT:    addw a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: muli32_p66:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    slli a1, a0, 6
+; RV64IM-NEXT:    addw a0, a1, a0
+; RV64IM-NEXT:    ret
+  %1 = mul i32 %a, 65
+  ret i32 %1
+}
+
 define i32 @muli32_p63(i32 %a) nounwind {
 ; RV32I-LABEL: muli32_p63:
 ; RV32I:       # %bb.0:
@@ -778,7 +882,89 @@ define i64 @muli64_p63(i64 %a) nounwind {
   ret i64 %1
 }
 
+define i64 @muli64_p60(i64 %a) nounwind {
+; RV32I-LABEL: muli64_p60:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    li a2, 60
+; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    call __muldi3
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV32IM-LABEL: muli64_p60:
+; RV32IM:       # %bb.0:
+; RV32IM-NEXT:    li a2, 60
+; RV32IM-NEXT:    slli a3, a1, 2
+; RV32IM-NEXT:    slli a1, a1, 6
+; RV32IM-NEXT:    sub a1, a1, a3
+; RV32IM-NEXT:    slli a3, a0, 2
+; RV32IM-NEXT:    mulhu a2, a0, a2
+; RV32IM-NEXT:    slli a0, a0, 6
+; RV32IM-NEXT:    add a1, a2, a1
+; RV32IM-NEXT:    sub a0, a0, a3
+; RV32IM-NEXT:    ret
+;
+; RV64I-LABEL: muli64_p60:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 2
+; RV64I-NEXT:    slli a0, a0, 6
+; RV64I-NEXT:    sub a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: muli64_p60:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    slli a1, a0, 2
+; RV64IM-NEXT:    slli a0, a0, 6
+; RV64IM-NEXT:    sub a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = mul i64 %a, 60
+  ret i64 %1
+}
 
+define i64 @muli64_p68(i64 %a) nounwind {
+; RV32I-LABEL: muli64_p68:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    li a2, 68
+; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    call __muldi3
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV32IM-LABEL: muli64_p68:
+; RV32IM:       # %bb.0:
+; RV32IM-NEXT:    li a2, 68
+; RV32IM-NEXT:    slli a3, a1, 2
+; RV32IM-NEXT:    slli a1, a1, 6
+; RV32IM-NEXT:    add a1, a1, a3
+; RV32IM-NEXT:    slli a3, a0, 2
+; RV32IM-NEXT:    mulhu a2, a0, a2
+; RV32IM-NEXT:    slli a0, a0, 6
+; RV32IM-NEXT:    add a1, a2, a1
+; RV32IM-NEXT:    add a0, a0, a3
+; RV32IM-NEXT:    ret
+;
+; RV64I-LABEL: muli64_p68:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 2
+; RV64I-NEXT:    slli a0, a0, 6
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: muli64_p68:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    slli a1, a0, 2
+; RV64IM-NEXT:    slli a0, a0, 6
+; RV64IM-NEXT:    add a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = mul i64 %a, 68
+  ret i64 %1
+}
 
 define i32 @muli32_m63(i32 %a) nounwind {
 ; RV32I-LABEL: muli32_m63:
@@ -930,8 +1116,10 @@ define i64 @muli64_m65(i64 %a) nounwind {
 define i32 @muli32_p384(i32 %a) nounwind {
 ; RV32I-LABEL: muli32_p384:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a1, 384
-; RV32I-NEXT:    tail __mulsi3
+; RV32I-NEXT:    slli a1, a0, 7
+; RV32I-NEXT:    slli a0, a0, 9
+; RV32I-NEXT:    sub a0, a0, a1
+; RV32I-NEXT:    ret
 ;
 ; RV32IM-LABEL: muli32_p384:
 ; RV32IM:       # %bb.0:
@@ -960,8 +1148,10 @@ define i32 @muli32_p384(i32 %a) nounwind {
 define i32 @muli32_p12288(i32 %a) nounwind {
 ; RV32I-LABEL: muli32_p12288:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    lui a1, 3
-; RV32I-NEXT:    tail __mulsi3
+; RV32I-NEXT:    slli a1, a0, 12
+; RV32I-NEXT:    slli a0, a0, 14
+; RV32I-NEXT:    sub a0, a0, a1
+; RV32I-NEXT:    ret
 ;
 ; RV32IM-LABEL: muli32_p12288:
 ; RV32IM:       # %bb.0:
@@ -1137,12 +1327,16 @@ define i64 @muli64_p4352(i64 %a) nounwind {
 ;
 ; RV32IM-LABEL: muli64_p4352:
 ; RV32IM:       # %bb.0:
+; RV32IM-NEXT:    slli a2, a1, 8
+; RV32IM-NEXT:    slli a1, a1, 12
+; RV32IM-NEXT:    add a1, a1, a2
 ; RV32IM-NEXT:    li a2, 17
 ; RV32IM-NEXT:    slli a2, a2, 8
-; RV32IM-NEXT:    mul a1, a1, a2
-; RV32IM-NEXT:    mulhu a3, a0, a2
-; RV32IM-NEXT:    add a1, a3, a1
-; RV32IM-NEXT:    mul a0, a0, a2
+; RV32IM-NEXT:    mulhu a2, a0, a2
+; RV32IM-NEXT:    add a1, a2, a1
+; RV32IM-NEXT:    slli a2, a0, 8
+; RV32IM-NEXT:    slli a0, a0, 12
+; RV32IM-NEXT:    add a0, a0, a2
 ; RV32IM-NEXT:    ret
 ;
 ; RV64I-LABEL: muli64_p4352:
@@ -1327,10 +1521,10 @@ define i128 @muli128_m3840(i128 %a) nounwind {
 ; RV32I-NEXT:    sltu a7, a5, a4
 ; RV32I-NEXT:    sub a6, a6, t2
 ; RV32I-NEXT:    mv t1, a7
-; RV32I-NEXT:    beq t0, a3, .LBB36_2
+; RV32I-NEXT:    beq t0, a3, .LBB41_2
 ; RV32I-NEXT:  # %bb.1:
 ; RV32I-NEXT:    sltu t1, t0, a3
-; RV32I-NEXT:  .LBB36_2:
+; RV32I-NEXT:  .LBB41_2:
 ; RV32I-NEXT:    sub a2, a2, a1
 ; RV32I-NEXT:    sub a1, t0, a3
 ; RV32I-NEXT:    sub a5, a5, a4
@@ -1441,10 +1635,10 @@ define i128 @muli128_m63(i128 %a) nounwind {
 ; RV32I-NEXT:    sltu a7, a3, a6
 ; RV32I-NEXT:    or t0, t0, a5
 ; RV32I-NEXT:    mv a5, a7
-; RV32I-NEXT:    beq a4, t0, .LBB37_2
+; RV32I-NEXT:    beq a4, t0, .LBB42_2
 ; RV32I-NEXT:  # %bb.1:
 ; RV32I-NEXT:    sltu a5, a4, t0
-; RV32I-NEXT:  .LBB37_2:
+; RV32I-NEXT:  .LBB42_2:
 ; RV32I-NEXT:    srli t1, a4, 26
 ; RV32I-NEXT:    slli t2, a2, 6
 ; RV32I-NEXT:    srli t3, a2, 26
@@ -1869,12 +2063,16 @@ define i64 @muland_demand(i64 %x) nounwind {
 ; RV64I-NEXT:    li a1, -29
 ; RV64I-NEXT:    srli a1, a1, 2
 ; RV64I-NEXT:    and a0, a0, a1
-; RV64I-NEXT:    li a1, 12
-; RV64I-NEXT:    tail __muldi3
+; RV64I-NEXT:    slli a1, a0, 2
+; RV64I-NEXT:    slli a0, a0, 4
+; RV64I-NEXT:    sub a0, a0, a1
+; RV64I-NEXT:    ret
 ;
 ; RV64IM-LABEL: muland_demand:
 ; RV64IM:       # %bb.0:
-; RV64IM-NEXT:    andi a0, a0, -8
+; RV64IM-NEXT:    li a1, -29
+; RV64IM-NEXT:    srli a1, a1, 2
+; RV64IM-NEXT:    and a0, a0, a1
 ; RV64IM-NEXT:    slli a1, a0, 2
 ; RV64IM-NEXT:    slli a0, a0, 4
 ; RV64IM-NEXT:    sub a0, a0, a1
@@ -1905,9 +2103,10 @@ define i64 @mulzext_demand(i32 signext %x) nounwind {
 ;
 ; RV64I-LABEL: mulzext_demand:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a1, 3
-; RV64I-NEXT:    slli a1, a1, 32
-; RV64I-NEXT:    tail __muldi3
+; RV64I-NEXT:    slli a1, a0, 32
+; RV64I-NEXT:    slli a0, a0, 34
+; RV64I-NEXT:    sub a0, a0, a1
+; RV64I-NEXT:    ret
 ;
 ; RV64IM-LABEL: mulzext_demand:
 ; RV64IM:       # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rv32xtheadba.ll b/llvm/test/CodeGen/RISCV/rv32xtheadba.ll
index 44ab0e1fef6c1..0fc0adbfa83d9 100644
--- a/llvm/test/CodeGen/RISCV/rv32xtheadba.ll
+++ b/llvm/test/CodeGen/RISCV/rv32xtheadba.ll
@@ -116,8 +116,9 @@ define i32 @addmul6(i32 %a, i32 %b) {
 define i32 @addmul10(i32 %a, i32 %b) {
 ; RV32I-LABEL: addmul10:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a2, 10
-; RV32I-NEXT:    mul a0, a0, a2
+; RV32I-NEXT:    slli a2, a0, 1
+; RV32I-NEXT:    slli a0, a0, 3
+; RV32I-NEXT:    add a0, a0, a2
 ; RV32I-NEXT:    add a0, a0, a1
 ; RV32I-NEXT:    ret
 ;
@@ -153,8 +154,9 @@ define i32 @addmul12(i32 %a, i32 %b) {
 define i32 @addmul18(i32 %a, i32 %b) {
 ; RV32I-LABEL: addmul18:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a2, 18
-; RV32I-NEXT:    mul a0, a0, a2
+; RV32I-NEXT:    slli a2, a0, 1
+; RV32I-NEXT:    slli a0, a0, 4
+; RV32I-NEXT:    add a0, a0, a2
 ; RV32I-NEXT:    add a0, a0, a1
 ; RV32I-NEXT:    ret
 ;
@@ -171,8 +173,9 @@ define i32 @addmul18(i32 %a, i32 %b) {
 define i32 @addmul20(i32 %a, i32 %b) {
 ; RV32I-LABEL: addmul20:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a2, 20
-; RV32I-NEXT:    mul a0, a0, a2
+; RV32I-NEXT:    slli a2, a0, 2
+; RV32I-NEXT:    slli a0, a0, 4
+; RV32I-NEXT:    add a0, a0, a2
 ; RV32I-NEXT:    add a0, a0, a1
 ; RV32I-NEXT:    ret
 ;
@@ -208,8 +211,9 @@ define i32 @addmul24(i32 %a, i32 %b) {
 define i32 @addmul36(i32 %a, i32 %b) {
 ; RV32I-LABEL: addmul36:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a2, 36
-; RV32I-NEXT:    mul a0, a0, a2
+; RV32I-NEXT:    slli a2, a0, 2
+; RV32I-NEXT:    slli a0, a0, 5
+; RV32I-NEXT:    add a0, a0, a2
 ; RV32I-NEXT:    add a0, a0, a1
 ; RV32I-NEXT:    ret
 ;
@@ -226,8 +230,9 @@ define i32 @addmul36(i32 %a, i32 %b) {
 define i32 @addmul40(i32 %a, i32 %b) {
 ; RV32I-LABEL: addmul40:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a2, 40
-; RV32I-NEXT:    mul a0, a0, a2
+; RV32I-NEXT:    slli a2, a0, 3
+; RV32I-NEXT:    slli a0, a0, 5
+; RV32I-NEXT:    add a0, a0, a2
 ; RV32I-NEXT:    add a0, a0, a1
 ; RV32I-NEXT:    ret
 ;
@@ -244,8 +249,9 @@ define i32 @addmul40(i32 %a, i32 %b) {
 define i32 @addmul72(i32 %a, i32 %b) {
 ; RV32I-LABEL: addmul72:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a2, 72
-; RV32I-NEXT:    mul a0, a0, a2
+; RV32I-NEXT:    slli a2, a0, 3
+; RV32I-NEXT:    slli a0, a0, 6
+; RV32I-NEXT:    add a0, a0, a2
 ; RV32I-NEXT:    add a0, a0, a1
 ; RV32I-NEXT:    ret
 ;
@@ -279,8 +285,9 @@ define i32 @mul96(i32 %a) {
 define i32 @mul160(i32 %a) {
 ; RV32I-LABEL: mul160:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a1, 160
-; RV32I-NEXT:    mul a0, a0, a1
+; RV32I-NEXT:    slli a1, a0, 5
+; RV32I-NEXT:    slli a0, a0, 7
+; RV32I-NEXT:    add a0, a0, a1
 ; RV32I-NEXT:    ret
 ;
 ; RV32XTHEADBA-LABEL: mul160:
@@ -312,8 +319,9 @@ define i32 @mul200(i32 %a) {
 define i32 @mul288(i32 %a) {
 ; RV32I-LABEL: mul288:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a1, 288
-; RV32I-NEXT:    mul a0, a0, a1
+; RV32I-NEXT:    slli a1, a0, 5
+; RV32I-NEXT:    slli a0, a0, 8
+; RV32I-NEXT:    add a0, a0, a1
 ; RV32I-NEXT:    ret
 ;
 ; RV32XTHEADBA-LABEL: mul288:
@@ -328,8 +336,9 @@ define i32 @mul288(i32 %a) {
 define i32 @mul258(i32 %a) {
 ; RV32I-LABEL: mul258:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a1, 258
-; RV32I-NEXT:    mul a0, a0, a1
+; RV32I-NEXT:    slli a1, a0, 1
+; RV32I-NEXT:    slli a0, a0, 8
+; RV32I-NEXT:    add a0, a0, a1
 ; RV32I-NEXT:    ret
 ;
 ; RV32XTHEADBA-LABEL: mul258:
@@ -344,8 +353,9 @@ define i32 @mul258(i32 %a) {
 define i32 @mul260(i32 %a) {
 ; RV32I-LABEL: mul260:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a1, 260
-; RV32I-NEXT:    mul a0, a0, a1
+; RV32I-NEXT:    slli a1, a0, 2
+; RV32I-NEXT:    slli a0, a0, 8
+; RV32I-NEXT:    add a0, a0, a1
 ; RV32I-NEXT:    ret
 ;
 ; RV32XTHEADBA-LABEL: mul260:
@@ -360,8 +370,9 @@ define i32 @mul260(i32 %a) {
 define i32 @mul264(i32 %a) {
 ; RV32I-LABEL: mul264:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a1, 264
-; RV32I-NEXT:    mul a0, a0, a1
+; RV32I-NEXT:    slli a1, a0, 3
+; RV32I-NEXT:    slli a0, a0, 8
+; RV32I-NEXT:    add a0, a0, a1
 ; RV32I-NEXT:    ret
 ;
 ; RV32XTHEADBA-LABEL: mul264:
diff --git a/llvm/test/CodeGen/RISCV/rv32zba.ll b/llvm/test/CodeGen/RISCV/rv32zba.ll
index fec156ac2be27..f8ca41782c6e1 100644
--- a/llvm/test/CodeGen/RISCV/rv32zba.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zba.ll
@@ -82,8 +82,9 @@ define i32 @addmul6(i32 %a, i32 %b) {
 define i32 @addmul10(i32 %a, i32 %b) {
 ; RV32I-LABEL: addmul10:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a2, 10
-; RV32I-NEXT:    mul a0, a0, a2
+; RV32I-NEXT:    slli a2, a0, 1
+; RV32I-NEXT:    slli a0, a0, 3
+; RV32I-NEXT:    add a0, a0, a2
 ; RV32I-NEXT:    add a0, a0, a1
 ; RV32I-NEXT:    ret
 ;
@@ -119,8 +120,9 @@ define i32 @addmul12(i32 %a, i32 %b) {
 define i32 @addmul18(i32 %a, i32 %b) {
 ; RV32I-LABEL: addmul18:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a2, 18
-; RV32I-NEXT:    mul a0, a0, a2
+; RV32I-NEXT:    slli a2, a0, 1
+; RV32I-NEXT:    slli a0, a0, 4
+; RV32I-NEXT:    add a0, a0, a2
 ; RV32I-NEXT:    add a0, a0, a1
 ; RV32I-NEXT:    ret
 ;
@@ -137,8 +139,9 @@ define i32 @addmul18(i32 %a, i32 %b) {
 define i32 @addmul20(i32 %a, i32 %b) {
 ; RV32I-LABEL: addmul20:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a2, 20
-; RV32I-NEXT:    mul a0, a0, a2
+; RV32I-NEXT:    slli a2, a0, 2
+; RV32I-NEXT:    slli a0, a0, 4
+; RV32I-NEXT:    add a0, a0, a2
 ; RV32I-NEXT:    add a0, a0, a1
 ; RV32I-NEXT:    ret
 ;
@@ -174,8 +177,9 @@ define i32 @addmul24(i32 %a, i32 %b) {
 define i32 @addmul36(i32 %a, i32 %b) {
 ; RV32I-LABEL: addmul36:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a2, 36
-; RV32I-NEXT:    mul a0, a0, a2
+; RV32I-NEXT:    slli a2, a0, 2
+; RV32I-NEXT:    slli a0, a0, 5
+; RV32I-NEXT:    add a0, a0, a2
 ; RV32I-NEXT:    add a0, a0, a1
 ; RV32I-NEXT:    ret
 ;
@@ -192,...
[truncated]

llvm/test/CodeGen/RISCV/mul.ll

topperc · 2025-04-24T23:07:20Z

Should we be solving the general case? What if there are more than 2 bits set in the multiplicand?

el-ev · 2025-04-25T06:44:36Z

Should we be solving the general case? What if there are more than 2 bits set in the multiplicand?

I'm not sure whether it's profitable enough to break down a mul into more than two shls, and we observe that other backends typically don't perform this general level of optimization either.

However, it may be beneficial if the mul could be broken down to a shl + shladd + add sequence. This could be a potential enhancement for a following patch.

topperc · 2025-04-25T06:54:17Z

Should we be solving the general case? What if there are more than 2 bits set in the multiplicand?

I'm not sure whether it's profitable enough to break down a mul into more than two shls, and we observe that other backends typically don't perform this general level of optimization either.

However, it may be beneficial if the mul could be broken down to a shl + shladd + add sequence. This could be a potential enhancement for a following patch.

The original bug report was for CPUs that don't implement M or Zmmul. For those CPUs, we can probably use more instructions to avoid a library call. Unless we're optimizing for size.

github-actions · 2025-04-25T12:22:44Z

✅ With the latest revision this PR passed the C/C++ code formatter.

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

pfusik · 2025-04-30T09:29:29Z

This seems to be three independent changes:

Inlining mul by constant for targets with no hardware mul.
Prioritizing the 3/5/9 * 3/5/9 * 2^N transform over 2^N - 2^M -> (sub (shl X, C1), (shl X, C2)).
Adding 2^N + 2^M -> (add (shl X, C1), (shl X, C2)) for all targets.

I suggest splitting to separate PRs.

Regarding the second change, why are we doing it? It changes:

        slli    a1, a0, 1
        slli    a0, a0, 5
        subw    a0, a0, a1

to:

        sh1add  a0, a0, a0
        sh2add  a0, a0, a0
        slliw   a0, a0, 1

el-ev · 2025-04-30T09:48:11Z

It maks sense. I will update this PR to include only the first change.

preames

Can you expand your review description with the motivation, and a bit of justification? In particular:

Why is unconditional expansion the right answer?
Why not use the simpler sum of powers of two? How much does the complexity of the factorization and NAF lowering really buy us? Feel free to cite a paper/blog if this is "well known".

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

preames · 2025-05-14T17:49:00Z

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

+}
+// Try to expand a multiply to a sequence of shifts and add/subs,
+// for a machine without native mul instruction.
+static SDValue expandMulToBasicOps(SDNode *N, SelectionDAG &DAG,


Would you mind dropping this from the current patch, and using only the expandMulToNAFSequence scheme? You can add back the second option in a second review, I just want to focus review time.

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

preames

LGTM

llvm/test/CodeGen/RISCV/mul.ll

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

topperc

LGTM

llvmbot added the backend:RISC-V label Apr 24, 2025

el-ev requested review from preames and topperc April 24, 2025 15:43

MaxGraey reviewed Apr 24, 2025

View reviewed changes

llvm/test/CodeGen/RISCV/mul.ll Show resolved Hide resolved

el-ev force-pushed the users/el-ev/rv32i-expand-mul branch from 0336625 to 1cc9447 Compare April 25, 2025 09:11

el-ev changed the title ~~[RISCV] Add 2^N + 2^M expanding pattern for mul~~ [RISCV] Expand constant multiplication for targets without M extension Apr 25, 2025

el-ev force-pushed the users/el-ev/rv32i-expand-mul branch 3 times, most recently from d2308b1 to 1af67fb Compare April 25, 2025 12:20

el-ev force-pushed the users/el-ev/rv32i-expand-mul branch 2 times, most recently from 79e4586 to 42bd798 Compare April 26, 2025 02:18

lenary reviewed Apr 26, 2025

View reviewed changes

llvm/lib/Target/RISCV/RISCVISelLowering.cpp Show resolved Hide resolved

MaxGraey reviewed Apr 26, 2025

View reviewed changes

llvm/lib/Target/RISCV/RISCVISelLowering.cpp Outdated Show resolved Hide resolved

el-ev commented Apr 26, 2025

View reviewed changes

llvm/lib/Target/RISCV/RISCVISelLowering.cpp Outdated Show resolved Hide resolved

MaxGraey reviewed Apr 26, 2025

View reviewed changes

llvm/lib/Target/RISCV/RISCVISelLowering.cpp Outdated Show resolved Hide resolved

preames reviewed May 8, 2025

View reviewed changes

llvm/lib/Target/RISCV/RISCVISelLowering.cpp Outdated Show resolved Hide resolved

el-ev mentioned this pull request May 9, 2025

[NFC][RISCV] Pre-commit tests for RVI constant multiplication expansion #139200

Merged

el-ev force-pushed the users/el-ev/rv32i-expand-mul branch 2 times, most recently from 6d7a485 to a9379d0 Compare May 10, 2025 03:37

el-ev requested a review from preames May 10, 2025 16:08

el-ev force-pushed the users/el-ev/rv32i-expand-mul branch from a9379d0 to 0cb0fad Compare May 13, 2025 07:55

preames reviewed May 14, 2025

View reviewed changes

el-ev force-pushed the users/el-ev/rv32i-expand-mul branch from 0cb0fad to 038b1a6 Compare May 15, 2025 06:39

el-ev requested a review from preames May 15, 2025 06:41

preames approved these changes May 15, 2025

View reviewed changes

topperc reviewed May 15, 2025

View reviewed changes

llvm/test/CodeGen/RISCV/mul.ll Outdated Show resolved Hide resolved

llvm/test/CodeGen/RISCV/mul.ll Show resolved Hide resolved

llvm/lib/Target/RISCV/RISCVISelLowering.cpp Outdated Show resolved Hide resolved

el-ev added 3 commits May 16, 2025 08:32

[RISCV] Expand constant multiplication for targets without M extension

f73da9e

Address review comments

605f1a4

Address review comments

e9a9dbc

el-ev force-pushed the users/el-ev/rv32i-expand-mul branch from 038b1a6 to e9a9dbc Compare May 16, 2025 00:47

el-ev requested a review from topperc May 16, 2025 00:48

topperc approved these changes May 16, 2025

View reviewed changes

el-ev merged commit dae5c4e into main May 16, 2025
7 of 10 checks passed

el-ev deleted the users/el-ev/rv32i-expand-mul branch May 16, 2025 01:40

[RISCV] Expand constant multiplication for targets without M extension #137195

[RISCV] Expand constant multiplication for targets without M extension #137195

Uh oh!

Conversation

el-ev commented Apr 24, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Apr 24, 2025

Uh oh!

Uh oh!

topperc commented Apr 24, 2025

Uh oh!

el-ev commented Apr 25, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

topperc commented Apr 25, 2025

Uh oh!

github-actions bot commented Apr 25, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

pfusik commented Apr 30, 2025

Uh oh!

el-ev commented Apr 30, 2025

Uh oh!

preames left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

preames May 14, 2025

Choose a reason for hiding this comment

Uh oh!

el-ev May 15, 2025

Choose a reason for hiding this comment

Uh oh!

Uh oh!

preames left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

topperc left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

el-ev commented Apr 24, 2025 •

edited

Loading

el-ev commented Apr 25, 2025 •

edited

Loading

github-actions bot commented Apr 25, 2025 •

edited

Loading