[RISCV] Fold vector shift of sext/zext to widening multiply #121563

pfusik · 2025-01-03T11:43:13Z

(shl (sext X), C) -> (vwmulsu X, 1u << C)
(shl (zext X), C) -> (vwmulu  X, 1u << C)

(shl (sext X), C) -> (vwmulsu X, 1u << C) (shl (zext X), C) -> (vwmulu X, 1u << C)

llvmbot · 2025-01-03T11:43:43Z

@llvm/pr-subscribers-backend-risc-v

Author: Piotr Fusik (pfusik)

Changes

(shl (sext X), C) -&gt; (vwmulsu X, 1u &lt;&lt; C)
(shl (zext X), C) -&gt; (vwmulu  X, 1u &lt;&lt; C)

Patch is 115.70 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/121563.diff

7 Files Affected:

(modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+74-2)
(modified) llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll (+106-96)
(modified) llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll (+112-102)
(modified) llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll (+217-206)
(modified) llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll (+195-236)
(modified) llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll (+8-13)
(modified) llvm/test/CodeGen/RISCV/rvv/vwsll-vp.ll (+22-12)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 04dd23d9cdaa20..955a15393ca8a1 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -17341,6 +17341,78 @@ static SDValue combineScalarCTPOPToVCPOP(SDNode *N, SelectionDAG &DAG,
   return DAG.getZExtOrTrunc(Pop, DL, VT);
 }
 
+static SDValue combineSHL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
+                          const RISCVSubtarget &Subtarget) {
+  if (DCI.isBeforeLegalize())
+    return SDValue();
+
+  // (shl (zext x), y) -> (vwsll   x, y)
+  if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
+    return V;
+
+  // (shl (sext x), C) -> (vwmulsu x, 1u << C)
+  // (shl (zext x), C) -> (vwmulu  x, 1u << C)
+
+  SDValue LHS = N->getOperand(0);
+  if (!LHS.hasOneUse())
+    return SDValue();
+  unsigned Opcode;
+  switch (LHS.getOpcode()) {
+  case ISD::SIGN_EXTEND:
+    Opcode = RISCVISD::VWMULSU_VL;
+    break;
+  case ISD::ZERO_EXTEND:
+    Opcode = RISCVISD::VWMULU_VL;
+    break;
+  default:
+    return SDValue();
+  }
+
+  SDValue RHS = N->getOperand(1);
+  APInt ShAmt;
+  if (!ISD::isConstantSplatVector(RHS.getNode(), ShAmt))
+    return SDValue();
+
+  // Better foldings:
+  // (shl (sext x), 1) -> (vwadd  x, x)
+  // (shl (zext x), 1) -> (vwaddu x, x)
+  uint64_t ShAmtInt = ShAmt.getZExtValue();
+  if (ShAmtInt <= 1)
+    return SDValue();
+
+  SDValue NarrowOp = LHS.getOperand(0);
+  EVT NarrowVT = NarrowOp.getValueType();
+  uint64_t NarrowBits = NarrowVT.getScalarSizeInBits();
+  if (ShAmtInt >= NarrowBits)
+    return SDValue();
+  EVT VT = N->getValueType(0);
+  if (NarrowBits * 2 != VT.getScalarSizeInBits())
+    return SDValue();
+
+  SelectionDAG &DAG = DCI.DAG;
+  SDLoc DL(N);
+  SDValue Passthru, Mask, VL;
+  switch (N->getOpcode()) {
+  case ISD::SHL:
+    if (!VT.isScalableVector())
+      return SDValue();
+    Passthru = DAG.getUNDEF(VT);
+    std::tie(Mask, VL) =
+        getDefaultScalableVLOps(VT.getSimpleVT(), DL, DAG, Subtarget);
+    break;
+  case RISCVISD::SHL_VL:
+    Passthru = N->getOperand(2);
+    Mask = N->getOperand(3);
+    VL = N->getOperand(4);
+    break;
+  default:
+    llvm_unreachable("Expected SHL");
+  }
+  return DAG.getNode(Opcode, DL, VT, NarrowOp,
+                     DAG.getConstant(1ULL << ShAmtInt, SDLoc(RHS), NarrowVT),
+                     Passthru, Mask, VL);
+}
+
 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
                                                DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
@@ -17970,7 +18042,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
     break;
   }
   case RISCVISD::SHL_VL:
-    if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
+    if (SDValue V = combineSHL(N, DCI, Subtarget))
       return V;
     [[fallthrough]];
   case RISCVISD::SRA_VL:
@@ -17995,7 +18067,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::SRL:
   case ISD::SHL: {
     if (N->getOpcode() == ISD::SHL) {
-      if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
+      if (SDValue V = combineSHL(N, DCI, Subtarget))
         return V;
     }
     SDValue ShAmt = N->getOperand(1);
diff --git a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll
index 9ee2324f615dd8..0fad09f27007c0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll
@@ -775,11 +775,11 @@ define <vscale x 8 x i32> @mgather_baseidx_sext_nxv8i8_nxv8i32(ptr %base, <vscal
 define <vscale x 8 x i32> @mgather_baseidx_zext_nxv8i8_nxv8i32(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru) {
 ; CHECK-LABEL: mgather_baseidx_zext_nxv8i8_nxv8i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vsll.vi v8, v10, 2
+; CHECK-NEXT:    li a1, 4
+; CHECK-NEXT:    vsetvli a2, zero, e8, m1, ta, ma
+; CHECK-NEXT:    vwmulu.vx v10, v8, a1
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vluxei16.v v12, (a0), v8, v0.t
+; CHECK-NEXT:    vluxei16.v v12, (a0), v10, v0.t
 ; CHECK-NEXT:    vmv.v.v v8, v12
 ; CHECK-NEXT:    ret
   %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
@@ -791,10 +791,11 @@ define <vscale x 8 x i32> @mgather_baseidx_zext_nxv8i8_nxv8i32(ptr %base, <vscal
 define <vscale x 8 x i32> @mgather_baseidx_nxv8i16_nxv8i32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru) {
 ; RV32-LABEL: mgather_baseidx_nxv8i16_nxv8i32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
-; RV32-NEXT:    vsext.vf2 v16, v8
-; RV32-NEXT:    vsll.vi v8, v16, 2
-; RV32-NEXT:    vluxei32.v v12, (a0), v8, v0.t
+; RV32-NEXT:    li a1, 4
+; RV32-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
+; RV32-NEXT:    vwmulsu.vx v16, v8, a1
+; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; RV32-NEXT:    vluxei32.v v12, (a0), v16, v0.t
 ; RV32-NEXT:    vmv.v.v v8, v12
 ; RV32-NEXT:    ret
 ;
@@ -815,10 +816,11 @@ define <vscale x 8 x i32> @mgather_baseidx_nxv8i16_nxv8i32(ptr %base, <vscale x
 define <vscale x 8 x i32> @mgather_baseidx_sext_nxv8i16_nxv8i32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru) {
 ; RV32-LABEL: mgather_baseidx_sext_nxv8i16_nxv8i32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
-; RV32-NEXT:    vsext.vf2 v16, v8
-; RV32-NEXT:    vsll.vi v8, v16, 2
-; RV32-NEXT:    vluxei32.v v12, (a0), v8, v0.t
+; RV32-NEXT:    li a1, 4
+; RV32-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
+; RV32-NEXT:    vwmulsu.vx v16, v8, a1
+; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; RV32-NEXT:    vluxei32.v v12, (a0), v16, v0.t
 ; RV32-NEXT:    vmv.v.v v8, v12
 ; RV32-NEXT:    ret
 ;
@@ -840,10 +842,11 @@ define <vscale x 8 x i32> @mgather_baseidx_sext_nxv8i16_nxv8i32(ptr %base, <vsca
 define <vscale x 8 x i32> @mgather_baseidx_zext_nxv8i16_nxv8i32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru) {
 ; CHECK-LABEL: mgather_baseidx_zext_nxv8i16_nxv8i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vzext.vf2 v16, v8
-; CHECK-NEXT:    vsll.vi v8, v16, 2
-; CHECK-NEXT:    vluxei32.v v12, (a0), v8, v0.t
+; CHECK-NEXT:    li a1, 4
+; CHECK-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
+; CHECK-NEXT:    vwmulu.vx v16, v8, a1
+; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-NEXT:    vluxei32.v v12, (a0), v16, v0.t
 ; CHECK-NEXT:    vmv.v.v v8, v12
 ; CHECK-NEXT:    ret
   %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
@@ -863,10 +866,9 @@ define <vscale x 8 x i32> @mgather_baseidx_nxv8i32(ptr %base, <vscale x 8 x i32>
 ;
 ; RV64-LABEL: mgather_baseidx_nxv8i32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
-; RV64-NEXT:    vsext.vf2 v16, v8
-; RV64-NEXT:    vsll.vi v16, v16, 2
-; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; RV64-NEXT:    li a1, 4
+; RV64-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
+; RV64-NEXT:    vwmulsu.vx v16, v8, a1
 ; RV64-NEXT:    vluxei64.v v12, (a0), v16, v0.t
 ; RV64-NEXT:    vmv.v.v v8, v12
 ; RV64-NEXT:    ret
@@ -1034,11 +1036,11 @@ define <vscale x 8 x i64> @mgather_baseidx_sext_nxv8i8_nxv8i64(ptr %base, <vscal
 define <vscale x 8 x i64> @mgather_baseidx_zext_nxv8i8_nxv8i64(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) {
 ; CHECK-LABEL: mgather_baseidx_zext_nxv8i8_nxv8i64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vsll.vi v8, v10, 3
+; CHECK-NEXT:    li a1, 8
+; CHECK-NEXT:    vsetvli a2, zero, e8, m1, ta, ma
+; CHECK-NEXT:    vwmulu.vx v10, v8, a1
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vluxei16.v v16, (a0), v8, v0.t
+; CHECK-NEXT:    vluxei16.v v16, (a0), v10, v0.t
 ; CHECK-NEXT:    vmv.v.v v8, v16
 ; CHECK-NEXT:    ret
   %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
@@ -1050,11 +1052,11 @@ define <vscale x 8 x i64> @mgather_baseidx_zext_nxv8i8_nxv8i64(ptr %base, <vscal
 define <vscale x 8 x i64> @mgather_baseidx_nxv8i16_nxv8i64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) {
 ; RV32-LABEL: mgather_baseidx_nxv8i16_nxv8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; RV32-NEXT:    vsext.vf2 v12, v8
-; RV32-NEXT:    vsll.vi v8, v12, 3
+; RV32-NEXT:    li a1, 8
+; RV32-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
+; RV32-NEXT:    vwmulsu.vx v12, v8, a1
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32-NEXT:    vluxei32.v v16, (a0), v8, v0.t
+; RV32-NEXT:    vluxei32.v v16, (a0), v12, v0.t
 ; RV32-NEXT:    vmv.v.v v8, v16
 ; RV32-NEXT:    ret
 ;
@@ -1074,11 +1076,11 @@ define <vscale x 8 x i64> @mgather_baseidx_nxv8i16_nxv8i64(ptr %base, <vscale x
 define <vscale x 8 x i64> @mgather_baseidx_sext_nxv8i16_nxv8i64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) {
 ; RV32-LABEL: mgather_baseidx_sext_nxv8i16_nxv8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; RV32-NEXT:    vsext.vf2 v12, v8
-; RV32-NEXT:    vsll.vi v8, v12, 3
+; RV32-NEXT:    li a1, 8
+; RV32-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
+; RV32-NEXT:    vwmulsu.vx v12, v8, a1
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32-NEXT:    vluxei32.v v16, (a0), v8, v0.t
+; RV32-NEXT:    vluxei32.v v16, (a0), v12, v0.t
 ; RV32-NEXT:    vmv.v.v v8, v16
 ; RV32-NEXT:    ret
 ;
@@ -1099,11 +1101,11 @@ define <vscale x 8 x i64> @mgather_baseidx_sext_nxv8i16_nxv8i64(ptr %base, <vsca
 define <vscale x 8 x i64> @mgather_baseidx_zext_nxv8i16_nxv8i64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) {
 ; CHECK-LABEL: mgather_baseidx_zext_nxv8i16_nxv8i64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vzext.vf2 v12, v8
-; CHECK-NEXT:    vsll.vi v8, v12, 3
+; CHECK-NEXT:    li a1, 8
+; CHECK-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
+; CHECK-NEXT:    vwmulu.vx v12, v8, a1
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vluxei32.v v16, (a0), v8, v0.t
+; CHECK-NEXT:    vluxei32.v v16, (a0), v12, v0.t
 ; CHECK-NEXT:    vmv.v.v v8, v16
 ; CHECK-NEXT:    ret
   %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
@@ -1124,10 +1126,11 @@ define <vscale x 8 x i64> @mgather_baseidx_nxv8i32_nxv8i64(ptr %base, <vscale x
 ;
 ; RV64-LABEL: mgather_baseidx_nxv8i32_nxv8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
-; RV64-NEXT:    vsext.vf2 v24, v8
-; RV64-NEXT:    vsll.vi v8, v24, 3
-; RV64-NEXT:    vluxei64.v v16, (a0), v8, v0.t
+; RV64-NEXT:    li a1, 8
+; RV64-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV64-NEXT:    vwmulsu.vx v24, v8, a1
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT:    vluxei64.v v16, (a0), v24, v0.t
 ; RV64-NEXT:    vmv.v.v v8, v16
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i32> %idxs
@@ -1147,10 +1150,11 @@ define <vscale x 8 x i64> @mgather_baseidx_sext_nxv8i32_nxv8i64(ptr %base, <vsca
 ;
 ; RV64-LABEL: mgather_baseidx_sext_nxv8i32_nxv8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
-; RV64-NEXT:    vsext.vf2 v24, v8
-; RV64-NEXT:    vsll.vi v8, v24, 3
-; RV64-NEXT:    vluxei64.v v16, (a0), v8, v0.t
+; RV64-NEXT:    li a1, 8
+; RV64-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV64-NEXT:    vwmulsu.vx v24, v8, a1
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT:    vluxei64.v v16, (a0), v24, v0.t
 ; RV64-NEXT:    vmv.v.v v8, v16
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
@@ -1171,10 +1175,11 @@ define <vscale x 8 x i64> @mgather_baseidx_zext_nxv8i32_nxv8i64(ptr %base, <vsca
 ;
 ; RV64-LABEL: mgather_baseidx_zext_nxv8i32_nxv8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
-; RV64-NEXT:    vzext.vf2 v24, v8
-; RV64-NEXT:    vsll.vi v8, v24, 3
-; RV64-NEXT:    vluxei64.v v16, (a0), v8, v0.t
+; RV64-NEXT:    li a1, 8
+; RV64-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV64-NEXT:    vwmulu.vx v24, v8, a1
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT:    vluxei64.v v16, (a0), v24, v0.t
 ; RV64-NEXT:    vmv.v.v v8, v16
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
@@ -1845,11 +1850,11 @@ define <vscale x 8 x float> @mgather_baseidx_sext_nxv8i8_nxv8f32(ptr %base, <vsc
 define <vscale x 8 x float> @mgather_baseidx_zext_nxv8i8_nxv8f32(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru) {
 ; CHECK-LABEL: mgather_baseidx_zext_nxv8i8_nxv8f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vsll.vi v8, v10, 2
+; CHECK-NEXT:    li a1, 4
+; CHECK-NEXT:    vsetvli a2, zero, e8, m1, ta, ma
+; CHECK-NEXT:    vwmulu.vx v10, v8, a1
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vluxei16.v v12, (a0), v8, v0.t
+; CHECK-NEXT:    vluxei16.v v12, (a0), v10, v0.t
 ; CHECK-NEXT:    vmv.v.v v8, v12
 ; CHECK-NEXT:    ret
   %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
@@ -1861,10 +1866,11 @@ define <vscale x 8 x float> @mgather_baseidx_zext_nxv8i8_nxv8f32(ptr %base, <vsc
 define <vscale x 8 x float> @mgather_baseidx_nxv8i16_nxv8f32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru) {
 ; RV32-LABEL: mgather_baseidx_nxv8i16_nxv8f32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
-; RV32-NEXT:    vsext.vf2 v16, v8
-; RV32-NEXT:    vsll.vi v8, v16, 2
-; RV32-NEXT:    vluxei32.v v12, (a0), v8, v0.t
+; RV32-NEXT:    li a1, 4
+; RV32-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
+; RV32-NEXT:    vwmulsu.vx v16, v8, a1
+; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; RV32-NEXT:    vluxei32.v v12, (a0), v16, v0.t
 ; RV32-NEXT:    vmv.v.v v8, v12
 ; RV32-NEXT:    ret
 ;
@@ -1885,10 +1891,11 @@ define <vscale x 8 x float> @mgather_baseidx_nxv8i16_nxv8f32(ptr %base, <vscale
 define <vscale x 8 x float> @mgather_baseidx_sext_nxv8i16_nxv8f32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru) {
 ; RV32-LABEL: mgather_baseidx_sext_nxv8i16_nxv8f32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
-; RV32-NEXT:    vsext.vf2 v16, v8
-; RV32-NEXT:    vsll.vi v8, v16, 2
-; RV32-NEXT:    vluxei32.v v12, (a0), v8, v0.t
+; RV32-NEXT:    li a1, 4
+; RV32-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
+; RV32-NEXT:    vwmulsu.vx v16, v8, a1
+; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; RV32-NEXT:    vluxei32.v v12, (a0), v16, v0.t
 ; RV32-NEXT:    vmv.v.v v8, v12
 ; RV32-NEXT:    ret
 ;
@@ -1910,10 +1917,11 @@ define <vscale x 8 x float> @mgather_baseidx_sext_nxv8i16_nxv8f32(ptr %base, <vs
 define <vscale x 8 x float> @mgather_baseidx_zext_nxv8i16_nxv8f32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru) {
 ; CHECK-LABEL: mgather_baseidx_zext_nxv8i16_nxv8f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vzext.vf2 v16, v8
-; CHECK-NEXT:    vsll.vi v8, v16, 2
-; CHECK-NEXT:    vluxei32.v v12, (a0), v8, v0.t
+; CHECK-NEXT:    li a1, 4
+; CHECK-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
+; CHECK-NEXT:    vwmulu.vx v16, v8, a1
+; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-NEXT:    vluxei32.v v12, (a0), v16, v0.t
 ; CHECK-NEXT:    vmv.v.v v8, v12
 ; CHECK-NEXT:    ret
   %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
@@ -1933,10 +1941,9 @@ define <vscale x 8 x float> @mgather_baseidx_nxv8f32(ptr %base, <vscale x 8 x i3
 ;
 ; RV64-LABEL: mgather_baseidx_nxv8f32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
-; RV64-NEXT:    vsext.vf2 v16, v8
-; RV64-NEXT:    vsll.vi v16, v16, 2
-; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; RV64-NEXT:    li a1, 4
+; RV64-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
+; RV64-NEXT:    vwmulsu.vx v16, v8, a1
 ; RV64-NEXT:    vluxei64.v v12, (a0), v16, v0.t
 ; RV64-NEXT:    vmv.v.v v8, v12
 ; RV64-NEXT:    ret
@@ -2104,11 +2111,11 @@ define <vscale x 8 x double> @mgather_baseidx_sext_nxv8i8_nxv8f64(ptr %base, <vs
 define <vscale x 8 x double> @mgather_baseidx_zext_nxv8i8_nxv8f64(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) {
 ; CHECK-LABEL: mgather_baseidx_zext_nxv8i8_nxv8f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vsll.vi v8, v10, 3
+; CHECK-NEXT:    li a1, 8
+; CHECK-NEXT:    vsetvli a2, zero, e8, m1, ta, ma
+; CHECK-NEXT:    vwmulu.vx v10, v8, a1
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vluxei16.v v16, (a0), v8, v0.t
+; CHECK-NEXT:    vluxei16.v v16, (a0), v10, v0.t
 ; CHECK-NEXT:    vmv.v.v v8, v16
 ; CHECK-NEXT:    ret
   %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
@@ -2120,11 +2127,11 @@ define <vscale x 8 x double> @mgather_baseidx_zext_nxv8i8_nxv8f64(ptr %base, <vs
 define <vscale x 8 x double> @mgather_baseidx_nxv8i16_nxv8f64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) {
 ; RV32-LABEL: mgather_baseidx_nxv8i16_nxv8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; RV32-NEXT:    vsext.vf2 v12, v8
-; RV32-NEXT:    vsll.vi v8, v12, 3
+; RV32-NEXT:    li a1, 8
+; RV32-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
+; RV32-NEXT:    vwmulsu.vx v12, v8, a1
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32-NEXT:    vluxei32.v v16, (a0), v8, v0.t
+; RV32-NEXT:    vluxei32.v v16, (a0), v12, v0.t
 ; RV32-NEXT:    vmv.v.v v8, v16
 ; RV32-NEXT:    ret
 ;
@@ -2144,11 +2151,11 @@ define <vscale x 8 x double> @mgather_baseidx_nxv8i16_nxv8f64(ptr %base, <vscale
 define <vscale x 8 x double> @mgather_baseidx_sext_nxv8i16_nxv8f64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) {
 ; RV32-LABEL: mgather_baseidx_sext_nxv8i16_nxv8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; RV32-NEXT:    vsext.vf2 v12, v8
-; RV32-NEXT:    vsll.vi v8, v12, 3
+; RV32-NEXT:    li a1, 8
+; RV32-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
+; RV32-NEXT:    vwmulsu.vx v12, v8, a1
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32-NEXT:    vluxei32.v v16, (a0), v8, v0.t
+; RV32-NEXT:    vluxei32.v v16, (a0), v12, v0.t
 ; RV32-NEXT:    vmv.v.v v8, v16
 ; RV32-NEXT:    ret
 ;
@@ -2169,11 +2176,11 @@ define <vscale x 8 x double> @mgather_baseidx_sext_nxv8i16_nxv8f64(ptr %base, <v
 define <vscale x 8 x double> @mgather_baseidx_zext_nxv8i16_nxv8f64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) {
 ; CHECK-LABEL: mgather_baseidx_zext_nxv8i16_nxv8f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vzext.vf2 v12, v8
-; CHECK-NEXT:    vsll.vi v8, v12, 3
+; CHECK-NEXT:    li a1, 8
+; CHECK-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
+; CHECK-NEXT:    vwmulu.vx v12, v8, a1
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vluxei32.v v16, (a0), v8, v0.t
+; CHECK-NEXT:    vluxei32.v v16, (a0), v12, v0.t
 ; CHECK-NEXT:    vmv.v.v v8, v16
 ; CHECK-NEXT:    ret
   %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
@@ -2194,10 +2201,11 @@ define <vscale x 8 x double> @mgather_baseidx_nxv8i32_nxv8f64(ptr %base, <vscale
 ;
 ; RV64-LABEL: mgather_baseidx_nxv8i32_nxv8f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
-; RV64-NEXT:    vsext.vf2 v24, v8
-; RV64-NEXT:    vsll.vi v8, v24, 3
-; RV64-NEXT:    vluxei64.v v16, (a0), v8, v0.t
+; RV64-NEXT:    li a1, 8
+; RV64-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV64-NEXT:    vwmulsu.vx v24, v8, a1
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT:    vluxei64.v v16, (a0), v24, v0.t
 ; RV64-NEXT:    vmv.v.v v8, v16
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inb...
[truncated]

pfusik

In the absence of Zvbb vwsll.vi, it can be profitable to use a widening multiply instead of sign/zero extension followed by a left shift.

This is the case on BPI-F3. Each of vsext.vf2, vsll.vi and vwmul[s]u.vx has 2*LMUL cycles throughtput: https://camel-cdr.github.io/rvv-bench-results/bpi_f3/
I confirmed this transform improves some benchmarks on a BPI-F3 board.

Looking at https://camel-cdr.github.io/rvv-bench-results/canmv_k230/, it should also apply there.
I don't know whether this is profitable for other RVV CPUs. Please advise if this should be restricted to certain CPUs and which ones.

pfusik · 2025-01-03T11:47:10Z

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

+
+  SDValue LHS = N->getOperand(0);
+  if (!LHS.hasOneUse())
+    return SDValue();


For now, this only handles single-use of sext/zext.
I can rewrite it to be part of combineOp_VLToVWOp_VL so that it handles multi-use too.

pfusik · 2025-01-03T11:47:58Z

llvm/test/CodeGen/RISCV/rvv/vwsll-vp.ll

+; CHECK-RV32:       # %bb.0:
+; CHECK-RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV32-NEXT:    vzext.vf2 v10, v8
+; CHECK-RV32-NEXT:    vsll.vi v8, v10, 2, v0.t


The transform did not apply on RV32 here:

17373 if (!ISD::isConstantSplatVector(RHS.getNode(), ShAmt)) (gdb) 17374 return SDValue(); (gdb) call RHS->dump() t20: nxv2i64 = splat_vector_parts Constant:i32<2>, Constant:i32<0>

I'm guessing it's an effect of type legalization. Please advise on how to fix.

splat_vector_parts is used to splat an e64 element on rv32, i.e. when the EEW > XLEN. You could manually handle it in a follow up PR, I don't think it's critical for this PR

rv32 RVV is rare, I suppose?
I'd prefer to do it right away, so that the test in vwsll-sdnode.ll doesn't disappear. Handling it here prioritizes vwmulu over vwsll -- I'll debug it.

Done. combineOp_VLToVWOp_VL handles RISCVISD::VMV_V_X_VL in AfterLegalizeDAG instead of ISD::SPLAT_VECTOR_PARTS in AfterLegalizeTypes. I do the same, so that vwsll has priority over vwmulu.

pfusik · 2025-01-03T11:48:26Z

llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll

-; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vsll.vi v8, v10, 2
-; CHECK-NEXT:    ret
-;


This is same problem as in the vwsll-vp.ll file - see my comment there.

lukel97

Makes sense to me. I guess multiplies might not have the same number of available execution ports, but it's still less vector ops at the end of the day.

lukel97 · 2025-01-03T13:23:06Z

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

+    return SDValue();
+
+  SDValue NarrowOp = LHS.getOperand(0);
+  EVT NarrowVT = NarrowOp.getValueType();


It's better to use MVT + getSimpleValueType explicitly since this is past type legalization

Why? I use EVT for DAG.getConstant.

NarrowVT.getScalarSizeInBits() generates less code if NarrowVT is MVT.

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

lukel97 · 2025-01-03T13:33:03Z

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

+    if (!VT.isScalableVector())
+      return SDValue();


It might be worthwhile to leave a TODO to handle fixed length vectors later. You would need to use the ContainerVT = getContainerForFixedLengthVector(...) pattern that we use elsewhere.

lukel97 · 2025-01-03T13:37:44Z

llvm/test/CodeGen/RISCV/rvv/vwsll-vp.ll

+; CHECK-RV32:       # %bb.0:
+; CHECK-RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV32-NEXT:    vzext.vf2 v10, v8
+; CHECK-RV32-NEXT:    vsll.vi v8, v10, 2, v0.t


splat_vector_parts is used to splat an e64 element on rv32, i.e. when the EEW > XLEN. You could manually handle it in a follow up PR, I don't think it's critical for this PR

pfusik · 2025-01-09T14:02:52Z

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll

 ; RV32ZVE32F-NEXT:    andi a3, t0, 1
-; RV32ZVE32F-NEXT:    vadd.vx v8, v8, a1
+; RV32ZVE32F-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; RV32ZVE32F-NEXT:    vwmaccus.vx v10, a1, v8


vsext+vsll+vadd folded into vwmaccus.

pfusik · 2025-01-09T14:05:19Z

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll

+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT:    vzext.vf2 v10, v8
+; RV32-NEXT:    vsll.vi v8, v10, 2


Not fold on RV32. I will investigate.

Fixed by handling VSEXT_VL/VZEXT_VL.

pfusik · 2025-01-09T14:05:50Z

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll

+; CHECK-ZVBB-RV64:       # %bb.0:
+; CHECK-ZVBB-RV64-NEXT:    li a0, 4
+; CHECK-ZVBB-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-ZVBB-RV64-NEXT:    vwmulu.vx v10, v8, a0


Pessimized. I will investigate.

Fixed by delaying this transform.

pfusik · 2025-01-09T14:06:18Z

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll

 ; CHECK-ZVBB-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vwsll.vi v10, v8, 2
+; CHECK-ZVBB-NEXT:    vwmulu.vx v10, v8, a0


Pessimized.

Fixed by delaying this transform.

pfusik · 2025-01-09T14:12:06Z

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

+    if (!VT.isScalableVector())
+      return SDValue();


pfusik

I think I addressed all the comments and issues. Please review.

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

pfusik · 2025-01-10T14:03:18Z

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll

+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT:    vzext.vf2 v10, v8
+; RV32-NEXT:    vsll.vi v8, v10, 2


Fixed by handling VSEXT_VL/VZEXT_VL.

pfusik · 2025-01-10T14:03:39Z

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll

+; CHECK-ZVBB-RV64:       # %bb.0:
+; CHECK-ZVBB-RV64-NEXT:    li a0, 4
+; CHECK-ZVBB-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-ZVBB-RV64-NEXT:    vwmulu.vx v10, v8, a0


Fixed by delaying this transform.

pfusik · 2025-01-10T14:03:51Z

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll

 ; CHECK-ZVBB-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vwsll.vi v10, v8, 2
+; CHECK-ZVBB-NEXT:    vwmulu.vx v10, v8, a0


Fixed by delaying this transform.

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

…lization

github-actions · 2025-01-13T12:59:29Z

✅ With the latest revision this PR passed the C/C++ code formatter.

lukel97

LGTM

pfusik · 2025-01-28T17:56:00Z

I've heard that this transform won't be profitable on other CPUs, so I added a commit that enables it on BPI's SpacemiT X60 only. Basing on https://camel-cdr.github.io/rvv-bench-results/canmv_k230/ perhaps K230 too, but I don't have one to confirm.

The sole presence of Zvbb doesn't preclude this transform, because vwsll.vi only does zero-extension, while widening multiply comes in the sign-extending variant as well.

topperc · 2025-01-28T18:08:25Z

I've heard that this transform won't be profitable on other CPUs, so I added a commit that enables it on BPI's SpacemiT X60 only. Basing on https://camel-cdr.github.io/rvv-bench-results/canmv_k230/ perhaps K230 too, but I don't have one to confirm.

The sole presence of Zvbb doesn't preclude this transform, because vwsll.vi only does zero-extension, while widening multiply comes in the sign-extending variant as well.

It's probably profitable on SiFive x280. The vzext/vsext can produce DLEN bits per cycle where DLEN=VLEN/2. The latency until the first DLEN is ready is 4. The shift also produces DLEN bits per cycle. The latency until the first DLEN is ready is 8. The widening multiply produces DLEN*2 bits per cycle. The first 2 DLENs complete in 8 cycles.

topperc · 2025-01-28T19:06:32Z

I've heard that this transform won't be profitable on other CPUs, so I added a commit that enables it on BPI's SpacemiT X60 only. Basing on https://camel-cdr.github.io/rvv-bench-results/canmv_k230/ perhaps K230 too, but I don't have one to confirm.
The sole presence of Zvbb doesn't preclude this transform, because vwsll.vi only does zero-extension, while widening multiply comes in the sign-extending variant as well.

It's probably profitable on SiFive x280. The vzext/vsext can produce DLEN bits per cycle where DLEN=VLEN/2. The latency until the first DLEN is ready is 4. The shift also produces DLEN bits per cycle. The latency until the first DLEN is ready is 8. The widening multiply produces DLEN*2 bits per cycle. The first 2 DLENs complete in 8 cycles.

I might have the latency wrong shifts on x280. The scheduler model says 4, but I have other docs internally that say 8. I'm confirming.

preames · 2025-01-29T19:45:00Z

I've heard that this transform won't be profitable on other CPUs, so I added a commit that enables it on BPI's SpacemiT X60 only.

For my micro-architecture, this should be at most a very minor regression, and possible a slight improvement. From our perspective, I'm 100% fine with this being default enabled without a tuning flag.

preames · 2025-01-29T19:46:30Z

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

@@ -17341,6 +17341,85 @@ static SDValue combineScalarCTPOPToVCPOP(SDNode *N, SelectionDAG &DAG,
  return DAG.getZExtOrTrunc(Pop, DL, VT);
 }

+static SDValue combineSHL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,


The naming convention in this file would have this routine named performSHLCombine

Renamed, thanks!

preames · 2025-01-29T19:57:24Z

llvm/lib/Target/RISCV/RISCVFeatures.td

@@ -1417,6 +1417,9 @@ def TuneVentanaVeyron : SubtargetFeature<"ventana-veyron", "RISCVProcFamily", "V
 def TuneVXRMPipelineFlush : SubtargetFeature<"vxrm-pipeline-flush", "HasVXRMPipelineFlush",
                                             "true", "VXRM writes causes pipeline flush">;

+def TuneCheapVWMul : SubtargetFeature<"cheap-vwmul", "HasCheapVWMul", "true",


I'm not sure the tune flag here is warranted. I would bias towards not having it unless we know some particular core is unprofitable.

…T X60" This reverts commit 63f902d.

pfusik · 2025-02-04T09:38:43Z

I reverted restricting the transform to SpacemiT X60.
This is now same as approved by @lukel97, modulo the renamed function.

topperc

LGTM

) (shl (sext X), C) -> (vwmulsu X, 1u << C) (shl (zext X), C) -> (vwmulu X, 1u << C)

[RISCV] Fold vector shift of sext/zext to widening multiply

15412e7

(shl (sext X), C) -> (vwmulsu X, 1u << C) (shl (zext X), C) -> (vwmulu X, 1u << C)

llvmbot added the backend:RISC-V label Jan 3, 2025

pfusik commented Jan 3, 2025

View reviewed changes

pfusik requested review from mshockwave and topperc January 3, 2025 11:50

lukel97 reviewed Jan 3, 2025

View reviewed changes

pfusik added 3 commits January 8, 2025 17:55

[RISCV] Fold vector shift to widening multiply on rv32

5aa9da4

[RISCV] Use getSimpleValueType

1868507

[RISCV] Add a TODO comment

3a59255

pfusik marked this pull request as draft January 9, 2025 13:56

[RISCV] Handle fixed-length vectors

174e59e

pfusik commented Jan 9, 2025

View reviewed changes

pfusik added 2 commits January 10, 2025 14:39

[RISCV] Handle VSEXT_VL/VZEXT_VL

fa18fca

[RISCV] Delay the transform so that vwsll has priority

0da5f8c

pfusik marked this pull request as ready for review January 10, 2025 14:04

pfusik commented Jan 10, 2025

View reviewed changes

lukel97 reviewed Jan 13, 2025

View reviewed changes

llvm/lib/Target/RISCV/RISCVISelLowering.cpp Show resolved Hide resolved

[RISCV] Remove dead code handling fixed-length vectors after DAG lega…

d84396c

…lization

[RISCV] Fix formatting

6df5335

lukel97 approved these changes Jan 13, 2025

View reviewed changes

[RISCV] Disable this transform by default, except for SpacemiT X60

63f902d

preames reviewed Jan 29, 2025

View reviewed changes

pfusik added 2 commits January 30, 2025 13:37

[RISCV] Rename function for consistency

b243fbc

Revert "[RISCV] Disable this transform by default, except for Spacemi…

bddd7fd

…T X60" This reverts commit 63f902d.

topperc approved these changes Feb 4, 2025

View reviewed changes

pfusik merged commit f7aad60 into llvm:main Feb 4, 2025
8 checks passed

Icohedron pushed a commit to Icohedron/llvm-project that referenced this pull request Feb 11, 2025

[RISCV] Fold vector shift of sext/zext to widening multiply (llvm#121563

456a21a

) (shl (sext X), C) -> (vwmulsu X, 1u << C) (shl (zext X), C) -> (vwmulu X, 1u << C)

[RISCV] Fold vector shift of sext/zext to widening multiply #121563

[RISCV] Fold vector shift of sext/zext to widening multiply #121563

Uh oh!

Conversation

pfusik commented Jan 3, 2025

Uh oh!

llvmbot commented Jan 3, 2025

Uh oh!

pfusik left a comment

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

lukel97 left a comment

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

pfusik left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

github-actions bot commented Jan 13, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

lukel97 left a comment

Choose a reason for hiding this comment

Uh oh!

pfusik commented Jan 28, 2025

Uh oh!

topperc commented Jan 28, 2025

Uh oh!

github-actions bot commented Jan 13, 2025 •

edited

Loading