[RISCV] Custom lower ISD::VSCALE.

topperc · topperc · commit dfc1901d513e · 2021-01-13T17:14:49.000-08:00
This patch custom lowers ISD::VSCALE into a csrr vlenb followed by a shift right by 3 followed by a multiply by the scale amount. I've added computeKnownBits support to indicate that the csrr vlenb always produces 3 trailng bits of 0s so the shift right is "exact". This allows the shift and multiply sequence to be nicely optimized into a single shift or removed completely when the scale amount is a power of 2. The non power of 2 case multiplying by 24 is still producing suboptimal code. We could remove the right shift and use a multiply by 3. Hopefully we can improve DAG combine to fix that since it's not unique to this sequence. This replaces D94144. Reviewed By: HsiangKai Differential Revision: https://reviews.llvm.org/D94249
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -32,6 +32,7 @@
 #include "llvm/IR/IntrinsicsRISCV.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -350,6 +351,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
   if (Subtarget.hasStdExtV()) {
     setBooleanVectorContents(ZeroOrOneBooleanContent);
 
+    setOperationAction(ISD::VSCALE, XLenVT, Custom);
+
     // RVV intrinsics may have illegal operands.
     // We also need to custom legalize vmv.x.s.
     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
@@ -686,6 +689,17 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
   }
   case ISD::SPLAT_VECTOR:
     return lowerSPLATVECTOR(Op, DAG);
+  case ISD::VSCALE: {
+    MVT VT = Op.getSimpleValueType();
+    SDLoc DL(Op);
+    SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT);
+    // We define our scalable vector types for lmul=1 to use a 64 bit known
+    // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
+    // vscale as VLENB / 8.
+    SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB,
+                                 DAG.getConstant(3, DL, VT));
+    return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0));
+  }
   }
 }
 
@@ -1867,6 +1881,30 @@ bool RISCVTargetLowering::isDesirableToCommuteWithShift(
   return true;
 }
 
+void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
+                                                        KnownBits &Known,
+                                                        const APInt &DemandedElts,
+                                                        const SelectionDAG &DAG,
+                                                        unsigned Depth) const {
+  unsigned Opc = Op.getOpcode();
+  assert((Opc >= ISD::BUILTIN_OP_END ||
+          Opc == ISD::INTRINSIC_WO_CHAIN ||
+          Opc == ISD::INTRINSIC_W_CHAIN ||
+          Opc == ISD::INTRINSIC_VOID) &&
+         "Should use MaskedValueIsZero if you don't know whether Op"
+         " is a target node!");
+
+  Known.resetAll();
+  switch (Opc) {
+  default: break;
+  case RISCVISD::READ_VLENB:
+    // We assume VLENB is at least 8 bytes.
+    // FIXME: The 1.0 draft spec defines minimum VLEN as 128 bits.
+    Known.Zero.setLowBits(3);
+    break;
+  }
+}
+
 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
     SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
     unsigned Depth) const {
@@ -3540,6 +3578,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(GORCIW)
   NODE_NAME_CASE(VMV_X_S)
   NODE_NAME_CASE(SPLAT_VECTOR_I64)
+  NODE_NAME_CASE(READ_VLENB)
   }
   // clang-format on
   return nullptr;
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -85,6 +85,8 @@ enum NodeType : unsigned {
   // Splats an i64 scalar to a vector type (with element type i64) where the
   // scalar is a sign-extended i32.
   SPLAT_VECTOR_I64,
+  // Read VLENB CSR
+  READ_VLENB,
 };
 } // namespace RISCVISD
 
@@ -123,6 +125,11 @@ class RISCVTargetLowering : public TargetLowering {
 
   SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
 
+  void computeKnownBitsForTargetNode(const SDValue Op,
+                                     KnownBits &Known,
+                                     const APInt &DemandedElts,
+                                     const SelectionDAG &DAG,
+                                     unsigned Depth) const override;
   unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
                                            const APInt &DemandedElts,
                                            const SelectionDAG &DAG,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -17,6 +17,8 @@
 def riscv_vmv_x_s : SDNode<"RISCVISD::VMV_X_S",
                            SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<1>,
                                                 SDTCisInt<1>]>>;
+def riscv_read_vlenb : SDNode<"RISCVISD::READ_VLENB",
+                              SDTypeProfile<1, 0, [SDTCisVT<0, XLenVT>]>>;
 
 // X0 has special meaning for vsetvl/vsetvli.
 //  rd | rs1 |   AVL value | Effect on vl
@@ -2397,6 +2399,11 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
   def PseudoVMV8R_V : VPseudo<VMV8R_V, V_M8, (outs VRM8:$vd), (ins VRM8:$vs2)>;
 }
 
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 1 in {
+  def PseudoReadVLENB : Pseudo<(outs GPR:$rd), (ins),
+                               [(set GPR:$rd, (riscv_read_vlenb))]>;
+}
+
 //===----------------------------------------------------------------------===//
 // 6. Configuration-Setting Instructions
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp b/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp
@@ -210,4 +210,13 @@ void llvm::LowerRISCVMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
     if (LowerRISCVMachineOperandToMCOperand(MO, MCOp, AP))
       OutMI.addOperand(MCOp);
   }
+
+  if (OutMI.getOpcode() == RISCV::PseudoReadVLENB) {
+    OutMI.setOpcode(RISCV::CSRRS);
+    OutMI.addOperand(MCOperand::createImm(
+        RISCVSysReg::lookupSysRegByName("VLENB")->Encoding));
+    OutMI.addOperand(MCOperand::createReg(RISCV::X0));
+    return;
+  }
+
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-vscale.i32.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-vscale.i32.ll
@@ -0,0 +1,54 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple riscv32 -mattr=+m,+experimental-v < %s \
+; RUN:    | FileCheck %s
+
+define i32 @vscale_zero() nounwind {
+; CHECK-LABEL: vscale_zero:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mv a0, zero
+; CHECK-NEXT:    ret
+entry:
+  %0 = call i32 @llvm.vscale.i32()
+  %1 = mul i32 %0, 0
+  ret i32 %1
+}
+
+define i32 @vscale_one() nounwind {
+; CHECK-LABEL: vscale_one:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    ret
+entry:
+  %0 = call i32 @llvm.vscale.i32()
+  %1 = mul i32 %0, 1
+  ret i32 %1
+}
+
+define i32 @vscale_uimmpow2xlen() nounwind {
+; CHECK-LABEL: vscale_uimmpow2xlen:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    ret
+entry:
+  %0 = call i32 @llvm.vscale.i32()
+  %1 = mul i32 %0, 64
+  ret i32 %1
+}
+
+define i32 @vscale_non_pow2() nounwind {
+; CHECK-LABEL: vscale_non_pow2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    addi a1, zero, 24
+; CHECK-NEXT:    mul a0, a0, a1
+; CHECK-NEXT:    ret
+entry:
+  %0 = call i32 @llvm.vscale.i32()
+  %1 = mul i32 %0, 24
+  ret i32 %1
+}
+
+declare i32 @llvm.vscale.i32()
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-vscale.i64.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-vscale.i64.ll
@@ -0,0 +1,54 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple riscv64 -mattr=+m,+experimental-v < %s \
+; RUN:    | FileCheck %s
+
+define i64 @vscale_zero() nounwind {
+; CHECK-LABEL: vscale_zero:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mv a0, zero
+; CHECK-NEXT:    ret
+entry:
+  %0 = call i64 @llvm.vscale.i64()
+  %1 = mul i64 %0, 0
+  ret i64 %1
+}
+
+define i64 @vscale_one() nounwind {
+; CHECK-LABEL: vscale_one:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    ret
+entry:
+  %0 = call i64 @llvm.vscale.i64()
+  %1 = mul i64 %0, 1
+  ret i64 %1
+}
+
+define i64 @vscale_uimmpow2xlen() nounwind {
+; CHECK-LABEL: vscale_uimmpow2xlen:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    ret
+entry:
+  %0 = call i64 @llvm.vscale.i64()
+  %1 = mul i64 %0, 64
+  ret i64 %1
+}
+
+define i64 @vscale_non_pow2() nounwind {
+; CHECK-LABEL: vscale_non_pow2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    addi a1, zero, 24
+; CHECK-NEXT:    mul a0, a0, a1
+; CHECK-NEXT:    ret
+entry:
+  %0 = call i64 @llvm.vscale.i64()
+  %1 = mul i64 %0, 24
+  ret i64 %1
+}
+
+declare i64 @llvm.vscale.i64()

Original file line number	Diff line number	Diff line change
`@@ -210,4 +210,13 @@ void llvm::LowerRISCVMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,`
`210`	`210`	`if (LowerRISCVMachineOperandToMCOperand(MO, MCOp, AP))`
`211`	`211`	`OutMI.addOperand(MCOp);`
`212`	`212`	`}`
	`213`	`+`
	`214`	`+ if (OutMI.getOpcode() == RISCV::PseudoReadVLENB) {`
	`215`	`+ OutMI.setOpcode(RISCV::CSRRS);`
	`216`	`+ OutMI.addOperand(MCOperand::createImm(`
	`217`	`+ RISCVSysReg::lookupSysRegByName("VLENB")->Encoding));`
	`218`	`+ OutMI.addOperand(MCOperand::createReg(RISCV::X0));`
	`219`	`+ return;`
	`220`	`+ }`
	`221`	`+`
`213`	`222`	`}`