llvm
diff --git a/‎llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Lines changed: 149 additions & 19 deletions b/‎llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Lines changed: 149 additions & 19 deletions
diff --git a/‎llvm/lib/Target/RISCV/RISCVISelLowering.h
Lines changed: 9 additions & 0 deletions b/‎llvm/lib/Target/RISCV/RISCVISelLowering.h
Lines changed: 9 additions & 0 deletions
diff --git a/‎llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
Lines changed: 107 additions & 0 deletions b/‎llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
Lines changed: 107 additions & 0 deletions
@@ -403,12 +403,20 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
         // 2. Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR"
         // nodes which truncate by one power of two at a time.
         setOperationAction(ISD::TRUNCATE, VT, Custom);
+
+        // Custom-lower insert/extract operations to simplify patterns.
+        setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+        setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
       }
     }
 
-    // We must custom-lower SPLAT_VECTOR vXi64 on RV32
-    if (!Subtarget.is64Bit())
+    // We must custom-lower certain vXi64 operations on RV32 due to the vector
+    // element type being illegal.
+    if (!Subtarget.is64Bit()) {
       setOperationAction(ISD::SPLAT_VECTOR, MVT::i64, Custom);
+      setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::i64, Custom);
+      setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::i64, Custom);
+    }
 
     // Expand various CCs to best match the RVV ISA, which natively supports UNE
     // but no other unordered comparisons, and supports all ordered comparisons
@@ -423,33 +431,34 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
         ISD::SETGT,  ISD::SETOGT, ISD::SETGE,  ISD::SETOGE,
     };
 
+    // Sets common operation actions on RVV floating-point vector types.
+    const auto SetCommonVFPActions = [&](MVT VT) {
+      setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
+      // Custom-lower insert/extract operations to simplify patterns.
+      setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+      setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+      for (auto CC : VFPCCToExpand)
+        setCondCodeAction(CC, VT, Expand);
+    };
+
     if (Subtarget.hasStdExtZfh()) {
       for (auto VT : {RISCVVMVTs::vfloat16mf4_t, RISCVVMVTs::vfloat16mf2_t,
                       RISCVVMVTs::vfloat16m1_t, RISCVVMVTs::vfloat16m2_t,
-                      RISCVVMVTs::vfloat16m4_t, RISCVVMVTs::vfloat16m8_t}) {
-        setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
-        for (auto CC : VFPCCToExpand)
-          setCondCodeAction(CC, VT, Expand);
-      }
+                      RISCVVMVTs::vfloat16m4_t, RISCVVMVTs::vfloat16m8_t})
+        SetCommonVFPActions(VT);
     }
 
     if (Subtarget.hasStdExtF()) {
       for (auto VT : {RISCVVMVTs::vfloat32mf2_t, RISCVVMVTs::vfloat32m1_t,
                       RISCVVMVTs::vfloat32m2_t, RISCVVMVTs::vfloat32m4_t,
-                      RISCVVMVTs::vfloat32m8_t}) {
-        setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
-        for (auto CC : VFPCCToExpand)
-          setCondCodeAction(CC, VT, Expand);
-      }
+                      RISCVVMVTs::vfloat32m8_t})
+        SetCommonVFPActions(VT);
     }
 
     if (Subtarget.hasStdExtD()) {
       for (auto VT : {RISCVVMVTs::vfloat64m1_t, RISCVVMVTs::vfloat64m2_t,
-                      RISCVVMVTs::vfloat64m4_t, RISCVVMVTs::vfloat64m8_t}) {
-        setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
-        for (auto CC : VFPCCToExpand)
-          setCondCodeAction(CC, VT, Expand);
-      }
+                      RISCVVMVTs::vfloat64m4_t, RISCVVMVTs::vfloat64m8_t})
+        SetCommonVFPActions(VT);
     }
   }
 
@@ -761,6 +770,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
     return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
   case ISD::SPLAT_VECTOR:
     return lowerSPLATVECTOR(Op, DAG);
+  case ISD::INSERT_VECTOR_ELT:
+    return lowerINSERT_VECTOR_ELT(Op, DAG);
+  case ISD::EXTRACT_VECTOR_ELT:
+    return lowerEXTRACT_VECTOR_ELT(Op, DAG);
   case ISD::VSCALE: {
     MVT VT = Op.getSimpleValueType();
     SDLoc DL(Op);
@@ -1209,6 +1222,12 @@ SDValue RISCVTargetLowering::lowerSPLATVECTOR(SDValue Op,
                          DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32));
   }
 
+  if (SplatVal.getOpcode() == ISD::SIGN_EXTEND &&
+      SplatVal.getOperand(0).getValueType() == MVT::i32) {
+    return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT,
+                       SplatVal.getOperand(0));
+  }
+
   // Else, on RV32 we lower an i64-element SPLAT_VECTOR thus, being careful not
   // to accidentally sign-extend the 32-bit halves to the e64 SEW:
   // vmv.v.x vX, hi
@@ -1306,6 +1325,72 @@ SDValue RISCVTargetLowering::lowerVectorMaskTrunc(SDValue Op,
   return DAG.getSetCC(DL, MaskVT, Trunc, SplatZero, ISD::SETNE);
 }
 
+SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
+                                                    SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  EVT VecVT = Op.getValueType();
+  SDValue Vec = Op.getOperand(0);
+  SDValue Val = Op.getOperand(1);
+  SDValue Idx = Op.getOperand(2);
+
+  // Custom-legalize INSERT_VECTOR_ELT where XLEN>=SEW, so that the vector is
+  // first slid down into position, the value is inserted into the first
+  // position, and the vector is slid back up. We do this to simplify patterns.
+  //   (slideup vec, (insertelt (slidedown impdef, vec, idx), val, 0), idx),
+  if (Subtarget.is64Bit() || VecVT.getVectorElementType() != MVT::i64) {
+    if (isNullConstant(Idx))
+      return Op;
+    SDValue Slidedown = DAG.getNode(RISCVISD::VSLIDEDOWN, DL, VecVT,
+                                    DAG.getUNDEF(VecVT), Vec, Idx);
+    SDValue InsertElt0 =
+        DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecVT, Slidedown, Val,
+                    DAG.getConstant(0, DL, Subtarget.getXLenVT()));
+
+    return DAG.getNode(RISCVISD::VSLIDEUP, DL, VecVT, Vec, InsertElt0, Idx);
+  }
+
+  // Custom-legalize INSERT_VECTOR_ELT where XLEN<SEW, as the SEW element type
+  // is illegal (currently only vXi64 RV32).
+  // Since there is no easy way of getting a single element into a vector when
+  // XLEN<SEW, we lower the operation to the following sequence:
+  //   splat      vVal, rVal
+  //   vid.v      vVid
+  //   vmseq.vx   mMask, vVid, rIdx
+  //   vmerge.vvm vDest, vSrc, vVal, mMask
+  // This essentially merges the original vector with the inserted element by
+  // using a mask whose only set bit is that corresponding to the insert
+  // index.
+  SDValue SplattedVal = DAG.getSplatVector(VecVT, DL, Val);
+  SDValue SplattedIdx = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Idx);
+
+  SDValue VID = DAG.getNode(RISCVISD::VID, DL, VecVT);
+  auto SetCCVT =
+      getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VecVT);
+  SDValue Mask = DAG.getSetCC(DL, SetCCVT, VID, SplattedIdx, ISD::SETEQ);
+
+  return DAG.getNode(ISD::VSELECT, DL, VecVT, Mask, SplattedVal, Vec);
+}
+
+// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
+// extract the first element: (extractelt (slidedown vec, idx), 0). This is
+// done to maintain partity with the legalization of RV32 vXi64 legalization.
+SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
+                                                     SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  SDValue Idx = Op.getOperand(1);
+  if (isNullConstant(Idx))
+    return Op;
+
+  SDValue Vec = Op.getOperand(0);
+  EVT EltVT = Op.getValueType();
+  EVT VecVT = Vec.getValueType();
+  SDValue Slidedown = DAG.getNode(RISCVISD::VSLIDEDOWN, DL, VecVT,
+                                  DAG.getUNDEF(VecVT), Vec, Idx);
+
+  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Slidedown,
+                     DAG.getConstant(0, DL, Subtarget.getXLenVT()));
+}
+
 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
                                                      SelectionDAG &DAG) const {
   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
@@ -1640,6 +1725,44 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewOp));
     break;
   }
+  case ISD::EXTRACT_VECTOR_ELT: {
+    // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
+    // type is illegal (currently only vXi64 RV32).
+    // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
+    // transferred to the destination register. We issue two of these from the
+    // upper- and lower- halves of the SEW-bit vector element, slid down to the
+    // first element.
+    SDLoc DL(N);
+    SDValue Vec = N->getOperand(0);
+    SDValue Idx = N->getOperand(1);
+    EVT VecVT = Vec.getValueType();
+    assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
+           VecVT.getVectorElementType() == MVT::i64 &&
+           "Unexpected EXTRACT_VECTOR_ELT legalization");
+
+    SDValue Slidedown = Vec;
+    // Unless the index is known to be 0, we must slide the vector down to get
+    // the desired element into index 0.
+    if (!isNullConstant(Idx))
+      Slidedown = DAG.getNode(RISCVISD::VSLIDEDOWN, DL, VecVT,
+                              DAG.getUNDEF(VecVT), Vec, Idx);
+
+    MVT XLenVT = Subtarget.getXLenVT();
+    // Extract the lower XLEN bits of the correct vector element.
+    SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Slidedown, Idx);
+
+    // To extract the upper XLEN bits of the vector element, shift the first
+    // element right by 32 bits and re-extract the lower XLEN bits.
+    SDValue ThirtyTwoV =
+        DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT,
+                    DAG.getConstant(32, DL, Subtarget.getXLenVT()));
+    SDValue LShr32 = DAG.getNode(ISD::SRL, DL, VecVT, Slidedown, ThirtyTwoV);
+
+    SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32, Idx);
+
+    Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
+    break;
+  }
   case ISD::INTRINSIC_WO_CHAIN: {
     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
     switch (IntNo) {
@@ -2231,8 +2354,12 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
     return 33;
   case RISCVISD::VMV_X_S:
     // The number of sign bits of the scalar result is computed by obtaining the
-    // element type of the input vector operand, substracting its width from the
-    // XLEN, and then adding one (sign bit within the element type).
+    // element type of the input vector operand, subtracting its width from the
+    // XLEN, and then adding one (sign bit within the element type). If the
+    // element type is wider than XLen, the least-significant XLEN bits are
+    // taken.
+    if (Op.getOperand(0).getScalarValueSizeInBits() > Subtarget.getXLen())
+      return 1;
     return Subtarget.getXLen() - Op.getOperand(0).getScalarValueSizeInBits() + 1;
   }
 
@@ -3893,6 +4020,9 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(VLEFF)
   NODE_NAME_CASE(VLEFF_MASK)
   NODE_NAME_CASE(READ_VL)
+  NODE_NAME_CASE(VSLIDEUP)
+  NODE_NAME_CASE(VSLIDEDOWN)
+  NODE_NAME_CASE(VID)
   }
   // clang-format on
   return nullptr;
 
@@ -101,6 +101,13 @@ enum NodeType : unsigned {
   VLEFF_MASK,
   // read vl CSR
   READ_VL,
+  // Matches the semantics of vslideup/vslidedown. The first operand is the
+  // pass-thru operand, the second is the source vector, and the third is the
+  // XLenVT index (either constant or non-constant).
+  VSLIDEUP,
+  VSLIDEDOWN,
+  // Matches the semantics of the unmasked vid.v instruction.
+  VID,
 };
 } // namespace RISCVISD
 
@@ -298,6 +305,8 @@ class RISCVTargetLowering : public TargetLowering {
   SDValue lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
                              int64_t ExtTrueVal) const;
   SDValue lowerVectorMaskTrunc(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
 
 
@@ -32,6 +32,15 @@ def riscv_trunc_vector : SDNode<"RISCVISD::TRUNCATE_VECTOR",
                                 SDTypeProfile<1, 1,
                                  [SDTCisVec<0>, SDTCisVec<1>]>>;
 
+class FromFPR32<DAGOperand operand, dag input_dag> {
+  dag ret = !cond(!eq(!cast<string>(operand), !cast<string>(FPR64)):
+                     (INSERT_SUBREG (IMPLICIT_DEF), input_dag, sub_32),
+                  !eq(!cast<string>(operand), !cast<string>(FPR16)):
+                      (EXTRACT_SUBREG input_dag, sub_16),
+                  !eq(1, 1):
+                    input_dag);
+}
+
 // Penalize the generic form with Complexity=1 to give the simm5/uimm5 variants
 // precedence
 def SplatPat       : ComplexPattern<vAny, 1, "selectVSplat", [], [], 1>;
@@ -538,3 +547,101 @@ foreach fvti = AllFloatVectors in {
               0, fvti.AVL, fvti.SEW)>;
 }
 } // Predicates = [HasStdExtV, HasStdExtF]
+
+//===----------------------------------------------------------------------===//
+// Vector Element Inserts/Extracts
+//===----------------------------------------------------------------------===//
+
+// The built-in TableGen 'extractelt' and 'insertelt' nodes must return the
+// same type as the vector element type. On RISC-V, XLenVT is the only legal
+// integer type, so for integer inserts/extracts we use a custom node which
+// returns XLenVT.
+def riscv_insert_vector_elt
+    : SDNode<"ISD::INSERT_VECTOR_ELT",
+             SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisVT<2, XLenVT>,
+                                  SDTCisPtrTy<3>]>, []>;
+def riscv_extract_vector_elt
+    : SDNode<"ISD::EXTRACT_VECTOR_ELT",
+             SDTypeProfile<1, 2, [SDTCisVT<0, XLenVT>, SDTCisPtrTy<2>]>, []>;
+
+multiclass VPatInsertExtractElt_XI_Idx<bit IsFloat> {
+  defvar vtilist = !if(IsFloat, AllFloatVectors, AllIntegerVectors);
+  defvar insertelt_node = !if(IsFloat, insertelt, riscv_insert_vector_elt);
+  defvar extractelt_node = !if(IsFloat, extractelt, riscv_extract_vector_elt);
+  foreach vti = vtilist in {
+    defvar MX = vti.LMul.MX;
+    defvar vmv_xf_s_inst = !cast<Instruction>(!if(IsFloat, "PseudoVFMV_F_S_",
+                                                           "PseudoVMV_X_S_")#MX);
+    defvar vmv_s_xf_inst = !cast<Instruction>(!if(IsFloat, "PseudoVFMV_S_F_",
+                                                           "PseudoVMV_S_X_")#MX);
+    // Only pattern-match insert/extract-element operations where the index is
+    // 0. Any other index will have been custom-lowered to slide the vector
+    // correctly into place (and, in the case of insert, slide it back again
+    // afterwards).
+    def : Pat<(vti.Scalar (extractelt_node (vti.Vector vti.RegClass:$rs2), 0)),
+              FromFPR32<vti.ScalarRegClass,
+                       (vmv_xf_s_inst vti.RegClass:$rs2, vti.SEW)>.ret>;
+
+    def : Pat<(vti.Vector (insertelt_node (vti.Vector vti.RegClass:$merge),
+                                          vti.ScalarRegClass:$rs1, 0)),
+              (vmv_s_xf_inst vti.RegClass:$merge,
+                      ToFPR32<vti.Scalar, vti.ScalarRegClass, "rs1">.ret,
+                      vti.AVL, vti.SEW)>;
+  }
+}
+
+let Predicates = [HasStdExtV] in
+defm "" : VPatInsertExtractElt_XI_Idx</*IsFloat*/0>;
+let Predicates = [HasStdExtV, HasStdExtF] in
+defm "" : VPatInsertExtractElt_XI_Idx</*IsFloat*/1>;
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous RISCVISD SDNodes
+//===----------------------------------------------------------------------===//
+
+def riscv_vid
+    : SDNode<"RISCVISD::VID", SDTypeProfile<1, 0, [SDTCisVec<0>]>, []>;
+
+def SDTRVVSlide : SDTypeProfile<1, 3, [
+  SDTCisVec<0>, SDTCisSameAs<1, 0>, SDTCisSameAs<2, 0>, SDTCisVT<3, XLenVT>
+]>;
+
+def riscv_slideup : SDNode<"RISCVISD::VSLIDEUP", SDTRVVSlide, []>;
+def riscv_slidedown : SDNode<"RISCVISD::VSLIDEDOWN", SDTRVVSlide, []>;
+
+let Predicates = [HasStdExtV] in {
+
+foreach vti = AllIntegerVectors in
+  def : Pat<(vti.Vector riscv_vid),
+            (!cast<Instruction>("PseudoVID_V_"#vti.LMul.MX) vti.AVL, vti.SEW)>;
+
+foreach vti = !listconcat(AllIntegerVectors, AllFloatVectors) in {
+    def : Pat<(vti.Vector (riscv_slideup (vti.Vector vti.RegClass:$rs3),
+                                         (vti.Vector vti.RegClass:$rs1),
+                                         uimm5:$rs2)),
+              (!cast<Instruction>("PseudoVSLIDEUP_VI_"#vti.LMul.MX)
+                  vti.RegClass:$rs3, vti.RegClass:$rs1, uimm5:$rs2,
+                  vti.AVL, vti.SEW)>;
+
+    def : Pat<(vti.Vector (riscv_slideup (vti.Vector vti.RegClass:$rs3),
+                                         (vti.Vector vti.RegClass:$rs1),
+                                         GPR:$rs2)),
+              (!cast<Instruction>("PseudoVSLIDEUP_VX_"#vti.LMul.MX)
+                  vti.RegClass:$rs3, vti.RegClass:$rs1, GPR:$rs2,
+                  vti.AVL, vti.SEW)>;
+
+    def : Pat<(vti.Vector (riscv_slidedown (vti.Vector vti.RegClass:$rs3),
+                                           (vti.Vector vti.RegClass:$rs1),
+                                           uimm5:$rs2)),
+              (!cast<Instruction>("PseudoVSLIDEDOWN_VI_"#vti.LMul.MX)
+                  vti.RegClass:$rs3, vti.RegClass:$rs1, uimm5:$rs2,
+                  vti.AVL, vti.SEW)>;
+
+    def : Pat<(vti.Vector (riscv_slidedown (vti.Vector vti.RegClass:$rs3),
+                                           (vti.Vector vti.RegClass:$rs1),
+                                           GPR:$rs2)),
+              (!cast<Instruction>("PseudoVSLIDEDOWN_VX_"#vti.LMul.MX)
+                  vti.RegClass:$rs3, vti.RegClass:$rs1, GPR:$rs2,
+                  vti.AVL, vti.SEW)>;
+}
+} // Predicates = [HasStdExtV]