[RISCV] Use vfslide1down for build_vectors of non-constant floats

preames · preames · commit 9357712b5058 · 2023-05-24T10:48:26.000-07:00
This adds the vfslide1down (and vfslide1up for consistency) nodes. These mostly parallel the existing vslide1down/up nodes. (See note below on instruction semantics.) We then use the vfslide1down in build_vector lowering instead of going through the stack. The specification is more than a bit vague on the meaning of these instructions. All we're given is "The vfslide1down instruction is defined analogously, but sources its scalar argument from an f register." We have to combine this with a general note at the beginning of section 10. Vector Arithmetic Instruction Formats which reads: "For floating-point operations, the scalar can be taken from a scalar f register. If FLEN > SEW, the value in the f registers is checked for a valid NaN-boxed value, in which case the least-signicant SEW bits of the f register are used, else the canonical NaN value is used. Vector instructions where any floating-point vector operand’s EEW is not a supported floating-point type width (which includes when FLEN < SEW) are reserved.". Note that floats are NaN-boxed when D is implemented. Combining that all together, we're fine as long as the element type matches the vector type - which is does by construction. We shouldn't have legal vectors which hit the reserved encoding case. An assert is included, just to be careful. Differential Revision: https://reviews.llvm.org/D151347
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -3218,15 +3218,13 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
   // For constant vectors, use generic constant pool lowering.  Otherwise,
   // we'd have to materialize constants in GPRs just to move them into the
   // vector.
-  if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()))
+  if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
+      ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
     return SDValue();
 
-  // We can use a series of vslide1down instructions to move values in GPRs
-  // into the appropriate place in the result vector.  We use slide1down
-  // to avoid the register group overlap constraint of vslide1up.
-  if (VT.isFloatingPoint())
-    // TODO: Use vfslide1down.
-    return SDValue();
+  assert((!VT.isFloatingPoint() ||
+          VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
+         "Illegal type which will result in reserved encoding");
 
   const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
 
@@ -3243,8 +3241,10 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
                           Vec, Offset, Mask, VL, Policy);
       UndefCount = 0;
     }
-    Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, ContainerVT,
-                      DAG.getUNDEF(ContainerVT), Vec, V, Mask, VL);
+    auto OpCode =
+      VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;
+    Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
+                      V, Mask, VL);
   }
   if (UndefCount) {
     const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
@@ -15161,6 +15161,8 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(VSLIDE1UP_VL)
   NODE_NAME_CASE(VSLIDEDOWN_VL)
   NODE_NAME_CASE(VSLIDE1DOWN_VL)
+  NODE_NAME_CASE(VFSLIDE1UP_VL)
+  NODE_NAME_CASE(VFSLIDE1DOWN_VL)
   NODE_NAME_CASE(VID_VL)
   NODE_NAME_CASE(VFNCVT_ROD_VL)
   NODE_NAME_CASE(VECREDUCE_ADD_VL)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -164,6 +164,12 @@ enum NodeType : unsigned {
   // value. The fourth and fifth operands are the mask and VL operands.
   VSLIDE1UP_VL,
   VSLIDE1DOWN_VL,
+  // Matches the semantics of vfslide1up/vfslide1down. The first operand is
+  // passthru operand, the second is source vector, third is a scalar value
+  // whose type matches the element type of the vectors.  The fourth and fifth
+  // operands are the mask and VL operands.
+  VFSLIDE1UP_VL,
+  VFSLIDE1DOWN_VL,
   // Matches the semantics of the vid.v instruction, with a mask and VL
   // operand.
   VID_VL,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -2456,11 +2456,18 @@ def SDTRVVSlide1 : SDTypeProfile<1, 5, [
   SDTCisVT<3, XLenVT>, SDTCVecEltisVT<4, i1>, SDTCisSameNumEltsAs<0, 4>,
   SDTCisVT<5, XLenVT>
 ]>;
+def SDTRVVFSlide1 : SDTypeProfile<1, 5, [
+  SDTCisVec<0>, SDTCisSameAs<1, 0>, SDTCisSameAs<2, 0>, SDTCisFP<0>,
+  SDTCisEltOfVec<3, 0>, SDTCVecEltisVT<4, i1>, SDTCisSameNumEltsAs<0, 4>,
+  SDTCisVT<5, XLenVT>
+]>;
 
 def riscv_slideup_vl   : SDNode<"RISCVISD::VSLIDEUP_VL", SDTRVVSlide, []>;
 def riscv_slide1up_vl  : SDNode<"RISCVISD::VSLIDE1UP_VL", SDTRVVSlide1, []>;
 def riscv_slidedown_vl : SDNode<"RISCVISD::VSLIDEDOWN_VL", SDTRVVSlide, []>;
 def riscv_slide1down_vl  : SDNode<"RISCVISD::VSLIDE1DOWN_VL", SDTRVVSlide1, []>;
+def riscv_fslide1up_vl  : SDNode<"RISCVISD::VFSLIDE1UP_VL", SDTRVVFSlide1, []>;
+def riscv_fslide1down_vl  : SDNode<"RISCVISD::VFSLIDE1DOWN_VL", SDTRVVFSlide1, []>;
 
 foreach vti = AllIntegerVectors in {
   let Predicates = GetVTypePredicates<vti>.Predicates in {
@@ -2495,6 +2502,35 @@ foreach vti = AllIntegerVectors in {
   }
 }
 
+foreach vti = AllFloatVectors in {
+  let Predicates = GetVTypePredicates<vti>.Predicates in {
+  def : Pat<(vti.Vector (riscv_fslide1up_vl (vti.Vector undef),
+                                            (vti.Vector vti.RegClass:$rs1),
+                                            vti.Scalar:$rs2, (vti.Mask true_mask),
+                                            VLOpFrag)),
+            (!cast<Instruction>("PseudoVFSLIDE1UP_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
+                vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, GPR:$vl, vti.Log2SEW)>;
+  def : Pat<(vti.Vector (riscv_fslide1up_vl (vti.Vector vti.RegClass:$rd),
+                                            (vti.Vector vti.RegClass:$rs1),
+                                            vti.Scalar:$rs2, (vti.Mask true_mask),
+                                            VLOpFrag)),
+            (!cast<Instruction>("PseudoVFSLIDE1UP_V"#vti.ScalarSuffix#"_"#vti.LMul.MX#"_TU")
+                vti.RegClass:$rd, vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, GPR:$vl, vti.Log2SEW)>;
+  def : Pat<(vti.Vector (riscv_fslide1down_vl (vti.Vector undef),
+                                              (vti.Vector vti.RegClass:$rs1),
+                                              vti.Scalar:$rs2, (vti.Mask true_mask),
+                                              VLOpFrag)),
+            (!cast<Instruction>("PseudoVFSLIDE1DOWN_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
+                vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, GPR:$vl, vti.Log2SEW)>;
+  def : Pat<(vti.Vector (riscv_fslide1down_vl (vti.Vector vti.RegClass:$rd),
+                                              (vti.Vector vti.RegClass:$rs1),
+                                              vti.Scalar:$rs2, (vti.Mask true_mask),
+                                              VLOpFrag)),
+            (!cast<Instruction>("PseudoVFSLIDE1DOWN_V"#vti.ScalarSuffix#"_"#vti.LMul.MX#"_TU")
+                vti.RegClass:$rd, vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, GPR:$vl, vti.Log2SEW)>;
+  }
+}
+
 foreach vti = AllVectors in {
   let Predicates = GetVTypePredicates<vti>.Predicates in {
     def : Pat<(vti.Vector (riscv_slideup_vl (vti.Vector vti.RegClass:$rs3),
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
@@ -291,14 +291,9 @@ define dso_local void @splat_load_licm(float* %0) {
 define <2 x half> @buildvec_v2f16(half %a, half %b) {
 ; CHECK-LABEL: buildvec_v2f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    fsh fa1, 14(sp)
-; CHECK-NEXT:    fsh fa0, 12(sp)
-; CHECK-NEXT:    addi a0, sp, 12
 ; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT:    vle16.v v8, (a0)
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa1
 ; CHECK-NEXT:    ret
   %v1 = insertelement <2 x half> poison, half %a, i64 0
   %v2 = insertelement <2 x half> %v1, half %b, i64 1
@@ -308,14 +303,9 @@ define <2 x half> @buildvec_v2f16(half %a, half %b) {
 define <2 x float> @buildvec_v2f32(float %a, float %b) {
 ; CHECK-LABEL: buildvec_v2f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    fsw fa1, 12(sp)
-; CHECK-NEXT:    fsw fa0, 8(sp)
-; CHECK-NEXT:    addi a0, sp, 8
 ; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa1
 ; CHECK-NEXT:    ret
   %v1 = insertelement <2 x float> poison, float %a, i64 0
   %v2 = insertelement <2 x float> %v1, float %b, i64 1
@@ -325,14 +315,9 @@ define <2 x float> @buildvec_v2f32(float %a, float %b) {
 define <2 x double> @buildvec_v2f64(double %a, double %b) {
 ; CHECK-LABEL: buildvec_v2f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    fsd fa1, 8(sp)
-; CHECK-NEXT:    fsd fa0, 0(sp)
-; CHECK-NEXT:    mv a0, sp
 ; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa1
 ; CHECK-NEXT:    ret
   %v1 = insertelement <2 x double> poison, double %a, i64 0
   %v2 = insertelement <2 x double> %v1, double %b, i64 1
@@ -342,14 +327,9 @@ define <2 x double> @buildvec_v2f64(double %a, double %b) {
 define <2 x double> @buildvec_v2f64_b(double %a, double %b) {
 ; CHECK-LABEL: buildvec_v2f64_b:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    fsd fa1, 8(sp)
-; CHECK-NEXT:    fsd fa0, 0(sp)
-; CHECK-NEXT:    mv a0, sp
 ; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa1
 ; CHECK-NEXT:    ret
   %v1 = insertelement <2 x double> poison, double %b, i64 1
   %v2 = insertelement <2 x double> %v1, double %a, i64 0
@@ -359,16 +339,11 @@ define <2 x double> @buildvec_v2f64_b(double %a, double %b) {
 define <4 x float> @buildvec_v4f32(float %a, float %b, float %c, float %d) {
 ; CHECK-LABEL: buildvec_v4f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    fsw fa3, 12(sp)
-; CHECK-NEXT:    fsw fa2, 8(sp)
-; CHECK-NEXT:    fsw fa1, 4(sp)
-; CHECK-NEXT:    fsw fa0, 0(sp)
-; CHECK-NEXT:    mv a0, sp
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa1
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa2
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa3
 ; CHECK-NEXT:    ret
   %v1 = insertelement <4 x float> poison, float %a, i64 0
   %v2 = insertelement <4 x float> %v1, float %b, i64 1