[RISCV] Add codegen support for Zvfbfmin

jacquesguan · jacquesguan · commit 120efbb7b923 · 2024-04-07T17:35:38.000+08:00
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1080,6 +1080,17 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
       }
     }
 
+    // TODO: Could we merge some code with zvfhmin?
+    if (Subtarget.hasVInstructionsBF16()) {
+      for (MVT VT : BF16VecVTs) {
+        if (!isTypeLegal(VT))
+          continue;
+        setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
+        setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
+        // TODO: Promote to fp32.
+      }
+    }
+
     if (Subtarget.hasVInstructionsF32()) {
       for (MVT VT : F32VecVTs) {
         if (!isTypeLegal(VT))
@@ -1295,6 +1306,14 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
           continue;
         }
 
+        if (VT.getVectorElementType() == MVT::bf16 &&
+            Subtarget.hasVInstructionsBF16()) {
+          setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
+          setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
+          // TODO: Promote to fp32.
+          continue;
+        }
+
         // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
         setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT,
                            Custom);
@@ -2549,6 +2568,10 @@ static bool useRVVForFixedLengthVectorVT(MVT VT,
     if (!Subtarget.hasVInstructionsF16Minimal())
       return false;
     break;
+  case MVT::bf16:
+    if (!Subtarget.hasVInstructionsBF16())
+      return false;
+    break;
   case MVT::f32:
     if (!Subtarget.hasVInstructionsF32())
       return false;
@@ -2600,6 +2623,7 @@ static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT,
   case MVT::i16:
   case MVT::i32:
   case MVT::i64:
+  case MVT::bf16:
   case MVT::f16:
   case MVT::f32:
   case MVT::f64: {
@@ -8086,9 +8110,12 @@ RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
   SDValue Src = Op.getOperand(0);
   MVT SrcVT = Src.getSimpleValueType();
 
-  bool IsDirectExtend = IsExtend && (VT.getVectorElementType() != MVT::f64 ||
-                                     SrcVT.getVectorElementType() != MVT::f16);
-  bool IsDirectTrunc = !IsExtend && (VT.getVectorElementType() != MVT::f16 ||
+  bool IsDirectExtend =
+      IsExtend && (VT.getVectorElementType() != MVT::f64 ||
+                   (SrcVT.getVectorElementType() != MVT::f16 &&
+                    SrcVT.getVectorElementType() != MVT::bf16));
+  bool IsDirectTrunc = !IsExtend && ((VT.getVectorElementType() != MVT::f16 &&
+                                      VT.getVectorElementType() != MVT::bf16) ||
                                      SrcVT.getVectorElementType() != MVT::f64);
 
   bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -1450,6 +1450,20 @@ foreach fvtiToFWti = AllWidenableFloatVectors in {
                 fvti.AVL, fvti.Log2SEW, TA_MA)>;
 }
 
+foreach fvtiToFWti = AllWidenableBFloatToFloatVectors in {
+  defvar fvti = fvtiToFWti.Vti;
+  defvar fwti = fvtiToFWti.Wti;
+  let Predicates = [HasVInstructionsBF16] in
+  def : Pat<(fvti.Vector (fpround (fwti.Vector fwti.RegClass:$rs1))),
+            (!cast<Instruction>("PseudoVFNCVTBF16_F_F_W_"#fvti.LMul.MX)
+                (fvti.Vector (IMPLICIT_DEF)),
+                fwti.RegClass:$rs1,
+                // Value to indicate no rounding mode change in
+                // RISCVInsertReadWriteCSR
+                FRM_DYN,
+                fvti.AVL, fvti.Log2SEW, TA_MA)>;
+}
+
 //===----------------------------------------------------------------------===//
 // Vector Splats
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -2668,6 +2668,20 @@ foreach fvtiToFWti = AllWidenableFloatVectors in {
                 GPR:$vl, fvti.Log2SEW, TA_MA)>;
 }
 
+foreach fvtiToFWti = AllWidenableBFloatToFloatVectors in {
+  defvar fvti = fvtiToFWti.Vti;
+  defvar fwti = fvtiToFWti.Wti;
+  let Predicates = [HasVInstructionsBF16] in
+  def : Pat<(fwti.Vector (any_riscv_fpextend_vl
+                             (fvti.Vector fvti.RegClass:$rs1),
+                             (fvti.Mask V0),
+                             VLOpFrag)),
+            (!cast<Instruction>("PseudoVFWCVTBF16_F_F_V_"#fvti.LMul.MX#"_MASK")
+                (fwti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs1,
+                (fvti.Mask V0),
+                GPR:$vl, fvti.Log2SEW, TA_MA)>;
+}
+
 // 13.19 Narrowing Floating-Point/Integer Type-Convert Instructions
 defm : VPatNConvertFP2IVL_W_RM<riscv_vfcvt_xu_f_vl, "PseudoVFNCVT_XU_F_W">;
 defm : VPatNConvertFP2IVL_W_RM<riscv_vfcvt_x_f_vl, "PseudoVFNCVT_X_F_W">;
@@ -2712,6 +2726,22 @@ foreach fvtiToFWti = AllWidenableFloatVectors in {
   }
 }
 
+foreach fvtiToFWti = AllWidenableBFloatToFloatVectors in {
+  defvar fvti = fvtiToFWti.Vti;
+  defvar fwti = fvtiToFWti.Wti;
+  let Predicates = [HasVInstructionsBF16] in
+    def : Pat<(fvti.Vector (any_riscv_fpround_vl
+                               (fwti.Vector fwti.RegClass:$rs1),
+                               (fwti.Mask V0), VLOpFrag)),
+              (!cast<Instruction>("PseudoVFNCVTBF16_F_F_W_"#fvti.LMul.MX#"_MASK")
+                  (fvti.Vector (IMPLICIT_DEF)), fwti.RegClass:$rs1,
+                  (fwti.Mask V0),
+                  // Value to indicate no rounding mode change in
+                  // RISCVInsertReadWriteCSR
+                  FRM_DYN,
+                  GPR:$vl, fvti.Log2SEW, TA_MA)>;
+}
+
 // 14. Vector Reduction Operations
 
 // 14.1. Vector Single-Width Integer Reduction Instructions
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s
 
 declare <2 x float> @llvm.vp.fpext.v2f32.v2f16(<2 x half>, <2 x i1>, i32)
 
@@ -120,3 +120,53 @@ define <32 x double> @vfpext_v32f32_v32f64(<32 x float> %a, <32 x i1> %m, i32 ze
   %v = call <32 x double> @llvm.vp.fpext.v32f64.v32f32(<32 x float> %a, <32 x i1> %m, i32 %vl)
   ret <32 x double> %v
 }
+
+declare <2 x float> @llvm.vp.fpext.v2f32.v2bf16(<2 x bfloat>, <2 x i1>, i32)
+
+define <2 x float> @vfpext_v2bf16_v2f32(<2 x bfloat> %a, <2 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: vfpext_v2bf16_v2f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %v = call <2 x float> @llvm.vp.fpext.v2f32.v2bf16(<2 x bfloat> %a, <2 x i1> %m, i32 %vl)
+  ret <2 x float> %v
+}
+
+define <2 x float> @vfpext_v2bf16_v2f32_unmasked(<2 x bfloat> %a, i32 zeroext %vl) {
+; CHECK-LABEL: vfpext_v2bf16_v2f32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %v = call <2 x float> @llvm.vp.fpext.v2f32.v2bf16(<2 x bfloat> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl)
+  ret <2 x float> %v
+}
+
+declare <2 x double> @llvm.vp.fpext.v2f64.v2bf16(<2 x bfloat>, <2 x i1>, i32)
+
+define <2 x double> @vfpext_v2bf16_v2f64(<2 x bfloat> %a, <2 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: vfpext_v2bf16_v2f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8, v0.t
+; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <2 x double> @llvm.vp.fpext.v2f64.v2bf16(<2 x bfloat> %a, <2 x i1> %m, i32 %vl)
+  ret <2 x double> %v
+}
+
+define <2 x double> @vfpext_v2bf16_v2f64_unmasked(<2 x bfloat> %a, i32 zeroext %vl) {
+; CHECK-LABEL: vfpext_v2bf16_v2f64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v9
+; CHECK-NEXT:    ret
+  %v = call <2 x double> @llvm.vp.fpext.v2f64.v2bf16(<2 x bfloat> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl)
+  ret <2 x double> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s
 
 
 declare <2 x half> @llvm.vp.fptrunc.v2f16.v2f32(<2 x float>, <2 x i1>, i32)
@@ -122,3 +122,53 @@ define <32 x float> @vfptrunc_v32f32_v32f64(<32 x double> %a, <32 x i1> %m, i32
   %v = call <32 x float> @llvm.vp.fptrunc.v32f64.v32f32(<32 x double> %a, <32 x i1> %m, i32 %vl)
   ret <32 x float> %v
 }
+
+declare <2 x bfloat> @llvm.vp.fptrunc.v2bf16.v2f32(<2 x float>, <2 x i1>, i32)
+
+define <2 x bfloat> @vfptrunc_v2bf16_v2f32(<2 x float> %a, <2 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: vfptrunc_v2bf16_v2f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vfncvtbf16.f.f.w v9, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %v = call <2 x bfloat> @llvm.vp.fptrunc.v2bf16.v2f32(<2 x float> %a, <2 x i1> %m, i32 %vl)
+  ret <2 x bfloat> %v
+}
+
+define <2 x bfloat> @vfptrunc_v2bf16_v2f32_unmasked(<2 x float> %a, i32 zeroext %vl) {
+; CHECK-LABEL: vfptrunc_v2bf16_v2f32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vfncvtbf16.f.f.w v9, v8
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %v = call <2 x bfloat> @llvm.vp.fptrunc.v2bf16.v2f32(<2 x float> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl)
+  ret <2 x bfloat> %v
+}
+
+declare <2 x bfloat> @llvm.vp.fptrunc.v2bf16.v2f64(<2 x double>, <2 x i1>, i32)
+
+define <2 x bfloat> @vfptrunc_v2bf16_v2f64(<2 x double> %a, <2 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: vfptrunc_v2bf16_v2f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vfncvt.rod.f.f.w v9, v8, v0.t
+; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <2 x bfloat> @llvm.vp.fptrunc.v2bf16.v2f64(<2 x double> %a, <2 x i1> %m, i32 %vl)
+  ret <2 x bfloat> %v
+}
+
+define <2 x bfloat> @vfptrunc_v2bf16_v2f64_unmasked(<2 x double> %a, i32 zeroext %vl) {
+; CHECK-LABEL: vfptrunc_v2bf16_v2f64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vfncvt.rod.f.f.w v9, v8
+; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    ret
+  %v = call <2 x bfloat> @llvm.vp.fptrunc.v2bf16.v2f64(<2 x double> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl)
+  ret <2 x bfloat> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfpext-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfpext-sdnode.ll
@@ -1,11 +1,11 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -target-abi=ilp32d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+experimental-zvfbfmin -target-abi=ilp32d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+experimental-zvfbfmin -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 define <vscale x 1 x float> @vfpext_nxv1f16_nxv1f32(<vscale x 1 x half> %va) {
@@ -167,3 +167,115 @@ define <vscale x 8 x double> @vfpext_nxv8f32_nxv8f64(<vscale x 8 x float> %va) {
   %evec = fpext <vscale x 8 x float> %va to <vscale x 8 x double>
   ret <vscale x 8 x double> %evec
 }
+
+define <vscale x 1 x float> @vfpext_nxv1bf16_nxv1f32(<vscale x 1 x bfloat> %va) {
+;
+; CHECK-LABEL: vfpext_nxv1bf16_nxv1f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %evec = fpext <vscale x 1 x bfloat> %va to <vscale x 1 x float>
+  ret <vscale x 1 x float> %evec
+}
+
+define <vscale x 1 x double> @vfpext_nxv1bf16_nxv1f64(<vscale x 1 x bfloat> %va) {
+;
+; CHECK-LABEL: vfpext_nxv1bf16_nxv1f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v9
+; CHECK-NEXT:    ret
+  %evec = fpext <vscale x 1 x bfloat> %va to <vscale x 1 x double>
+  ret <vscale x 1 x double> %evec
+}
+
+define <vscale x 2 x float> @vfpext_nxv2bf16_nxv2f32(<vscale x 2 x bfloat> %va) {
+;
+; CHECK-LABEL: vfpext_nxv2bf16_nxv2f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %evec = fpext <vscale x 2 x bfloat> %va to <vscale x 2 x float>
+  ret <vscale x 2 x float> %evec
+}
+
+define <vscale x 2 x double> @vfpext_nxv2bf16_nxv2f64(<vscale x 2 x bfloat> %va) {
+;
+; CHECK-LABEL: vfpext_nxv2bf16_nxv2f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v10
+; CHECK-NEXT:    ret
+  %evec = fpext <vscale x 2 x bfloat> %va to <vscale x 2 x double>
+  ret <vscale x 2 x double> %evec
+}
+
+define <vscale x 4 x float> @vfpext_nxv4bf16_nxv4f32(<vscale x 4 x bfloat> %va) {
+;
+; CHECK-LABEL: vfpext_nxv4bf16_nxv4f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT:    vmv2r.v v8, v10
+; CHECK-NEXT:    ret
+  %evec = fpext <vscale x 4 x bfloat> %va to <vscale x 4 x float>
+  ret <vscale x 4 x float> %evec
+}
+
+define <vscale x 4 x double> @vfpext_nxv4bf16_nxv4f64(<vscale x 4 x bfloat> %va) {
+;
+; CHECK-LABEL: vfpext_nxv4bf16_nxv4f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v12
+; CHECK-NEXT:    ret
+  %evec = fpext <vscale x 4 x bfloat> %va to <vscale x 4 x double>
+  ret <vscale x 4 x double> %evec
+}
+
+define <vscale x 8 x float> @vfpext_nxv8bf16_nxv8f32(<vscale x 8 x bfloat> %va) {
+;
+; CHECK-LABEL: vfpext_nxv8bf16_nxv8f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT:    vmv4r.v v8, v12
+; CHECK-NEXT:    ret
+  %evec = fpext <vscale x 8 x bfloat> %va to <vscale x 8 x float>
+  ret <vscale x 8 x float> %evec
+}
+
+define <vscale x 8 x double> @vfpext_nxv8bf16_nxv8f64(<vscale x 8 x bfloat> %va) {
+;
+; CHECK-LABEL: vfpext_nxv8bf16_nxv8f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v16
+; CHECK-NEXT:    ret
+  %evec = fpext <vscale x 8 x bfloat> %va to <vscale x 8 x double>
+  ret <vscale x 8 x double> %evec
+}
+
+define <vscale x 16 x float> @vfpext_nxv16bf16_nxv16f32(<vscale x 16 x bfloat> %va) {
+;
+; CHECK-LABEL: vfpext_nxv16bf16_nxv16f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT:    vmv8r.v v8, v16
+; CHECK-NEXT:    ret
+  %evec = fpext <vscale x 16 x bfloat> %va to <vscale x 16 x float>
+  ret <vscale x 16 x float> %evec
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfpext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfpext-vp.ll
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-sdnode.ll
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll