Skip to content

Commit 120efbb

Browse files
committed
[RISCV] Add codegen support for Zvfbfmin
1 parent bc8726b commit 120efbb

File tree

9 files changed

+484
-27
lines changed

9 files changed

+484
-27
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1080,6 +1080,17 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
10801080
}
10811081
}
10821082

1083+
// TODO: Could we merge some code with zvfhmin?
1084+
if (Subtarget.hasVInstructionsBF16()) {
1085+
for (MVT VT : BF16VecVTs) {
1086+
if (!isTypeLegal(VT))
1087+
continue;
1088+
setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1089+
setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1090+
// TODO: Promote to fp32.
1091+
}
1092+
}
1093+
10831094
if (Subtarget.hasVInstructionsF32()) {
10841095
for (MVT VT : F32VecVTs) {
10851096
if (!isTypeLegal(VT))
@@ -1295,6 +1306,14 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
12951306
continue;
12961307
}
12971308

1309+
if (VT.getVectorElementType() == MVT::bf16 &&
1310+
Subtarget.hasVInstructionsBF16()) {
1311+
setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1312+
setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1313+
// TODO: Promote to fp32.
1314+
continue;
1315+
}
1316+
12981317
// We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
12991318
setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT,
13001319
Custom);
@@ -2549,6 +2568,10 @@ static bool useRVVForFixedLengthVectorVT(MVT VT,
25492568
if (!Subtarget.hasVInstructionsF16Minimal())
25502569
return false;
25512570
break;
2571+
case MVT::bf16:
2572+
if (!Subtarget.hasVInstructionsBF16())
2573+
return false;
2574+
break;
25522575
case MVT::f32:
25532576
if (!Subtarget.hasVInstructionsF32())
25542577
return false;
@@ -2600,6 +2623,7 @@ static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT,
26002623
case MVT::i16:
26012624
case MVT::i32:
26022625
case MVT::i64:
2626+
case MVT::bf16:
26032627
case MVT::f16:
26042628
case MVT::f32:
26052629
case MVT::f64: {
@@ -8086,9 +8110,12 @@ RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
80868110
SDValue Src = Op.getOperand(0);
80878111
MVT SrcVT = Src.getSimpleValueType();
80888112

8089-
bool IsDirectExtend = IsExtend && (VT.getVectorElementType() != MVT::f64 ||
8090-
SrcVT.getVectorElementType() != MVT::f16);
8091-
bool IsDirectTrunc = !IsExtend && (VT.getVectorElementType() != MVT::f16 ||
8113+
bool IsDirectExtend =
8114+
IsExtend && (VT.getVectorElementType() != MVT::f64 ||
8115+
(SrcVT.getVectorElementType() != MVT::f16 &&
8116+
SrcVT.getVectorElementType() != MVT::bf16));
8117+
bool IsDirectTrunc = !IsExtend && ((VT.getVectorElementType() != MVT::f16 &&
8118+
VT.getVectorElementType() != MVT::bf16) ||
80928119
SrcVT.getVectorElementType() != MVT::f64);
80938120

80948121
bool IsDirectConv = IsDirectExtend || IsDirectTrunc;

llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1450,6 +1450,20 @@ foreach fvtiToFWti = AllWidenableFloatVectors in {
14501450
fvti.AVL, fvti.Log2SEW, TA_MA)>;
14511451
}
14521452

1453+
foreach fvtiToFWti = AllWidenableBFloatToFloatVectors in {
1454+
defvar fvti = fvtiToFWti.Vti;
1455+
defvar fwti = fvtiToFWti.Wti;
1456+
let Predicates = [HasVInstructionsBF16] in
1457+
def : Pat<(fvti.Vector (fpround (fwti.Vector fwti.RegClass:$rs1))),
1458+
(!cast<Instruction>("PseudoVFNCVTBF16_F_F_W_"#fvti.LMul.MX)
1459+
(fvti.Vector (IMPLICIT_DEF)),
1460+
fwti.RegClass:$rs1,
1461+
// Value to indicate no rounding mode change in
1462+
// RISCVInsertReadWriteCSR
1463+
FRM_DYN,
1464+
fvti.AVL, fvti.Log2SEW, TA_MA)>;
1465+
}
1466+
14531467
//===----------------------------------------------------------------------===//
14541468
// Vector Splats
14551469
//===----------------------------------------------------------------------===//

llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2668,6 +2668,20 @@ foreach fvtiToFWti = AllWidenableFloatVectors in {
26682668
GPR:$vl, fvti.Log2SEW, TA_MA)>;
26692669
}
26702670

2671+
foreach fvtiToFWti = AllWidenableBFloatToFloatVectors in {
2672+
defvar fvti = fvtiToFWti.Vti;
2673+
defvar fwti = fvtiToFWti.Wti;
2674+
let Predicates = [HasVInstructionsBF16] in
2675+
def : Pat<(fwti.Vector (any_riscv_fpextend_vl
2676+
(fvti.Vector fvti.RegClass:$rs1),
2677+
(fvti.Mask V0),
2678+
VLOpFrag)),
2679+
(!cast<Instruction>("PseudoVFWCVTBF16_F_F_V_"#fvti.LMul.MX#"_MASK")
2680+
(fwti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs1,
2681+
(fvti.Mask V0),
2682+
GPR:$vl, fvti.Log2SEW, TA_MA)>;
2683+
}
2684+
26712685
// 13.19 Narrowing Floating-Point/Integer Type-Convert Instructions
26722686
defm : VPatNConvertFP2IVL_W_RM<riscv_vfcvt_xu_f_vl, "PseudoVFNCVT_XU_F_W">;
26732687
defm : VPatNConvertFP2IVL_W_RM<riscv_vfcvt_x_f_vl, "PseudoVFNCVT_X_F_W">;
@@ -2712,6 +2726,22 @@ foreach fvtiToFWti = AllWidenableFloatVectors in {
27122726
}
27132727
}
27142728

2729+
foreach fvtiToFWti = AllWidenableBFloatToFloatVectors in {
2730+
defvar fvti = fvtiToFWti.Vti;
2731+
defvar fwti = fvtiToFWti.Wti;
2732+
let Predicates = [HasVInstructionsBF16] in
2733+
def : Pat<(fvti.Vector (any_riscv_fpround_vl
2734+
(fwti.Vector fwti.RegClass:$rs1),
2735+
(fwti.Mask V0), VLOpFrag)),
2736+
(!cast<Instruction>("PseudoVFNCVTBF16_F_F_W_"#fvti.LMul.MX#"_MASK")
2737+
(fvti.Vector (IMPLICIT_DEF)), fwti.RegClass:$rs1,
2738+
(fwti.Mask V0),
2739+
// Value to indicate no rounding mode change in
2740+
// RISCVInsertReadWriteCSR
2741+
FRM_DYN,
2742+
GPR:$vl, fvti.Log2SEW, TA_MA)>;
2743+
}
2744+
27152745
// 14. Vector Reduction Operations
27162746

27172747
// 14.1. Vector Single-Width Integer Reduction Instructions

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll

Lines changed: 54 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s
3-
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s
4-
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -verify-machineinstrs < %s | FileCheck %s
5-
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -verify-machineinstrs < %s | FileCheck %s
2+
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s
3+
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s
4+
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s
5+
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s
66

77
declare <2 x float> @llvm.vp.fpext.v2f32.v2f16(<2 x half>, <2 x i1>, i32)
88

@@ -120,3 +120,53 @@ define <32 x double> @vfpext_v32f32_v32f64(<32 x float> %a, <32 x i1> %m, i32 ze
120120
%v = call <32 x double> @llvm.vp.fpext.v32f64.v32f32(<32 x float> %a, <32 x i1> %m, i32 %vl)
121121
ret <32 x double> %v
122122
}
123+
124+
declare <2 x float> @llvm.vp.fpext.v2f32.v2bf16(<2 x bfloat>, <2 x i1>, i32)
125+
126+
define <2 x float> @vfpext_v2bf16_v2f32(<2 x bfloat> %a, <2 x i1> %m, i32 zeroext %vl) {
127+
; CHECK-LABEL: vfpext_v2bf16_v2f32:
128+
; CHECK: # %bb.0:
129+
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
130+
; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8, v0.t
131+
; CHECK-NEXT: vmv1r.v v8, v9
132+
; CHECK-NEXT: ret
133+
%v = call <2 x float> @llvm.vp.fpext.v2f32.v2bf16(<2 x bfloat> %a, <2 x i1> %m, i32 %vl)
134+
ret <2 x float> %v
135+
}
136+
137+
define <2 x float> @vfpext_v2bf16_v2f32_unmasked(<2 x bfloat> %a, i32 zeroext %vl) {
138+
; CHECK-LABEL: vfpext_v2bf16_v2f32_unmasked:
139+
; CHECK: # %bb.0:
140+
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
141+
; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
142+
; CHECK-NEXT: vmv1r.v v8, v9
143+
; CHECK-NEXT: ret
144+
%v = call <2 x float> @llvm.vp.fpext.v2f32.v2bf16(<2 x bfloat> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl)
145+
ret <2 x float> %v
146+
}
147+
148+
declare <2 x double> @llvm.vp.fpext.v2f64.v2bf16(<2 x bfloat>, <2 x i1>, i32)
149+
150+
define <2 x double> @vfpext_v2bf16_v2f64(<2 x bfloat> %a, <2 x i1> %m, i32 zeroext %vl) {
151+
; CHECK-LABEL: vfpext_v2bf16_v2f64:
152+
; CHECK: # %bb.0:
153+
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
154+
; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8, v0.t
155+
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
156+
; CHECK-NEXT: vfwcvt.f.f.v v8, v9, v0.t
157+
; CHECK-NEXT: ret
158+
%v = call <2 x double> @llvm.vp.fpext.v2f64.v2bf16(<2 x bfloat> %a, <2 x i1> %m, i32 %vl)
159+
ret <2 x double> %v
160+
}
161+
162+
define <2 x double> @vfpext_v2bf16_v2f64_unmasked(<2 x bfloat> %a, i32 zeroext %vl) {
163+
; CHECK-LABEL: vfpext_v2bf16_v2f64_unmasked:
164+
; CHECK: # %bb.0:
165+
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
166+
; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
167+
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
168+
; CHECK-NEXT: vfwcvt.f.f.v v8, v9
169+
; CHECK-NEXT: ret
170+
%v = call <2 x double> @llvm.vp.fpext.v2f64.v2bf16(<2 x bfloat> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl)
171+
ret <2 x double> %v
172+
}

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll

Lines changed: 54 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s
3-
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s
4-
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -verify-machineinstrs < %s | FileCheck %s
5-
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -verify-machineinstrs < %s | FileCheck %s
2+
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s
3+
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s
4+
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s
5+
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s
66

77

88
declare <2 x half> @llvm.vp.fptrunc.v2f16.v2f32(<2 x float>, <2 x i1>, i32)
@@ -122,3 +122,53 @@ define <32 x float> @vfptrunc_v32f32_v32f64(<32 x double> %a, <32 x i1> %m, i32
122122
%v = call <32 x float> @llvm.vp.fptrunc.v32f64.v32f32(<32 x double> %a, <32 x i1> %m, i32 %vl)
123123
ret <32 x float> %v
124124
}
125+
126+
declare <2 x bfloat> @llvm.vp.fptrunc.v2bf16.v2f32(<2 x float>, <2 x i1>, i32)
127+
128+
define <2 x bfloat> @vfptrunc_v2bf16_v2f32(<2 x float> %a, <2 x i1> %m, i32 zeroext %vl) {
129+
; CHECK-LABEL: vfptrunc_v2bf16_v2f32:
130+
; CHECK: # %bb.0:
131+
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
132+
; CHECK-NEXT: vfncvtbf16.f.f.w v9, v8, v0.t
133+
; CHECK-NEXT: vmv1r.v v8, v9
134+
; CHECK-NEXT: ret
135+
%v = call <2 x bfloat> @llvm.vp.fptrunc.v2bf16.v2f32(<2 x float> %a, <2 x i1> %m, i32 %vl)
136+
ret <2 x bfloat> %v
137+
}
138+
139+
define <2 x bfloat> @vfptrunc_v2bf16_v2f32_unmasked(<2 x float> %a, i32 zeroext %vl) {
140+
; CHECK-LABEL: vfptrunc_v2bf16_v2f32_unmasked:
141+
; CHECK: # %bb.0:
142+
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
143+
; CHECK-NEXT: vfncvtbf16.f.f.w v9, v8
144+
; CHECK-NEXT: vmv1r.v v8, v9
145+
; CHECK-NEXT: ret
146+
%v = call <2 x bfloat> @llvm.vp.fptrunc.v2bf16.v2f32(<2 x float> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl)
147+
ret <2 x bfloat> %v
148+
}
149+
150+
declare <2 x bfloat> @llvm.vp.fptrunc.v2bf16.v2f64(<2 x double>, <2 x i1>, i32)
151+
152+
define <2 x bfloat> @vfptrunc_v2bf16_v2f64(<2 x double> %a, <2 x i1> %m, i32 zeroext %vl) {
153+
; CHECK-LABEL: vfptrunc_v2bf16_v2f64:
154+
; CHECK: # %bb.0:
155+
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
156+
; CHECK-NEXT: vfncvt.rod.f.f.w v9, v8, v0.t
157+
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
158+
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t
159+
; CHECK-NEXT: ret
160+
%v = call <2 x bfloat> @llvm.vp.fptrunc.v2bf16.v2f64(<2 x double> %a, <2 x i1> %m, i32 %vl)
161+
ret <2 x bfloat> %v
162+
}
163+
164+
define <2 x bfloat> @vfptrunc_v2bf16_v2f64_unmasked(<2 x double> %a, i32 zeroext %vl) {
165+
; CHECK-LABEL: vfptrunc_v2bf16_v2f64_unmasked:
166+
; CHECK: # %bb.0:
167+
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
168+
; CHECK-NEXT: vfncvt.rod.f.f.w v9, v8
169+
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
170+
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
171+
; CHECK-NEXT: ret
172+
%v = call <2 x bfloat> @llvm.vp.fptrunc.v2bf16.v2f64(<2 x double> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl)
173+
ret <2 x bfloat> %v
174+
}

llvm/test/CodeGen/RISCV/rvv/vfpext-sdnode.ll

Lines changed: 116 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
2+
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -target-abi=ilp32d \
33
; RUN: -verify-machineinstrs < %s | FileCheck %s
4-
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
4+
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -target-abi=lp64d \
55
; RUN: -verify-machineinstrs < %s | FileCheck %s
6-
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d \
6+
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+experimental-zvfbfmin -target-abi=ilp32d \
77
; RUN: -verify-machineinstrs < %s | FileCheck %s
8-
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d \
8+
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+experimental-zvfbfmin -target-abi=lp64d \
99
; RUN: -verify-machineinstrs < %s | FileCheck %s
1010

1111
define <vscale x 1 x float> @vfpext_nxv1f16_nxv1f32(<vscale x 1 x half> %va) {
@@ -167,3 +167,115 @@ define <vscale x 8 x double> @vfpext_nxv8f32_nxv8f64(<vscale x 8 x float> %va) {
167167
%evec = fpext <vscale x 8 x float> %va to <vscale x 8 x double>
168168
ret <vscale x 8 x double> %evec
169169
}
170+
171+
define <vscale x 1 x float> @vfpext_nxv1bf16_nxv1f32(<vscale x 1 x bfloat> %va) {
172+
;
173+
; CHECK-LABEL: vfpext_nxv1bf16_nxv1f32:
174+
; CHECK: # %bb.0:
175+
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
176+
; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
177+
; CHECK-NEXT: vmv1r.v v8, v9
178+
; CHECK-NEXT: ret
179+
%evec = fpext <vscale x 1 x bfloat> %va to <vscale x 1 x float>
180+
ret <vscale x 1 x float> %evec
181+
}
182+
183+
define <vscale x 1 x double> @vfpext_nxv1bf16_nxv1f64(<vscale x 1 x bfloat> %va) {
184+
;
185+
; CHECK-LABEL: vfpext_nxv1bf16_nxv1f64:
186+
; CHECK: # %bb.0:
187+
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
188+
; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
189+
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
190+
; CHECK-NEXT: vfwcvt.f.f.v v8, v9
191+
; CHECK-NEXT: ret
192+
%evec = fpext <vscale x 1 x bfloat> %va to <vscale x 1 x double>
193+
ret <vscale x 1 x double> %evec
194+
}
195+
196+
define <vscale x 2 x float> @vfpext_nxv2bf16_nxv2f32(<vscale x 2 x bfloat> %va) {
197+
;
198+
; CHECK-LABEL: vfpext_nxv2bf16_nxv2f32:
199+
; CHECK: # %bb.0:
200+
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
201+
; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
202+
; CHECK-NEXT: vmv1r.v v8, v9
203+
; CHECK-NEXT: ret
204+
%evec = fpext <vscale x 2 x bfloat> %va to <vscale x 2 x float>
205+
ret <vscale x 2 x float> %evec
206+
}
207+
208+
define <vscale x 2 x double> @vfpext_nxv2bf16_nxv2f64(<vscale x 2 x bfloat> %va) {
209+
;
210+
; CHECK-LABEL: vfpext_nxv2bf16_nxv2f64:
211+
; CHECK: # %bb.0:
212+
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
213+
; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
214+
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
215+
; CHECK-NEXT: vfwcvt.f.f.v v8, v10
216+
; CHECK-NEXT: ret
217+
%evec = fpext <vscale x 2 x bfloat> %va to <vscale x 2 x double>
218+
ret <vscale x 2 x double> %evec
219+
}
220+
221+
define <vscale x 4 x float> @vfpext_nxv4bf16_nxv4f32(<vscale x 4 x bfloat> %va) {
222+
;
223+
; CHECK-LABEL: vfpext_nxv4bf16_nxv4f32:
224+
; CHECK: # %bb.0:
225+
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
226+
; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
227+
; CHECK-NEXT: vmv2r.v v8, v10
228+
; CHECK-NEXT: ret
229+
%evec = fpext <vscale x 4 x bfloat> %va to <vscale x 4 x float>
230+
ret <vscale x 4 x float> %evec
231+
}
232+
233+
define <vscale x 4 x double> @vfpext_nxv4bf16_nxv4f64(<vscale x 4 x bfloat> %va) {
234+
;
235+
; CHECK-LABEL: vfpext_nxv4bf16_nxv4f64:
236+
; CHECK: # %bb.0:
237+
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
238+
; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
239+
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
240+
; CHECK-NEXT: vfwcvt.f.f.v v8, v12
241+
; CHECK-NEXT: ret
242+
%evec = fpext <vscale x 4 x bfloat> %va to <vscale x 4 x double>
243+
ret <vscale x 4 x double> %evec
244+
}
245+
246+
define <vscale x 8 x float> @vfpext_nxv8bf16_nxv8f32(<vscale x 8 x bfloat> %va) {
247+
;
248+
; CHECK-LABEL: vfpext_nxv8bf16_nxv8f32:
249+
; CHECK: # %bb.0:
250+
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
251+
; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
252+
; CHECK-NEXT: vmv4r.v v8, v12
253+
; CHECK-NEXT: ret
254+
%evec = fpext <vscale x 8 x bfloat> %va to <vscale x 8 x float>
255+
ret <vscale x 8 x float> %evec
256+
}
257+
258+
define <vscale x 8 x double> @vfpext_nxv8bf16_nxv8f64(<vscale x 8 x bfloat> %va) {
259+
;
260+
; CHECK-LABEL: vfpext_nxv8bf16_nxv8f64:
261+
; CHECK: # %bb.0:
262+
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
263+
; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
264+
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
265+
; CHECK-NEXT: vfwcvt.f.f.v v8, v16
266+
; CHECK-NEXT: ret
267+
%evec = fpext <vscale x 8 x bfloat> %va to <vscale x 8 x double>
268+
ret <vscale x 8 x double> %evec
269+
}
270+
271+
define <vscale x 16 x float> @vfpext_nxv16bf16_nxv16f32(<vscale x 16 x bfloat> %va) {
272+
;
273+
; CHECK-LABEL: vfpext_nxv16bf16_nxv16f32:
274+
; CHECK: # %bb.0:
275+
; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
276+
; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
277+
; CHECK-NEXT: vmv8r.v v8, v16
278+
; CHECK-NEXT: ret
279+
%evec = fpext <vscale x 16 x bfloat> %va to <vscale x 16 x float>
280+
ret <vscale x 16 x float> %evec
281+
}

0 commit comments

Comments
 (0)