Skip to content

Commit 2509dcd

Browse files
jacquesguanjacquesguan
authored andcommitted
[RISCV] Add rvv codegen support for vp.fpext.
This patch adds rvv codegen support for vp.fpext. The lowering of fp_round, vp.fptrunc, fp_extend and vp.fpext share most code so use a common lowering function to handle these four. And this patch changes the intermediate cast from ISD::FP_EXTEND/ISD::FP_ROUND to the RVV VL version op RISCVISD::FP_EXTEND_VL and RISCVISD::FP_ROUND_VL for scalable vectors. Reviewed By: frasercrmck Differential Revision: https://reviews.llvm.org/D123975
1 parent d4609ae commit 2509dcd

File tree

4 files changed

+190
-75
lines changed

4 files changed

+190
-75
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 35 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -443,7 +443,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
443443
ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX,
444444
ISD::VP_MERGE, ISD::VP_SELECT,
445445
ISD::VP_SITOFP, ISD::VP_UITOFP,
446-
ISD::VP_SETCC, ISD::VP_FP_ROUND};
446+
ISD::VP_SETCC, ISD::VP_FP_ROUND,
447+
ISD::VP_FP_EXTEND};
447448

448449
if (!Subtarget.is64Bit()) {
449450
// We must custom-lower certain vXi64 operations on RV32 due to the vector
@@ -2795,21 +2796,6 @@ bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
27952796
isInterleaveShuffle(M, SVT, SwapSources, Subtarget);
27962797
}
27972798

2798-
static SDValue getRVVFPExtendOrRound(SDValue Op, MVT VT, MVT ContainerVT,
2799-
SDLoc DL, SelectionDAG &DAG,
2800-
const RISCVSubtarget &Subtarget) {
2801-
if (VT.isScalableVector())
2802-
return DAG.getFPExtendOrRound(Op, DL, VT);
2803-
assert(VT.isFixedLengthVector() &&
2804-
"Unexpected value type for RVV FP extend/round lowering");
2805-
SDValue Mask, VL;
2806-
std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2807-
unsigned RVVOpc = ContainerVT.bitsGT(Op.getSimpleValueType())
2808-
? RISCVISD::FP_EXTEND_VL
2809-
: RISCVISD::FP_ROUND_VL;
2810-
return DAG.getNode(RVVOpc, DL, ContainerVT, Op, Mask, VL);
2811-
}
2812-
28132799
// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
28142800
// the exponent.
28152801
static SDValue lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) {
@@ -3126,50 +3112,11 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
31263112
}
31273113
return SDValue();
31283114
}
3129-
case ISD::FP_EXTEND: {
3130-
// RVV can only do fp_extend to types double the size as the source. We
3131-
// custom-lower f16->f64 extensions to two hops of ISD::FP_EXTEND, going
3132-
// via f32.
3133-
SDLoc DL(Op);
3134-
MVT VT = Op.getSimpleValueType();
3135-
SDValue Src = Op.getOperand(0);
3136-
MVT SrcVT = Src.getSimpleValueType();
3137-
3138-
// Prepare any fixed-length vector operands.
3139-
MVT ContainerVT = VT;
3140-
if (SrcVT.isFixedLengthVector()) {
3141-
ContainerVT = getContainerForFixedLengthVector(VT);
3142-
MVT SrcContainerVT =
3143-
ContainerVT.changeVectorElementType(SrcVT.getVectorElementType());
3144-
Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3145-
}
3146-
3147-
if (!VT.isVector() || VT.getVectorElementType() != MVT::f64 ||
3148-
SrcVT.getVectorElementType() != MVT::f16) {
3149-
// For scalable vectors, we only need to close the gap between
3150-
// vXf16->vXf64.
3151-
if (!VT.isFixedLengthVector())
3152-
return Op;
3153-
// For fixed-length vectors, lower the FP_EXTEND to a custom "VL" version.
3154-
Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget);
3155-
return convertFromScalableVector(VT, Src, DAG, Subtarget);
3156-
}
3157-
3158-
MVT InterVT = VT.changeVectorElementType(MVT::f32);
3159-
MVT InterContainerVT = ContainerVT.changeVectorElementType(MVT::f32);
3160-
SDValue IntermediateExtend = getRVVFPExtendOrRound(
3161-
Src, InterVT, InterContainerVT, DL, DAG, Subtarget);
3162-
3163-
SDValue Extend = getRVVFPExtendOrRound(IntermediateExtend, VT, ContainerVT,
3164-
DL, DAG, Subtarget);
3165-
if (VT.isFixedLengthVector())
3166-
return convertFromScalableVector(VT, Extend, DAG, Subtarget);
3167-
return Extend;
3168-
}
3115+
case ISD::FP_EXTEND:
31693116
case ISD::FP_ROUND:
31703117
if (!Op.getValueType().isVector())
31713118
return Op;
3172-
return lowerVectorFPRoundLike(Op, DAG);
3119+
return lowerVectorFPExtendOrRoundLike(Op, DAG);
31733120
case ISD::FP_TO_SINT:
31743121
case ISD::FP_TO_UINT:
31753122
case ISD::SINT_TO_FP:
@@ -3512,8 +3459,9 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
35123459
: RISCVISD::VZEXT_VL);
35133460
case ISD::VP_TRUNC:
35143461
return lowerVectorTruncLike(Op, DAG);
3462+
case ISD::VP_FP_EXTEND:
35153463
case ISD::VP_FP_ROUND:
3516-
return lowerVectorFPRoundLike(Op, DAG);
3464+
return lowerVectorFPExtendOrRoundLike(Op, DAG);
35173465
case ISD::VP_FPTOSI:
35183466
return lowerVPFPIntConvOp(Op, DAG, RISCVISD::FP_TO_SINT_VL);
35193467
case ISD::VP_FPTOUI:
@@ -4281,9 +4229,13 @@ SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
42814229
return Result;
42824230
}
42834231

4284-
SDValue RISCVTargetLowering::lowerVectorFPRoundLike(SDValue Op,
4232+
SDValue
4233+
RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
42854234
SelectionDAG &DAG) const {
4286-
bool IsVPFPTrunc = Op.getOpcode() == ISD::VP_FP_ROUND;
4235+
bool IsVP =
4236+
Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
4237+
bool IsExtend =
4238+
Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
42874239
// RVV can only do truncate fp to types half the size as the source. We
42884240
// custom-lower f64->f16 rounds via RVV's round-to-odd float
42894241
// conversion instruction.
@@ -4295,17 +4247,21 @@ SDValue RISCVTargetLowering::lowerVectorFPRoundLike(SDValue Op,
42954247
SDValue Src = Op.getOperand(0);
42964248
MVT SrcVT = Src.getSimpleValueType();
42974249

4298-
bool IsDirectConv = VT.getVectorElementType() != MVT::f16 ||
4299-
SrcVT.getVectorElementType() != MVT::f64;
4250+
bool IsDirectExtend = IsExtend && (VT.getVectorElementType() != MVT::f64 ||
4251+
SrcVT.getVectorElementType() != MVT::f16);
4252+
bool IsDirectTrunc = !IsExtend && (VT.getVectorElementType() != MVT::f16 ||
4253+
SrcVT.getVectorElementType() != MVT::f64);
43004254

4301-
// For FP_ROUND of scalable vectors, leave it to the pattern.
4302-
if (!VT.isFixedLengthVector() && !IsVPFPTrunc && IsDirectConv)
4255+
bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
4256+
4257+
// For FP_ROUND/FP_EXTEND of scalable vectors, leave it to the pattern.
4258+
if (!VT.isFixedLengthVector() && !IsVP && IsDirectConv)
43034259
return Op;
43044260

43054261
// Prepare any fixed-length vector operands.
43064262
MVT ContainerVT = VT;
43074263
SDValue Mask, VL;
4308-
if (IsVPFPTrunc) {
4264+
if (IsVP) {
43094265
Mask = Op.getOperand(1);
43104266
VL = Op.getOperand(2);
43114267
}
@@ -4314,31 +4270,36 @@ SDValue RISCVTargetLowering::lowerVectorFPRoundLike(SDValue Op,
43144270
ContainerVT =
43154271
SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
43164272
Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
4317-
if (IsVPFPTrunc) {
4273+
if (IsVP) {
43184274
MVT MaskVT = getMaskTypeFor(ContainerVT);
43194275
Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
43204276
}
43214277
}
43224278

4323-
if (!IsVPFPTrunc)
4279+
if (!IsVP)
43244280
std::tie(Mask, VL) =
43254281
getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
43264282

4283+
unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
4284+
43274285
if (IsDirectConv) {
4328-
Src = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, ContainerVT, Src, Mask, VL);
4286+
Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);
43294287
if (VT.isFixedLengthVector())
43304288
Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
43314289
return Src;
43324290
}
43334291

4292+
unsigned InterConvOpc =
4293+
IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::VFNCVT_ROD_VL;
4294+
43344295
MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
4335-
SDValue IntermediateRound =
4336-
DAG.getNode(RISCVISD::VFNCVT_ROD_VL, DL, InterVT, Src, Mask, VL);
4337-
SDValue Round = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, ContainerVT,
4338-
IntermediateRound, Mask, VL);
4296+
SDValue IntermediateConv =
4297+
DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);
4298+
SDValue Result =
4299+
DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);
43394300
if (VT.isFixedLengthVector())
4340-
return convertFromScalableVector(VT, Round, DAG, Subtarget);
4341-
return Round;
4301+
return convertFromScalableVector(VT, Result, DAG, Subtarget);
4302+
return Result;
43424303
}
43434304

43444305
// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the

llvm/lib/Target/RISCV/RISCVISelLowering.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -614,7 +614,7 @@ class RISCVTargetLowering : public TargetLowering {
614614
int64_t ExtTrueVal) const;
615615
SDValue lowerVectorMaskTruncLike(SDValue Op, SelectionDAG &DAG) const;
616616
SDValue lowerVectorTruncLike(SDValue Op, SelectionDAG &DAG) const;
617-
SDValue lowerVectorFPRoundLike(SDValue Op, SelectionDAG &DAG) const;
617+
SDValue lowerVectorFPExtendOrRoundLike(SDValue Op, SelectionDAG &DAG) const;
618618
SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
619619
SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
620620
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s
3+
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s
4+
5+
declare <2 x float> @llvm.vp.fpext.v2f32.v2f16(<2 x half>, <2 x i1>, i32)
6+
7+
define <2 x float> @vfpext_v2f16_v2f32(<2 x half> %a, <2 x i1> %m, i32 zeroext %vl) {
8+
; CHECK-LABEL: vfpext_v2f16_v2f32:
9+
; CHECK: # %bb.0:
10+
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
11+
; CHECK-NEXT: vfwcvt.f.f.v v9, v8, v0.t
12+
; CHECK-NEXT: vmv1r.v v8, v9
13+
; CHECK-NEXT: ret
14+
%v = call <2 x float> @llvm.vp.fpext.v2f32.v2f16(<2 x half> %a, <2 x i1> %m, i32 %vl)
15+
ret <2 x float> %v
16+
}
17+
18+
define <2 x float> @vfpext_v2f16_v2f32_unmasked(<2 x half> %a, i32 zeroext %vl) {
19+
; CHECK-LABEL: vfpext_v2f16_v2f32_unmasked:
20+
; CHECK: # %bb.0:
21+
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
22+
; CHECK-NEXT: vfwcvt.f.f.v v9, v8
23+
; CHECK-NEXT: vmv1r.v v8, v9
24+
; CHECK-NEXT: ret
25+
%v = call <2 x float> @llvm.vp.fpext.v2f32.v2f16(<2 x half> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl)
26+
ret <2 x float> %v
27+
}
28+
29+
declare <2 x double> @llvm.vp.fpext.v2f64.v2f16(<2 x half>, <2 x i1>, i32)
30+
31+
define <2 x double> @vfpext_v2f16_v2f64(<2 x half> %a, <2 x i1> %m, i32 zeroext %vl) {
32+
; CHECK-LABEL: vfpext_v2f16_v2f64:
33+
; CHECK: # %bb.0:
34+
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
35+
; CHECK-NEXT: vfwcvt.f.f.v v9, v8, v0.t
36+
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
37+
; CHECK-NEXT: vfwcvt.f.f.v v8, v9, v0.t
38+
; CHECK-NEXT: ret
39+
%v = call <2 x double> @llvm.vp.fpext.v2f64.v2f16(<2 x half> %a, <2 x i1> %m, i32 %vl)
40+
ret <2 x double> %v
41+
}
42+
43+
define <2 x double> @vfpext_v2f16_v2f64_unmasked(<2 x half> %a, i32 zeroext %vl) {
44+
; CHECK-LABEL: vfpext_v2f16_v2f64_unmasked:
45+
; CHECK: # %bb.0:
46+
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
47+
; CHECK-NEXT: vfwcvt.f.f.v v9, v8
48+
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
49+
; CHECK-NEXT: vfwcvt.f.f.v v8, v9
50+
; CHECK-NEXT: ret
51+
%v = call <2 x double> @llvm.vp.fpext.v2f64.v2f16(<2 x half> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl)
52+
ret <2 x double> %v
53+
}
54+
55+
declare <2 x double> @llvm.vp.fpext.v2f64.v2f32(<2 x float>, <2 x i1>, i32)
56+
57+
define <2 x double> @vfpext_v2f32_v2f64(<2 x float> %a, <2 x i1> %m, i32 zeroext %vl) {
58+
; CHECK-LABEL: vfpext_v2f32_v2f64:
59+
; CHECK: # %bb.0:
60+
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
61+
; CHECK-NEXT: vfwcvt.f.f.v v9, v8, v0.t
62+
; CHECK-NEXT: vmv1r.v v8, v9
63+
; CHECK-NEXT: ret
64+
%v = call <2 x double> @llvm.vp.fpext.v2f64.v2f32(<2 x float> %a, <2 x i1> %m, i32 %vl)
65+
ret <2 x double> %v
66+
}
67+
68+
define <2 x double> @vfpext_v2f32_v2f64_unmasked(<2 x float> %a, i32 zeroext %vl) {
69+
; CHECK-LABEL: vfpext_v2f32_v2f64_unmasked:
70+
; CHECK: # %bb.0:
71+
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
72+
; CHECK-NEXT: vfwcvt.f.f.v v9, v8
73+
; CHECK-NEXT: vmv1r.v v8, v9
74+
; CHECK-NEXT: ret
75+
%v = call <2 x double> @llvm.vp.fpext.v2f64.v2f32(<2 x float> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl)
76+
ret <2 x double> %v
77+
}
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v -verify-machineinstrs < %s | FileCheck %s
3+
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v -verify-machineinstrs < %s | FileCheck %s
4+
5+
declare <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half>, <vscale x 2 x i1>, i32)
6+
7+
define <vscale x 2 x float> @vfpext_nxv2f16_nxv2f32(<vscale x 2 x half> %a, <vscale x 2 x i1> %m, i32 zeroext %vl) {
8+
; CHECK-LABEL: vfpext_nxv2f16_nxv2f32:
9+
; CHECK: # %bb.0:
10+
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
11+
; CHECK-NEXT: vfwcvt.f.f.v v9, v8, v0.t
12+
; CHECK-NEXT: vmv1r.v v8, v9
13+
; CHECK-NEXT: ret
14+
%v = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x i1> %m, i32 %vl)
15+
ret <vscale x 2 x float> %v
16+
}
17+
18+
define <vscale x 2 x float> @vfpext_nxv2f16_nxv2f32_unmasked(<vscale x 2 x half> %a, i32 zeroext %vl) {
19+
; CHECK-LABEL: vfpext_nxv2f16_nxv2f32_unmasked:
20+
; CHECK: # %bb.0:
21+
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
22+
; CHECK-NEXT: vfwcvt.f.f.v v9, v8
23+
; CHECK-NEXT: vmv1r.v v8, v9
24+
; CHECK-NEXT: ret
25+
%v = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer), i32 %vl)
26+
ret <vscale x 2 x float> %v
27+
}
28+
29+
declare <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f16(<vscale x 2 x half>, <vscale x 2 x i1>, i32)
30+
31+
define <vscale x 2 x double> @vfpext_nxv2f16_nxv2f64(<vscale x 2 x half> %a, <vscale x 2 x i1> %m, i32 zeroext %vl) {
32+
; CHECK-LABEL: vfpext_nxv2f16_nxv2f64:
33+
; CHECK: # %bb.0:
34+
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
35+
; CHECK-NEXT: vfwcvt.f.f.v v10, v8, v0.t
36+
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
37+
; CHECK-NEXT: vfwcvt.f.f.v v8, v10, v0.t
38+
; CHECK-NEXT: ret
39+
%v = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x i1> %m, i32 %vl)
40+
ret <vscale x 2 x double> %v
41+
}
42+
43+
define <vscale x 2 x double> @vfpext_nxv2f16_nxv2f64_unmasked(<vscale x 2 x half> %a, i32 zeroext %vl) {
44+
; CHECK-LABEL: vfpext_nxv2f16_nxv2f64_unmasked:
45+
; CHECK: # %bb.0:
46+
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
47+
; CHECK-NEXT: vfwcvt.f.f.v v10, v8
48+
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
49+
; CHECK-NEXT: vfwcvt.f.f.v v8, v10
50+
; CHECK-NEXT: ret
51+
%v = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer), i32 %vl)
52+
ret <vscale x 2 x double> %v
53+
}
54+
55+
declare <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32)
56+
57+
define <vscale x 2 x double> @vfpext_nxv2f32_nxv2f64(<vscale x 2 x float> %a, <vscale x 2 x i1> %m, i32 zeroext %vl) {
58+
; CHECK-LABEL: vfpext_nxv2f32_nxv2f64:
59+
; CHECK: # %bb.0:
60+
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
61+
; CHECK-NEXT: vfwcvt.f.f.v v10, v8, v0.t
62+
; CHECK-NEXT: vmv2r.v v8, v10
63+
; CHECK-NEXT: ret
64+
%v = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x i1> %m, i32 %vl)
65+
ret <vscale x 2 x double> %v
66+
}
67+
68+
define <vscale x 2 x double> @vfpext_nxv2f32_nxv2f64_unmasked(<vscale x 2 x float> %a, i32 zeroext %vl) {
69+
; CHECK-LABEL: vfpext_nxv2f32_nxv2f64_unmasked:
70+
; CHECK: # %bb.0:
71+
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
72+
; CHECK-NEXT: vfwcvt.f.f.v v10, v8
73+
; CHECK-NEXT: vmv2r.v v8, v10
74+
; CHECK-NEXT: ret
75+
%v = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer), i32 %vl)
76+
ret <vscale x 2 x double> %v
77+
}

0 commit comments

Comments
 (0)