Skip to content

Commit 64335db

Browse files
committed
WIP
1 parent 8a37e17 commit 64335db

6 files changed

+99
-78
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 67 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -18961,9 +18961,39 @@ static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N,
1896118961
return SDValue();
1896218962
}
1896318963

18964-
static bool
18965-
shouldUseSVEForScalarFPConversion(SDNode *N,
18966-
const AArch64Subtarget *Subtarget) {
18964+
/// Creates a scalar FP <-> INT conversion with a scalable one, wrapped
18965+
/// with an insert and extract.
18966+
static SDValue createScalarSVEFPConversion(SelectionDAG &DAG, unsigned Opc,
18967+
SDLoc DL, SDValue SrcVal, EVT SrcTy,
18968+
EVT DestTy) {
18969+
EVT SrcVecTy;
18970+
EVT DestVecTy;
18971+
if (DestTy.bitsGT(SrcTy)) {
18972+
DestVecTy = getPackedSVEVectorVT(DestTy);
18973+
SrcVecTy = DestVecTy.changeVectorElementType(SrcTy);
18974+
} else {
18975+
SrcVecTy = getPackedSVEVectorVT(SrcTy);
18976+
DestVecTy = SrcVecTy.changeVectorElementType(DestTy);
18977+
}
18978+
SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
18979+
SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, SrcVecTy,
18980+
DAG.getUNDEF(SrcVecTy), SrcVal, ZeroIdx);
18981+
Vec = DAG.getNode(Opc, DL, DestVecTy, Vec);
18982+
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, DestTy, Vec, ZeroIdx);
18983+
}
18984+
18985+
/// Tries to replace scalar FP <-> conversions with SVE in streaming functions.
18986+
static SDValue
18987+
tryReplaceScalarFPConversionWithSVE(SDNode *N, SelectionDAG &DAG,
18988+
TargetLowering::DAGCombinerInfo &DCI,
18989+
const AArch64Subtarget *Subtarget) {
18990+
// Uncomment to introduce extra fcvts.
18991+
// if (DCI.isBeforeLegalizeOps())
18992+
// return SDValue();
18993+
18994+
if (N->isStrictFPOpcode())
18995+
return SDValue();
18996+
1896718997
auto isSupportedType = [](EVT VT) {
1896818998
if (!VT.isSimple())
1896918999
return false;
@@ -18973,54 +19003,52 @@ shouldUseSVEForScalarFPConversion(SDNode *N,
1897319003
return is_contained({MVT::i32, MVT::i64, MVT::f16, MVT::f32, MVT::f64},
1897419004
VT.getSimpleVT().SimpleTy);
1897519005
};
19006+
19007+
if (!isSupportedType(N->getValueType(0)) ||
19008+
!isSupportedType(N->getOperand(0).getValueType()))
19009+
return SDValue();
19010+
1897619011
// If we are in a streaming[-compatible] function, use SVE for scalar FP <->
18977-
// INT conversions as this can help avoid movs between GPRs and FPRs, which
19012+
// INT conversions as this can help avoid moves between GPRs and FPRs, which
1897819013
// could be quite expensive.
18979-
return !N->isStrictFPOpcode() && Subtarget->isSVEorStreamingSVEAvailable() &&
18980-
(Subtarget->isStreaming() || Subtarget->isStreamingCompatible()) &&
18981-
isSupportedType(N->getValueType(0)) &&
18982-
isSupportedType(N->getOperand(0).getValueType());
18983-
}
19014+
if (!Subtarget->isSVEorStreamingSVEAvailable() ||
19015+
(!Subtarget->isStreaming() && !Subtarget->isStreamingCompatible()))
19016+
return SDValue();
1898419017

18985-
/// Replaces a scalar FP <-> INT conversion with an SVE (scalable) one, wrapped
18986-
/// with an insert and extract.
18987-
static SDValue replaceScalarFPConversionWithSVE(SDNode *N, SelectionDAG &DAG) {
18988-
assert(!N->isStrictFPOpcode() && "strict fp ops not supported");
19018+
SDLoc DL(N);
19019+
unsigned Opc = N->getOpcode();
1898919020
SDValue SrcVal = N->getOperand(0);
1899019021
EVT SrcTy = SrcVal.getValueType();
1899119022
EVT DestTy = N->getValueType(0);
18992-
EVT SrcVecTy;
18993-
EVT DestVecTy;
18994-
// Use a packed vector for the larger type.
18995-
// Note: For conversions such as FCVTZS_ZPmZ_DtoS, and UCVTF_ZPmZ_StoD that
18996-
// notionally take or return a nxv2i32 type we must instead use a nxv4i32, as
18997-
// (unlike floats) nxv2i32 is an illegal unpacked type.
18998-
if (DestTy.bitsGT(SrcTy)) {
18999-
DestVecTy = getPackedSVEVectorVT(DestTy);
19000-
SrcVecTy = SrcTy == MVT::i32 ? getPackedSVEVectorVT(SrcTy)
19001-
: DestVecTy.changeVectorElementType(SrcTy);
19002-
} else {
19003-
SrcVecTy = getPackedSVEVectorVT(SrcTy);
19004-
DestVecTy = DestTy == MVT::i32 ? getPackedSVEVectorVT(DestTy)
19005-
: SrcVecTy.changeVectorElementType(DestTy);
19023+
19024+
// Conversions between f64 and i32 are a special case as nxv2i32 is an illegal
19025+
// type (unlike the equivalent nxv2f32 for floating-point types).
19026+
// May materialize extra instructions :(
19027+
if (SrcTy == MVT::i32 && DestTy == MVT::f64) {
19028+
SDValue ExtSrc = DAG.getNode(Opc == ISD::SINT_TO_FP ? ISD::SIGN_EXTEND
19029+
: ISD::ZERO_EXTEND,
19030+
DL, MVT::i64, SrcVal);
19031+
return createScalarSVEFPConversion(DAG, Opc, DL, ExtSrc, MVT::i64,
19032+
MVT::f64);
1900619033
}
19007-
SDLoc dl(N);
19008-
SDValue ZeroIdx = DAG.getVectorIdxConstant(0, dl);
19009-
SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SrcVecTy,
19010-
DAG.getUNDEF(SrcVecTy), SrcVal, ZeroIdx);
19011-
Vec = DAG.getNode(N->getOpcode(), dl, DestVecTy, Vec);
19012-
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, DestTy, Vec, ZeroIdx);
19034+
if (SrcTy == MVT::f64 && DestTy == MVT::i32) {
19035+
SDValue ExtDest =
19036+
createScalarSVEFPConversion(DAG, Opc, DL, SrcVal, MVT::f64, MVT::i64);
19037+
return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ExtDest);
19038+
}
19039+
return createScalarSVEFPConversion(DAG, Opc, DL, SrcVal, SrcTy, DestTy);
1901319040
}
1901419041

1901519042
static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
19043+
TargetLowering::DAGCombinerInfo &DCI,
1901619044
const AArch64Subtarget *Subtarget) {
1901719045
// First try to optimize away the conversion when it's conditionally from
1901819046
// a constant. Vectors only.
1901919047
if (SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG))
1902019048
return Res;
1902119049

19022-
if (shouldUseSVEForScalarFPConversion(N, Subtarget))
19023-
return replaceScalarFPConversionWithSVE(N, DAG);
19050+
if (SDValue Res = tryReplaceScalarFPConversionWithSVE(N, DAG, DCI, Subtarget))
19051+
return Res;
1902419052

1902519053
EVT VT = N->getValueType(0);
1902619054
if (VT != MVT::f32 && VT != MVT::f64)
@@ -19060,8 +19088,8 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
1906019088
static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
1906119089
TargetLowering::DAGCombinerInfo &DCI,
1906219090
const AArch64Subtarget *Subtarget) {
19063-
if (shouldUseSVEForScalarFPConversion(N, Subtarget))
19064-
return replaceScalarFPConversionWithSVE(N, DAG);
19091+
if (SDValue Res = tryReplaceScalarFPConversionWithSVE(N, DAG, DCI, Subtarget))
19092+
return Res;
1906519093

1906619094
if (!Subtarget->isNeonAvailable())
1906719095
return SDValue();
@@ -26082,7 +26110,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
2608226110
return performMulCombine(N, DAG, DCI, Subtarget);
2608326111
case ISD::SINT_TO_FP:
2608426112
case ISD::UINT_TO_FP:
26085-
return performIntToFpCombine(N, DAG, Subtarget);
26113+
return performIntToFpCombine(N, DAG, DCI, Subtarget);
2608626114
case ISD::FP_TO_SINT:
2608726115
case ISD::FP_TO_UINT:
2608826116
case ISD::FP_TO_SINT_SAT:
@@ -28384,21 +28412,7 @@ SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op,
2838428412
unsigned NewOp) const {
2838528413
EVT VT = Op.getValueType();
2838628414
SDLoc DL(Op);
28387-
SDValue Pg;
28388-
28389-
// FCVTZS_ZPmZ_DtoS and FCVTZU_ZPmZ_DtoS are special cases. These operations
28390-
// return nxv4i32 rather than the correct nxv2i32, as nxv2i32 is an illegal
28391-
// unpacked type. So, in this case, we take the predicate size from the
28392-
// operand.
28393-
SDValue LastOp{};
28394-
if ((NewOp == AArch64ISD::FCVTZU_MERGE_PASSTHRU ||
28395-
NewOp == AArch64ISD::FCVTZS_MERGE_PASSTHRU) &&
28396-
VT == MVT::nxv4i32 &&
28397-
(LastOp = Op->ops().back().get()).getValueType() == MVT::nxv2f64) {
28398-
Pg = getPredicateForVector(DAG, DL, LastOp.getValueType());
28399-
} else {
28400-
Pg = getPredicateForVector(DAG, DL, VT);
28401-
}
28415+
auto Pg = getPredicateForVector(DAG, DL, VT);
2840228416

2840328417
if (VT.isFixedLengthVector()) {
2840428418
assert(isTypeLegal(VT) && "Expected only legal fixed-width types");

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2328,8 +2328,8 @@ let Predicates = [HasSVEorSME] in {
23282328
defm FCVT_ZPmZ_HtoD : sve_fp_2op_p_zd< 0b1101001, "fcvt", ZPR16, ZPR64, int_aarch64_sve_fcvt_f64f16, AArch64fcvte_mt, nxv2f64, nxv2i1, nxv2f16, ElementSizeD>;
23292329
defm FCVT_ZPmZ_DtoS : sve_fp_2op_p_zdr<0b1101010, "fcvt", ZPR64, ZPR32, int_aarch64_sve_fcvt_f32f64, AArch64fcvtr_mt, nxv2f32, nxv2i1, nxv2f64, ElementSizeD>;
23302330
defm FCVT_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1101011, "fcvt", ZPR32, ZPR64, int_aarch64_sve_fcvt_f64f32, AArch64fcvte_mt, nxv2f64, nxv2i1, nxv2f32, ElementSizeD>;
2331-
defm SCVTF_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1110000, "scvtf", ZPR32, ZPR64, int_aarch64_sve_scvtf_f64i32, AArch64scvtf_mt, nxv2f64, nxv2i1, nxv4i32, ElementSizeD>;
2332-
defm UCVTF_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1110001, "ucvtf", ZPR32, ZPR64, int_aarch64_sve_ucvtf_f64i32, AArch64ucvtf_mt, nxv2f64, nxv2i1, nxv4i32, ElementSizeD>;
2331+
defm SCVTF_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1110000, "scvtf", ZPR32, ZPR64, int_aarch64_sve_scvtf_f64i32, null_frag, nxv2f64, nxv2i1, nxv4i32, ElementSizeD>;
2332+
defm UCVTF_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1110001, "ucvtf", ZPR32, ZPR64, int_aarch64_sve_ucvtf_f64i32, null_frag, nxv2f64, nxv2i1, nxv4i32, ElementSizeD>;
23332333
defm UCVTF_ZPmZ_StoH : sve_fp_2op_p_zd< 0b0110101, "ucvtf", ZPR32, ZPR16, int_aarch64_sve_ucvtf_f16i32, AArch64ucvtf_mt, nxv4f16, nxv4i1, nxv4i32, ElementSizeS>;
23342334
defm SCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd< 0b1110100, "scvtf", ZPR64, ZPR32, int_aarch64_sve_scvtf_f32i64, AArch64scvtf_mt, nxv2f32, nxv2i1, nxv2i64, ElementSizeD>;
23352335
defm SCVTF_ZPmZ_StoH : sve_fp_2op_p_zd< 0b0110100, "scvtf", ZPR32, ZPR16, int_aarch64_sve_scvtf_f16i32, AArch64scvtf_mt, nxv4f16, nxv4i1, nxv4i32, ElementSizeS>;
@@ -2338,8 +2338,8 @@ let Predicates = [HasSVEorSME] in {
23382338
defm UCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd< 0b0110111, "ucvtf", ZPR64, ZPR16, int_aarch64_sve_ucvtf_f16i64, AArch64ucvtf_mt, nxv2f16, nxv2i1, nxv2i64, ElementSizeD>;
23392339
defm SCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1110110, "scvtf", ZPR64, ZPR64, null_frag, AArch64scvtf_mt, nxv2f64, nxv2i1, nxv2i64, ElementSizeD>;
23402340
defm UCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1110111, "ucvtf", ZPR64, ZPR64, null_frag, AArch64ucvtf_mt, nxv2f64, nxv2i1, nxv2i64, ElementSizeD>;
2341-
defm FCVTZS_ZPmZ_DtoS : sve_fp_2op_p_zd< 0b1111000, "fcvtzs", ZPR64, ZPR32, int_aarch64_sve_fcvtzs_i32f64, AArch64fcvtzs_mt, nxv4i32, nxv2i1, nxv2f64, ElementSizeD>;
2342-
defm FCVTZU_ZPmZ_DtoS : sve_fp_2op_p_zd< 0b1111001, "fcvtzu", ZPR64, ZPR32, int_aarch64_sve_fcvtzu_i32f64, AArch64fcvtzu_mt, nxv4i32, nxv2i1, nxv2f64, ElementSizeD>;
2341+
defm FCVTZS_ZPmZ_DtoS : sve_fp_2op_p_zd< 0b1111000, "fcvtzs", ZPR64, ZPR32, int_aarch64_sve_fcvtzs_i32f64, null_frag, nxv4i32, nxv2i1, nxv2f64, ElementSizeD>;
2342+
defm FCVTZU_ZPmZ_DtoS : sve_fp_2op_p_zd< 0b1111001, "fcvtzu", ZPR64, ZPR32, int_aarch64_sve_fcvtzu_i32f64, null_frag, nxv4i32, nxv2i1, nxv2f64, ElementSizeD>;
23432343
defm FCVTZS_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1111100, "fcvtzs", ZPR32, ZPR64, int_aarch64_sve_fcvtzs_i64f32, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f32, ElementSizeD>;
23442344
defm FCVTZS_ZPmZ_HtoS : sve_fp_2op_p_zd< 0b0111100, "fcvtzs", ZPR16, ZPR32, int_aarch64_sve_fcvtzs_i32f16, AArch64fcvtzs_mt, nxv4i32, nxv4i1, nxv4f16, ElementSizeS>;
23452345
defm FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd< 0b0111110, "fcvtzs", ZPR16, ZPR64, int_aarch64_sve_fcvtzs_i64f16, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f16, ElementSizeD>;

llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-to-int.ll

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,9 @@ define i32 @f64_to_s32(double %x) {
8484
; CHECK: // %bb.0: // %entry
8585
; CHECK-NEXT: ptrue p0.d
8686
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
87-
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.d
88-
; CHECK-NEXT: fmov w0, s0
87+
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
88+
; CHECK-NEXT: fmov x0, d0
89+
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
8990
; CHECK-NEXT: ret
9091
;
9192
; NONEON-NOSVE-LABEL: f64_to_s32:
@@ -194,8 +195,9 @@ define i32 @f64_to_u32(double %x) {
194195
; CHECK: // %bb.0: // %entry
195196
; CHECK-NEXT: ptrue p0.d
196197
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
197-
; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.d
198-
; CHECK-NEXT: fmov w0, s0
198+
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
199+
; CHECK-NEXT: fmov x0, d0
200+
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
199201
; CHECK-NEXT: ret
200202
;
201203
; NONEON-NOSVE-LABEL: f64_to_u32:

llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-int-to-fp.ll

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,11 @@ entry:
4545
define double @s32_to_f64(i32 %x) {
4646
; CHECK-LABEL: s32_to_f64:
4747
; CHECK: // %bb.0: // %entry
48-
; CHECK-NEXT: fmov s0, w0
48+
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
49+
; CHECK-NEXT: sxtw x8, w0
4950
; CHECK-NEXT: ptrue p0.d
50-
; CHECK-NEXT: scvtf z0.d, p0/m, z0.s
51+
; CHECK-NEXT: fmov d0, x8
52+
; CHECK-NEXT: scvtf z0.d, p0/m, z0.d
5153
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
5254
; CHECK-NEXT: ret
5355
;
@@ -100,9 +102,10 @@ entry:
100102
define double @u32_to_f64(i32 %x) {
101103
; CHECK-LABEL: u32_to_f64:
102104
; CHECK: // %bb.0: // %entry
103-
; CHECK-NEXT: fmov s0, w0
105+
; CHECK-NEXT: mov w8, w0
104106
; CHECK-NEXT: ptrue p0.d
105-
; CHECK-NEXT: ucvtf z0.d, p0/m, z0.s
107+
; CHECK-NEXT: fmov d0, x8
108+
; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
106109
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
107110
; CHECK-NEXT: ret
108111
;

llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1166,7 +1166,7 @@ define <1 x i16> @fcvtzu_v1f64_v1i16(<1 x double> %op1) {
11661166
; CHECK: // %bb.0:
11671167
; CHECK-NEXT: ptrue p0.d
11681168
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
1169-
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.d
1169+
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
11701170
; CHECK-NEXT: fmov w8, s0
11711171
; CHECK-NEXT: mov z0.h, w8
11721172
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
@@ -2867,7 +2867,7 @@ define <1 x i16> @fcvtzs_v1f64_v1i16(<1 x double> %op1) {
28672867
; CHECK: // %bb.0:
28682868
; CHECK-NEXT: ptrue p0.d
28692869
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
2870-
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.d
2870+
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
28712871
; CHECK-NEXT: fmov w8, s0
28722872
; CHECK-NEXT: mov z0.h, w8
28732873
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0

llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -392,8 +392,8 @@ define <1 x double> @ucvtf_v1i16_v1f64(<1 x i16> %op1) {
392392
; CHECK-NEXT: fmov w8, s0
393393
; CHECK-NEXT: ptrue p0.d
394394
; CHECK-NEXT: and w8, w8, #0xffff
395-
; CHECK-NEXT: fmov s0, w8
396-
; CHECK-NEXT: ucvtf z0.d, p0/m, z0.s
395+
; CHECK-NEXT: fmov d0, x8
396+
; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
397397
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
398398
; CHECK-NEXT: ret
399399
;
@@ -2836,10 +2836,10 @@ define float @scvtf_i16_f32(ptr %0) {
28362836
define double @scvtf_i16_f64(ptr %0) {
28372837
; CHECK-LABEL: scvtf_i16_f64:
28382838
; CHECK: // %bb.0:
2839-
; CHECK-NEXT: ldrsh w8, [x0]
2839+
; CHECK-NEXT: ldrsh x8, [x0]
28402840
; CHECK-NEXT: ptrue p0.d
2841-
; CHECK-NEXT: fmov s0, w8
2842-
; CHECK-NEXT: scvtf z0.d, p0/m, z0.s
2841+
; CHECK-NEXT: fmov d0, x8
2842+
; CHECK-NEXT: scvtf z0.d, p0/m, z0.d
28432843
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
28442844
; CHECK-NEXT: ret
28452845
;
@@ -2895,9 +2895,10 @@ define float @scvtf_i32_f32(ptr %0) {
28952895
define double @scvtf_i32_f64(ptr %0) {
28962896
; CHECK-LABEL: scvtf_i32_f64:
28972897
; CHECK: // %bb.0:
2898+
; CHECK-NEXT: ldrsw x8, [x0]
28982899
; CHECK-NEXT: ptrue p0.d
2899-
; CHECK-NEXT: ldr s0, [x0]
2900-
; CHECK-NEXT: scvtf z0.d, p0/m, z0.s
2900+
; CHECK-NEXT: fmov d0, x8
2901+
; CHECK-NEXT: scvtf z0.d, p0/m, z0.d
29012902
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
29022903
; CHECK-NEXT: ret
29032904
;
@@ -3015,8 +3016,8 @@ define double @ucvtf_i16_f64(ptr %0) {
30153016
; CHECK: // %bb.0:
30163017
; CHECK-NEXT: ldrh w8, [x0]
30173018
; CHECK-NEXT: ptrue p0.d
3018-
; CHECK-NEXT: fmov s0, w8
3019-
; CHECK-NEXT: ucvtf z0.d, p0/m, z0.s
3019+
; CHECK-NEXT: fmov d0, x8
3020+
; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
30203021
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
30213022
; CHECK-NEXT: ret
30223023
;
@@ -3072,9 +3073,10 @@ define float @ucvtf_i32_f32(ptr %0) {
30723073
define double @ucvtf_i32_f64(ptr %0) {
30733074
; CHECK-LABEL: ucvtf_i32_f64:
30743075
; CHECK: // %bb.0:
3076+
; CHECK-NEXT: ldr w8, [x0]
30753077
; CHECK-NEXT: ptrue p0.d
3076-
; CHECK-NEXT: ldr s0, [x0]
3077-
; CHECK-NEXT: ucvtf z0.d, p0/m, z0.s
3078+
; CHECK-NEXT: fmov d0, x8
3079+
; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
30783080
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
30793081
; CHECK-NEXT: ret
30803082
;

0 commit comments

Comments
 (0)