@@ -18961,36 +18961,10 @@ static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N,
18961
18961
return SDValue();
18962
18962
}
18963
18963
18964
- /// Creates a scalar FP <-> INT conversion with a scalable one, wrapped
18965
- /// with an insert and extract.
18966
- static SDValue createScalarSVEFPConversion(SelectionDAG &DAG, unsigned Opc,
18967
- SDLoc DL, SDValue SrcVal, EVT SrcTy,
18968
- EVT DestTy) {
18969
- EVT SrcVecTy;
18970
- EVT DestVecTy;
18971
- if (DestTy.bitsGT(SrcTy)) {
18972
- DestVecTy = getPackedSVEVectorVT(DestTy);
18973
- SrcVecTy = DestVecTy.changeVectorElementType(SrcTy);
18974
- } else {
18975
- SrcVecTy = getPackedSVEVectorVT(SrcTy);
18976
- DestVecTy = SrcVecTy.changeVectorElementType(DestTy);
18977
- }
18978
- SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
18979
- SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, SrcVecTy,
18980
- DAG.getUNDEF(SrcVecTy), SrcVal, ZeroIdx);
18981
- Vec = DAG.getNode(Opc, DL, DestVecTy, Vec);
18982
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, DestTy, Vec, ZeroIdx);
18983
- }
18984
-
18985
18964
/// Tries to replace scalar FP <-> conversions with SVE in streaming functions.
18986
18965
static SDValue
18987
18966
tryReplaceScalarFPConversionWithSVE(SDNode *N, SelectionDAG &DAG,
18988
- TargetLowering::DAGCombinerInfo &DCI,
18989
18967
const AArch64Subtarget *Subtarget) {
18990
- // Uncomment to introduce extra fcvts.
18991
- // if (DCI.isBeforeLegalizeOps())
18992
- // return SDValue();
18993
-
18994
18968
if (N->isStrictFPOpcode())
18995
18969
return SDValue();
18996
18970
@@ -19015,39 +18989,64 @@ tryReplaceScalarFPConversionWithSVE(SDNode *N, SelectionDAG &DAG,
19015
18989
(!Subtarget->isStreaming() && !Subtarget->isStreamingCompatible()))
19016
18990
return SDValue();
19017
18991
19018
- SDLoc DL(N);
19019
18992
unsigned Opc = N->getOpcode();
18993
+ bool IsSigned = Opc == ISD::SINT_TO_FP || Opc == ISD::FP_TO_SINT;
18994
+
19020
18995
SDValue SrcVal = N->getOperand(0);
19021
18996
EVT SrcTy = SrcVal.getValueType();
19022
18997
EVT DestTy = N->getValueType(0);
19023
18998
19024
- // Conversions between f64 and i32 are a special case as nxv2i32 is an illegal
19025
- // type (unlike the equivalent nxv2f32 for floating-point types).
19026
- // May materialize extra instructions :(
19027
- if (SrcTy == MVT::i32 && DestTy == MVT::f64) {
19028
- SDValue ExtSrc = DAG.getNode(Opc == ISD::SINT_TO_FP ? ISD::SIGN_EXTEND
19029
- : ISD::ZERO_EXTEND,
19030
- DL, MVT::i64, SrcVal);
19031
- return createScalarSVEFPConversion(DAG, Opc, DL, ExtSrc, MVT::i64,
19032
- MVT::f64);
18999
+ EVT SrcVecTy;
19000
+ EVT DestVecTy;
19001
+ if (DestTy.bitsGT(SrcTy)) {
19002
+ DestVecTy = getPackedSVEVectorVT(DestTy);
19003
+ SrcVecTy = SrcTy == MVT::i32 ? getPackedSVEVectorVT(SrcTy)
19004
+ : DestVecTy.changeVectorElementType(SrcTy);
19005
+ } else {
19006
+ SrcVecTy = getPackedSVEVectorVT(SrcTy);
19007
+ DestVecTy = DestTy == MVT::i32 ? getPackedSVEVectorVT(DestTy)
19008
+ : SrcVecTy.changeVectorElementType(DestTy);
19033
19009
}
19034
- if (SrcTy == MVT::f64 && DestTy == MVT::i32) {
19035
- SDValue ExtDest =
19036
- createScalarSVEFPConversion(DAG, Opc, DL, SrcVal, MVT::f64, MVT::i64);
19037
- return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ExtDest);
19010
+
19011
+ SDLoc DL(N);
19012
+ SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
19013
+ SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, SrcVecTy,
19014
+ DAG.getUNDEF(SrcVecTy), SrcVal, ZeroIdx);
19015
+
19016
+ // Conversions between f64 and i32 are a special case as nxv2i32 is an illegal
19017
+ // type (unlike the equivalent nxv2f32 for floating-point types). So,
19018
+ // unfortunately, the only way to lower to these variants is via the
19019
+ // intrinsics. Note: We could sign/zero extend to the i64 variant, but that
19020
+ // may result in extra extends or fmovs in the final assembly.
19021
+ bool IsI32ToF64 = SrcTy == MVT::i32 && DestTy == MVT::f64;
19022
+ bool isF64ToI32 = SrcTy == MVT::f64 && DestTy == MVT::i32;
19023
+ if (IsI32ToF64 || isF64ToI32) {
19024
+ unsigned IntrinsicOpc;
19025
+ if (IsI32ToF64)
19026
+ IntrinsicOpc = IsSigned ? Intrinsic::aarch64_sve_scvtf_f64i32
19027
+ : Intrinsic::aarch64_sve_ucvtf_f64i32;
19028
+ else
19029
+ IntrinsicOpc = IsSigned ? Intrinsic::aarch64_sve_fcvtzs_i32f64
19030
+ : Intrinsic::aarch64_sve_fcvtzu_i32f64;
19031
+ SDValue PTrue = getPredicateForVector(DAG, DL, MVT::nxv2f64);
19032
+ Vec = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, DestVecTy,
19033
+ {DAG.getConstant(IntrinsicOpc, DL, MVT::i32),
19034
+ DAG.getUNDEF(DestTy), PTrue, Vec});
19035
+ } else {
19036
+ Vec = DAG.getNode(Opc, DL, DestVecTy, Vec);
19038
19037
}
19039
- return createScalarSVEFPConversion(DAG, Opc, DL, SrcVal, SrcTy, DestTy);
19038
+
19039
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, DestTy, Vec, ZeroIdx);
19040
19040
}
19041
19041
19042
19042
static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
19043
- TargetLowering::DAGCombinerInfo &DCI,
19044
19043
const AArch64Subtarget *Subtarget) {
19045
19044
// First try to optimize away the conversion when it's conditionally from
19046
19045
// a constant. Vectors only.
19047
19046
if (SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG))
19048
19047
return Res;
19049
19048
19050
- if (SDValue Res = tryReplaceScalarFPConversionWithSVE(N, DAG, DCI, Subtarget))
19049
+ if (SDValue Res = tryReplaceScalarFPConversionWithSVE(N, DAG, Subtarget))
19051
19050
return Res;
19052
19051
19053
19052
EVT VT = N->getValueType(0);
@@ -19088,7 +19087,7 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
19088
19087
static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
19089
19088
TargetLowering::DAGCombinerInfo &DCI,
19090
19089
const AArch64Subtarget *Subtarget) {
19091
- if (SDValue Res = tryReplaceScalarFPConversionWithSVE(N, DAG, DCI, Subtarget))
19090
+ if (SDValue Res = tryReplaceScalarFPConversionWithSVE(N, DAG, Subtarget))
19092
19091
return Res;
19093
19092
19094
19093
if (!Subtarget->isNeonAvailable())
@@ -26110,7 +26109,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
26110
26109
return performMulCombine(N, DAG, DCI, Subtarget);
26111
26110
case ISD::SINT_TO_FP:
26112
26111
case ISD::UINT_TO_FP:
26113
- return performIntToFpCombine(N, DAG, DCI, Subtarget);
26112
+ return performIntToFpCombine(N, DAG, Subtarget);
26114
26113
case ISD::FP_TO_SINT:
26115
26114
case ISD::FP_TO_UINT:
26116
26115
case ISD::FP_TO_SINT_SAT:
0 commit comments