@@ -18961,9 +18961,39 @@ static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N,
18961
18961
return SDValue();
18962
18962
}
18963
18963
18964
- static bool
18965
- shouldUseSVEForScalarFPConversion(SDNode *N,
18966
- const AArch64Subtarget *Subtarget) {
18964
+ /// Creates a scalar FP <-> INT conversion with a scalable one, wrapped
18965
+ /// with an insert and extract.
18966
+ static SDValue createScalarSVEFPConversion(SelectionDAG &DAG, unsigned Opc,
18967
+ SDLoc DL, SDValue SrcVal, EVT SrcTy,
18968
+ EVT DestTy) {
18969
+ EVT SrcVecTy;
18970
+ EVT DestVecTy;
18971
+ if (DestTy.bitsGT(SrcTy)) {
18972
+ DestVecTy = getPackedSVEVectorVT(DestTy);
18973
+ SrcVecTy = DestVecTy.changeVectorElementType(SrcTy);
18974
+ } else {
18975
+ SrcVecTy = getPackedSVEVectorVT(SrcTy);
18976
+ DestVecTy = SrcVecTy.changeVectorElementType(DestTy);
18977
+ }
18978
+ SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
18979
+ SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, SrcVecTy,
18980
+ DAG.getUNDEF(SrcVecTy), SrcVal, ZeroIdx);
18981
+ Vec = DAG.getNode(Opc, DL, DestVecTy, Vec);
18982
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, DestTy, Vec, ZeroIdx);
18983
+ }
18984
+
18985
+ /// Tries to replace scalar FP <-> conversions with SVE in streaming functions.
18986
+ static SDValue
18987
+ tryReplaceScalarFPConversionWithSVE(SDNode *N, SelectionDAG &DAG,
18988
+ TargetLowering::DAGCombinerInfo &DCI,
18989
+ const AArch64Subtarget *Subtarget) {
18990
+ // Uncomment to introduce extra fcvts.
18991
+ // if (DCI.isBeforeLegalizeOps())
18992
+ // return SDValue();
18993
+
18994
+ if (N->isStrictFPOpcode())
18995
+ return SDValue();
18996
+
18967
18997
auto isSupportedType = [](EVT VT) {
18968
18998
if (!VT.isSimple())
18969
18999
return false;
@@ -18973,54 +19003,52 @@ shouldUseSVEForScalarFPConversion(SDNode *N,
18973
19003
return is_contained({MVT::i32, MVT::i64, MVT::f16, MVT::f32, MVT::f64},
18974
19004
VT.getSimpleVT().SimpleTy);
18975
19005
};
19006
+
19007
+ if (!isSupportedType(N->getValueType(0)) ||
19008
+ !isSupportedType(N->getOperand(0).getValueType()))
19009
+ return SDValue();
19010
+
18976
19011
// If we are in a streaming[-compatible] function, use SVE for scalar FP <->
18977
- // INT conversions as this can help avoid movs between GPRs and FPRs, which
19012
+ // INT conversions as this can help avoid moves between GPRs and FPRs, which
18978
19013
// could be quite expensive.
18979
- return !N->isStrictFPOpcode() && Subtarget->isSVEorStreamingSVEAvailable() &&
18980
- (Subtarget->isStreaming() || Subtarget->isStreamingCompatible()) &&
18981
- isSupportedType(N->getValueType(0)) &&
18982
- isSupportedType(N->getOperand(0).getValueType());
18983
- }
19014
+ if (!Subtarget->isSVEorStreamingSVEAvailable() ||
19015
+ (!Subtarget->isStreaming() && !Subtarget->isStreamingCompatible()))
19016
+ return SDValue();
18984
19017
18985
- /// Replaces a scalar FP <-> INT conversion with an SVE (scalable) one, wrapped
18986
- /// with an insert and extract.
18987
- static SDValue replaceScalarFPConversionWithSVE(SDNode *N, SelectionDAG &DAG) {
18988
- assert(!N->isStrictFPOpcode() && "strict fp ops not supported");
19018
+ SDLoc DL(N);
19019
+ unsigned Opc = N->getOpcode();
18989
19020
SDValue SrcVal = N->getOperand(0);
18990
19021
EVT SrcTy = SrcVal.getValueType();
18991
19022
EVT DestTy = N->getValueType(0);
18992
- EVT SrcVecTy;
18993
- EVT DestVecTy;
18994
- // Use a packed vector for the larger type.
18995
- // Note: For conversions such as FCVTZS_ZPmZ_DtoS, and UCVTF_ZPmZ_StoD that
18996
- // notionally take or return a nxv2i32 type we must instead use a nxv4i32, as
18997
- // (unlike floats) nxv2i32 is an illegal unpacked type.
18998
- if (DestTy.bitsGT(SrcTy)) {
18999
- DestVecTy = getPackedSVEVectorVT(DestTy);
19000
- SrcVecTy = SrcTy == MVT::i32 ? getPackedSVEVectorVT(SrcTy)
19001
- : DestVecTy.changeVectorElementType(SrcTy);
19002
- } else {
19003
- SrcVecTy = getPackedSVEVectorVT(SrcTy);
19004
- DestVecTy = DestTy == MVT::i32 ? getPackedSVEVectorVT(DestTy)
19005
- : SrcVecTy.changeVectorElementType(DestTy);
19023
+
19024
+ // Conversions between f64 and i32 are a special case as nxv2i32 is an illegal
19025
+ // type (unlike the equivalent nxv2f32 for floating-point types).
19026
+ // May materialize extra instructions :(
19027
+ if (SrcTy == MVT::i32 && DestTy == MVT::f64) {
19028
+ SDValue ExtSrc = DAG.getNode(Opc == ISD::SINT_TO_FP ? ISD::SIGN_EXTEND
19029
+ : ISD::ZERO_EXTEND,
19030
+ DL, MVT::i64, SrcVal);
19031
+ return createScalarSVEFPConversion(DAG, Opc, DL, ExtSrc, MVT::i64,
19032
+ MVT::f64);
19006
19033
}
19007
- SDLoc dl(N);
19008
- SDValue ZeroIdx = DAG.getVectorIdxConstant(0, dl);
19009
- SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SrcVecTy,
19010
- DAG.getUNDEF(SrcVecTy), SrcVal, ZeroIdx );
19011
- Vec = DAG.getNode(N->getOpcode(), dl, DestVecTy, Vec);
19012
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, DestTy, Vec, ZeroIdx );
19034
+ if (SrcTy == MVT::f64 && DestTy == MVT::i32) {
19035
+ SDValue ExtDest =
19036
+ createScalarSVEFPConversion(DAG, Opc, DL, SrcVal, MVT::f64, MVT::i64);
19037
+ return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ExtDest );
19038
+ }
19039
+ return createScalarSVEFPConversion(DAG, Opc, DL, SrcVal, SrcTy, DestTy );
19013
19040
}
19014
19041
19015
19042
static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
19043
+ TargetLowering::DAGCombinerInfo &DCI,
19016
19044
const AArch64Subtarget *Subtarget) {
19017
19045
// First try to optimize away the conversion when it's conditionally from
19018
19046
// a constant. Vectors only.
19019
19047
if (SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG))
19020
19048
return Res;
19021
19049
19022
- if (shouldUseSVEForScalarFPConversion(N , Subtarget))
19023
- return replaceScalarFPConversionWithSVE(N, DAG) ;
19050
+ if (SDValue Res = tryReplaceScalarFPConversionWithSVE(N, DAG, DCI , Subtarget))
19051
+ return Res ;
19024
19052
19025
19053
EVT VT = N->getValueType(0);
19026
19054
if (VT != MVT::f32 && VT != MVT::f64)
@@ -19060,8 +19088,8 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
19060
19088
static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
19061
19089
TargetLowering::DAGCombinerInfo &DCI,
19062
19090
const AArch64Subtarget *Subtarget) {
19063
- if (shouldUseSVEForScalarFPConversion(N , Subtarget))
19064
- return replaceScalarFPConversionWithSVE(N, DAG) ;
19091
+ if (SDValue Res = tryReplaceScalarFPConversionWithSVE(N, DAG, DCI , Subtarget))
19092
+ return Res ;
19065
19093
19066
19094
if (!Subtarget->isNeonAvailable())
19067
19095
return SDValue();
@@ -26082,7 +26110,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
26082
26110
return performMulCombine(N, DAG, DCI, Subtarget);
26083
26111
case ISD::SINT_TO_FP:
26084
26112
case ISD::UINT_TO_FP:
26085
- return performIntToFpCombine(N, DAG, Subtarget);
26113
+ return performIntToFpCombine(N, DAG, DCI, Subtarget);
26086
26114
case ISD::FP_TO_SINT:
26087
26115
case ISD::FP_TO_UINT:
26088
26116
case ISD::FP_TO_SINT_SAT:
@@ -28384,21 +28412,7 @@ SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op,
28384
28412
unsigned NewOp) const {
28385
28413
EVT VT = Op.getValueType();
28386
28414
SDLoc DL(Op);
28387
- SDValue Pg;
28388
-
28389
- // FCVTZS_ZPmZ_DtoS and FCVTZU_ZPmZ_DtoS are special cases. These operations
28390
- // return nxv4i32 rather than the correct nxv2i32, as nxv2i32 is an illegal
28391
- // unpacked type. So, in this case, we take the predicate size from the
28392
- // operand.
28393
- SDValue LastOp{};
28394
- if ((NewOp == AArch64ISD::FCVTZU_MERGE_PASSTHRU ||
28395
- NewOp == AArch64ISD::FCVTZS_MERGE_PASSTHRU) &&
28396
- VT == MVT::nxv4i32 &&
28397
- (LastOp = Op->ops().back().get()).getValueType() == MVT::nxv2f64) {
28398
- Pg = getPredicateForVector(DAG, DL, LastOp.getValueType());
28399
- } else {
28400
- Pg = getPredicateForVector(DAG, DL, VT);
28401
- }
28415
+ auto Pg = getPredicateForVector(DAG, DL, VT);
28402
28416
28403
28417
if (VT.isFixedLengthVector()) {
28404
28418
assert(isTypeLegal(VT) && "Expected only legal fixed-width types");
0 commit comments