@@ -4550,10 +4550,9 @@ SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
4550
4550
EVT VT = Op.getValueType();
4551
4551
4552
4552
if (VT.isScalableVector()) {
4553
- unsigned Opc = Op.getOpcode();
4554
- bool IsSigned = Opc == ISD::FP_TO_SINT || Opc == ISD::STRICT_FP_TO_SINT;
4555
- unsigned Opcode = IsSigned ? AArch64ISD::FCVTZS_MERGE_PASSTHRU
4556
- : AArch64ISD::FCVTZU_MERGE_PASSTHRU;
4553
+ unsigned Opcode = Op.getOpcode() == ISD::FP_TO_UINT
4554
+ ? AArch64ISD::FCVTZU_MERGE_PASSTHRU
4555
+ : AArch64ISD::FCVTZS_MERGE_PASSTHRU;
4557
4556
return LowerToPredicatedOp(Op, DAG, Opcode);
4558
4557
}
4559
4558
@@ -4629,46 +4628,6 @@ SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
4629
4628
return Op;
4630
4629
}
4631
4630
4632
- static bool CanLowerToScalarSVEFPIntConversion(EVT VT) {
4633
- if (!VT.isSimple())
4634
- return false;
4635
- // There are SVE instructions that can convert to/from all pairs of these int
4636
- // and float types. Note: We don't bother with i8 or i16 as those are illegal
4637
- // types for scalars.
4638
- return is_contained({MVT::i32, MVT::i64, MVT::f16, MVT::f32, MVT::f64},
4639
- VT.getSimpleVT().SimpleTy);
4640
- }
4641
-
4642
- /// Lowers a scalar FP conversion (to/from) int to SVE.
4643
- static SDValue LowerScalarFPConversionToSVE(SDValue Op, SelectionDAG &DAG) {
4644
- assert(!Op->isStrictFPOpcode() && "strict fp ops not supported");
4645
- SDValue SrcVal = Op.getOperand(0);
4646
- EVT SrcTy = SrcVal.getValueType();
4647
- EVT DestTy = Op.getValueType();
4648
- EVT SrcVecTy;
4649
- EVT DestVecTy;
4650
- // Use a packed vector for the larger type.
4651
- // Note: For conversions such as FCVTZS_ZPmZ_DtoS, and UCVTF_ZPmZ_StoD that
4652
- // notionally take or return a nxv2i32 type we must instead use a nxv4i32, as
4653
- // (unlike floats) nxv2i32 is an illegal unpacked type.
4654
- if (DestTy.bitsGT(SrcTy)) {
4655
- DestVecTy = getPackedSVEVectorVT(DestTy);
4656
- SrcVecTy = SrcTy == MVT::i32 ? getPackedSVEVectorVT(SrcTy)
4657
- : DestVecTy.changeVectorElementType(SrcTy);
4658
- } else {
4659
- SrcVecTy = getPackedSVEVectorVT(SrcTy);
4660
- DestVecTy = DestTy == MVT::i32 ? getPackedSVEVectorVT(DestTy)
4661
- : SrcVecTy.changeVectorElementType(DestTy);
4662
- }
4663
- SDLoc dl(Op);
4664
- SDValue ZeroIdx = DAG.getVectorIdxConstant(0, dl);
4665
- SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SrcVecTy,
4666
- DAG.getUNDEF(SrcVecTy), SrcVal, ZeroIdx);
4667
- Vec = DAG.getNode(Op.getOpcode(), dl, DestVecTy, Vec);
4668
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec,
4669
- ZeroIdx);
4670
- }
4671
-
4672
4631
SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
4673
4632
SelectionDAG &DAG) const {
4674
4633
bool IsStrict = Op->isStrictFPOpcode();
@@ -4677,12 +4636,6 @@ SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
4677
4636
if (SrcVal.getValueType().isVector())
4678
4637
return LowerVectorFP_TO_INT(Op, DAG);
4679
4638
4680
- if (!IsStrict && !Subtarget->isNeonAvailable() &&
4681
- Subtarget->isSVEorStreamingSVEAvailable() &&
4682
- CanLowerToScalarSVEFPIntConversion(SrcVal.getValueType()) &&
4683
- CanLowerToScalarSVEFPIntConversion(Op.getValueType()))
4684
- return LowerScalarFPConversionToSVE(Op, DAG);
4685
-
4686
4639
// f16 conversions are promoted to f32 when full fp16 is not supported.
4687
4640
if ((SrcVal.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) ||
4688
4641
SrcVal.getValueType() == MVT::bf16) {
@@ -4986,12 +4939,6 @@ SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
4986
4939
bool IsStrict = Op->isStrictFPOpcode();
4987
4940
SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
4988
4941
4989
- if (!IsStrict && !Subtarget->isNeonAvailable() &&
4990
- Subtarget->isSVEorStreamingSVEAvailable() &&
4991
- CanLowerToScalarSVEFPIntConversion(SrcVal.getValueType()) &&
4992
- CanLowerToScalarSVEFPIntConversion(Op.getValueType()))
4993
- return LowerScalarFPConversionToSVE(Op, DAG);
4994
-
4995
4942
bool IsSigned = Op->getOpcode() == ISD::STRICT_SINT_TO_FP ||
4996
4943
Op->getOpcode() == ISD::SINT_TO_FP;
4997
4944
@@ -19014,13 +18961,67 @@ static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N,
19014
18961
return SDValue();
19015
18962
}
19016
18963
18964
+ static bool
18965
+ shouldUseSVEForScalarFPConversion(SDNode *N,
18966
+ const AArch64Subtarget *Subtarget) {
18967
+ auto isSupportedType = [](EVT VT) {
18968
+ if (!VT.isSimple())
18969
+ return false;
18970
+ // There are SVE instructions that can convert to/from all pairs of these
18971
+ // int and float types. Note: We don't bother with i8 or i16 as those are
18972
+ // illegal types for scalars.
18973
+ return is_contained({MVT::i32, MVT::i64, MVT::f16, MVT::f32, MVT::f64},
18974
+ VT.getSimpleVT().SimpleTy);
18975
+ };
18976
+ // If we are in a streaming[-compatible] function, use SVE for scalar FP <->
18977
+ // INT conversions as this can help avoid movs between GPRs and FPRs, which
18978
+ // could be quite expensive.
18979
+ return !N->isStrictFPOpcode() && Subtarget->isSVEorStreamingSVEAvailable() &&
18980
+ (Subtarget->isStreaming() || Subtarget->isStreamingCompatible()) &&
18981
+ isSupportedType(N->getValueType(0)) &&
18982
+ isSupportedType(N->getOperand(0).getValueType());
18983
+ }
18984
+
18985
+ /// Replaces a scalar FP <-> INT conversion with an SVE (scalable) one, wrapped
18986
+ /// with an insert and extract.
18987
+ static SDValue replaceScalarFPConversionWithSVE(SDNode *N, SelectionDAG &DAG) {
18988
+ assert(!N->isStrictFPOpcode() && "strict fp ops not supported");
18989
+ SDValue SrcVal = N->getOperand(0);
18990
+ EVT SrcTy = SrcVal.getValueType();
18991
+ EVT DestTy = N->getValueType(0);
18992
+ EVT SrcVecTy;
18993
+ EVT DestVecTy;
18994
+ // Use a packed vector for the larger type.
18995
+ // Note: For conversions such as FCVTZS_ZPmZ_DtoS, and UCVTF_ZPmZ_StoD that
18996
+ // notionally take or return a nxv2i32 type we must instead use a nxv4i32, as
18997
+ // (unlike floats) nxv2i32 is an illegal unpacked type.
18998
+ if (DestTy.bitsGT(SrcTy)) {
18999
+ DestVecTy = getPackedSVEVectorVT(DestTy);
19000
+ SrcVecTy = SrcTy == MVT::i32 ? getPackedSVEVectorVT(SrcTy)
19001
+ : DestVecTy.changeVectorElementType(SrcTy);
19002
+ } else {
19003
+ SrcVecTy = getPackedSVEVectorVT(SrcTy);
19004
+ DestVecTy = DestTy == MVT::i32 ? getPackedSVEVectorVT(DestTy)
19005
+ : SrcVecTy.changeVectorElementType(DestTy);
19006
+ }
19007
+ SDLoc dl(N);
19008
+ SDValue ZeroIdx = DAG.getVectorIdxConstant(0, dl);
19009
+ SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SrcVecTy,
19010
+ DAG.getUNDEF(SrcVecTy), SrcVal, ZeroIdx);
19011
+ Vec = DAG.getNode(N->getOpcode(), dl, DestVecTy, Vec);
19012
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, DestTy, Vec, ZeroIdx);
19013
+ }
19014
+
19017
19015
static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
19018
19016
const AArch64Subtarget *Subtarget) {
19019
19017
// First try to optimize away the conversion when it's conditionally from
19020
19018
// a constant. Vectors only.
19021
19019
if (SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG))
19022
19020
return Res;
19023
19021
19022
+ if (shouldUseSVEForScalarFPConversion(N, Subtarget))
19023
+ return replaceScalarFPConversionWithSVE(N, DAG);
19024
+
19024
19025
EVT VT = N->getValueType(0);
19025
19026
if (VT != MVT::f32 && VT != MVT::f64)
19026
19027
return SDValue();
@@ -19059,6 +19060,9 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
19059
19060
static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
19060
19061
TargetLowering::DAGCombinerInfo &DCI,
19061
19062
const AArch64Subtarget *Subtarget) {
19063
+ if (shouldUseSVEForScalarFPConversion(N, Subtarget))
19064
+ return replaceScalarFPConversionWithSVE(N, DAG);
19065
+
19062
19066
if (!Subtarget->isNeonAvailable())
19063
19067
return SDValue();
19064
19068
0 commit comments