@@ -4550,10 +4550,9 @@ SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
4550
4550
EVT VT = Op.getValueType();
4551
4551
4552
4552
if (VT.isScalableVector()) {
4553
- unsigned Opc = Op.getOpcode();
4554
- bool IsSigned = Opc == ISD::FP_TO_SINT || Opc == ISD::STRICT_FP_TO_SINT;
4555
- unsigned Opcode = IsSigned ? AArch64ISD::FCVTZS_MERGE_PASSTHRU
4556
- : AArch64ISD::FCVTZU_MERGE_PASSTHRU;
4553
+ unsigned Opcode = Op.getOpcode() == ISD::FP_TO_UINT
4554
+ ? AArch64ISD::FCVTZU_MERGE_PASSTHRU
4555
+ : AArch64ISD::FCVTZS_MERGE_PASSTHRU;
4557
4556
return LowerToPredicatedOp(Op, DAG, Opcode);
4558
4557
}
4559
4558
@@ -4629,46 +4628,6 @@ SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
4629
4628
return Op;
4630
4629
}
4631
4630
4632
- static bool CanLowerToScalarSVEFPIntConversion(EVT VT) {
4633
- if (!VT.isSimple())
4634
- return false;
4635
- // There are SVE instructions that can convert to/from all pairs of these int
4636
- // and float types. Note: We don't bother with i8 or i16 as those are illegal
4637
- // types for scalars.
4638
- return is_contained({MVT::i32, MVT::i64, MVT::f16, MVT::f32, MVT::f64},
4639
- VT.getSimpleVT().SimpleTy);
4640
- }
4641
-
4642
- /// Lowers a scalar FP conversion (to/from) int to SVE.
4643
- static SDValue LowerScalarFPConversionToSVE(SDValue Op, SelectionDAG &DAG) {
4644
- assert(!Op->isStrictFPOpcode() && "strict fp ops not supported");
4645
- SDValue SrcVal = Op.getOperand(0);
4646
- EVT SrcTy = SrcVal.getValueType();
4647
- EVT DestTy = Op.getValueType();
4648
- EVT SrcVecTy;
4649
- EVT DestVecTy;
4650
- // Use a packed vector for the larger type.
4651
- // Note: For conversions such as FCVTZS_ZPmZ_DtoS, and UCVTF_ZPmZ_StoD that
4652
- // notionally take or return a nxv2i32 type we must instead use a nxv4i32, as
4653
- // (unlike floats) nxv2i32 is an illegal unpacked type.
4654
- if (DestTy.bitsGT(SrcTy)) {
4655
- DestVecTy = getPackedSVEVectorVT(DestTy);
4656
- SrcVecTy = SrcTy == MVT::i32 ? getPackedSVEVectorVT(SrcTy)
4657
- : DestVecTy.changeVectorElementType(SrcTy);
4658
- } else {
4659
- SrcVecTy = getPackedSVEVectorVT(SrcTy);
4660
- DestVecTy = DestTy == MVT::i32 ? getPackedSVEVectorVT(DestTy)
4661
- : SrcVecTy.changeVectorElementType(DestTy);
4662
- }
4663
- SDLoc dl(Op);
4664
- SDValue ZeroIdx = DAG.getVectorIdxConstant(0, dl);
4665
- SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SrcVecTy,
4666
- DAG.getUNDEF(SrcVecTy), SrcVal, ZeroIdx);
4667
- Vec = DAG.getNode(Op.getOpcode(), dl, DestVecTy, Vec);
4668
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec,
4669
- ZeroIdx);
4670
- }
4671
-
4672
4631
SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
4673
4632
SelectionDAG &DAG) const {
4674
4633
bool IsStrict = Op->isStrictFPOpcode();
@@ -4677,12 +4636,6 @@ SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
4677
4636
if (SrcVal.getValueType().isVector())
4678
4637
return LowerVectorFP_TO_INT(Op, DAG);
4679
4638
4680
- if (!IsStrict && !Subtarget->isNeonAvailable() &&
4681
- Subtarget->isSVEorStreamingSVEAvailable() &&
4682
- CanLowerToScalarSVEFPIntConversion(SrcVal.getValueType()) &&
4683
- CanLowerToScalarSVEFPIntConversion(Op.getValueType()))
4684
- return LowerScalarFPConversionToSVE(Op, DAG);
4685
-
4686
4639
// f16 conversions are promoted to f32 when full fp16 is not supported.
4687
4640
if ((SrcVal.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) ||
4688
4641
SrcVal.getValueType() == MVT::bf16) {
@@ -4986,12 +4939,6 @@ SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
4986
4939
bool IsStrict = Op->isStrictFPOpcode();
4987
4940
SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
4988
4941
4989
- if (!IsStrict && !Subtarget->isNeonAvailable() &&
4990
- Subtarget->isSVEorStreamingSVEAvailable() &&
4991
- CanLowerToScalarSVEFPIntConversion(SrcVal.getValueType()) &&
4992
- CanLowerToScalarSVEFPIntConversion(Op.getValueType()))
4993
- return LowerScalarFPConversionToSVE(Op, DAG);
4994
-
4995
4942
bool IsSigned = Op->getOpcode() == ISD::STRICT_SINT_TO_FP ||
4996
4943
Op->getOpcode() == ISD::SINT_TO_FP;
4997
4944
@@ -18982,13 +18929,67 @@ static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N,
18982
18929
return SDValue();
18983
18930
}
18984
18931
18932
+ static bool
18933
+ shouldUseSVEForScalarFPConversion(SDNode *N,
18934
+ const AArch64Subtarget *Subtarget) {
18935
+ auto isSupportedType = [](EVT VT) {
18936
+ if (!VT.isSimple())
18937
+ return false;
18938
+ // There are SVE instructions that can convert to/from all pairs of these
18939
+ // int and float types. Note: We don't bother with i8 or i16 as those are
18940
+ // illegal types for scalars.
18941
+ return is_contained({MVT::i32, MVT::i64, MVT::f16, MVT::f32, MVT::f64},
18942
+ VT.getSimpleVT().SimpleTy);
18943
+ };
18944
+ // If we are in a streaming[-compatible] function, use SVE for scalar FP <->
18945
+ // INT conversions as this can help avoid movs between GPRs and FPRs, which
18946
+ // could be quite expensive.
18947
+ return !N->isStrictFPOpcode() && Subtarget->isSVEorStreamingSVEAvailable() &&
18948
+ (Subtarget->isStreaming() || Subtarget->isStreamingCompatible()) &&
18949
+ isSupportedType(N->getValueType(0)) &&
18950
+ isSupportedType(N->getOperand(0).getValueType());
18951
+ }
18952
+
18953
+ /// Replaces a scalar FP <-> INT conversion with an SVE (scalable) one, wrapped
18954
+ /// with an insert and extract.
18955
+ static SDValue replaceScalarFPConversionWithSVE(SDNode *N, SelectionDAG &DAG) {
18956
+ assert(!N->isStrictFPOpcode() && "strict fp ops not supported");
18957
+ SDValue SrcVal = N->getOperand(0);
18958
+ EVT SrcTy = SrcVal.getValueType();
18959
+ EVT DestTy = N->getValueType(0);
18960
+ EVT SrcVecTy;
18961
+ EVT DestVecTy;
18962
+ // Use a packed vector for the larger type.
18963
+ // Note: For conversions such as FCVTZS_ZPmZ_DtoS, and UCVTF_ZPmZ_StoD that
18964
+ // notionally take or return a nxv2i32 type we must instead use a nxv4i32, as
18965
+ // (unlike floats) nxv2i32 is an illegal unpacked type.
18966
+ if (DestTy.bitsGT(SrcTy)) {
18967
+ DestVecTy = getPackedSVEVectorVT(DestTy);
18968
+ SrcVecTy = SrcTy == MVT::i32 ? getPackedSVEVectorVT(SrcTy)
18969
+ : DestVecTy.changeVectorElementType(SrcTy);
18970
+ } else {
18971
+ SrcVecTy = getPackedSVEVectorVT(SrcTy);
18972
+ DestVecTy = DestTy == MVT::i32 ? getPackedSVEVectorVT(DestTy)
18973
+ : SrcVecTy.changeVectorElementType(DestTy);
18974
+ }
18975
+ SDLoc dl(N);
18976
+ SDValue ZeroIdx = DAG.getVectorIdxConstant(0, dl);
18977
+ SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SrcVecTy,
18978
+ DAG.getUNDEF(SrcVecTy), SrcVal, ZeroIdx);
18979
+ Vec = DAG.getNode(N->getOpcode(), dl, DestVecTy, Vec);
18980
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, DestTy, Vec, ZeroIdx);
18981
+ }
18982
+
18985
18983
static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
18986
18984
const AArch64Subtarget *Subtarget) {
18987
18985
// First try to optimize away the conversion when it's conditionally from
18988
18986
// a constant. Vectors only.
18989
18987
if (SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG))
18990
18988
return Res;
18991
18989
18990
+ if (shouldUseSVEForScalarFPConversion(N, Subtarget))
18991
+ return replaceScalarFPConversionWithSVE(N, DAG);
18992
+
18992
18993
EVT VT = N->getValueType(0);
18993
18994
if (VT != MVT::f32 && VT != MVT::f64)
18994
18995
return SDValue();
@@ -19027,6 +19028,9 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
19027
19028
static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
19028
19029
TargetLowering::DAGCombinerInfo &DCI,
19029
19030
const AArch64Subtarget *Subtarget) {
19031
+ if (shouldUseSVEForScalarFPConversion(N, Subtarget))
19032
+ return replaceScalarFPConversionWithSVE(N, DAG);
19033
+
19030
19034
if (!Subtarget->isNeonAvailable())
19031
19035
return SDValue();
19032
19036
0 commit comments