@@ -701,43 +701,45 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
701
701
}
702
702
703
703
auto LegalizeNarrowFP = [this](MVT ScalarVT) {
704
- for (auto Op : {ISD::SETCC,
705
- ISD::SELECT_CC,
706
- ISD::BR_CC,
707
- ISD::FADD,
708
- ISD::FSUB,
709
- ISD::FMUL,
710
- ISD::FDIV,
711
- ISD::FMA,
712
- ISD::FCEIL,
713
- ISD::FSQRT,
714
- ISD::FFLOOR,
715
- ISD::FNEARBYINT,
716
- ISD::FRINT,
717
- ISD::FROUND,
718
- ISD::FROUNDEVEN,
719
- ISD::FTRUNC,
720
- ISD::FMINNUM,
721
- ISD::FMAXNUM,
722
- ISD::FMINIMUM,
723
- ISD::FMAXIMUM,
724
- ISD::STRICT_FADD,
725
- ISD::STRICT_FSUB,
726
- ISD::STRICT_FMUL,
727
- ISD::STRICT_FDIV,
728
- ISD::STRICT_FMA,
729
- ISD::STRICT_FCEIL,
730
- ISD::STRICT_FFLOOR,
731
- ISD::STRICT_FSQRT,
732
- ISD::STRICT_FRINT,
733
- ISD::STRICT_FNEARBYINT,
734
- ISD::STRICT_FROUND,
735
- ISD::STRICT_FTRUNC,
736
- ISD::STRICT_FROUNDEVEN,
737
- ISD::STRICT_FMINNUM,
738
- ISD::STRICT_FMAXNUM,
739
- ISD::STRICT_FMINIMUM,
740
- ISD::STRICT_FMAXIMUM})
704
+ for (auto Op : {
705
+ ISD::SETCC,
706
+ ISD::SELECT_CC,
707
+ ISD::BR_CC,
708
+ ISD::FADD,
709
+ ISD::FSUB,
710
+ ISD::FMUL,
711
+ ISD::FDIV,
712
+ ISD::FMA,
713
+ ISD::FCEIL,
714
+ ISD::FSQRT,
715
+ ISD::FFLOOR,
716
+ ISD::FNEARBYINT,
717
+ ISD::FRINT,
718
+ ISD::FROUND,
719
+ ISD::FROUNDEVEN,
720
+ ISD::FTRUNC,
721
+ ISD::FMINNUM,
722
+ ISD::FMAXNUM,
723
+ ISD::FMINIMUM,
724
+ ISD::FMAXIMUM,
725
+ ISD::STRICT_FADD,
726
+ ISD::STRICT_FSUB,
727
+ ISD::STRICT_FMUL,
728
+ ISD::STRICT_FDIV,
729
+ ISD::STRICT_FMA,
730
+ ISD::STRICT_FCEIL,
731
+ ISD::STRICT_FFLOOR,
732
+ ISD::STRICT_FSQRT,
733
+ ISD::STRICT_FRINT,
734
+ ISD::STRICT_FNEARBYINT,
735
+ ISD::STRICT_FROUND,
736
+ ISD::STRICT_FTRUNC,
737
+ ISD::STRICT_FROUNDEVEN,
738
+ ISD::STRICT_FMINNUM,
739
+ ISD::STRICT_FMAXNUM,
740
+ ISD::STRICT_FMINIMUM,
741
+ ISD::STRICT_FMAXIMUM,
742
+ })
741
743
setOperationAction(Op, ScalarVT, Promote);
742
744
743
745
for (auto Op : {ISD::FNEG, ISD::FABS})
@@ -752,45 +754,45 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
752
754
753
755
// promote v4f16 to v4f32 when that is known to be safe.
754
756
auto V4Narrow = MVT::getVectorVT(ScalarVT, 4);
755
- setOperationPromotedToType(ISD::FADD, V4Narrow, MVT::v4f32);
756
- setOperationPromotedToType(ISD::FSUB, V4Narrow, MVT::v4f32);
757
- setOperationPromotedToType(ISD::FMUL, V4Narrow, MVT::v4f32);
758
- setOperationPromotedToType(ISD::FDIV, V4Narrow, MVT::v4f32);
759
-
760
- setOperationAction(ISD::FABS, V4Narrow, Legal);
761
- setOperationAction(ISD::FNEG, V4Narrow, Legal);
762
- setOperationAction(ISD::FROUND, V4Narrow, Expand);
763
- setOperationAction(ISD::FROUNDEVEN, V4Narrow, Expand);
757
+ setOperationPromotedToType(ISD::FADD, V4Narrow, MVT::v4f32);
758
+ setOperationPromotedToType(ISD::FSUB, V4Narrow, MVT::v4f32);
759
+ setOperationPromotedToType(ISD::FMUL, V4Narrow, MVT::v4f32);
760
+ setOperationPromotedToType(ISD::FDIV, V4Narrow, MVT::v4f32);
761
+ setOperationPromotedToType(ISD::FCEIL, V4Narrow, MVT::v4f32);
762
+ setOperationPromotedToType(ISD::FFLOOR, V4Narrow, MVT::v4f32);
763
+ setOperationPromotedToType(ISD::FROUND, V4Narrow, MVT::v4f32);
764
+ setOperationPromotedToType(ISD::FTRUNC, V4Narrow, MVT::v4f32);
765
+ setOperationPromotedToType(ISD::FROUNDEVEN, V4Narrow, MVT::v4f32);
766
+ setOperationPromotedToType(ISD::FRINT, V4Narrow, MVT::v4f32);
767
+ setOperationPromotedToType(ISD::FNEARBYINT, V4Narrow, MVT::v4f32);
768
+
769
+ setOperationAction(ISD::FABS, V4Narrow, Legal);
770
+ setOperationAction(ISD::FNEG, V4Narrow, Legal);
764
771
setOperationAction(ISD::FMA, V4Narrow, Expand);
765
772
setOperationAction(ISD::SETCC, V4Narrow, Custom);
766
773
setOperationAction(ISD::BR_CC, V4Narrow, Expand);
767
774
setOperationAction(ISD::SELECT, V4Narrow, Expand);
768
775
setOperationAction(ISD::SELECT_CC, V4Narrow, Expand);
769
- setOperationAction(ISD::FTRUNC, V4Narrow, Expand);
770
- setOperationAction(ISD::FCOPYSIGN, V4Narrow, Custom);
771
- setOperationAction(ISD::FFLOOR, V4Narrow, Expand);
772
- setOperationAction(ISD::FCEIL, V4Narrow, Expand);
773
- setOperationAction(ISD::FRINT, V4Narrow, Expand);
774
- setOperationAction(ISD::FNEARBYINT, V4Narrow, Expand);
776
+ setOperationAction(ISD::FCOPYSIGN, V4Narrow, Custom);
775
777
setOperationAction(ISD::FSQRT, V4Narrow, Expand);
776
778
777
779
auto V8Narrow = MVT::getVectorVT(ScalarVT, 8);
778
- setOperationAction(ISD::FABS, V8Narrow, Legal);
779
- setOperationAction(ISD::FADD, V8Narrow, Expand );
780
- setOperationAction(ISD::FCEIL, V8Narrow, Expand );
781
- setOperationAction(ISD::FCOPYSIGN, V8Narrow, Custom);
782
- setOperationAction(ISD::FDIV, V8Narrow, Expand );
783
- setOperationAction(ISD::FFLOOR, V8Narrow, Expand );
780
+ setOperationAction(ISD::FABS, V8Narrow, Legal);
781
+ setOperationAction(ISD::FADD, V8Narrow, Legal );
782
+ setOperationAction(ISD::FCEIL, V8Narrow, Legal );
783
+ setOperationAction(ISD::FCOPYSIGN, V8Narrow, Custom);
784
+ setOperationAction(ISD::FDIV, V8Narrow, Legal );
785
+ setOperationAction(ISD::FFLOOR, V8Narrow, Legal );
784
786
setOperationAction(ISD::FMA, V8Narrow, Expand);
785
- setOperationAction(ISD::FMUL, V8Narrow, Expand );
786
- setOperationAction(ISD::FNEARBYINT, V8Narrow, Expand );
787
- setOperationAction(ISD::FNEG, V8Narrow, Legal);
788
- setOperationAction(ISD::FROUND, V8Narrow, Expand );
789
- setOperationAction(ISD::FROUNDEVEN, V8Narrow, Expand );
790
- setOperationAction(ISD::FRINT, V8Narrow, Expand );
787
+ setOperationAction(ISD::FMUL, V8Narrow, Legal );
788
+ setOperationAction(ISD::FNEARBYINT, V8Narrow, Legal );
789
+ setOperationAction(ISD::FNEG, V8Narrow, Legal);
790
+ setOperationAction(ISD::FROUND, V8Narrow, Legal );
791
+ setOperationAction(ISD::FROUNDEVEN, V8Narrow, Legal );
792
+ setOperationAction(ISD::FRINT, V8Narrow, Legal );
791
793
setOperationAction(ISD::FSQRT, V8Narrow, Expand);
792
- setOperationAction(ISD::FSUB, V8Narrow, Expand );
793
- setOperationAction(ISD::FTRUNC, V8Narrow, Expand );
794
+ setOperationAction(ISD::FSUB, V8Narrow, Legal );
795
+ setOperationAction(ISD::FTRUNC, V8Narrow, Legal );
794
796
setOperationAction(ISD::SETCC, V8Narrow, Expand);
795
797
setOperationAction(ISD::BR_CC, V8Narrow, Expand);
796
798
setOperationAction(ISD::SELECT, V8Narrow, Expand);
@@ -10593,13 +10595,19 @@ static SDValue getEstimate(const AArch64Subtarget *ST, unsigned Opcode,
10593
10595
VT == MVT::v4f32)) ||
10594
10596
(ST->hasSVE() &&
10595
10597
(VT == MVT::nxv8f16 || VT == MVT::nxv4f32 || VT == MVT::nxv2f64))) {
10596
- if (ExtraSteps == TargetLoweringBase::ReciprocalEstimate::Unspecified)
10598
+ if (ExtraSteps == TargetLoweringBase::ReciprocalEstimate::Unspecified) {
10597
10599
// For the reciprocal estimates, convergence is quadratic, so the number
10598
10600
// of digits is doubled after each iteration. In ARMv8, the accuracy of
10599
10601
// the initial estimate is 2^-8. Thus the number of extra steps to refine
10600
10602
// the result for float (23 mantissa bits) is 2 and for double (52
10601
10603
// mantissa bits) is 3.
10602
- ExtraSteps = VT.getScalarType() == MVT::f64 ? 3 : 2;
10604
+ constexpr unsigned AccurateBits = 8;
10605
+ unsigned DesiredBits =
10606
+ APFloat::semanticsPrecision(DAG.EVTToAPFloatSemantics(VT));
10607
+ ExtraSteps = DesiredBits <= AccurateBits
10608
+ ? 0
10609
+ : Log2_64_Ceil(DesiredBits) - Log2_64_Ceil(AccurateBits);
10610
+ }
10603
10611
10604
10612
return DAG.getNode(Opcode, SDLoc(Operand), VT, Operand);
10605
10613
}
0 commit comments