@@ -2760,14 +2760,40 @@ SDValue AMDGPUTargetLowering::lowerFEXP2(SDValue Op, SelectionDAG &DAG) const {
2760
2760
return DAG.getNode (ISD::FMUL, SL, VT, Exp2, ResultScale, Flags);
2761
2761
}
2762
2762
2763
- SDValue AMDGPUTargetLowering::lowerFEXPUnsafe (SDValue Op , const SDLoc &SL,
2763
+ SDValue AMDGPUTargetLowering::lowerFEXPUnsafe (SDValue X , const SDLoc &SL,
2764
2764
SelectionDAG &DAG,
2765
2765
SDNodeFlags Flags) const {
2766
- // exp2(M_LOG2E_F * f);
2767
- EVT VT = Op.getValueType ();
2768
- const SDValue K = DAG.getConstantFP (numbers::log2e, SL, VT);
2769
- SDValue Mul = DAG.getNode (ISD::FMUL, SL, VT, Op, K, Flags);
2770
- return DAG.getNode (VT == MVT::f32 ? AMDGPUISD::EXP : ISD::FEXP2, SL, VT, Mul,
2766
+ EVT VT = X.getValueType ();
2767
+ const SDValue Log2E = DAG.getConstantFP (numbers::log2e, SL, VT);
2768
+
2769
+ if (VT != MVT::f32 || !needsDenormHandlingF32 (DAG, X, Flags)) {
2770
+ // exp2(M_LOG2E_F * f);
2771
+ SDValue Mul = DAG.getNode (ISD::FMUL, SL, VT, X, Log2E, Flags);
2772
+ return DAG.getNode (VT == MVT::f32 ? AMDGPUISD::EXP : ISD::FEXP2, SL, VT,
2773
+ Mul, Flags);
2774
+ }
2775
+
2776
+ EVT SetCCVT = getSetCCResultType (DAG.getDataLayout (), *DAG.getContext (), VT);
2777
+
2778
+ SDValue Threshold = DAG.getConstantFP (-0x1 .5d58a0p+6f , SL, VT);
2779
+ SDValue NeedsScaling = DAG.getSetCC (SL, SetCCVT, X, Threshold, ISD::SETOLT);
2780
+
2781
+ SDValue ScaleOffset = DAG.getConstantFP (0x1 .0p+6f , SL, VT);
2782
+
2783
+ SDValue ScaledX = DAG.getNode (ISD::FADD, SL, VT, X, ScaleOffset, Flags);
2784
+
2785
+ SDValue AdjustedX =
2786
+ DAG.getNode (ISD::SELECT, SL, VT, NeedsScaling, ScaledX, X);
2787
+
2788
+ SDValue ExpInput = DAG.getNode (ISD::FMUL, SL, VT, AdjustedX, Log2E, Flags);
2789
+
2790
+ SDValue Exp2 = DAG.getNode (AMDGPUISD::EXP, SL, VT, ExpInput, Flags);
2791
+
2792
+ SDValue ResultScaleFactor = DAG.getConstantFP (0x1 .969d48p-93f , SL, VT);
2793
+ SDValue AdjustedResult =
2794
+ DAG.getNode (ISD::FMUL, SL, VT, Exp2, ResultScaleFactor, Flags);
2795
+
2796
+ return DAG.getNode (ISD::SELECT, SL, VT, NeedsScaling, AdjustedResult, Exp2,
2771
2797
Flags);
2772
2798
}
2773
2799
@@ -2800,7 +2826,7 @@ SDValue AMDGPUTargetLowering::lowerFEXP(SDValue Op, SelectionDAG &DAG) const {
2800
2826
2801
2827
// TODO: Interpret allowApproxFunc as ignoring DAZ. This is currently copying
2802
2828
// library behavior. Also, is known-not-daz source sufficient?
2803
- if (allowApproxFunc (DAG, Flags) && ! needsDenormHandlingF32 (DAG, X, Flags) ) {
2829
+ if (allowApproxFunc (DAG, Flags)) {
2804
2830
assert (!IsExp10 && " todo exp10 support" );
2805
2831
return lowerFEXPUnsafe (X, SL, DAG, Flags);
2806
2832
}
0 commit comments