@@ -336,8 +336,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
336
336
setOperationAction (ISD::FLOG2, MVT::f32 , Custom);
337
337
setOperationAction (ISD::FROUND, {MVT::f32 , MVT::f64 }, Custom);
338
338
339
- setOperationAction ({ISD::FLOG, ISD::FLOG10, ISD::FEXP, ISD::FEXP2}, MVT::f32 ,
340
- Custom);
339
+ setOperationAction (
340
+ {ISD::FLOG, ISD::FLOG10, ISD::FEXP, ISD::FEXP2, ISD::FEXP10}, MVT::f32 ,
341
+ Custom);
341
342
342
343
setOperationAction (ISD::FNEARBYINT, {MVT::f16 , MVT::f32 , MVT::f64 }, Custom);
343
344
@@ -352,7 +353,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
352
353
setOperationAction ({ISD::FLOG2, ISD::FEXP2}, MVT::f16 , Custom);
353
354
}
354
355
355
- setOperationAction ({ISD::FLOG10, ISD::FLOG, ISD::FEXP}, MVT::f16 , Custom);
356
+ setOperationAction ({ISD::FLOG10, ISD::FLOG, ISD::FEXP, ISD::FEXP10}, MVT::f16 ,
357
+ Custom);
356
358
357
359
// FIXME: These IS_FPCLASS vector fp types are marked custom so it reaches
358
360
// scalarization code. Can be removed when IS_FPCLASS expand isn't called by
@@ -457,14 +459,17 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
457
459
458
460
for (MVT VT : FloatVectorTypes) {
459
461
setOperationAction (
460
- {ISD::FABS, ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD,
461
- ISD::FCEIL, ISD::FCOS, ISD::FDIV, ISD::FEXP2,
462
- ISD::FEXP, ISD::FLOG2, ISD::FREM, ISD::FLOG,
463
- ISD::FLOG10, ISD::FPOW, ISD::FFLOOR, ISD::FTRUNC,
464
- ISD::FMUL, ISD::FMA, ISD::FRINT, ISD::FNEARBYINT,
465
- ISD::FSQRT, ISD::FSIN, ISD::FSUB, ISD::FNEG,
466
- ISD::VSELECT, ISD::SELECT_CC, ISD::FCOPYSIGN, ISD::VECTOR_SHUFFLE,
467
- ISD::SETCC, ISD::FCANONICALIZE, ISD::FROUNDEVEN},
462
+ {ISD::FABS, ISD::FMINNUM, ISD::FMAXNUM,
463
+ ISD::FADD, ISD::FCEIL, ISD::FCOS,
464
+ ISD::FDIV, ISD::FEXP2, ISD::FEXP,
465
+ ISD::FEXP10, ISD::FLOG2, ISD::FREM,
466
+ ISD::FLOG, ISD::FLOG10, ISD::FPOW,
467
+ ISD::FFLOOR, ISD::FTRUNC, ISD::FMUL,
468
+ ISD::FMA, ISD::FRINT, ISD::FNEARBYINT,
469
+ ISD::FSQRT, ISD::FSIN, ISD::FSUB,
470
+ ISD::FNEG, ISD::VSELECT, ISD::SELECT_CC,
471
+ ISD::FCOPYSIGN, ISD::VECTOR_SHUFFLE, ISD::SETCC,
472
+ ISD::FCANONICALIZE, ISD::FROUNDEVEN},
468
473
VT, Expand);
469
474
}
470
475
@@ -1322,6 +1327,7 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
1322
1327
case ISD::FLOG10:
1323
1328
return LowerFLOGCommon (Op, DAG);
1324
1329
case ISD::FEXP:
1330
+ case ISD::FEXP10:
1325
1331
return lowerFEXP (Op, DAG);
1326
1332
case ISD::FEXP2:
1327
1333
return lowerFEXP2 (Op, DAG);
@@ -1367,6 +1373,7 @@ void AMDGPUTargetLowering::ReplaceNodeResults(SDNode *N,
1367
1373
Results.push_back (Lowered);
1368
1374
return ;
1369
1375
case ISD::FEXP:
1376
+ case ISD::FEXP10:
1370
1377
if (SDValue Lowered = lowerFEXP (SDValue (N, 0 ), DAG))
1371
1378
Results.push_back (Lowered);
1372
1379
return ;
@@ -2841,12 +2848,66 @@ SDValue AMDGPUTargetLowering::lowerFEXPUnsafe(SDValue X, const SDLoc &SL,
2841
2848
Flags);
2842
2849
}
2843
2850
2851
+ // / Emit approx-funcs appropriate lowering for exp10. inf/nan should still be
2852
+ // / handled correctly.
2853
+ SDValue AMDGPUTargetLowering::lowerFEXP10Unsafe (SDValue X, const SDLoc &SL,
2854
+ SelectionDAG &DAG,
2855
+ SDNodeFlags Flags) const {
2856
+ const EVT VT = X.getValueType ();
2857
+ const unsigned Exp2Op = VT == MVT::f32 ? AMDGPUISD::EXP : ISD::FEXP2;
2858
+
2859
+ if (VT != MVT::f32 || !needsDenormHandlingF32 (DAG, X, Flags)) {
2860
+ // exp2(x * 0x1.a92000p+1f) * exp2(x * 0x1.4f0978p-11f);
2861
+ SDValue K0 = DAG.getConstantFP (0x1 .a92000p +1f , SL, VT);
2862
+ SDValue K1 = DAG.getConstantFP (0x1 .4f0978p-11f , SL, VT);
2863
+
2864
+ SDValue Mul0 = DAG.getNode (ISD::FMUL, SL, VT, X, K0, Flags);
2865
+ SDValue Exp2_0 = DAG.getNode (Exp2Op, SL, VT, Mul0, Flags);
2866
+ SDValue Mul1 = DAG.getNode (ISD::FMUL, SL, VT, X, K1, Flags);
2867
+ SDValue Exp2_1 = DAG.getNode (Exp2Op, SL, VT, Mul1, Flags);
2868
+ return DAG.getNode (ISD::FMUL, SL, VT, Exp2_0, Exp2_1);
2869
+ }
2870
+
2871
+ // bool s = x < -0x1.2f7030p+5f;
2872
+ // x += s ? 0x1.0p+5f : 0.0f;
2873
+ // exp10 = exp2(x * 0x1.a92000p+1f) *
2874
+ // exp2(x * 0x1.4f0978p-11f) *
2875
+ // (s ? 0x1.9f623ep-107f : 1.0f);
2876
+
2877
+ EVT SetCCVT = getSetCCResultType (DAG.getDataLayout (), *DAG.getContext (), VT);
2878
+
2879
+ SDValue Threshold = DAG.getConstantFP (-0x1 .2f7030p+5f , SL, VT);
2880
+ SDValue NeedsScaling = DAG.getSetCC (SL, SetCCVT, X, Threshold, ISD::SETOLT);
2881
+
2882
+ SDValue ScaleOffset = DAG.getConstantFP (0x1 .0p+5f , SL, VT);
2883
+ SDValue ScaledX = DAG.getNode (ISD::FADD, SL, VT, X, ScaleOffset, Flags);
2884
+ SDValue AdjustedX =
2885
+ DAG.getNode (ISD::SELECT, SL, VT, NeedsScaling, ScaledX, X);
2886
+
2887
+ SDValue K0 = DAG.getConstantFP (0x1 .a92000p +1f , SL, VT);
2888
+ SDValue K1 = DAG.getConstantFP (0x1 .4f0978p-11f , SL, VT);
2889
+
2890
+ SDValue Mul0 = DAG.getNode (ISD::FMUL, SL, VT, AdjustedX, K0, Flags);
2891
+ SDValue Exp2_0 = DAG.getNode (Exp2Op, SL, VT, Mul0, Flags);
2892
+ SDValue Mul1 = DAG.getNode (ISD::FMUL, SL, VT, AdjustedX, K1, Flags);
2893
+ SDValue Exp2_1 = DAG.getNode (Exp2Op, SL, VT, Mul1, Flags);
2894
+
2895
+ SDValue MulExps = DAG.getNode (ISD::FMUL, SL, VT, Exp2_0, Exp2_1, Flags);
2896
+
2897
+ SDValue ResultScaleFactor = DAG.getConstantFP (0x1 .9f623ep-107f , SL, VT);
2898
+ SDValue AdjustedResult =
2899
+ DAG.getNode (ISD::FMUL, SL, VT, MulExps, ResultScaleFactor, Flags);
2900
+
2901
+ return DAG.getNode (ISD::SELECT, SL, VT, NeedsScaling, AdjustedResult, MulExps,
2902
+ Flags);
2903
+ }
2904
+
2844
2905
SDValue AMDGPUTargetLowering::lowerFEXP (SDValue Op, SelectionDAG &DAG) const {
2845
2906
EVT VT = Op.getValueType ();
2846
2907
SDLoc SL (Op);
2847
2908
SDValue X = Op.getOperand (0 );
2848
2909
SDNodeFlags Flags = Op->getFlags ();
2849
- const bool IsExp10 = false ; // TODO: For some reason exp10 is missing
2910
+ const bool IsExp10 = Op. getOpcode () == ISD::FEXP10;
2850
2911
2851
2912
if (VT.getScalarType () == MVT::f16 ) {
2852
2913
// v_exp_f16 (fmul x, log2e)
@@ -2871,8 +2932,8 @@ SDValue AMDGPUTargetLowering::lowerFEXP(SDValue Op, SelectionDAG &DAG) const {
2871
2932
// TODO: Interpret allowApproxFunc as ignoring DAZ. This is currently copying
2872
2933
// library behavior. Also, is known-not-daz source sufficient?
2873
2934
if (allowApproxFunc (DAG, Flags)) {
2874
- assert (! IsExp10 && " todo exp10 support " );
2875
- return lowerFEXPUnsafe (X, SL, DAG, Flags);
2935
+ return IsExp10 ? lowerFEXP10Unsafe (X, SL, DAG, Flags)
2936
+ : lowerFEXPUnsafe (X, SL, DAG, Flags);
2876
2937
}
2877
2938
2878
2939
// Algorithm:
0 commit comments