Skip to content

[flang] Add runtime support for Fortran intrinsic ERFC_SCALED #95040

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions flang/include/flang/Optimizer/Builder/IntrinsicCall.h
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,8 @@ struct IntrinsicLibrary {
llvm::ArrayRef<fir::ExtendedValue>);
fir::ExtendedValue genCAssociatedCPtr(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genErfcScaled(mlir::Type resultType,
llvm::ArrayRef<mlir::Value> args);
void genCFPointer(llvm::ArrayRef<fir::ExtendedValue>);
void genCFProcPointer(llvm::ArrayRef<fir::ExtendedValue>);
fir::ExtendedValue genCFunLoc(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
Expand Down
4 changes: 4 additions & 0 deletions flang/include/flang/Optimizer/Builder/Runtime/Numeric.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ class FirOpBuilder;

namespace fir::runtime {

/// Generate call to ErfcScaled intrinsic runtime routine.
mlir::Value genErfcScaled(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Value x);

/// Generate call to Exponent intrinsic runtime routine.
mlir::Value genExponent(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Type resultType, mlir::Value x);
Expand Down
14 changes: 14 additions & 0 deletions flang/include/flang/Runtime/numeric.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,20 @@ CppTypeFor<TypeCategory::Integer, 16> RTDECL(Ceiling16_16)(
#endif
#endif

// ERFC_SCALED
CppTypeFor<TypeCategory::Real, 4> RTDECL(ErfcScaled4)(
CppTypeFor<TypeCategory::Real, 4>);
CppTypeFor<TypeCategory::Real, 8> RTDECL(ErfcScaled8)(
CppTypeFor<TypeCategory::Real, 8>);
#if LDBL_MANT_DIG == 64
CppTypeFor<TypeCategory::Real, 10> RTDECL(ErfcScaled10)(
CppTypeFor<TypeCategory::Real, 10>);
#endif
#if LDBL_MANT_DIG == 113 || HAS_FLOAT128
CppTypeFor<TypeCategory::Real, 16> RTDECL(ErfcScaled16)(
CppTypeFor<TypeCategory::Real, 16>);
#endif

// EXPONENT is defined to return default INTEGER; support INTEGER(4 & 8)
CppTypeFor<TypeCategory::Integer, 4> RTDECL(Exponent4_4)(
CppTypeFor<TypeCategory::Real, 4>);
Expand Down
11 changes: 11 additions & 0 deletions flang/lib/Optimizer/Builder/IntrinsicCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,7 @@ static constexpr IntrinsicHandler handlers[]{
{"boundary", asBox, handleDynamicOptional},
{"dim", asValue}}},
/*isElemental=*/false},
{"erfc_scaled", &I::genErfcScaled},
{"etime",
&I::genEtime,
{{{"values", asBox}, {"time", asBox}}},
Expand Down Expand Up @@ -5814,6 +5815,16 @@ mlir::Value IntrinsicLibrary::genRRSpacing(mlir::Type resultType,
fir::runtime::genRRSpacing(builder, loc, fir::getBase(args[0])));
}

// ERFC_SCALED
mlir::Value IntrinsicLibrary::genErfcScaled(mlir::Type resultType,
llvm::ArrayRef<mlir::Value> args) {
assert(args.size() == 1);

return builder.createConvert(
loc, resultType,
fir::runtime::genErfcScaled(builder, loc, fir::getBase(args[0])));
}

// SAME_TYPE_AS
fir::ExtendedValue
IntrinsicLibrary::genSameTypeAs(mlir::Type resultType,
Expand Down
46 changes: 46 additions & 0 deletions flang/lib/Optimizer/Builder/Runtime/Numeric.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,28 @@ using namespace Fortran::runtime;
// may not have them in their runtime library. This can occur in the
// case of cross compilation, for example.

/// Placeholder for real*10 version of ErfcScaled Intrinsic
struct ForcedErfcScaled10 {
static constexpr const char *name = ExpandAndQuoteKey(RTNAME(ErfcScaled10));
static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() {
return [](mlir::MLIRContext *ctx) {
auto ty = mlir::FloatType::getF80(ctx);
return mlir::FunctionType::get(ctx, {ty}, {ty});
};
}
};

/// Placeholder for real*16 version of ErfcScaled Intrinsic
struct ForcedErfcScaled16 {
static constexpr const char *name = ExpandAndQuoteKey(RTNAME(ErfcScaled16));
static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() {
return [](mlir::MLIRContext *ctx) {
auto ty = mlir::FloatType::getF128(ctx);
return mlir::FunctionType::get(ctx, {ty}, {ty});
};
}
};

/// Placeholder for real*10 version of Exponent Intrinsic
struct ForcedExponent10_4 {
static constexpr const char *name = ExpandAndQuoteKey(RTNAME(Exponent10_4));
Expand Down Expand Up @@ -444,6 +466,30 @@ mlir::Value fir::runtime::genRRSpacing(fir::FirOpBuilder &builder,
return builder.create<fir::CallOp>(loc, func, args).getResult(0);
}

/// Generate call to ErfcScaled intrinsic runtime routine.
mlir::Value fir::runtime::genErfcScaled(fir::FirOpBuilder &builder,
mlir::Location loc, mlir::Value x) {
mlir::func::FuncOp func;
mlir::Type fltTy = x.getType();

if (fltTy.isF32())
func = fir::runtime::getRuntimeFunc<mkRTKey(ErfcScaled4)>(loc, builder);
else if (fltTy.isF64())
func = fir::runtime::getRuntimeFunc<mkRTKey(ErfcScaled8)>(loc, builder);
else if (fltTy.isF80())
func = fir::runtime::getRuntimeFunc<ForcedErfcScaled10>(loc, builder);
else if (fltTy.isF128())
func = fir::runtime::getRuntimeFunc<ForcedErfcScaled16>(loc, builder);
else
fir::intrinsicTypeTODO(builder, fltTy, loc, "ERFC_SCALED");

auto funcTy = func.getFunctionType();
llvm::SmallVector<mlir::Value> args = {
builder.createConvert(loc, funcTy.getInput(0), x)};

return builder.create<fir::CallOp>(loc, func, args).getResult(0);
}

/// Generate call to Scale intrinsic runtime routine.
mlir::Value fir::runtime::genScale(fir::FirOpBuilder &builder,
mlir::Location loc, mlir::Value x,
Expand Down
104 changes: 104 additions & 0 deletions flang/runtime/numeric-templates.h
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,110 @@ template <int PREC, typename T> inline RT_API_ATTRS T Spacing(T x) {
}
}

// ERFC_SCALED (16.9.71)
template <typename T> inline RT_API_ATTRS T ErfcScaled(T arg) {
// Coefficients for approximation to erfc in the first interval.
static const T a[5] = {3.16112374387056560e00, 1.13864154151050156e02,
3.77485237685302021e02, 3.20937758913846947e03, 1.85777706184603153e-1};
static const T b[4] = {2.36012909523441209e01, 2.44024637934444173e02,
1.28261652607737228e03, 2.84423683343917062e03};

// Coefficients for approximation to erfc in the second interval.
static const T c[9] = {5.64188496988670089e-1, 8.88314979438837594e00,
6.61191906371416295e01, 2.98635138197400131e02, 8.81952221241769090e02,
1.71204761263407058e03, 2.05107837782607147e03, 1.23033935479799725e03,
2.15311535474403846e-8};
static const T d[8] = {1.57449261107098347e01, 1.17693950891312499e02,
5.37181101862009858e02, 1.62138957456669019e03, 3.29079923573345963e03,
4.36261909014324716e03, 3.43936767414372164e03, 1.23033935480374942e03};

// Coefficients for approximation to erfc in the third interval.
static const T p[6] = {3.05326634961232344e-1, 3.60344899949804439e-1,
1.25781726111229246e-1, 1.60837851487422766e-2, 6.58749161529837803e-4,
1.63153871373020978e-2};
static const T q[5] = {2.56852019228982242e00, 1.87295284992346047e00,
5.27905102951428412e-1, 6.05183413124413191e-2, 2.33520497626869185e-3};

constexpr T sqrtpi{1.7724538509078120380404576221783883301349L};
constexpr T rsqrtpi{0.5641895835477562869480794515607725858440L};
constexpr T epsilonby2{std::numeric_limits<T>::epsilon() * 0.5};
constexpr T xneg{-26.628e0};
constexpr T xhuge{6.71e7};
constexpr T thresh{0.46875e0};
constexpr T zero{0.0};
constexpr T one{1.0};
constexpr T four{4.0};
constexpr T sixteen{16.0};
constexpr T xmax{1.0 / (sqrtpi * std::numeric_limits<T>::min())};
static_assert(xmax > xhuge, "xmax must be greater than xhuge");

T ysq;
T xnum;
T xden;
T del;
T result;

auto x{arg};
auto y{std::fabs(x)};

if (y <= thresh) {
// evaluate erf for |x| <= 0.46875
ysq = zero;
if (y > epsilonby2) {
ysq = y * y;
}
xnum = a[4] * ysq;
xden = ysq;
for (int i{0}; i < 3; i++) {
xnum = (xnum + a[i]) * ysq;
xden = (xden + b[i]) * ysq;
}
result = x * (xnum + a[3]) / (xden + b[3]);
result = one - result;
result = std::exp(ysq) * result;
return result;
} else if (y <= four) {
// evaluate erfc for 0.46875 < |x| <= 4.0
xnum = c[8] * y;
xden = y;
for (int i{0}; i < 7; ++i) {
xnum = (xnum + c[i]) * y;
xden = (xden + d[i]) * y;
}
result = (xnum + c[7]) / (xden + d[7]);
} else {
// evaluate erfc for |x| > 4.0
result = zero;
if (y >= xhuge) {
if (y < xmax) {
result = rsqrtpi / y;
}
} else {
ysq = one / (y * y);
xnum = p[5] * ysq;
xden = ysq;
for (int i{0}; i < 4; ++i) {
xnum = (xnum + p[i]) * ysq;
xden = (xden + q[i]) * ysq;
}
result = ysq * (xnum + p[4]) / (xden + q[4]);
result = (rsqrtpi - result) / y;
}
}
// fix up for negative argument, erf, etc.
if (x < zero) {
if (x < xneg) {
result = std::numeric_limits<T>::max();
} else {
ysq = trunc(x * sixteen) / sixteen;
del = (x - ysq) * (x + ysq);
y = std::exp((ysq * ysq)) * std::exp((del));
result = (y + y) - result;
}
}
return result;
}

} // namespace Fortran::runtime

#endif // FORTRAN_RUNTIME_NUMERIC_TEMPLATES_H_
21 changes: 21 additions & 0 deletions flang/runtime/numeric.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,27 @@ CppTypeFor<TypeCategory::Integer, 16> RTDEF(Ceiling16_16)(
#endif
#endif

CppTypeFor<TypeCategory::Real, 4> RTDEF(ErfcScaled4)(
CppTypeFor<TypeCategory::Real, 4> x) {
return ErfcScaled(x);
}
CppTypeFor<TypeCategory::Real, 8> RTDEF(ErfcScaled8)(
CppTypeFor<TypeCategory::Real, 8> x) {
return ErfcScaled(x);
}
#if LDBL_MANT_DIG == 64
CppTypeFor<TypeCategory::Real, 10> RTDEF(ErfcScaled10)(
CppTypeFor<TypeCategory::Real, 10> x) {
return ErfcScaled(x);
}
#endif
#if LDBL_MANT_DIG == 113
CppTypeFor<TypeCategory::Real, 16> RTDEF(ErfcScaled16)(
CppTypeFor<TypeCategory::Real, 16> x) {
return ErfcScaled(x);
}
#endif

CppTypeFor<TypeCategory::Integer, 4> RTDEF(Exponent4_4)(
CppTypeFor<TypeCategory::Real, 4> x) {
return Exponent<CppTypeFor<TypeCategory::Integer, 4>>(x);
Expand Down
23 changes: 23 additions & 0 deletions flang/test/Lower/Intrinsics/erfc_scaled.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
! RUN: bbc -emit-fir -hlfir=false %s -o - | FileCheck %s
! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir %s -o - | FileCheck %s

! CHECK-LABEL: func @_QPerfc_scaled4(
! CHECK-SAME: %[[x:[^:]+]]: !fir.ref<f32>{{.*}}) -> f32
function erfc_scaled4(x)
real(kind=4) :: erfc_scaled4
real(kind=4) :: x
erfc_scaled4 = erfc_scaled(x);
! CHECK: %[[a1:.*]] = fir.load %[[x]] : !fir.ref<f32>
! CHECK: %{{.*}} = fir.call @_FortranAErfcScaled4(%[[a1]]) {{.*}}: (f32) -> f32
end function erfc_scaled4


! CHECK-LABEL: func @_QPerfc_scaled8(
! CHECK-SAME: %[[x:[^:]+]]: !fir.ref<f64>{{.*}}) -> f64
function erfc_scaled8(x)
real(kind=8) :: erfc_scaled8
real(kind=8) :: x
erfc_scaled8 = erfc_scaled(x);
! CHECK: %[[a1:.*]] = fir.load %[[x]] : !fir.ref<f64>
! CHECK: %{{.*}} = fir.call @_FortranAErfcScaled8(%[[a1]]) {{.*}}: (f64) -> f64
end function erfc_scaled8
8 changes: 8 additions & 0 deletions flang/unittests/Runtime/Numeric.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,14 @@ TEST(Numeric, Floor) {
EXPECT_EQ(RTNAME(Floor4_1)(Real<4>{0}), 0);
}

TEST(Numeric, Erfc_scaled) {
EXPECT_NEAR(RTNAME(ErfcScaled4)(Real<4>{20.0}), 0.02817434874, 1.0e-8);
EXPECT_NEAR(RTNAME(ErfcScaled8)(Real<8>{20.0}), 0.02817434874, 1.0e-11);
#if LDBL_MANT_DIG == 64
EXPECT_NEAR(RTNAME(ErfcScaled10)(Real<10>{20.0}), 0.02817434874, 1.0e-8);
#endif
}

TEST(Numeric, Exponent) {
EXPECT_EQ(RTNAME(Exponent4_4)(Real<4>{0}), 0);
EXPECT_EQ(RTNAME(Exponent4_8)(Real<4>{1.0}), 1);
Expand Down
Loading