Skip to content

Commit 38b9dd7

Browse files
authored
[flang] Fold ERFC_SCALED (#112287)
Move the ErfcScaled template function from the runtime into a new header file in flang/include/Common, then use it in constant folding to implement folding for the erfc_scaled() intrinsic function.
1 parent 35e8624 commit 38b9dd7

File tree

4 files changed

+127
-100
lines changed

4 files changed

+127
-100
lines changed
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
//===-- include/flang/Common/erfc-scaled.h-----------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef FORTRAN_COMMON_ERFC_SCALED_H_
10+
#define FORTRAN_COMMON_ERFC_SCALED_H_
11+
12+
namespace Fortran::common {
13+
template <typename T> inline T ErfcScaled(T arg) {
14+
// Coefficients for approximation to erfc in the first interval.
15+
static const T a[5] = {3.16112374387056560e00, 1.13864154151050156e02,
16+
3.77485237685302021e02, 3.20937758913846947e03, 1.85777706184603153e-1};
17+
static const T b[4] = {2.36012909523441209e01, 2.44024637934444173e02,
18+
1.28261652607737228e03, 2.84423683343917062e03};
19+
20+
// Coefficients for approximation to erfc in the second interval.
21+
static const T c[9] = {5.64188496988670089e-1, 8.88314979438837594e00,
22+
6.61191906371416295e01, 2.98635138197400131e02, 8.81952221241769090e02,
23+
1.71204761263407058e03, 2.05107837782607147e03, 1.23033935479799725e03,
24+
2.15311535474403846e-8};
25+
static const T d[8] = {1.57449261107098347e01, 1.17693950891312499e02,
26+
5.37181101862009858e02, 1.62138957456669019e03, 3.29079923573345963e03,
27+
4.36261909014324716e03, 3.43936767414372164e03, 1.23033935480374942e03};
28+
29+
// Coefficients for approximation to erfc in the third interval.
30+
static const T p[6] = {3.05326634961232344e-1, 3.60344899949804439e-1,
31+
1.25781726111229246e-1, 1.60837851487422766e-2, 6.58749161529837803e-4,
32+
1.63153871373020978e-2};
33+
static const T q[5] = {2.56852019228982242e00, 1.87295284992346047e00,
34+
5.27905102951428412e-1, 6.05183413124413191e-2, 2.33520497626869185e-3};
35+
36+
constexpr T sqrtpi{1.7724538509078120380404576221783883301349L};
37+
constexpr T rsqrtpi{0.5641895835477562869480794515607725858440L};
38+
constexpr T epsilonby2{std::numeric_limits<T>::epsilon() * 0.5};
39+
constexpr T xneg{-26.628e0};
40+
constexpr T xhuge{6.71e7};
41+
constexpr T thresh{0.46875e0};
42+
constexpr T zero{0.0};
43+
constexpr T one{1.0};
44+
constexpr T four{4.0};
45+
constexpr T sixteen{16.0};
46+
constexpr T xmax{1.0 / (sqrtpi * std::numeric_limits<T>::min())};
47+
static_assert(xmax > xhuge, "xmax must be greater than xhuge");
48+
49+
T ysq;
50+
T xnum;
51+
T xden;
52+
T del;
53+
T result;
54+
55+
auto x{arg};
56+
auto y{std::fabs(x)};
57+
58+
if (y <= thresh) {
59+
// evaluate erf for |x| <= 0.46875
60+
ysq = zero;
61+
if (y > epsilonby2) {
62+
ysq = y * y;
63+
}
64+
xnum = a[4] * ysq;
65+
xden = ysq;
66+
for (int i{0}; i < 3; i++) {
67+
xnum = (xnum + a[i]) * ysq;
68+
xden = (xden + b[i]) * ysq;
69+
}
70+
result = x * (xnum + a[3]) / (xden + b[3]);
71+
result = one - result;
72+
result = std::exp(ysq) * result;
73+
return result;
74+
} else if (y <= four) {
75+
// evaluate erfc for 0.46875 < |x| <= 4.0
76+
xnum = c[8] * y;
77+
xden = y;
78+
for (int i{0}; i < 7; ++i) {
79+
xnum = (xnum + c[i]) * y;
80+
xden = (xden + d[i]) * y;
81+
}
82+
result = (xnum + c[7]) / (xden + d[7]);
83+
} else {
84+
// evaluate erfc for |x| > 4.0
85+
result = zero;
86+
if (y >= xhuge) {
87+
if (y < xmax) {
88+
result = rsqrtpi / y;
89+
}
90+
} else {
91+
ysq = one / (y * y);
92+
xnum = p[5] * ysq;
93+
xden = ysq;
94+
for (int i{0}; i < 4; ++i) {
95+
xnum = (xnum + p[i]) * ysq;
96+
xden = (xden + q[i]) * ysq;
97+
}
98+
result = ysq * (xnum + p[4]) / (xden + q[4]);
99+
result = (rsqrtpi - result) / y;
100+
}
101+
}
102+
// fix up for negative argument, erf, etc.
103+
if (x < zero) {
104+
if (x < xneg) {
105+
result = std::numeric_limits<T>::max();
106+
} else {
107+
ysq = trunc(x * sixteen) / sixteen;
108+
del = (x - ysq) * (x + ysq);
109+
y = std::exp((ysq * ysq)) * std::exp((del));
110+
result = (y + y) - result;
111+
}
112+
}
113+
return result;
114+
}
115+
} // namespace Fortran::common
116+
#endif // FORTRAN_COMMON_ERFC_SCALED_H_

flang/lib/Evaluate/intrinsics-library.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "flang/Evaluate/intrinsics-library.h"
1515
#include "fold-implementation.h"
1616
#include "host.h"
17+
#include "flang/Common/erfc-scaled.h"
1718
#include "flang/Common/static-multimap-view.h"
1819
#include "flang/Evaluate/expression.h"
1920
#include <cfloat>
@@ -231,6 +232,7 @@ struct HostRuntimeLibrary<HostT, LibraryVersion::Libm> {
231232
FolderFactory<F, F{std::cosh}>::Create("cosh"),
232233
FolderFactory<F, F{std::erf}>::Create("erf"),
233234
FolderFactory<F, F{std::erfc}>::Create("erfc"),
235+
FolderFactory<F, F{common::ErfcScaled}>::Create("erfc_scaled"),
234236
FolderFactory<F, F{std::exp}>::Create("exp"),
235237
FolderFactory<F, F{std::tgamma}>::Create("gamma"),
236238
FolderFactory<F, F{std::log}>::Create("log"),

flang/runtime/numeric-templates.h

Lines changed: 2 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "terminator.h"
2222
#include "tools.h"
2323
#include "flang/Common/api-attrs.h"
24+
#include "flang/Common/erfc-scaled.h"
2425
#include "flang/Common/float128.h"
2526
#include <cstdint>
2627
#include <limits>
@@ -362,106 +363,7 @@ template <int PREC, typename T> inline RT_API_ATTRS T Spacing(T x) {
362363

363364
// ERFC_SCALED (16.9.71)
364365
template <typename T> inline RT_API_ATTRS T ErfcScaled(T arg) {
365-
// Coefficients for approximation to erfc in the first interval.
366-
static const T a[5] = {3.16112374387056560e00, 1.13864154151050156e02,
367-
3.77485237685302021e02, 3.20937758913846947e03, 1.85777706184603153e-1};
368-
static const T b[4] = {2.36012909523441209e01, 2.44024637934444173e02,
369-
1.28261652607737228e03, 2.84423683343917062e03};
370-
371-
// Coefficients for approximation to erfc in the second interval.
372-
static const T c[9] = {5.64188496988670089e-1, 8.88314979438837594e00,
373-
6.61191906371416295e01, 2.98635138197400131e02, 8.81952221241769090e02,
374-
1.71204761263407058e03, 2.05107837782607147e03, 1.23033935479799725e03,
375-
2.15311535474403846e-8};
376-
static const T d[8] = {1.57449261107098347e01, 1.17693950891312499e02,
377-
5.37181101862009858e02, 1.62138957456669019e03, 3.29079923573345963e03,
378-
4.36261909014324716e03, 3.43936767414372164e03, 1.23033935480374942e03};
379-
380-
// Coefficients for approximation to erfc in the third interval.
381-
static const T p[6] = {3.05326634961232344e-1, 3.60344899949804439e-1,
382-
1.25781726111229246e-1, 1.60837851487422766e-2, 6.58749161529837803e-4,
383-
1.63153871373020978e-2};
384-
static const T q[5] = {2.56852019228982242e00, 1.87295284992346047e00,
385-
5.27905102951428412e-1, 6.05183413124413191e-2, 2.33520497626869185e-3};
386-
387-
constexpr T sqrtpi{1.7724538509078120380404576221783883301349L};
388-
constexpr T rsqrtpi{0.5641895835477562869480794515607725858440L};
389-
constexpr T epsilonby2{std::numeric_limits<T>::epsilon() * 0.5};
390-
constexpr T xneg{-26.628e0};
391-
constexpr T xhuge{6.71e7};
392-
constexpr T thresh{0.46875e0};
393-
constexpr T zero{0.0};
394-
constexpr T one{1.0};
395-
constexpr T four{4.0};
396-
constexpr T sixteen{16.0};
397-
constexpr T xmax{1.0 / (sqrtpi * std::numeric_limits<T>::min())};
398-
static_assert(xmax > xhuge, "xmax must be greater than xhuge");
399-
400-
T ysq;
401-
T xnum;
402-
T xden;
403-
T del;
404-
T result;
405-
406-
auto x{arg};
407-
auto y{std::fabs(x)};
408-
409-
if (y <= thresh) {
410-
// evaluate erf for |x| <= 0.46875
411-
ysq = zero;
412-
if (y > epsilonby2) {
413-
ysq = y * y;
414-
}
415-
xnum = a[4] * ysq;
416-
xden = ysq;
417-
for (int i{0}; i < 3; i++) {
418-
xnum = (xnum + a[i]) * ysq;
419-
xden = (xden + b[i]) * ysq;
420-
}
421-
result = x * (xnum + a[3]) / (xden + b[3]);
422-
result = one - result;
423-
result = std::exp(ysq) * result;
424-
return result;
425-
} else if (y <= four) {
426-
// evaluate erfc for 0.46875 < |x| <= 4.0
427-
xnum = c[8] * y;
428-
xden = y;
429-
for (int i{0}; i < 7; ++i) {
430-
xnum = (xnum + c[i]) * y;
431-
xden = (xden + d[i]) * y;
432-
}
433-
result = (xnum + c[7]) / (xden + d[7]);
434-
} else {
435-
// evaluate erfc for |x| > 4.0
436-
result = zero;
437-
if (y >= xhuge) {
438-
if (y < xmax) {
439-
result = rsqrtpi / y;
440-
}
441-
} else {
442-
ysq = one / (y * y);
443-
xnum = p[5] * ysq;
444-
xden = ysq;
445-
for (int i{0}; i < 4; ++i) {
446-
xnum = (xnum + p[i]) * ysq;
447-
xden = (xden + q[i]) * ysq;
448-
}
449-
result = ysq * (xnum + p[4]) / (xden + q[4]);
450-
result = (rsqrtpi - result) / y;
451-
}
452-
}
453-
// fix up for negative argument, erf, etc.
454-
if (x < zero) {
455-
if (x < xneg) {
456-
result = std::numeric_limits<T>::max();
457-
} else {
458-
ysq = trunc(x * sixteen) / sixteen;
459-
del = (x - ysq) * (x + ysq);
460-
y = std::exp((ysq * ysq)) * std::exp((del));
461-
result = (y + y) - result;
462-
}
463-
}
464-
return result;
366+
return common::ErfcScaled(arg);
465367
}
466368

467369
} // namespace Fortran::runtime
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
! RUN: %python %S/test_folding.py %s %flang_fc1
2+
module m
3+
real(4), parameter :: x20_4 = erfc_scaled(20._4)
4+
logical, parameter :: t20_4 = x20_4 == 0.02817435003817081451416015625_4
5+
real(8), parameter :: x20_8 = erfc_scaled(20._8)
6+
logical, parameter :: t20_8 = x20_8 == 0.0281743487410513193669459042212110944092273712158203125_8
7+
end

0 commit comments

Comments
 (0)