Skip to content

Commit a6a56a3

Browse files
authored
[libclc] erfc: fix fp32 implementation in FTZ mode (#132390)
On some implementations, the current implementation leads to slight accuracy issues. While the maths behind this implementation is correct, it does not take into account the accumulation of errors coming from other operators that do not provide correct rounding (like the exp function). To avoid it, compute statically exp(-0.5625). Fixes #124939
1 parent f10dc76 commit a6a56a3

File tree

2 files changed

+12
-10
lines changed

2 files changed

+12
-10
lines changed

libclc/generic/lib/math/erf.cl

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323

2424
#define erx 8.4506291151e-01f /* 0x3f58560b */
2525

26-
// Coefficients for approximation to erf on [00.84375]
26+
// Coefficients for approximation to erf on [0, 0.84375]
2727

2828
#define efx 1.2837916613e-01f /* 0x3e0375d4 */
2929
#define efx8 1.0270333290e+00f /* 0x3f8375d4 */
@@ -39,7 +39,7 @@
3939
#define qq4 1.3249473704e-04f /* 0x390aee49 */
4040
#define qq5 -3.9602282413e-06f /* 0xb684e21a */
4141

42-
// Coefficients for approximation to erf in [0.843751.25]
42+
// Coefficients for approximation to erf in [0.84375, 1.25]
4343

4444
#define pa0 -2.3621185683e-03f /* 0xbb1acdc6 */
4545
#define pa1 4.1485610604e-01f /* 0x3ed46805 */
@@ -55,7 +55,7 @@
5555
#define qa5 1.3637083583e-02f /* 0x3c5f6e13 */
5656
#define qa6 1.1984500103e-02f /* 0x3c445aa3 */
5757

58-
// Coefficients for approximation to erfc in [1.251/0.35]
58+
// Coefficients for approximation to erfc in [1.25, 1/0.35]
5959

6060
#define ra0 -9.8649440333e-03f /* 0xbc21a093 */
6161
#define ra1 -6.9385856390e-01f /* 0xbf31a0b7 */
@@ -74,7 +74,7 @@
7474
#define sa7 6.5702495575e+00f /* 0x40d23f7c */
7575
#define sa8 -6.0424413532e-02f /* 0xbd777f97 */
7676

77-
// Coefficients for approximation to erfc in [1/.3528]
77+
// Coefficients for approximation to erfc in [1/0.35, 28]
7878

7979
#define rb0 -9.8649431020e-03f /* 0xbc21a092 */
8080
#define rb1 -7.9928326607e-01f /* 0xbf4c9dd4 */
@@ -130,7 +130,8 @@ _CLC_OVERLOAD _CLC_DEF float erf(float x) {
130130

131131
// |x| < 6
132132
float z = as_float(ix & 0xfffff000);
133-
float r = exp(mad(-z, z, -0.5625f)) * exp(mad(z-absx, z+absx, q));
133+
float r = exp(-z * z) * exp(mad(z - absx, z + absx, q));
134+
r *= 0x1.23ba94p-1f; // exp(-0.5625)
134135
r = 1.0f - MATH_DIVIDE(r, absx);
135136
ret = absx < 6.0f ? r : ret;
136137

libclc/generic/lib/math/erfc.cl

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323

2424
#define erx_f 8.4506291151e-01f /* 0x3f58560b */
2525

26-
// Coefficients for approximation to erf on [00.84375]
26+
// Coefficients for approximation to erf on [0, 0.84375]
2727

2828
#define efx 1.2837916613e-01f /* 0x3e0375d4 */
2929
#define efx8 1.0270333290e+00f /* 0x3f8375d4 */
@@ -39,7 +39,7 @@
3939
#define qq4 1.3249473704e-04f /* 0x390aee49 */
4040
#define qq5 -3.9602282413e-06f /* 0xb684e21a */
4141

42-
// Coefficients for approximation to erf in [0.843751.25]
42+
// Coefficients for approximation to erf in [0.84375, 1.25]
4343

4444
#define pa0 -2.3621185683e-03f /* 0xbb1acdc6 */
4545
#define pa1 4.1485610604e-01f /* 0x3ed46805 */
@@ -55,7 +55,7 @@
5555
#define qa5 1.3637083583e-02f /* 0x3c5f6e13 */
5656
#define qa6 1.1984500103e-02f /* 0x3c445aa3 */
5757

58-
// Coefficients for approximation to erfc in [1.251/0.35]
58+
// Coefficients for approximation to erfc in [1.25, 1/0.35]
5959

6060
#define ra0 -9.8649440333e-03f /* 0xbc21a093 */
6161
#define ra1 -6.9385856390e-01f /* 0xbf31a0b7 */
@@ -74,7 +74,7 @@
7474
#define sa7 6.5702495575e+00f /* 0x40d23f7c */
7575
#define sa8 -6.0424413532e-02f /* 0xbd777f97 */
7676

77-
// Coefficients for approximation to erfc in [1/.3528]
77+
// Coefficients for approximation to erfc in [1/0.35, 28]
7878

7979
#define rb0 -9.8649431020e-03f /* 0xbc21a092 */
8080
#define rb1 -7.9928326607e-01f /* 0xbf4c9dd4 */
@@ -131,7 +131,8 @@ _CLC_OVERLOAD _CLC_DEF float erfc(float x) {
131131
float ret = 0.0f;
132132

133133
float z = as_float(ix & 0xfffff000);
134-
float r = exp(mad(-z, z, -0.5625f)) * exp(mad(z - absx, z + absx, q));
134+
float r = exp(-z * z) * exp(mad(z - absx, z + absx, q));
135+
r *= 0x1.23ba94p-1f; // exp(-0.5625)
135136
r = MATH_DIVIDE(r, absx);
136137
t = 2.0f - r;
137138
r = x < 0.0f ? t : r;

0 commit comments

Comments
 (0)