Skip to content

Commit f14ff59

Browse files
authored
[libclc] Move exp, exp2 and expm1 to the CLC library (#133932)
These all share the use of a common helper function so are handled in one go. These builtins are also now vectorized.
1 parent 602d05f commit f14ff59

File tree

21 files changed

+545
-343
lines changed

21 files changed

+545
-343
lines changed

libclc/generic/lib/math/exp_helper.h renamed to libclc/clc/include/clc/math/clc_exp.h

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,15 @@
66
//
77
//===----------------------------------------------------------------------===//
88

9-
#ifdef cl_khr_fp64
9+
#ifndef __CLC_MATH_CLC_EXP_H__
10+
#define __CLC_MATH_CLC_EXP_H__
1011

11-
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
12-
_CLC_DECL double __clc_exp_helper(double x, double x_min, double x_max, double r, int n);
12+
#define __CLC_BODY <clc/math/unary_decl.inc>
13+
#define __CLC_FUNCTION __clc_exp
1314

14-
#endif
15+
#include <clc/math/gentype.inc>
16+
17+
#undef __CLC_BODY
18+
#undef __CLC_FUNCTION
19+
20+
#endif // __CLC_MATH_CLC_EXP_H__
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef __CLC_MATH_CLC_EXP2_H__
10+
#define __CLC_MATH_CLC_EXP2_H__
11+
12+
#define __CLC_BODY <clc/math/unary_decl.inc>
13+
#define __CLC_FUNCTION __clc_exp2
14+
15+
#include <clc/math/gentype.inc>
16+
17+
#undef __CLC_BODY
18+
#undef __CLC_FUNCTION
19+
20+
#endif // __CLC_MATH_CLC_EXP2_H__
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef __CLC_MATH_CLC_EXP_HELPER
10+
#define __CLC_MATH_CLC_EXP_HELPER
11+
12+
#define __DOUBLE_ONLY
13+
#define __CLC_BODY <clc/math/clc_exp_helper.inc>
14+
15+
#include <clc/math/gentype.inc>
16+
17+
#undef __CLC_BODY
18+
#undef __DOUBLE_ONLY
19+
20+
#endif // __CLC_MATH_CLC_EXP_HELPER
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
_CLC_DECL _CLC_OVERLOAD __CLC_GENTYPE __clc_exp_helper(__CLC_GENTYPE x,
10+
__CLC_GENTYPE x_min,
11+
__CLC_GENTYPE x_max,
12+
__CLC_GENTYPE r,
13+
__CLC_INTN n);
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef __CLC_MATH_CLC_EXPM1_H__
10+
#define __CLC_MATH_CLC_EXPM1_H__
11+
12+
#define __CLC_BODY <clc/math/unary_decl.inc>
13+
#define __CLC_FUNCTION __clc_expm1
14+
15+
#include <clc/math/gentype.inc>
16+
17+
#undef __CLC_BODY
18+
#undef __CLC_FUNCTION
19+
20+
#endif // __CLC_MATH_CLC_EXPM1_H__

libclc/clc/lib/generic/SOURCES

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,11 @@ math/clc_ceil.cl
3232
math/clc_copysign.cl
3333
math/clc_cospi.cl
3434
math/clc_ep_log.cl
35+
math/clc_exp.cl
3536
math/clc_exp10.cl
37+
math/clc_exp2.cl
38+
math/clc_expm1.cl
39+
math/clc_exp_helper.cl
3640
math/clc_fabs.cl
3741
math/clc_fma.cl
3842
math/clc_fmod.cl
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <clc/clc_convert.h>
10+
#include <clc/float/definitions.h>
11+
#include <clc/internal/clc.h>
12+
#include <clc/math/clc_exp_helper.h>
13+
#include <clc/math/clc_fma.h>
14+
#include <clc/math/clc_mad.h>
15+
#include <clc/math/math.h>
16+
#include <clc/relational/clc_isnan.h>
17+
18+
#define __CLC_BODY <clc_exp.inc>
19+
#include <clc/math/gentype.inc>
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#if __CLC_FPSIZE == 32
10+
11+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_exp(__CLC_GENTYPE x) {
12+
// Reduce x
13+
const __CLC_GENTYPE ln2HI = 0x1.62e300p-1f;
14+
const __CLC_GENTYPE ln2LO = 0x1.2fefa2p-17f;
15+
const __CLC_GENTYPE invln2 = 0x1.715476p+0f;
16+
17+
__CLC_GENTYPE fhalF = x < 0.0f ? -0.5f : 0.5f;
18+
__CLC_INTN p = __CLC_CONVERT_INTN(__clc_mad(x, invln2, fhalF));
19+
__CLC_GENTYPE fp = __CLC_CONVERT_GENTYPE(p);
20+
__CLC_GENTYPE hi = __clc_mad(fp, -ln2HI, x); // t*ln2HI is exact here
21+
__CLC_GENTYPE lo = -fp * ln2LO;
22+
23+
// Evaluate poly
24+
__CLC_GENTYPE t = hi + lo;
25+
__CLC_GENTYPE tt = t * t;
26+
__CLC_GENTYPE v = __clc_mad(
27+
tt,
28+
-__clc_mad(
29+
tt,
30+
__clc_mad(tt,
31+
__clc_mad(tt,
32+
__clc_mad(tt, 0x1.637698p-25f, -0x1.bbd41cp-20f),
33+
0x1.1566aap-14f),
34+
-0x1.6c16c2p-9f),
35+
0x1.555556p-3f),
36+
t);
37+
38+
__CLC_GENTYPE y = 1.0f - (((-lo) - MATH_DIVIDE(t * v, 2.0f - v)) - hi);
39+
40+
// Scale by 2^p
41+
__CLC_GENTYPE r = __CLC_AS_GENTYPE(__CLC_AS_INTN(y) + (p << 23));
42+
43+
// ln(largest_normal) = 88.72283905206835305366
44+
const __CLC_GENTYPE ulim = 0x1.62e430p+6f;
45+
// ln(smallest_normal) = -87.33654475055310898657
46+
const __CLC_GENTYPE llim = -0x1.5d589ep+6f;
47+
48+
r = x < llim ? 0.0f : r;
49+
r = x < ulim ? r : __CLC_AS_GENTYPE((__CLC_UINTN)0x7f800000);
50+
return __clc_isnan(x) ? x : r;
51+
}
52+
53+
#elif __CLC_FPSIZE == 64
54+
55+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_exp(__CLC_GENTYPE x) {
56+
57+
const __CLC_GENTYPE X_MIN = -0x1.74910d52d3051p+9; // -1075*ln(2)
58+
const __CLC_GENTYPE X_MAX = 0x1.62e42fefa39efp+9; // 1024*ln(2)
59+
const __CLC_GENTYPE R_64_BY_LOG2 = 0x1.71547652b82fep+6; // 64/ln(2)
60+
const __CLC_GENTYPE R_LOG2_BY_64_LD = 0x1.62e42fefa0000p-7; // head ln(2)/64
61+
const __CLC_GENTYPE R_LOG2_BY_64_TL = 0x1.cf79abc9e3b39p-46; // tail ln(2)/64
62+
63+
__CLC_INTN n = __CLC_CONVERT_INTN(x * R_64_BY_LOG2);
64+
__CLC_GENTYPE r =
65+
__clc_fma(-R_LOG2_BY_64_TL, __CLC_CONVERT_GENTYPE(n),
66+
__clc_fma(-R_LOG2_BY_64_LD, __CLC_CONVERT_GENTYPE(n), x));
67+
return __clc_exp_helper(x, X_MIN, X_MAX, r, n);
68+
}
69+
70+
#elif __CLC_FPSIZE == 16
71+
72+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_exp(__CLC_GENTYPE x) {
73+
return __CLC_CONVERT_GENTYPE(__clc_exp(__CLC_CONVERT_FLOATN(x)));
74+
}
75+
76+
#endif
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <clc/clc_convert.h>
10+
#include <clc/float/definitions.h>
11+
#include <clc/internal/clc.h>
12+
#include <clc/math/clc_exp_helper.h>
13+
#include <clc/math/clc_fma.h>
14+
#include <clc/math/clc_mad.h>
15+
#include <clc/math/clc_rint.h>
16+
#include <clc/math/math.h>
17+
#include <clc/relational/clc_isnan.h>
18+
19+
#define __CLC_BODY <clc_exp2.inc>
20+
#include <clc/math/gentype.inc>
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#if __CLC_FPSIZE == 32
10+
11+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_exp2(__CLC_GENTYPE x) {
12+
// Reduce x
13+
const __CLC_GENTYPE ln2HI = 0x1.62e300p-1f;
14+
const __CLC_GENTYPE ln2LO = 0x1.2fefa2p-17f;
15+
16+
__CLC_GENTYPE t = __clc_rint(x);
17+
__CLC_INTN p = __CLC_CONVERT_INTN(t);
18+
__CLC_GENTYPE tt = x - t;
19+
__CLC_GENTYPE hi = tt * ln2HI;
20+
__CLC_GENTYPE lo = tt * ln2LO;
21+
22+
// Evaluate poly
23+
t = hi + lo;
24+
tt = t * t;
25+
__CLC_GENTYPE v = __clc_mad(
26+
tt,
27+
-__clc_mad(
28+
tt,
29+
__clc_mad(tt,
30+
__clc_mad(tt,
31+
__clc_mad(tt, 0x1.637698p-25f, -0x1.bbd41cp-20f),
32+
0x1.1566aap-14f),
33+
-0x1.6c16c2p-9f),
34+
0x1.555556p-3f),
35+
t);
36+
37+
__CLC_GENTYPE y = 1.0f - (((-lo) - MATH_DIVIDE(t * v, 2.0f - v)) - hi);
38+
39+
// Scale by 2^p
40+
__CLC_GENTYPE r = __CLC_AS_FLOATN(__CLC_AS_INTN(y) + (p << 23));
41+
42+
const __CLC_GENTYPE ulim = 128.0f;
43+
const __CLC_GENTYPE llim = -126.0f;
44+
45+
r = x < llim ? 0.0f : r;
46+
r = x < ulim ? r : __CLC_AS_FLOATN((__CLC_UINTN)0x7f800000);
47+
return __clc_isnan(x) ? x : r;
48+
}
49+
50+
#elif __CLC_FPSIZE == 64
51+
52+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_exp2(__CLC_GENTYPE x) {
53+
const __CLC_GENTYPE R_LN2 = 0x1.62e42fefa39efp-1; // ln(2)
54+
const __CLC_GENTYPE R_1_BY_64 = 1.0 / 64.0;
55+
56+
__CLC_INTN n = __CLC_CONVERT_INTN(x * 64.0);
57+
__CLC_GENTYPE r = R_LN2 * __clc_fma(-R_1_BY_64, __CLC_CONVERT_GENTYPE(n), x);
58+
59+
return __clc_exp_helper(x, -1074.0, 1024.0, r, n);
60+
}
61+
62+
#elif __CLC_FPSIZE == 16
63+
64+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_exp2(__CLC_GENTYPE x) {
65+
return __CLC_CONVERT_GENTYPE(__clc_exp2(__CLC_CONVERT_FLOATN(x)));
66+
}
67+
68+
#endif
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <clc/clc_convert.h>
10+
#include <clc/internal/clc.h>
11+
#include <clc/math/clc_fma.h>
12+
#include <clc/math/clc_ldexp.h>
13+
#include <clc/math/math.h>
14+
#include <clc/math/tables.h>
15+
#include <clc/relational/clc_isnan.h>
16+
17+
#define __DOUBLE_ONLY
18+
#define __CLC_BODY <clc_exp_helper.inc>
19+
20+
#include <clc/math/gentype.inc>
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE __clc_exp_helper(__CLC_GENTYPE x,
10+
__CLC_GENTYPE x_min,
11+
__CLC_GENTYPE x_max,
12+
__CLC_GENTYPE r,
13+
__CLC_INTN n) {
14+
15+
__CLC_INTN j = n & 0x3f;
16+
__CLC_INTN m = n >> 6;
17+
18+
// 6 term tail of Taylor expansion of e^r
19+
__CLC_GENTYPE z2 =
20+
r * __clc_fma(
21+
r,
22+
__clc_fma(r,
23+
__clc_fma(r,
24+
__clc_fma(r,
25+
__clc_fma(r, 0x1.6c16c16c16c17p-10,
26+
0x1.1111111111111p-7),
27+
0x1.5555555555555p-5),
28+
0x1.5555555555555p-3),
29+
0x1.0000000000000p-1),
30+
1.0);
31+
32+
__CLC_GENTYPE tv0 = USE_TABLE(two_to_jby64_ep_tbl_head, j);
33+
__CLC_GENTYPE tv1 = USE_TABLE(two_to_jby64_ep_tbl_tail, j);
34+
z2 = __clc_fma(tv0 + tv1, z2, tv1) + tv0;
35+
36+
__CLC_INTN small_value =
37+
(m < -1022) || ((m == -1022) && __CLC_CONVERT_INTN(z2 < 1.0));
38+
39+
__CLC_INTN n1 = m >> 2;
40+
__CLC_INTN n2 = m - n1;
41+
__CLC_GENTYPE z3 =
42+
z2 * __CLC_AS_GENTYPE((__CLC_CONVERT_LONGN(n1) + 1023) << 52);
43+
z3 *= __CLC_AS_GENTYPE((__CLC_CONVERT_LONGN(n2) + 1023) << 52);
44+
45+
z2 = __clc_ldexp(z2, m);
46+
z2 = __CLC_CONVERT_LONGN(small_value) ? z3 : z2;
47+
48+
z2 = __clc_isnan(x) ? x : z2;
49+
50+
z2 = x > x_max ? __CLC_AS_GENTYPE((__CLC_ULONGN)PINFBITPATT_DP64) : z2;
51+
z2 = x < x_min ? 0.0 : z2;
52+
53+
return z2;
54+
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <clc/clc_convert.h>
10+
#include <clc/float/definitions.h>
11+
#include <clc/internal/clc.h>
12+
#include <clc/math/clc_exp_helper.h>
13+
#include <clc/math/clc_fma.h>
14+
#include <clc/math/clc_mad.h>
15+
#include <clc/math/math.h>
16+
#include <clc/math/tables.h>
17+
#include <clc/relational/clc_isnan.h>
18+
19+
#define __CLC_BODY <clc_expm1.inc>
20+
#include <clc/math/gentype.inc>

0 commit comments

Comments
 (0)