Skip to content

Commit f186041

Browse files
authored
[libclc] Move sinh, cosh & tanh to the CLC library (#134063)
This commit also vectorizes the builtins.
1 parent d51525b commit f186041

File tree

16 files changed

+786
-611
lines changed

16 files changed

+786
-611
lines changed
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef __CLC_MATH_CLC_COSH_H__
10+
#define __CLC_MATH_CLC_COSH_H__
11+
12+
#define __CLC_BODY <clc/math/unary_decl.inc>
13+
#define __CLC_FUNCTION __clc_cosh
14+
15+
#include <clc/math/gentype.inc>
16+
17+
#undef __CLC_BODY
18+
#undef __CLC_FUNCTION
19+
20+
#endif // __CLC_MATH_CLC_COSH_H__
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef __CLC_MATH_CLC_SINH_H__
10+
#define __CLC_MATH_CLC_SINH_H__
11+
12+
#define __CLC_BODY <clc/math/unary_decl.inc>
13+
#define __CLC_FUNCTION __clc_sinh
14+
15+
#include <clc/math/gentype.inc>
16+
17+
#undef __CLC_BODY
18+
#undef __CLC_FUNCTION
19+
20+
#endif // __CLC_MATH_CLC_SINH_H__
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef __CLC_MATH_CLC_TANH_H__
10+
#define __CLC_MATH_CLC_TANH_H__
11+
12+
#define __CLC_BODY <clc/math/unary_decl.inc>
13+
#define __CLC_FUNCTION __clc_tanh
14+
15+
#include <clc/math/gentype.inc>
16+
17+
#undef __CLC_BODY
18+
#undef __CLC_FUNCTION
19+
20+
#endif // __CLC_MATH_CLC_TANH_H__

libclc/clc/include/clc/math/tables.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,6 @@
6262
TABLE_FUNCTION_DECL(float2, log2_tbl);
6363
TABLE_FUNCTION_DECL(float2, log10_tbl);
6464
TABLE_FUNCTION_DECL(uint4, pibits_tbl);
65-
TABLE_FUNCTION_DECL(float2, sinhcosh_tbl);
6665

6766
CLC_TABLE_FUNCTION_DECL(float, log_inv_tbl_ep_head);
6867
CLC_TABLE_FUNCTION_DECL(float, log_inv_tbl_ep_tail);
@@ -74,6 +73,8 @@ CLC_TABLE_FUNCTION_DECL(float, exp_tbl_ep_head);
7473
CLC_TABLE_FUNCTION_DECL(float, exp_tbl_ep_tail);
7574
CLC_TABLE_FUNCTION_DECL(float, cbrt_tbl_head);
7675
CLC_TABLE_FUNCTION_DECL(float, cbrt_tbl_tail);
76+
CLC_TABLE_FUNCTION_DECL(float, sinhcosh_tbl_head);
77+
CLC_TABLE_FUNCTION_DECL(float, sinhcosh_tbl_tail);
7778

7879
#ifdef cl_khr_fp64
7980

@@ -85,8 +86,10 @@ CLC_TABLE_FUNCTION_DECL(double, atan_jby256_tbl_head);
8586
CLC_TABLE_FUNCTION_DECL(double, atan_jby256_tbl_tail);
8687
CLC_TABLE_FUNCTION_DECL(double, two_to_jby64_ep_tbl_head);
8788
CLC_TABLE_FUNCTION_DECL(double, two_to_jby64_ep_tbl_tail);
88-
TABLE_FUNCTION_DECL(double2, sinh_tbl);
89-
TABLE_FUNCTION_DECL(double2, cosh_tbl);
89+
CLC_TABLE_FUNCTION_DECL(double, sinh_tbl_head);
90+
CLC_TABLE_FUNCTION_DECL(double, sinh_tbl_tail);
91+
CLC_TABLE_FUNCTION_DECL(double, cosh_tbl_head);
92+
CLC_TABLE_FUNCTION_DECL(double, cosh_tbl_tail);
9093
CLC_TABLE_FUNCTION_DECL(double, cbrt_inv_tbl);
9194
CLC_TABLE_FUNCTION_DECL(double, cbrt_dbl_tbl_head);
9295
CLC_TABLE_FUNCTION_DECL(double, cbrt_dbl_tbl_tail);

libclc/clc/lib/generic/SOURCES

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ math/clc_atanpi.cl
3131
math/clc_cbrt.cl
3232
math/clc_ceil.cl
3333
math/clc_copysign.cl
34+
math/clc_cosh.cl
3435
math/clc_cospi.cl
3536
math/clc_ep_log.cl
3637
math/clc_exp.cl
@@ -76,10 +77,12 @@ math/clc_rootn.cl
7677
math/clc_round.cl
7778
math/clc_rsqrt.cl
7879
math/clc_sincos_helpers.cl
80+
math/clc_sinh.cl
7981
math/clc_sinpi.cl
8082
math/clc_sqrt.cl
8183
math/clc_sw_fma.cl
8284
math/clc_tables.cl
85+
math/clc_tanh.cl
8386
math/clc_tanpi.cl
8487
math/clc_tgamma.cl
8588
math/clc_trunc.cl
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <clc/clc_convert.h>
10+
#include <clc/float/definitions.h>
11+
#include <clc/internal/clc.h>
12+
#include <clc/math/clc_copysign.h>
13+
#include <clc/math/clc_exp.h>
14+
#include <clc/math/clc_fabs.h>
15+
#include <clc/math/clc_fma.h>
16+
#include <clc/math/clc_mad.h>
17+
#include <clc/math/math.h>
18+
#include <clc/math/tables.h>
19+
#include <clc/relational/clc_isinf.h>
20+
#include <clc/relational/clc_isnan.h>
21+
#include <clc/shared/clc_min.h>
22+
23+
#define __CLC_BODY <clc_cosh.inc>
24+
#include <clc/math/gentype.inc>
Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#if __CLC_FPSIZE == 32
10+
11+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_cosh(__CLC_GENTYPE x) {
12+
// After dealing with special cases the computation is split into regions as
13+
// follows. abs(x) >= max_cosh_arg: cosh(x) = sign(x)*Inf abs(x) >=
14+
// small_threshold: cosh(x) = sign(x)*exp(abs(x))/2 computed using the
15+
// splitexp and scaleDouble functions as for exp_amd().
16+
// abs(x) < small_threshold:
17+
// compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0)))
18+
// cosh(x) is then z.
19+
20+
const __CLC_GENTYPE max_cosh_arg = 0x1.65a9fap+6f;
21+
const __CLC_GENTYPE small_threshold = 0x1.0a2b24p+3f;
22+
23+
__CLC_UINTN ux = __CLC_AS_UINTN(x);
24+
__CLC_GENTYPE y = __clc_fabs(x);
25+
__CLC_UINTN aux = __CLC_AS_UINTN(y);
26+
27+
// Find the integer part y0 of y and the increment dy = y - y0. We then
28+
// compute z = sinh(y) = sinh(y0)cosh(dy) + cosh(y0)sinh(dy) z = cosh(y) =
29+
// cosh(y0)cosh(dy) + sinh(y0)sinh(dy) where sinh(y0) and cosh(y0) are
30+
// tabulated above.
31+
32+
__CLC_INTN ind = __CLC_CONVERT_INTN(y);
33+
ind = __CLC_CONVERT_UINTN(ind) > 36U ? 0 : ind;
34+
35+
__CLC_GENTYPE dy = y - __CLC_CONVERT_GENTYPE(ind);
36+
__CLC_GENTYPE dy2 = dy * dy;
37+
38+
__CLC_GENTYPE sdy = __clc_mad(
39+
dy2,
40+
__clc_mad(
41+
dy2,
42+
__clc_mad(
43+
dy2,
44+
__clc_mad(
45+
dy2,
46+
__clc_mad(dy2,
47+
__clc_mad(dy2, 0.7746188980094184251527126e-12f,
48+
0.160576793121939886190847e-9f),
49+
0.250521176994133472333666e-7f),
50+
0.275573191913636406057211e-5f),
51+
0.198412698413242405162014e-3f),
52+
0.833333333333329931873097e-2f),
53+
0.166666666666666667013899e0f);
54+
sdy = __clc_mad(sdy, dy * dy2, dy);
55+
56+
__CLC_GENTYPE cdy = __clc_mad(
57+
dy2,
58+
__clc_mad(
59+
dy2,
60+
__clc_mad(
61+
dy2,
62+
__clc_mad(
63+
dy2,
64+
__clc_mad(dy2,
65+
__clc_mad(dy2, 0.1163921388172173692062032e-10f,
66+
0.208744349831471353536305e-8f),
67+
0.275573350756016588011357e-6f),
68+
0.248015872460622433115785e-4f),
69+
0.138888888889814854814536e-2f),
70+
0.416666666666660876512776e-1f),
71+
0.500000000000000005911074e0f);
72+
cdy = __clc_mad(cdy, dy2, 1.0f);
73+
74+
__CLC_GENTYPE sinhcoshh = USE_TABLE(sinhcosh_tbl_head, ind);
75+
__CLC_GENTYPE sinhcosht = USE_TABLE(sinhcosh_tbl_tail, ind);
76+
__CLC_GENTYPE z = __clc_mad(sinhcoshh, sdy, sinhcosht * cdy);
77+
78+
// When exp(-x) is insignificant compared to exp(x), return exp(x)/2
79+
__CLC_GENTYPE t = __clc_exp(y - 0x1.62e500p-1f);
80+
__CLC_GENTYPE zsmall = __clc_mad(0x1.a0210ep-18f, t, t);
81+
z = y >= small_threshold ? zsmall : z;
82+
83+
// Corner cases
84+
z = y >= max_cosh_arg ? __CLC_AS_GENTYPE((__CLC_UINTN)PINFBITPATT_SP32) : z;
85+
z = aux > PINFBITPATT_SP32 ? __CLC_GENTYPE_NAN : z;
86+
z = aux < 0x38800000 ? 1.0f : z;
87+
88+
return z;
89+
}
90+
91+
#elif __CLC_FPSIZE == 64
92+
93+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_cosh(__CLC_GENTYPE x) {
94+
// After dealing with special cases the computation is split into
95+
// regions as follows:
96+
//
97+
// abs(x) >= max_cosh_arg:
98+
// cosh(x) = sign(x)*Inf
99+
//
100+
// abs(x) >= small_threshold:
101+
// cosh(x) = sign(x)*exp(abs(x))/2 computed using the
102+
// splitexp and scaleDouble functions as for exp_amd().
103+
//
104+
// abs(x) < small_threshold:
105+
// compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0)))
106+
// cosh(x) is then sign(x)*z.
107+
108+
// This is ln(2^1025) = 0x408633ce8fb9f87e
109+
const __CLC_GENTYPE max_cosh_arg = 7.10475860073943977113e+02;
110+
111+
// This is where exp(-x) is insignificant compared to exp(x) = ln(2^27)
112+
const __CLC_GENTYPE small_threshold = 0x1.2b708872320e2p+4;
113+
114+
__CLC_GENTYPE y = __clc_fabs(x);
115+
116+
// In this range we find the integer part y0 of y
117+
// and the increment dy = y - y0. We then compute
118+
// z = cosh(y) = cosh(y0)cosh(dy) + sinh(y0)sinh(dy)
119+
// where sinh(y0) and cosh(y0) are tabulated above.
120+
121+
__CLC_INTN ind = __clc_min(__CLC_CONVERT_INTN(y), 36);
122+
__CLC_GENTYPE dy = y - __CLC_CONVERT_GENTYPE(ind);
123+
__CLC_GENTYPE dy2 = dy * dy;
124+
125+
__CLC_GENTYPE sdy =
126+
dy * dy2 *
127+
__clc_fma(
128+
dy2,
129+
__clc_fma(
130+
dy2,
131+
__clc_fma(
132+
dy2,
133+
__clc_fma(
134+
dy2,
135+
__clc_fma(dy2,
136+
__clc_fma(dy2, 0.7746188980094184251527126e-12,
137+
0.160576793121939886190847e-9),
138+
0.250521176994133472333666e-7),
139+
0.275573191913636406057211e-5),
140+
0.198412698413242405162014e-3),
141+
0.833333333333329931873097e-2),
142+
0.166666666666666667013899e0);
143+
144+
__CLC_GENTYPE cdy =
145+
dy2 *
146+
__clc_fma(
147+
dy2,
148+
__clc_fma(
149+
dy2,
150+
__clc_fma(
151+
dy2,
152+
__clc_fma(
153+
dy2,
154+
__clc_fma(dy2,
155+
__clc_fma(dy2, 0.1163921388172173692062032e-10,
156+
0.208744349831471353536305e-8),
157+
0.275573350756016588011357e-6),
158+
0.248015872460622433115785e-4),
159+
0.138888888889814854814536e-2),
160+
0.416666666666660876512776e-1),
161+
0.500000000000000005911074e0);
162+
163+
// At this point sinh(dy) is approximated by dy + sdy,
164+
// and cosh(dy) is approximated by 1 + cdy.
165+
__CLC_GENTYPE cl = USE_TABLE(cosh_tbl_head, ind);
166+
__CLC_GENTYPE ct = USE_TABLE(cosh_tbl_tail, ind);
167+
__CLC_GENTYPE sl = USE_TABLE(sinh_tbl_head, ind);
168+
__CLC_GENTYPE st = USE_TABLE(sinh_tbl_tail, ind);
169+
170+
__CLC_GENTYPE z =
171+
__clc_fma(
172+
sl, dy,
173+
__clc_fma(sl, sdy,
174+
__clc_fma(cl, cdy,
175+
__clc_fma(st, dy, __clc_fma(st, sdy, ct * cdy)) +
176+
ct))) +
177+
cl;
178+
179+
// Other cases
180+
z = y < 0x1.0p-28 ? 1.0 : z;
181+
182+
__CLC_GENTYPE t = __clc_exp(y - 0x1.62e42fefa3800p-1);
183+
t = __clc_fma(t, -0x1.ef35793c76641p-45, t);
184+
z = y >= small_threshold ? t : z;
185+
186+
z = y >= max_cosh_arg ? __CLC_AS_GENTYPE((__CLC_ULONGN)PINFBITPATT_DP64) : z;
187+
188+
z = __clc_isinf(x) || __clc_isnan(x) ? y : z;
189+
190+
return z;
191+
}
192+
193+
#elif __CLC_FPSIZE == 16
194+
195+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_cosh(__CLC_GENTYPE x) {
196+
return __CLC_CONVERT_GENTYPE(__clc_cosh(__CLC_CONVERT_FLOATN(x)));
197+
}
198+
199+
#endif
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <clc/clc_convert.h>
10+
#include <clc/internal/clc.h>
11+
#include <clc/math/clc_copysign.h>
12+
#include <clc/math/clc_exp.h>
13+
#include <clc/math/clc_fabs.h>
14+
#include <clc/math/clc_fma.h>
15+
#include <clc/math/clc_mad.h>
16+
#include <clc/math/math.h>
17+
#include <clc/math/tables.h>
18+
#include <clc/relational/clc_isinf.h>
19+
#include <clc/relational/clc_isnan.h>
20+
#include <clc/shared/clc_min.h>
21+
22+
#define __CLC_BODY <clc_sinh.inc>
23+
#include <clc/math/gentype.inc>

0 commit comments

Comments
 (0)