Skip to content

Commit dd19e7e

Browse files
authored
[libclc] Move cbrt to the CLC library; vectorize (#133940)
1 parent 9356091 commit dd19e7e

File tree

8 files changed

+552
-803
lines changed

8 files changed

+552
-803
lines changed
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef __CLC_MATH_CLC_CBRT_H__
10+
#define __CLC_MATH_CLC_CBRT_H__
11+
12+
#define __CLC_BODY <clc/math/unary_decl.inc>
13+
#define __CLC_FUNCTION __clc_cbrt
14+
15+
#include <clc/math/gentype.inc>
16+
17+
#undef __CLC_BODY
18+
#undef __CLC_FUNCTION
19+
20+
#endif // __CLC_MATH_CLC_CBRT_H__

libclc/clc/include/clc/math/tables.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,6 @@ TABLE_FUNCTION_DECL(float2, log2_tbl);
6363
TABLE_FUNCTION_DECL(float2, log10_tbl);
6464
TABLE_FUNCTION_DECL(uint4, pibits_tbl);
6565
TABLE_FUNCTION_DECL(float2, sinhcosh_tbl);
66-
TABLE_FUNCTION_DECL(float2, cbrt_tbl);
6766

6867
CLC_TABLE_FUNCTION_DECL(float, log_inv_tbl_ep_head);
6968
CLC_TABLE_FUNCTION_DECL(float, log_inv_tbl_ep_tail);
@@ -73,6 +72,8 @@ CLC_TABLE_FUNCTION_DECL(float, log_inv_tbl);
7372
CLC_TABLE_FUNCTION_DECL(float, exp_tbl);
7473
CLC_TABLE_FUNCTION_DECL(float, exp_tbl_ep_head);
7574
CLC_TABLE_FUNCTION_DECL(float, exp_tbl_ep_tail);
75+
CLC_TABLE_FUNCTION_DECL(float, cbrt_tbl_head);
76+
CLC_TABLE_FUNCTION_DECL(float, cbrt_tbl_tail);
7677

7778
#ifdef cl_khr_fp64
7879

@@ -86,9 +87,11 @@ CLC_TABLE_FUNCTION_DECL(double, two_to_jby64_ep_tbl_head);
8687
CLC_TABLE_FUNCTION_DECL(double, two_to_jby64_ep_tbl_tail);
8788
TABLE_FUNCTION_DECL(double2, sinh_tbl);
8889
TABLE_FUNCTION_DECL(double2, cosh_tbl);
89-
TABLE_FUNCTION_DECL(double, cbrt_inv_tbl);
90-
TABLE_FUNCTION_DECL(double2, cbrt_dbl_tbl);
91-
TABLE_FUNCTION_DECL(double2, cbrt_rem_tbl);
90+
CLC_TABLE_FUNCTION_DECL(double, cbrt_inv_tbl);
91+
CLC_TABLE_FUNCTION_DECL(double, cbrt_dbl_tbl_head);
92+
CLC_TABLE_FUNCTION_DECL(double, cbrt_dbl_tbl_tail);
93+
CLC_TABLE_FUNCTION_DECL(double, cbrt_rem_tbl_head);
94+
CLC_TABLE_FUNCTION_DECL(double, cbrt_rem_tbl_tail);
9295
CLC_TABLE_FUNCTION_DECL(double, powlog_tbl_head);
9396
CLC_TABLE_FUNCTION_DECL(double, powlog_tbl_tail);
9497
CLC_TABLE_FUNCTION_DECL(double, log_f_inv_tbl_head);

libclc/clc/lib/generic/SOURCES

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ math/clc_atan2.cl
2828
math/clc_atan2pi.cl
2929
math/clc_atanh.cl
3030
math/clc_atanpi.cl
31+
math/clc_cbrt.cl
3132
math/clc_ceil.cl
3233
math/clc_copysign.cl
3334
math/clc_cospi.cl
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <clc/clc_convert.h>
10+
#include <clc/clcmacro.h>
11+
#include <clc/internal/clc.h>
12+
#include <clc/math/clc_copysign.h>
13+
#include <clc/math/clc_fabs.h>
14+
#include <clc/math/clc_fma.h>
15+
#include <clc/math/clc_mad.h>
16+
#include <clc/math/math.h>
17+
#include <clc/math/tables.h>
18+
#include <clc/relational/clc_isinf.h>
19+
#include <clc/relational/clc_isnan.h>
20+
21+
#define __CLC_BODY <clc_cbrt.inc>
22+
#include <clc/math/gentype.inc>
Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#if __CLC_FPSIZE == 32
10+
11+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_cbrt(__CLC_GENTYPE x) {
12+
__CLC_UINTN xi = __CLC_AS_UINTN(x);
13+
__CLC_UINTN axi = __CLC_AS_UINTN(__clc_fabs(x));
14+
__CLC_UINTN xsign = axi ^ xi;
15+
xi = axi;
16+
17+
__CLC_INTN m = __CLC_AS_INTN(xi >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;
18+
19+
// Treat subnormals
20+
__CLC_UINTN xisub = __CLC_AS_UINTN(__CLC_AS_GENTYPE(xi | 0x3f800000) - 1.0f);
21+
__CLC_INTN msub = __CLC_AS_INTN(xisub >> EXPSHIFTBITS_SP32) - 253;
22+
__CLC_INTN c = m == -127;
23+
xi = c ? xisub : xi;
24+
m = c ? msub : m;
25+
26+
__CLC_INTN m3 = m / 3;
27+
__CLC_INTN rem = m - m3 * 3;
28+
__CLC_GENTYPE mf = __CLC_AS_GENTYPE((m3 + EXPBIAS_SP32) << EXPSHIFTBITS_SP32);
29+
30+
__CLC_UINTN indx = (xi & 0x007f0000) + ((xi & 0x00008000) << 1);
31+
__CLC_GENTYPE f = __CLC_AS_GENTYPE((xi & MANTBITS_SP32) | 0x3f000000) -
32+
__CLC_AS_GENTYPE(indx | 0x3f000000);
33+
34+
indx >>= 16;
35+
__CLC_GENTYPE r = f * USE_TABLE(log_inv_tbl, __CLC_AS_INTN(indx));
36+
__CLC_GENTYPE poly = __clc_mad(__clc_mad(r, 0x1.f9add4p-5f, -0x1.c71c72p-4f),
37+
r * r, r * 0x1.555556p-2f);
38+
39+
// This could also be done with a 5-element table
40+
__CLC_GENTYPE remH = 0x1.428000p-1f;
41+
__CLC_GENTYPE remT = 0x1.45f31ap-14f;
42+
43+
remH = rem == -1 ? 0x1.964000p-1f : remH;
44+
remT = rem == -1 ? 0x1.fea53ep-13f : remT;
45+
46+
remH = rem == 0 ? 0x1.000000p+0f : remH;
47+
remT = rem == 0 ? 0x0.000000p+0f : remT;
48+
49+
remH = rem == 1 ? 0x1.428000p+0f : remH;
50+
remT = rem == 1 ? 0x1.45f31ap-13f : remT;
51+
52+
remH = rem == 2 ? 0x1.964000p+0f : remH;
53+
remT = rem == 2 ? 0x1.fea53ep-12f : remT;
54+
55+
__CLC_GENTYPE cbrtH = USE_TABLE(cbrt_tbl_head, __CLC_AS_INTN(indx));
56+
__CLC_GENTYPE cbrtT = USE_TABLE(cbrt_tbl_tail, __CLC_AS_INTN(indx));
57+
58+
__CLC_GENTYPE bH = cbrtH * remH;
59+
__CLC_GENTYPE bT =
60+
__clc_mad(cbrtH, remT, __clc_mad(cbrtT, remH, cbrtT * remT));
61+
62+
__CLC_GENTYPE z = __clc_mad(poly, bH, __clc_mad(poly, bT, bT)) + bH;
63+
z *= mf;
64+
z = __CLC_AS_GENTYPE(__CLC_AS_UINTN(z) | xsign);
65+
c = axi >= EXPBITS_SP32 || axi == 0;
66+
z = c ? x : z;
67+
return z;
68+
}
69+
70+
#elif __CLC_FPSIZE == 64
71+
72+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_cbrt(__CLC_GENTYPE x) {
73+
__CLC_LONGN return_x = __clc_isinf(x) || __clc_isnan(x) || x == 0.0;
74+
__CLC_ULONGN ux = __CLC_AS_ULONGN(__clc_fabs(x));
75+
__CLC_INTN m = __CLC_CONVERT_INTN(ux >> EXPSHIFTBITS_DP64) - 1023;
76+
77+
// Treat subnormals
78+
__CLC_ULONGN uxs =
79+
__CLC_AS_ULONGN(__CLC_AS_GENTYPE(0x3ff0000000000000UL | ux) - 1.0);
80+
__CLC_INTN ms = m + __CLC_CONVERT_INTN(uxs >> EXPSHIFTBITS_DP64) - 1022;
81+
82+
__CLC_INTN c = m == -1023;
83+
ux = __CLC_CONVERT_LONGN(c) ? uxs : ux;
84+
m = c ? ms : m;
85+
86+
__CLC_INTN mby3 = m / 3;
87+
__CLC_INTN rem = m - 3 * mby3;
88+
89+
__CLC_GENTYPE mf = __CLC_AS_GENTYPE(__CLC_CONVERT_ULONGN(mby3 + 1023) << 52);
90+
91+
ux &= 0x000fffffffffffffUL;
92+
__CLC_GENTYPE Y = __CLC_AS_GENTYPE(0x3fe0000000000000UL | ux);
93+
94+
// nearest integer
95+
__CLC_INTN index = __CLC_CONVERT_INTN(ux >> 43);
96+
index = (0x100 | (index >> 1)) + (index & 1);
97+
__CLC_GENTYPE F = __CLC_CONVERT_GENTYPE(index) * 0x1.0p-9;
98+
99+
__CLC_GENTYPE f = Y - F;
100+
__CLC_GENTYPE r = f * USE_TABLE(cbrt_inv_tbl, index - 256);
101+
102+
__CLC_GENTYPE z =
103+
r * __clc_fma(
104+
r,
105+
__clc_fma(r,
106+
__clc_fma(r,
107+
__clc_fma(r,
108+
__clc_fma(r, -0x1.8090d6221a247p-6,
109+
0x1.ee7113506ac13p-6),
110+
-0x1.511e8d2b3183bp-5),
111+
0x1.f9add3c0ca458p-5),
112+
-0x1.c71c71c71c71cp-4),
113+
0x1.5555555555555p-2);
114+
115+
__CLC_GENTYPE Rem_h = USE_TABLE(cbrt_rem_tbl_head, rem + 2);
116+
__CLC_GENTYPE Rem_t = USE_TABLE(cbrt_rem_tbl_tail, rem + 2);
117+
118+
__CLC_GENTYPE F_h = USE_TABLE(cbrt_dbl_tbl_head, index - 256);
119+
__CLC_GENTYPE F_t = USE_TABLE(cbrt_dbl_tbl_tail, index - 256);
120+
121+
__CLC_GENTYPE b_h = F_h * Rem_h;
122+
__CLC_GENTYPE b_t = __clc_fma(Rem_t, F_h, __clc_fma(F_t, Rem_h, F_t * Rem_t));
123+
124+
__CLC_GENTYPE ans = __clc_fma(z, b_h, __clc_fma(z, b_t, b_t)) + b_h;
125+
ans = __clc_copysign(ans * mf, x);
126+
return return_x ? x : ans;
127+
}
128+
129+
#elif __CLC_FPSIZE == 16
130+
131+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_cbrt(__CLC_GENTYPE x) {
132+
return __CLC_CONVERT_GENTYPE(__clc_cbrt(__CLC_CONVERT_FLOATN(x)));
133+
}
134+
135+
#endif

0 commit comments

Comments
 (0)