Skip to content

Commit 3284559

Browse files
authored
[libclc] Move atan2/atan2pi to the CLC library (#133226)
As with other work in this area, these builtins are now vectorized. A further table has been split into two. There was discrepancy between comments above the table describing the values as "lead" and "tail" and variables taken from the table called "head" and "tail", so these have been unified as head/tail.
1 parent 6c21716 commit 3284559

File tree

12 files changed

+756
-674
lines changed

12 files changed

+756
-674
lines changed
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef __CLC_MATH_CLC_ATAN2_H__
10+
#define __CLC_MATH_CLC_ATAN2_H__
11+
12+
#define __CLC_BODY <clc/shared/binary_decl.inc>
13+
#define __CLC_FUNCTION __clc_atan2
14+
15+
#include <clc/math/gentype.inc>
16+
17+
#undef __CLC_BODY
18+
#undef __CLC_FUNCTION
19+
20+
#endif // __CLC_MATH_CLC_ATAN2_H__
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef __CLC_MATH_CLC_ATAN2PI_H__
10+
#define __CLC_MATH_CLC_ATAN2PI_H__
11+
12+
#define __CLC_BODY <clc/shared/binary_decl.inc>
13+
#define __CLC_FUNCTION __clc_atan2pi
14+
15+
#include <clc/math/gentype.inc>
16+
17+
#undef __CLC_BODY
18+
#undef __CLC_FUNCTION
19+
20+
#endif // __CLC_MATH_CLC_ATAN2PI_H__

libclc/clc/include/clc/math/tables.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,8 @@ CLC_TABLE_FUNCTION_DECL(float, log_inv_tbl);
7979
TABLE_FUNCTION_DECL(double2, ln_tbl);
8080
CLC_TABLE_FUNCTION_DECL(double, ln_tbl_lo);
8181
CLC_TABLE_FUNCTION_DECL(double, ln_tbl_hi);
82-
TABLE_FUNCTION_DECL(double2, atan_jby256_tbl);
82+
CLC_TABLE_FUNCTION_DECL(double, atan_jby256_tbl_head);
83+
CLC_TABLE_FUNCTION_DECL(double, atan_jby256_tbl_tail);
8384
TABLE_FUNCTION_DECL(double2, two_to_jby64_ep_tbl);
8485
TABLE_FUNCTION_DECL(double2, sinh_tbl);
8586
TABLE_FUNCTION_DECL(double2, cosh_tbl);

libclc/clc/lib/generic/SOURCES

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ math/clc_asin.cl
2424
math/clc_asinh.cl
2525
math/clc_asinpi.cl
2626
math/clc_atan.cl
27+
math/clc_atan2.cl
28+
math/clc_atan2pi.cl
2729
math/clc_atanh.cl
2830
math/clc_atanpi.cl
2931
math/clc_ceil.cl
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <clc/clc_convert.h>
10+
#include <clc/float/definitions.h>
11+
#include <clc/internal/clc.h>
12+
#include <clc/math/clc_copysign.h>
13+
#include <clc/math/clc_fabs.h>
14+
#include <clc/math/clc_fma.h>
15+
#include <clc/math/clc_ldexp.h>
16+
#include <clc/math/clc_mad.h>
17+
#include <clc/math/math.h>
18+
#include <clc/math/tables.h>
19+
#include <clc/relational/clc_isinf.h>
20+
#include <clc/relational/clc_isnan.h>
21+
#include <clc/relational/clc_select.h>
22+
#include <clc/shared/clc_max.h>
23+
#include <clc/shared/clc_min.h>
24+
25+
#define __CLC_BODY <clc_atan2.inc>
26+
#include <clc/math/gentype.inc>
Lines changed: 248 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,248 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#if __CLC_FPSIZE == 32
10+
11+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_atan2(__CLC_GENTYPE y,
12+
__CLC_GENTYPE x) {
13+
const __CLC_GENTYPE pi = 0x1.921fb6p+1f;
14+
const __CLC_GENTYPE piby2 = 0x1.921fb6p+0f;
15+
const __CLC_GENTYPE piby4 = 0x1.921fb6p-1f;
16+
const __CLC_GENTYPE threepiby4 = 0x1.2d97c8p+1f;
17+
18+
__CLC_GENTYPE ax = __clc_fabs(x);
19+
__CLC_GENTYPE ay = __clc_fabs(y);
20+
__CLC_GENTYPE v = __clc_min(ax, ay);
21+
__CLC_GENTYPE u = __clc_max(ax, ay);
22+
23+
// Scale since u could be large, as in "regular" divide
24+
__CLC_GENTYPE s = u > 0x1.0p+96f ? 0x1.0p-32f : 1.0f;
25+
__CLC_GENTYPE vbyu = s * MATH_DIVIDE(v, s * u);
26+
27+
__CLC_GENTYPE vbyu2 = vbyu * vbyu;
28+
29+
#define USE_2_2_APPROXIMATION
30+
#if defined USE_2_2_APPROXIMATION
31+
__CLC_GENTYPE p =
32+
__clc_mad(vbyu2, __clc_mad(vbyu2, -0x1.7e1f78p-9f, -0x1.7d1b98p-3f),
33+
-0x1.5554d0p-2f) *
34+
vbyu2 * vbyu;
35+
__CLC_GENTYPE q =
36+
__clc_mad(vbyu2, __clc_mad(vbyu2, 0x1.1a714cp-2f, 0x1.287c56p+0f), 1.0f);
37+
#else
38+
__CLC_GENTYPE p =
39+
__clc_mad(vbyu2, __clc_mad(vbyu2, -0x1.55cd22p-5f, -0x1.26cf76p-2f),
40+
-0x1.55554ep-2f) *
41+
vbyu2 * vbyu;
42+
__CLC_GENTYPE q = __clc_mad(
43+
vbyu2,
44+
__clc_mad(vbyu2, __clc_mad(vbyu2, 0x1.9f1304p-5f, 0x1.2656fap-1f),
45+
0x1.76b4b8p+0f),
46+
1.0f);
47+
#endif
48+
49+
// Octant 0 result
50+
__CLC_GENTYPE a = __clc_mad(p, MATH_RECIP(q), vbyu);
51+
52+
// Fix up 3 other octants
53+
__CLC_GENTYPE at = piby2 - a;
54+
a = ay > ax ? at : a;
55+
at = pi - a;
56+
a = x < 0.0F ? at : a;
57+
58+
// y == 0 => 0 for x >= 0, pi for x < 0
59+
at = __CLC_AS_INTN(x) < 0 ? pi : 0.0f;
60+
a = y == 0.0f ? at : a;
61+
62+
// x and y are +- Inf
63+
at = x > 0.0f ? piby4 : threepiby4;
64+
a = __clc_select(a, at, __clc_isinf(x) && __clc_isinf(y));
65+
66+
// x or y is NaN
67+
a = __clc_select(a, __CLC_GENTYPE_NAN, __clc_isnan(x) || __clc_isnan(y));
68+
69+
// Fixup sign and return
70+
return __clc_copysign(a, y);
71+
}
72+
73+
#elif __CLC_FPSIZE == 64
74+
75+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_atan2(__CLC_GENTYPE y,
76+
__CLC_GENTYPE x) {
77+
const __CLC_GENTYPE pi = 3.1415926535897932e+00; /* 0x400921fb54442d18 */
78+
const __CLC_GENTYPE piby2 = 1.5707963267948966e+00; /* 0x3ff921fb54442d18 */
79+
const __CLC_GENTYPE piby4 = 7.8539816339744831e-01; /* 0x3fe921fb54442d18 */
80+
// 0x4002d97c7f3321d2
81+
const __CLC_GENTYPE three_piby4 = 2.3561944901923449e+00;
82+
const __CLC_GENTYPE pi_head = 3.1415926218032836e+00; /* 0x400921fb50000000 */
83+
const __CLC_GENTYPE pi_tail = 3.1786509547056392e-08; /* 0x3e6110b4611a6263 */
84+
// 0x3ff921fb54442d18
85+
const __CLC_GENTYPE piby2_head = 1.5707963267948965e+00;
86+
// 0x3c91a62633145c07
87+
const __CLC_GENTYPE piby2_tail = 6.1232339957367660e-17;
88+
89+
__CLC_GENTYPE x2 = x;
90+
// Important to capture -0.0 in xneg and yneg, so comparison done as integer
91+
__CLC_LONGN xneg = __CLC_AS_LONGN(x) < 0;
92+
__CLC_INTN xexp =
93+
__CLC_CONVERT_INTN(__CLC_AS_ULONGN(x) >> EXPSHIFTBITS_DP64) & 0x7ff;
94+
95+
__CLC_GENTYPE y2 = y;
96+
__CLC_LONGN yneg = __CLC_AS_LONGN(y) < 0;
97+
__CLC_INTN yexp =
98+
__CLC_CONVERT_INTN(__CLC_AS_ULONGN(y) >> EXPSHIFTBITS_DP64) & 0x7ff;
99+
100+
__CLC_LONGN cond2 = __CLC_CONVERT_LONGN(xexp < 1021 && yexp < 1021);
101+
__CLC_LONGN diffexp = __CLC_CONVERT_LONGN(yexp - xexp);
102+
103+
// Scale up both x and y if they are both below 1/4
104+
__CLC_GENTYPE x1 = __clc_ldexp(x, 1024);
105+
__CLC_INTN xexp1 =
106+
__CLC_CONVERT_INTN(__CLC_AS_ULONGN(x1) >> EXPSHIFTBITS_DP64) & 0x7ff;
107+
__CLC_GENTYPE y1 = __clc_ldexp(y, 1024);
108+
__CLC_INTN yexp1 =
109+
__CLC_CONVERT_INTN(__CLC_AS_ULONGN(y1) >> EXPSHIFTBITS_DP64) & 0x7ff;
110+
__CLC_LONGN diffexp1 = __CLC_CONVERT_LONGN(yexp1 - xexp1);
111+
112+
diffexp = __clc_select(diffexp, diffexp1, cond2);
113+
x = cond2 ? x1 : x;
114+
y = cond2 ? y1 : y;
115+
116+
// General case: take absolute values of arguments
117+
__CLC_GENTYPE u = __clc_fabs(x);
118+
__CLC_GENTYPE v = __clc_fabs(y);
119+
120+
// Swap u and v if necessary to obtain 0 < v < u. Compute v/u.
121+
__CLC_LONGN swap_vu = u < v;
122+
__CLC_GENTYPE uu = u;
123+
u = swap_vu ? v : u;
124+
v = swap_vu ? uu : v;
125+
126+
__CLC_GENTYPE vbyu = v / u;
127+
__CLC_GENTYPE q1, q2;
128+
129+
// General values of v/u. Use a look-up table and series expansion.
130+
131+
{
132+
__CLC_GENTYPE val = vbyu > 0.0625 ? vbyu : 0.063;
133+
__CLC_INTN index = __CLC_CONVERT_INTN(__clc_fma(256.0, val, 0.5));
134+
q1 = USE_TABLE(atan_jby256_tbl_head, index - 16);
135+
q2 = USE_TABLE(atan_jby256_tbl_tail, index - 16);
136+
__CLC_GENTYPE c = __CLC_CONVERT_GENTYPE(index) * 0x1.0p-8;
137+
138+
// We're going to scale u and v by 2^(-u_exponent) to bring them close to 1
139+
// u_exponent could be EMAX so we have to do it in 2 steps
140+
__CLC_INTN m =
141+
-(__CLC_CONVERT_INTN(__CLC_AS_ULONGN(u) >> EXPSHIFTBITS_DP64) -
142+
EXPBIAS_DP64);
143+
__CLC_GENTYPE um = __clc_ldexp(u, m);
144+
__CLC_GENTYPE vm = __clc_ldexp(v, m);
145+
146+
// 26 leading bits of u
147+
__CLC_GENTYPE u1 =
148+
__CLC_AS_GENTYPE(__CLC_AS_ULONGN(um) & 0xfffffffff8000000UL);
149+
__CLC_GENTYPE u2 = um - u1;
150+
151+
__CLC_GENTYPE r = MATH_DIVIDE(__clc_fma(-c, u2, __clc_fma(-c, u1, vm)),
152+
__clc_fma(c, vm, um));
153+
154+
// Polynomial approximation to atan(r)
155+
__CLC_GENTYPE s = r * r;
156+
q2 = q2 + __clc_fma((s * __clc_fma(-s, 0.19999918038989143496,
157+
0.33333333333224095522)),
158+
-r, r);
159+
}
160+
161+
__CLC_GENTYPE q3, q4;
162+
{
163+
q3 = 0.0;
164+
q4 = vbyu;
165+
}
166+
167+
__CLC_GENTYPE q5, q6;
168+
{
169+
__CLC_GENTYPE u1 =
170+
__CLC_AS_GENTYPE(__CLC_AS_ULONGN(u) & 0xffffffff00000000UL);
171+
__CLC_GENTYPE u2 = u - u1;
172+
__CLC_GENTYPE vu1 =
173+
__CLC_AS_GENTYPE(__CLC_AS_ULONGN(vbyu) & 0xffffffff00000000UL);
174+
__CLC_GENTYPE vu2 = vbyu - vu1;
175+
176+
q5 = 0.0;
177+
__CLC_GENTYPE s = vbyu * vbyu;
178+
q6 = vbyu +
179+
__clc_fma(
180+
-vbyu * s,
181+
__clc_fma(
182+
-s,
183+
__clc_fma(-s,
184+
__clc_fma(-s,
185+
__clc_fma(-s, 0.90029810285449784439E-01,
186+
0.11110736283514525407),
187+
0.14285713561807169030),
188+
0.19999999999393223405),
189+
0.33333333333333170500),
190+
MATH_DIVIDE(__clc_fma(-u, vu2,
191+
__clc_fma(-u2, vu1, __clc_fma(-u1, vu1, v))),
192+
u));
193+
}
194+
195+
q3 = vbyu < 0x1.d12ed0af1a27fp-27 ? q3 : q5;
196+
q4 = vbyu < 0x1.d12ed0af1a27fp-27 ? q4 : q6;
197+
198+
q1 = vbyu > 0.0625 ? q1 : q3;
199+
q2 = vbyu > 0.0625 ? q2 : q4;
200+
201+
// Tidy-up according to which quadrant the arguments lie in
202+
__CLC_GENTYPE res1, res2, res3, res4;
203+
q1 = swap_vu ? piby2_head - q1 : q1;
204+
q2 = swap_vu ? piby2_tail - q2 : q2;
205+
q1 = xneg ? pi_head - q1 : q1;
206+
q2 = xneg ? pi_tail - q2 : q2;
207+
q1 = q1 + q2;
208+
res4 = yneg ? -q1 : q1;
209+
210+
res1 = yneg ? -three_piby4 : three_piby4;
211+
res2 = yneg ? -piby4 : piby4;
212+
res3 = xneg ? res1 : res2;
213+
214+
res3 = __clc_select(res4, res3,
215+
__CLC_CONVERT_LONGN(__clc_isinf(x2) && __clc_isinf(y2)));
216+
res1 = yneg ? -pi : pi;
217+
218+
// abs(x)/abs(y) > 2^56 and x < 0
219+
res3 = (diffexp < -56 && xneg) ? res1 : res3;
220+
221+
res4 = MATH_DIVIDE(y, x);
222+
// x positive and dominant over y by a factor of 2^28
223+
res3 = diffexp < -28 && xneg == 0 ? res4 : res3;
224+
225+
// abs(y)/abs(x) > 2^56
226+
res4 = yneg ? -piby2 : piby2; // atan(y/x) is insignificant compared to piby2
227+
res3 = diffexp > 56 ? res4 : res3;
228+
229+
res3 = x2 == 0.0 ? res4 : res3; // Zero x gives +- pi/2 depending on sign of y
230+
res4 = xneg ? res1 : y2;
231+
232+
// Zero y gives +-0 for positive x and +-pi for negative x
233+
res3 = y2 == 0.0 ? res4 : res3;
234+
res3 = __clc_isnan(y2) ? y2 : res3;
235+
res3 = __clc_isnan(x2) ? x2 : res3;
236+
237+
return res3;
238+
}
239+
240+
#elif __CLC_FPSIZE == 16
241+
242+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_atan2(__CLC_GENTYPE x,
243+
__CLC_GENTYPE y) {
244+
return __CLC_CONVERT_GENTYPE(
245+
__clc_atan2(__CLC_CONVERT_FLOATN(x), __CLC_CONVERT_FLOATN(y)));
246+
}
247+
248+
#endif
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <clc/clc_convert.h>
10+
#include <clc/float/definitions.h>
11+
#include <clc/internal/clc.h>
12+
#include <clc/math/clc_copysign.h>
13+
#include <clc/math/clc_fabs.h>
14+
#include <clc/math/clc_fma.h>
15+
#include <clc/math/clc_ldexp.h>
16+
#include <clc/math/clc_mad.h>
17+
#include <clc/math/math.h>
18+
#include <clc/math/tables.h>
19+
#include <clc/relational/clc_isinf.h>
20+
#include <clc/relational/clc_isnan.h>
21+
#include <clc/relational/clc_select.h>
22+
#include <clc/shared/clc_max.h>
23+
#include <clc/shared/clc_min.h>
24+
25+
#define __CLC_BODY <clc_atan2pi.inc>
26+
#include <clc/math/gentype.inc>

0 commit comments

Comments
 (0)