Skip to content

Commit 4f107cd

Browse files
authored
[libclc] Move sin, cos & sincos to CLC library (llvm#139527)
This commit moves the remaining FP64 sin and cos helper functions to the CLC library. As a consequence, it formally moves all sin, cos and sincos builtins to the CLC library. Previously, the FP16 and FP32 were nominally there but still in the OpenCL layer while waiting for the FP64 ones. The FP64 builtins are now vectorized as the FP16 and FP32 ones were earlier. One helper table had to be changed. It was previously a table of bytes loaded by each work-item as uint4. Since this doesn't vectorize well, the table was split to load two ulongNs per work-item. While this might not be as efficient on some devices, one mitigating factor is that we were previously loading 48 bytes per work-item in total, but only using 40 of them. With this commit we only load the bytes we need.
1 parent 2ec13c5 commit 4f107cd

30 files changed

+611
-516
lines changed

libclc/clc/include/clc/math/clc_cos.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef __CLC_MATH_CLC_COS_H__
10+
#define __CLC_MATH_CLC_COS_H__
11+
12+
#define __CLC_BODY <clc/math/unary_decl.inc>
13+
#define __CLC_FUNCTION __clc_cos
14+
15+
#include <clc/math/gentype.inc>
16+
17+
#undef __CLC_FUNCTION
18+
19+
#endif // __CLC_MATH_CLC_COS_H__

libclc/clc/include/clc/math/clc_sin.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef __CLC_MATH_CLC_SIN_H__
10+
#define __CLC_MATH_CLC_SIN_H__
11+
12+
#define __CLC_BODY <clc/math/unary_decl.inc>
13+
#define __CLC_FUNCTION __clc_sin
14+
15+
#include <clc/math/gentype.inc>
16+
17+
#undef __CLC_FUNCTION
18+
19+
#endif // __CLC_MATH_CLC_SIN_H__
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef __CLC_MATH_CLC_SINCOS_H__
10+
#define __CLC_MATH_CLC_SINCOS_H__
11+
12+
#define __CLC_BODY <clc/math/unary_decl_with_ptr.inc>
13+
#define __CLC_FUNCTION __clc_sincos
14+
15+
#include <clc/math/gentype.inc>
16+
17+
#undef __CLC_FUNCTION
18+
19+
#endif // __CLC_MATH_CLC_SINCOS_H__

libclc/clc/include/clc/math/clc_sincos_helpers.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,11 @@
1616

1717
#undef __FLOAT_ONLY
1818

19+
#define __DOUBLE_ONLY
20+
#define __CLC_BODY <clc/math/clc_sincos_helpers_fp64.inc>
21+
22+
#include <clc/math/gentype.inc>
23+
24+
#undef __DOUBLE_ONLY
25+
1926
#endif // __CLC_MATH_CLC_SINCOS_HELPERS_H__
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
_CLC_DECL _CLC_OVERLOAD void
10+
__clc_remainder_piby2_medium(__CLC_DOUBLEN x, private __CLC_DOUBLEN *r,
11+
private __CLC_DOUBLEN *rr,
12+
private __CLC_INTN *regn);
13+
14+
_CLC_DECL _CLC_OVERLOAD void
15+
__clc_remainder_piby2_large(__CLC_DOUBLEN x, private __CLC_DOUBLEN *r,
16+
private __CLC_DOUBLEN *rr,
17+
private __CLC_INTN *regn);

libclc/clc/include/clc/math/tables.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,6 @@
6161

6262
TABLE_FUNCTION_DECL(float2, log2_tbl);
6363
TABLE_FUNCTION_DECL(float2, log10_tbl);
64-
TABLE_FUNCTION_DECL(uint4, pibits_tbl);
6564

6665
CLC_TABLE_FUNCTION_DECL(float, log_inv_tbl_ep_head);
6766
CLC_TABLE_FUNCTION_DECL(float, log_inv_tbl_ep_tail);
@@ -75,6 +74,7 @@ CLC_TABLE_FUNCTION_DECL(float, cbrt_tbl_head);
7574
CLC_TABLE_FUNCTION_DECL(float, cbrt_tbl_tail);
7675
CLC_TABLE_FUNCTION_DECL(float, sinhcosh_tbl_head);
7776
CLC_TABLE_FUNCTION_DECL(float, sinhcosh_tbl_tail);
77+
CLC_TABLE_FUNCTION_DECL(ulong, pibits_tbl);
7878

7979
#ifdef cl_khr_fp64
8080

libclc/clc/lib/generic/SOURCES

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ math/clc_atanpi.cl
3232
math/clc_cbrt.cl
3333
math/clc_ceil.cl
3434
math/clc_copysign.cl
35+
math/clc_cos.cl
3536
math/clc_cosh.cl
3637
math/clc_cospi.cl
3738
math/clc_ep_log.cl
@@ -86,6 +87,8 @@ math/clc_rint.cl
8687
math/clc_rootn.cl
8788
math/clc_round.cl
8889
math/clc_rsqrt.cl
90+
math/clc_sin.cl
91+
math/clc_sincos.cl
8992
math/clc_sincos_helpers.cl
9093
math/clc_sinh.cl
9194
math/clc_sinpi.cl
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <clc/clc_convert.h>
10+
#include <clc/clcmacro.h>
11+
#include <clc/float/definitions.h>
12+
#include <clc/math/clc_fabs.h>
13+
#include <clc/math/clc_sincos_helpers.h>
14+
#include <clc/math/clc_sincos_piby4.h>
15+
#include <clc/math/math.h>
16+
#include <clc/relational/clc_isinf.h>
17+
#include <clc/relational/clc_isnan.h>
18+
#include <clc/relational/clc_select.h>
19+
20+
#define __CLC_BODY <clc_cos.inc>
21+
#include <clc/math/gentype.inc>
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#if __CLC_FPSIZE == 32
10+
11+
_CLC_OVERLOAD _CLC_DEF __CLC_FLOATN __clc_cos(__CLC_FLOATN x) {
12+
__CLC_FLOATN absx = __clc_fabs(x);
13+
14+
__CLC_FLOATN r0, r1;
15+
__CLC_INTN regn = __clc_argReductionS(&r0, &r1, absx);
16+
17+
__CLC_FLOATN ss = -__clc_sinf_piby4(r0, r1);
18+
__CLC_FLOATN cc = __clc_cosf_piby4(r0, r1);
19+
20+
__CLC_FLOATN c = (regn & 1) != 0 ? ss : cc;
21+
c = __CLC_AS_FLOATN(__CLC_AS_INTN(c) ^ ((regn > 1) << 31));
22+
23+
c = __clc_select(c, __CLC_GENTYPE_NAN, __clc_isnan(x) || __clc_isinf(x));
24+
25+
return c;
26+
}
27+
28+
#elif __CLC_FPSIZE == 16
29+
30+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_cos(__CLC_GENTYPE x) {
31+
return __CLC_CONVERT_GENTYPE(__clc_cos(__CLC_CONVERT_FLOATN(x)));
32+
}
33+
34+
#elif __CLC_FPSIZE == 64
35+
36+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_cos(__CLC_GENTYPE x) {
37+
__CLC_GENTYPE absx = __clc_fabs(x);
38+
39+
__CLC_BIT_INTN is_medium = absx < 0x1.0p+47;
40+
41+
__CLC_INTN regn_m, regn_l;
42+
__CLC_GENTYPE r_m, r_l, rr_m, rr_l;
43+
44+
__clc_remainder_piby2_medium(absx, &r_m, &rr_m, &regn_m);
45+
__clc_remainder_piby2_large(absx, &r_l, &rr_l, &regn_l);
46+
47+
__CLC_GENTYPE r = is_medium ? r_m : r_l;
48+
__CLC_GENTYPE rr = is_medium ? rr_m : rr_l;
49+
__CLC_INTN regn = __CLC_CONVERT_INTN(is_medium) ? regn_m : regn_l;
50+
51+
__CLC_GENTYPE sinval, cosval;
52+
__clc_sincos_piby4(r, rr, &sinval, &cosval);
53+
sinval = -sinval;
54+
55+
__CLC_LONGN c =
56+
__CLC_AS_LONGN(__CLC_CONVERT_BIT_INTN((regn & 1) != 0) ? sinval : cosval);
57+
c ^= __CLC_CONVERT_BIT_INTN(regn > 1) << 63;
58+
59+
return __clc_isnan(absx) | __clc_isinf(absx) ? __CLC_GENTYPE_NAN
60+
: __CLC_AS_GENTYPE(c);
61+
}
62+
63+
#endif
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <clc/clc_convert.h>
10+
#include <clc/clcmacro.h>
11+
#include <clc/float/definitions.h>
12+
#include <clc/internal/clc.h>
13+
#include <clc/math/clc_fabs.h>
14+
#include <clc/math/clc_sincos_helpers.h>
15+
#include <clc/math/clc_sincos_piby4.h>
16+
#include <clc/math/clc_trunc.h>
17+
#include <clc/math/math.h>
18+
#include <clc/math/tables.h>
19+
#include <clc/relational/clc_isinf.h>
20+
#include <clc/relational/clc_isnan.h>
21+
#include <clc/relational/clc_select.h>
22+
#include <clc/shared/clc_max.h>
23+
24+
#define __CLC_BODY <clc_sin.inc>
25+
#include <clc/math/gentype.inc>
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#if __CLC_FPSIZE == 32
10+
11+
_CLC_OVERLOAD _CLC_DEF __CLC_FLOATN __clc_sin(__CLC_FLOATN x) {
12+
__CLC_FLOATN absx = __clc_fabs(x);
13+
14+
__CLC_FLOATN r0, r1;
15+
__CLC_INTN regn = __clc_argReductionS(&r0, &r1, absx);
16+
17+
__CLC_FLOATN ss = __clc_sinf_piby4(r0, r1);
18+
__CLC_FLOATN cc = __clc_cosf_piby4(r0, r1);
19+
20+
__CLC_FLOATN s = (regn & 1) != 0 ? cc : ss;
21+
s = __CLC_AS_FLOATN(__CLC_AS_INTN(s) ^ ((regn > 1) << 31) ^
22+
(__CLC_AS_INTN(x) ^ __CLC_AS_INTN(absx)));
23+
24+
s = __clc_select(s, __CLC_GENTYPE_NAN, __clc_isnan(x) || __clc_isinf(x));
25+
26+
// Subnormals
27+
s = x == 0.0f ? x : s;
28+
29+
return s;
30+
}
31+
32+
#elif __CLC_FPSIZE == 16
33+
34+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_sin(__CLC_GENTYPE x) {
35+
return __CLC_CONVERT_GENTYPE(__clc_sin(__CLC_CONVERT_FLOATN(x)));
36+
}
37+
38+
#elif __CLC_FPSIZE == 64
39+
40+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_sin(__CLC_GENTYPE x) {
41+
__CLC_GENTYPE absx = __clc_fabs(x);
42+
43+
__CLC_BIT_INTN is_medium = absx < 0x1.0p+47;
44+
45+
__CLC_INTN regn_m, regn_l;
46+
__CLC_GENTYPE r_m, r_l, rr_m, rr_l;
47+
48+
__clc_remainder_piby2_medium(absx, &r_m, &rr_m, &regn_m);
49+
__clc_remainder_piby2_large(absx, &r_l, &rr_l, &regn_l);
50+
51+
__CLC_GENTYPE r = is_medium ? r_m : r_l;
52+
__CLC_GENTYPE rr = is_medium ? rr_m : rr_l;
53+
__CLC_INTN regn = __CLC_CONVERT_INTN(is_medium) ? regn_m : regn_l;
54+
55+
__CLC_GENTYPE sinval, cosval;
56+
__clc_sincos_piby4(r, rr, &sinval, &cosval);
57+
58+
__CLC_LONGN s =
59+
__CLC_AS_LONGN(__CLC_CONVERT_BIT_INTN((regn & 1) != 0) ? cosval : sinval);
60+
61+
s ^= (__CLC_CONVERT_BIT_INTN(regn > 1) << 63) ^
62+
(__CLC_CONVERT_BIT_INTN(x < 0.0) << 63);
63+
64+
return __clc_isinf(x) | __clc_isnan(x) ? __CLC_GENTYPE_NAN
65+
: __CLC_AS_GENTYPE(s);
66+
}
67+
68+
#endif
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <clc/internal/clc.h>
10+
#include <clc/math/clc_cos.h>
11+
#include <clc/math/clc_sin.h>
12+
13+
#define __CLC_BODY <clc_sincos.inc>
14+
#include <clc/math/gentype.inc>

libclc/generic/lib/math/sincos.inc renamed to libclc/clc/lib/generic/math/clc_sincos.inc

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,10 @@
66
//
77
//===----------------------------------------------------------------------===//
88

9-
#define __CLC_DECLARE_SINCOS(ADDRSPACE, TYPE) \
10-
_CLC_OVERLOAD _CLC_DEF TYPE sincos (TYPE x, ADDRSPACE TYPE * cosval) { \
11-
*cosval = cos(x); \
12-
return sin(x); \
9+
#define __CLC_DECLARE_SINCOS(ADDRSPACE, TYPE) \
10+
_CLC_OVERLOAD _CLC_DEF TYPE __clc_sincos(TYPE x, ADDRSPACE TYPE *cosval) { \
11+
*cosval = __clc_cos(x); \
12+
return __clc_sin(x); \
1313
}
1414

1515
__CLC_DECLARE_SINCOS(global, __CLC_GENTYPE)

libclc/clc/lib/generic/math/clc_sincos_helpers.cl

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,27 @@
3131
#define __CLC_BODY <clc_sincos_helpers.inc>
3232

3333
#include <clc/math/gentype.inc>
34+
35+
#undef __FLOAT_ONLY
36+
37+
#ifdef cl_khr_fp64
38+
39+
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
40+
41+
#include <clc/math/clc_fract.h>
42+
#include <clc/math/tables.h>
43+
#include <clc/shared/clc_max.h>
44+
45+
#define bytealign(src0, src1, src2) \
46+
(__CLC_CONVERT_UINTN( \
47+
((__CLC_CONVERT_LONGN((src0)) << 32) | __CLC_CONVERT_LONGN((src1))) >> \
48+
(((src2) & 3) * 8)))
49+
50+
#define __DOUBLE_ONLY
51+
#define __CLC_BODY <clc_sincos_helpers_fp64.inc>
52+
53+
#include <clc/math/gentype.inc>
54+
55+
#undef __DOUBLE_ONLY
56+
57+
#endif

0 commit comments

Comments
 (0)