Skip to content

Commit 95c683f

Browse files
authored
[libclc] Move logb/ilogb to CLC library; optimize (#128028)
This commit moves the logb and ilogb builtins to the CLC library. It simultaneously optimizes them both for vector types and for half types. Vector types were being scalarized in some cases. Half types were previously promoting to float, whereas this commit provides them a native implementation. Everything passes the OpenCL-CTS. I had to intuit some magic numbers used by these implementations in order to generate the half variants. I gave them clearer definitions derived from what I believe are their actual component numbers, but named them 'magic' to convey that they weren't derived from first principles.
1 parent 0e8f0b5 commit 95c683f

File tree

13 files changed

+259
-99
lines changed

13 files changed

+259
-99
lines changed
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef __CLC_MATH_CLC_ILOGB_H__
10+
#define __CLC_MATH_CLC_ILOGB_H__
11+
12+
#define __CLC_FUNCTION __clc_ilogb
13+
#define __CLC_BODY <clc/math/unary_decl_with_int_return.inc>
14+
#include <clc/math/gentype.inc>
15+
16+
#undef __CLC_FUNCTION
17+
18+
#endif // __CLC_MATH_CLC_ILOGB_H__
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef __CLC_MATH_CLC_LOGB_H__
10+
#define __CLC_MATH_CLC_LOGB_H__
11+
12+
#define __CLC_FUNCTION __clc_logb
13+
#define __CLC_BODY <clc/shared/unary_decl.inc>
14+
#include <clc/math/gentype.inc>
15+
16+
#undef __CLC_FUNCTION
17+
18+
#endif // __CLC_MATH_CLC_LOGB_H__

libclc/clc/include/clc/math/math.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ bool __attribute__((noinline)) __clc_runtime_has_hw_fma32(void);
5252
#define INDEFBITPATT_SP32 0xffc00000
5353
#define PINFBITPATT_SP32 0x7f800000
5454
#define NINFBITPATT_SP32 0xff800000
55+
#define NUMEXPBITS_SP32 8
5556
#define EXPBIAS_SP32 127
5657
#define EXPSHIFTBITS_SP32 23
5758
#define BIASEDEMIN_SP32 1
@@ -62,6 +63,8 @@ bool __attribute__((noinline)) __clc_runtime_has_hw_fma32(void);
6263
#define MANTLENGTH_SP32 24
6364
#define BASEDIGITS_SP32 7
6465

66+
#define LOG_MAGIC_NUM_SP32 (1 + NUMEXPBITS_SP32 - EXPBIAS_SP32)
67+
6568
_CLC_OVERLOAD _CLC_INLINE float __clc_flush_denormal_if_not_supported(float x) {
6669
int ix = __clc_as_int(x);
6770
if (!__clc_fp32_subnormals_supported() && ((ix & EXPBITS_SP32) == 0) &&
@@ -86,6 +89,7 @@ _CLC_OVERLOAD _CLC_INLINE float __clc_flush_denormal_if_not_supported(float x) {
8689
#define INDEFBITPATT_DP64 0xfff8000000000000L
8790
#define PINFBITPATT_DP64 0x7ff0000000000000L
8891
#define NINFBITPATT_DP64 0xfff0000000000000L
92+
#define NUMEXPBITS_DP64 11
8993
#define EXPBIAS_DP64 1023
9094
#define EXPSHIFTBITS_DP64 52
9195
#define BIASEDEMIN_DP64 1
@@ -96,8 +100,26 @@ _CLC_OVERLOAD _CLC_INLINE float __clc_flush_denormal_if_not_supported(float x) {
96100
#define MANTLENGTH_DP64 53
97101
#define BASEDIGITS_DP64 15
98102

103+
#define LOG_MAGIC_NUM_DP64 (1 + NUMEXPBITS_DP64 - EXPBIAS_DP64)
104+
99105
#endif // cl_khr_fp64
100106

107+
#ifdef cl_khr_fp16
108+
109+
#define SIGNBIT_FP16 0x8000
110+
#define EXSIGNBIT_FP16 0x7fff
111+
#define EXPBITS_FP16 0x7c00
112+
#define MANTBITS_FP16 0x03ff
113+
#define PINFBITPATT_FP16 0x7c00
114+
#define NINFBITPATT_FP16 0xfc00
115+
#define NUMEXPBITS_FP16 5
116+
#define EXPBIAS_FP16 15
117+
#define EXPSHIFTBITS_FP16 10
118+
119+
#define LOG_MAGIC_NUM_FP16 (1 + NUMEXPBITS_FP16 - EXPBIAS_FP16)
120+
121+
#endif // cl_khr_fp16
122+
101123
#define ALIGNED(x) __attribute__((aligned(x)))
102124

103125
#endif // __CLC_MATH_MATH_H__

libclc/generic/include/clc/math/ilogb.inc renamed to libclc/clc/include/clc/math/unary_decl_with_int_return.inc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,4 @@
66
//
77
//===----------------------------------------------------------------------===//
88

9-
_CLC_OVERLOAD _CLC_DECL __CLC_INTN ilogb(__CLC_GENTYPE x);
9+
_CLC_OVERLOAD _CLC_DECL __CLC_INTN __CLC_FUNCTION(__CLC_GENTYPE x);
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <clc/utils.h>
10+
11+
#ifndef __CLC_FUNCTION
12+
#define __CLC_FUNCTION(x) __CLC_CONCAT(__clc_, x)
13+
#endif
14+
15+
_CLC_OVERLOAD _CLC_DEF __CLC_INTN FUNCTION(__CLC_GENTYPE a) {
16+
return __CLC_FUNCTION(FUNCTION)(a);
17+
}

libclc/clc/lib/generic/SOURCES

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,13 +69,15 @@ math/clc_half_sin.cl
6969
math/clc_half_sqrt.cl
7070
math/clc_half_tan.cl
7171
math/clc_hypot.cl
72+
math/clc_ilogb.cl
7273
math/clc_ldexp.cl
7374
math/clc_lgamma.cl
7475
math/clc_lgamma_r.cl
7576
math/clc_log.cl
7677
math/clc_log10.cl
7778
math/clc_log1p.cl
7879
math/clc_log2.cl
80+
math/clc_logb.cl
7981
math/clc_mad.cl
8082
math/clc_maxmag.cl
8183
math/clc_minmag.cl
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <clc/clc_convert.h>
10+
#include <clc/clcmacro.h>
11+
#include <clc/float/definitions.h>
12+
#include <clc/integer/clc_clz.h>
13+
#include <clc/internal/clc.h>
14+
#include <clc/math/math.h>
15+
16+
#define __CLC_BODY <clc_ilogb.inc>
17+
#include <clc/math/gentype.inc>
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#if __CLC_FPSIZE == 32
10+
11+
_CLC_OVERLOAD _CLC_DEF __CLC_INTN __clc_ilogb(__CLC_GENTYPE x) {
12+
__CLC_UINTN ux = __CLC_AS_UINTN(x);
13+
__CLC_UINTN ax = ux & EXSIGNBIT_SP32;
14+
__CLC_INTN rs = (__CLC_INTN)LOG_MAGIC_NUM_SP32 -
15+
__CLC_AS_INTN(__clc_clz(ux & MANTBITS_SP32));
16+
__CLC_INTN r = __CLC_AS_INTN(ax >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;
17+
r = ax < 0x00800000U ? rs : r;
18+
r = ax == 0 ? FP_ILOGB0 : r;
19+
20+
// We could merge those 2 tests and have:
21+
//
22+
// r = ax >= EXPBITS_SP32 ? 0x7fffffff : r
23+
//
24+
// since FP_ILOGBNAN is set to INT_MAX, but it's clearer this way and
25+
// FP_ILOGBNAN can change without requiring changes to __clc_ilogb() code.
26+
r = ax > EXPBITS_SP32 ? FP_ILOGBNAN : r;
27+
r = ax == EXPBITS_SP32 ? 0x7fffffff : r;
28+
return r;
29+
}
30+
31+
#endif
32+
33+
#if __CLC_FPSIZE == 64
34+
35+
_CLC_OVERLOAD _CLC_DEF __CLC_INTN __clc_ilogb(__CLC_GENTYPE x) {
36+
__CLC_ULONGN ux = __CLC_AS_ULONGN(x);
37+
__CLC_ULONGN ax = ux & ~SIGNBIT_DP64;
38+
__CLC_INTN rs = (__CLC_INTN)LOG_MAGIC_NUM_DP64 -
39+
__CLC_CONVERT_INTN(__clc_clz(ax & MANTBITS_DP64));
40+
__CLC_INTN r = __CLC_CONVERT_INTN(ax >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
41+
r = __CLC_CONVERT_INTN(ax < 0x0010000000000000UL) ? rs : r;
42+
r = __CLC_CONVERT_INTN(ax == 0UL) ? (__CLC_INTN)FP_ILOGB0 : r;
43+
44+
// We could merge those 2 tests and have:
45+
//
46+
// r = ax >= 0x7ff0000000000000UL ? 0x7fffffff : r
47+
//
48+
// since FP_ILOGBNAN is set to INT_MAX, but it's clearer this way and
49+
// FP_ILOGBNAN can change without requiring changes to __clc_ilogb() code.
50+
r = __CLC_CONVERT_INTN(ax > 0x7ff0000000000000UL) ? FP_ILOGBNAN : r;
51+
r = __CLC_CONVERT_INTN(ax == 0x7ff0000000000000UL) ? 0x7fffffff : r;
52+
return r;
53+
}
54+
55+
#endif
56+
57+
#if __CLC_FPSIZE == 16
58+
59+
_CLC_OVERLOAD _CLC_DEF __CLC_INTN __clc_ilogb(__CLC_GENTYPE x) {
60+
__CLC_USHORTN ux = __CLC_AS_USHORTN(x);
61+
__CLC_USHORTN ax = ux & (__CLC_USHORTN)EXSIGNBIT_FP16;
62+
__CLC_USHORTN mantx = ux & (__CLC_USHORTN)MANTBITS_FP16;
63+
__CLC_INTN rs =
64+
(__CLC_INTN)LOG_MAGIC_NUM_FP16 - __CLC_CONVERT_INTN(__clc_clz(mantx));
65+
__CLC_INTN r =
66+
__CLC_CONVERT_INTN(ax >> (__CLC_USHORTN)EXPSHIFTBITS_FP16) - EXPBIAS_FP16;
67+
r = __CLC_CONVERT_INTN(ax < (__CLC_USHORTN)0x0400U) ? rs : r;
68+
r = __CLC_CONVERT_INTN(ax == (__CLC_USHORTN)0) ? (__CLC_INTN)FP_ILOGB0 : r;
69+
70+
// We could merge those 2 tests and have:
71+
//
72+
// r = ax >= EXPBITS_FP16 ? 0x7fffffff : r
73+
//
74+
// since FP_ILOGBNAN is set to INT_MAX, but it's clearer this way and
75+
// FP_ILOGBNAN can change without requiring changes to __clc_ilogb() code.
76+
r = __CLC_CONVERT_INTN(ax > (__CLC_USHORTN)EXPBITS_FP16) ? FP_ILOGBNAN : r;
77+
r = __CLC_CONVERT_INTN(ax == (__CLC_USHORTN)EXPBITS_FP16) ? 0x7fffffff : r;
78+
return r;
79+
}
80+
81+
#endif
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <clc/clc_convert.h>
10+
#include <clc/clcmacro.h>
11+
#include <clc/float/definitions.h>
12+
#include <clc/integer/clc_clz.h>
13+
#include <clc/internal/clc.h>
14+
#include <clc/math/math.h>
15+
16+
#define __CLC_BODY <clc_logb.inc>
17+
#include <clc/math/gentype.inc>
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#if __CLC_FPSIZE == 32
10+
11+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_logb(__CLC_GENTYPE x) {
12+
__CLC_INTN ax = __CLC_AS_INTN(x) & EXSIGNBIT_SP32;
13+
__CLC_GENTYPE s = __CLC_CONVERT_GENTYPE(LOG_MAGIC_NUM_SP32 - __clc_clz(ax));
14+
__CLC_GENTYPE r =
15+
__CLC_CONVERT_GENTYPE((ax >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32);
16+
r = ax >= PINFBITPATT_SP32 ? __CLC_AS_GENTYPE(ax) : r;
17+
r = ax < 0x00800000 ? s : r;
18+
r = ax == 0 ? __CLC_AS_GENTYPE((__CLC_INTN)NINFBITPATT_SP32) : r;
19+
return r;
20+
}
21+
22+
#endif
23+
24+
#if __CLC_FPSIZE == 64
25+
26+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_logb(__CLC_GENTYPE x) {
27+
__CLC_LONGN ax = __CLC_AS_LONGN(x) & EXSIGNBIT_DP64;
28+
__CLC_GENTYPE s = __CLC_CONVERT_GENTYPE(LOG_MAGIC_NUM_DP64 - __clc_clz(ax));
29+
__CLC_GENTYPE r =
30+
__CLC_CONVERT_GENTYPE((ax >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64);
31+
r = ax >= PINFBITPATT_DP64 ? __CLC_AS_GENTYPE(ax) : r;
32+
r = ax < 0x0010000000000000L ? s : r;
33+
r = ax == 0L ? __CLC_AS_GENTYPE((__CLC_LONGN)NINFBITPATT_DP64) : r;
34+
return r;
35+
}
36+
37+
#endif
38+
39+
#if __CLC_FPSIZE == 16
40+
41+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_logb(__CLC_GENTYPE x) {
42+
__CLC_SHORTN ax = __CLC_AS_SHORTN(x) & (__CLC_SHORTN)EXSIGNBIT_FP16;
43+
__CLC_GENTYPE s = __CLC_CONVERT_GENTYPE((__CLC_SHORTN)LOG_MAGIC_NUM_FP16 -
44+
(__CLC_SHORTN)__clc_clz(ax));
45+
__CLC_GENTYPE r = __CLC_CONVERT_GENTYPE(
46+
(ax >> (__CLC_SHORTN)EXPSHIFTBITS_FP16) - (__CLC_SHORTN)EXPBIAS_FP16);
47+
r = ax >= (__CLC_SHORTN)PINFBITPATT_FP16 ? __CLC_AS_GENTYPE(ax) : r;
48+
r = ax < (__CLC_SHORTN)0x0400 ? s : r;
49+
r = ax == (__CLC_SHORTN)0 ? __CLC_AS_GENTYPE((__CLC_SHORTN)NINFBITPATT_FP16)
50+
: r;
51+
return r;
52+
}
53+
54+
#endif

libclc/generic/include/clc/math/ilogb.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@
66
//
77
//===----------------------------------------------------------------------===//
88

9-
#define __CLC_BODY <clc/math/ilogb.inc>
9+
#define __CLC_FUNCTION ilogb
10+
#define __CLC_BODY <clc/math/unary_decl_with_int_return.inc>
1011

1112
#include <clc/math/gentype.inc>
13+
14+
#undef __CLC_FUNCTION

libclc/generic/lib/math/ilogb.cl

Lines changed: 4 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -7,64 +7,8 @@
77
//===----------------------------------------------------------------------===//
88

99
#include <clc/clc.h>
10-
#include <clc/clcmacro.h>
11-
#include <clc/math/math.h>
10+
#include <clc/math/clc_ilogb.h>
1211

13-
_CLC_OVERLOAD _CLC_DEF int ilogb(float x) {
14-
uint ux = as_uint(x);
15-
uint ax = ux & EXSIGNBIT_SP32;
16-
int rs = -118 - (int) clz(ux & MANTBITS_SP32);
17-
int r = (int) (ax >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;
18-
r = ax < 0x00800000U ? rs : r;
19-
r = ax == 0 ? FP_ILOGB0 : r;
20-
21-
// We could merge those 2 tests and have:
22-
//
23-
// r = ax >= EXPBITS_SP32 ? 0x7fffffff : r
24-
//
25-
// since FP_ILOGBNAN is set to INT_MAX, but it's clearer this way and
26-
// FP_ILOGBNAN can change without requiring changes to ilogb() code.
27-
r = ax > EXPBITS_SP32 ? FP_ILOGBNAN : r;
28-
r = ax == EXPBITS_SP32 ? 0x7fffffff : r;
29-
return r;
30-
}
31-
32-
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, ilogb, float);
33-
34-
#ifdef cl_khr_fp64
35-
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
36-
37-
_CLC_OVERLOAD _CLC_DEF int ilogb(double x) {
38-
ulong ux = as_ulong(x);
39-
ulong ax = ux & ~SIGNBIT_DP64;
40-
int r = (int) (ax >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
41-
int rs = -1011 - (int) clz(ax & MANTBITS_DP64);
42-
r = ax < 0x0010000000000000UL ? rs : r;
43-
r = ax == 0UL ? FP_ILOGB0 : r;
44-
45-
// We could merge those 2 tests and have:
46-
//
47-
// r = ax >= 0x7ff0000000000000UL ? 0x7fffffff : r
48-
//
49-
// since FP_ILOGBNAN is set to INT_MAX, but it's clearer this way and
50-
// FP_ILOGBNAN can change without requiring changes to ilogb() code.
51-
r = ax > 0x7ff0000000000000UL ? FP_ILOGBNAN : r;
52-
r = ax == 0x7ff0000000000000UL ? 0x7fffffff : r;
53-
return r;
54-
}
55-
56-
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, ilogb, double);
57-
58-
#endif // cl_khr_fp64
59-
60-
#ifdef cl_khr_fp16
61-
62-
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
63-
64-
_CLC_OVERLOAD _CLC_DEF int ilogb(half x) {
65-
return ilogb((float)x);
66-
}
67-
68-
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, ilogb, half);
69-
70-
#endif
12+
#define FUNCTION ilogb
13+
#define __CLC_BODY <clc/math/unary_def_with_int_return.inc>
14+
#include <clc/math/gentype.inc>

0 commit comments

Comments
 (0)