Skip to content

Commit 78922ca

Browse files
committed
[libclc] Move logb/ilogb to CLC library; optimize
This commit moves the logb and ilogb builtins to the CLC library. It simultaneously optimizes them both for vector types and for half types. Half types were previously promoting to float, whereas this commit provides them a native implementation. Everything passes the OpenCL-CTS. I had to intuit some magic numbers used by these implementations in order to generate the half variants. I gave them clearer definitions derived from what I believe are their actual component numbers, but named them 'magic' to convey that they weren't derived from first principles.
1 parent 87978ea commit 78922ca

File tree

12 files changed

+224
-97
lines changed

12 files changed

+224
-97
lines changed
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#ifndef __CLC_MATH_CLC_ILOGB_H__
2+
#define __CLC_MATH_CLC_ILOGB_H__
3+
4+
#define __CLC_BODY <clc/math/clc_ilogb.inc>
5+
#include <clc/math/gentype.inc>
6+
7+
#undef __CLC_BODY
8+
9+
#endif // __CLC_MATH_CLC_ILOGB_H__
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
_CLC_OVERLOAD _CLC_DECL __CLC_INTN __clc_ilogb(__CLC_GENTYPE x);
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#ifndef __CLC_MATH_CLC_LOGB_H__
2+
#define __CLC_MATH_CLC_LOGB_H__
3+
4+
#define __CLC_FUNCTION __clc_logb
5+
#define __CLC_BODY <clc/shared/unary_decl.inc>
6+
#include <clc/math/gentype.inc>
7+
8+
#undef __CLC_BODY
9+
#undef __CLC_FUNCTION
10+
11+
#endif // __CLC_MATH_CLC_LOGB_H__

libclc/clc/include/clc/math/math.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ bool __attribute__((noinline)) __clc_runtime_has_hw_fma32(void);
5252
#define INDEFBITPATT_SP32 0xffc00000
5353
#define PINFBITPATT_SP32 0x7f800000
5454
#define NINFBITPATT_SP32 0xff800000
55+
#define NUMEXPBITS_SP32 8
5556
#define EXPBIAS_SP32 127
5657
#define EXPSHIFTBITS_SP32 23
5758
#define BIASEDEMIN_SP32 1
@@ -62,6 +63,8 @@ bool __attribute__((noinline)) __clc_runtime_has_hw_fma32(void);
6263
#define MANTLENGTH_SP32 24
6364
#define BASEDIGITS_SP32 7
6465

66+
#define LOG_MAGIC_NUM_SP32 (1 + NUMEXPBITS_SP32 - EXPBIAS_SP32)
67+
6568
_CLC_OVERLOAD _CLC_INLINE float __clc_flush_denormal_if_not_supported(float x) {
6669
int ix = __clc_as_int(x);
6770
if (!__clc_fp32_subnormals_supported() && ((ix & EXPBITS_SP32) == 0) &&
@@ -86,6 +89,7 @@ _CLC_OVERLOAD _CLC_INLINE float __clc_flush_denormal_if_not_supported(float x) {
8689
#define INDEFBITPATT_DP64 0xfff8000000000000L
8790
#define PINFBITPATT_DP64 0x7ff0000000000000L
8891
#define NINFBITPATT_DP64 0xfff0000000000000L
92+
#define NUMEXPBITS_DP64 11
8993
#define EXPBIAS_DP64 1023
9094
#define EXPSHIFTBITS_DP64 52
9195
#define BIASEDEMIN_DP64 1
@@ -96,8 +100,26 @@ _CLC_OVERLOAD _CLC_INLINE float __clc_flush_denormal_if_not_supported(float x) {
96100
#define MANTLENGTH_DP64 53
97101
#define BASEDIGITS_DP64 15
98102

103+
#define LOG_MAGIC_NUM_DP64 (1 + NUMEXPBITS_DP64 - EXPBIAS_DP64)
104+
99105
#endif // cl_khr_fp64
100106

107+
#ifdef cl_khr_fp16
108+
109+
#define SIGNBIT_FP16 0x8000
110+
#define EXSIGNBIT_FP16 0x7fff
111+
#define EXPBITS_FP16 0x7c00
112+
#define MANTBITS_FP16 0x03ff
113+
#define PINFBITPATT_FP16 0x7c00
114+
#define NINFBITPATT_FP16 0xfc00
115+
#define NUMEXPBITS_FP16 5
116+
#define EXPBIAS_FP16 15
117+
#define EXPSHIFTBITS_FP16 10
118+
119+
#define LOG_MAGIC_NUM_FP16 (1 + NUMEXPBITS_FP16 - EXPBIAS_FP16)
120+
121+
#endif // cl_khr_fp16
122+
101123
#define ALIGNED(x) __attribute__((aligned(x)))
102124

103125
#endif // __CLC_MATH_MATH_H__

libclc/clc/lib/generic/SOURCES

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,13 +51,15 @@ math/clc_fmod.cl
5151
math/clc_fract.cl
5252
math/clc_frexp.cl
5353
math/clc_hypot.cl
54+
math/clc_ilogb.cl
5455
math/clc_ldexp.cl
5556
math/clc_lgamma.cl
5657
math/clc_lgamma_r.cl
5758
math/clc_log.cl
5859
math/clc_log10.cl
5960
math/clc_log1p.cl
6061
math/clc_log2.cl
62+
math/clc_logb.cl
6163
math/clc_mad.cl
6264
math/clc_maxmag.cl
6365
math/clc_minmag.cl
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <clc/clc_convert.h>
10+
#include <clc/clcmacro.h>
11+
#include <clc/float/definitions.h>
12+
#include <clc/integer/clc_clz.h>
13+
#include <clc/internal/clc.h>
14+
#include <clc/math/math.h>
15+
16+
#define __CLC_BODY <clc_ilogb.inc>
17+
#include <clc/math/gentype.inc>
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#if __CLC_FPSIZE == 32
10+
11+
_CLC_OVERLOAD _CLC_DEF __CLC_INTN __clc_ilogb(__CLC_GENTYPE x) {
12+
__CLC_UINTN ux = __CLC_AS_UINTN(x);
13+
__CLC_UINTN ax = ux & EXSIGNBIT_SP32;
14+
__CLC_INTN rs = (__CLC_INTN)LOG_MAGIC_NUM_SP32 -
15+
__CLC_AS_INTN(__clc_clz(ux & MANTBITS_SP32));
16+
__CLC_INTN r = __CLC_AS_INTN(ax >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;
17+
r = ax < 0x00800000U ? rs : r;
18+
r = ax == 0 ? FP_ILOGB0 : r;
19+
20+
// We could merge those 2 tests and have:
21+
//
22+
// r = ax >= EXPBITS_SP32 ? 0x7fffffff : r
23+
//
24+
// since FP_ILOGBNAN is set to INT_MAX, but it's clearer this way and
25+
// FP_ILOGBNAN can change without requiring changes to __clc_ilogb() code.
26+
r = ax > EXPBITS_SP32 ? FP_ILOGBNAN : r;
27+
r = ax == EXPBITS_SP32 ? 0x7fffffff : r;
28+
return r;
29+
}
30+
31+
#endif
32+
33+
#if __CLC_FPSIZE == 64
34+
35+
_CLC_OVERLOAD _CLC_DEF __CLC_INTN __clc_ilogb(__CLC_GENTYPE x) {
36+
__CLC_ULONGN ux = __CLC_AS_ULONGN(x);
37+
__CLC_ULONGN ax = ux & ~SIGNBIT_DP64;
38+
__CLC_INTN rs = (__CLC_INTN)LOG_MAGIC_NUM_DP64 -
39+
__CLC_CONVERT_INTN(__clc_clz(ax & MANTBITS_DP64));
40+
__CLC_INTN r = __CLC_CONVERT_INTN(ax >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
41+
r = __CLC_CONVERT_INTN(ax < 0x0010000000000000UL) ? rs : r;
42+
r = __CLC_CONVERT_INTN(ax == 0UL) ? (__CLC_INTN)FP_ILOGB0 : r;
43+
44+
// We could merge those 2 tests and have:
45+
//
46+
// r = ax >= 0x7ff0000000000000UL ? 0x7fffffff : r
47+
//
48+
// since FP_ILOGBNAN is set to INT_MAX, but it's clearer this way and
49+
// FP_ILOGBNAN can change without requiring changes to __clc_ilogb() code.
50+
r = __CLC_CONVERT_INTN(ax > 0x7ff0000000000000UL) ? FP_ILOGBNAN : r;
51+
r = __CLC_CONVERT_INTN(ax == 0x7ff0000000000000UL) ? 0x7fffffff : r;
52+
return r;
53+
}
54+
55+
#endif
56+
57+
#if __CLC_FPSIZE == 16
58+
59+
_CLC_OVERLOAD _CLC_DEF __CLC_INTN __clc_ilogb(__CLC_GENTYPE x) {
60+
__CLC_USHORTN ux = __CLC_AS_USHORTN(x);
61+
__CLC_USHORTN ax = ux & (__CLC_USHORTN)EXSIGNBIT_FP16;
62+
__CLC_USHORTN mantx = ux & (__CLC_USHORTN)MANTBITS_FP16;
63+
__CLC_INTN rs =
64+
(__CLC_INTN)LOG_MAGIC_NUM_FP16 - __CLC_CONVERT_INTN(__clc_clz(mantx));
65+
__CLC_INTN r =
66+
__CLC_CONVERT_INTN(ax >> (__CLC_USHORTN)EXPSHIFTBITS_FP16) - EXPBIAS_FP16;
67+
r = __CLC_CONVERT_INTN(ax < (__CLC_USHORTN)0x0400U) ? rs : r;
68+
r = __CLC_CONVERT_INTN(ax == (__CLC_USHORTN)0) ? (__CLC_INTN)FP_ILOGB0 : r;
69+
70+
// We could merge those 2 tests and have:
71+
//
72+
// r = ax >= EXPBITS_FP16 ? 0x7fffffff : r
73+
//
74+
// since FP_ILOGBNAN is set to INT_MAX, but it's clearer this way and
75+
// FP_ILOGBNAN can change without requiring changes to __clc_ilogb() code.
76+
r = __CLC_CONVERT_INTN(ax > (__CLC_USHORTN)EXPBITS_FP16) ? FP_ILOGBNAN : r;
77+
r = __CLC_CONVERT_INTN(ax == (__CLC_USHORTN)EXPBITS_FP16) ? 0x7fffffff : r;
78+
return r;
79+
}
80+
81+
#endif
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <clc/clc_convert.h>
10+
#include <clc/clcmacro.h>
11+
#include <clc/float/definitions.h>
12+
#include <clc/integer/clc_clz.h>
13+
#include <clc/internal/clc.h>
14+
#include <clc/math/math.h>
15+
16+
#define __CLC_BODY <clc_logb.inc>
17+
#include <clc/math/gentype.inc>
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#if __CLC_FPSIZE == 32
10+
11+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_logb(__CLC_GENTYPE x) {
12+
__CLC_INTN ax = __CLC_AS_INTN(x) & EXSIGNBIT_SP32;
13+
__CLC_GENTYPE s = __CLC_CONVERT_GENTYPE(LOG_MAGIC_NUM_SP32 - __clc_clz(ax));
14+
__CLC_GENTYPE r =
15+
__CLC_CONVERT_GENTYPE((ax >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32);
16+
r = ax >= PINFBITPATT_SP32 ? __CLC_AS_GENTYPE(ax) : r;
17+
r = ax < 0x00800000 ? s : r;
18+
r = ax == 0 ? __CLC_AS_GENTYPE((__CLC_INTN)NINFBITPATT_SP32) : r;
19+
return r;
20+
}
21+
22+
#endif
23+
24+
#if __CLC_FPSIZE == 64
25+
26+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_logb(__CLC_GENTYPE x) {
27+
__CLC_LONGN ax = __CLC_AS_LONGN(x) & EXSIGNBIT_DP64;
28+
__CLC_GENTYPE s = __CLC_CONVERT_GENTYPE(LOG_MAGIC_NUM_DP64 - __clc_clz(ax));
29+
__CLC_GENTYPE r =
30+
__CLC_CONVERT_GENTYPE((ax >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64);
31+
r = ax >= PINFBITPATT_DP64 ? __CLC_AS_GENTYPE(ax) : r;
32+
r = ax < 0x0010000000000000L ? s : r;
33+
r = ax == 0L ? __CLC_AS_GENTYPE((__CLC_LONGN)NINFBITPATT_DP64) : r;
34+
return r;
35+
}
36+
37+
#endif
38+
39+
#if __CLC_FPSIZE == 16
40+
41+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_logb(__CLC_GENTYPE x) {
42+
__CLC_SHORTN ax = __CLC_AS_SHORTN(x) & (__CLC_SHORTN)EXSIGNBIT_FP16;
43+
__CLC_GENTYPE s = __CLC_CONVERT_GENTYPE((__CLC_SHORTN)LOG_MAGIC_NUM_FP16 -
44+
(__CLC_SHORTN)__clc_clz(ax));
45+
__CLC_GENTYPE r = __CLC_CONVERT_GENTYPE(
46+
(ax >> (__CLC_SHORTN)EXPSHIFTBITS_FP16) - (__CLC_SHORTN)EXPBIAS_FP16);
47+
r = ax >= (__CLC_SHORTN)PINFBITPATT_FP16 ? __CLC_AS_GENTYPE(ax) : r;
48+
r = ax < (__CLC_SHORTN)0x0400 ? s : r;
49+
r = ax == (__CLC_SHORTN)0 ? __CLC_AS_GENTYPE((__CLC_SHORTN)NINFBITPATT_FP16)
50+
: r;
51+
return r;
52+
}
53+
54+
#endif

libclc/generic/lib/math/ilogb.cl

Lines changed: 3 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -7,64 +7,7 @@
77
//===----------------------------------------------------------------------===//
88

99
#include <clc/clc.h>
10-
#include <clc/clcmacro.h>
11-
#include <clc/math/math.h>
10+
#include <clc/math/clc_ilogb.h>
1211

13-
_CLC_OVERLOAD _CLC_DEF int ilogb(float x) {
14-
uint ux = as_uint(x);
15-
uint ax = ux & EXSIGNBIT_SP32;
16-
int rs = -118 - (int) clz(ux & MANTBITS_SP32);
17-
int r = (int) (ax >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;
18-
r = ax < 0x00800000U ? rs : r;
19-
r = ax == 0 ? FP_ILOGB0 : r;
20-
21-
// We could merge those 2 tests and have:
22-
//
23-
// r = ax >= EXPBITS_SP32 ? 0x7fffffff : r
24-
//
25-
// since FP_ILOGBNAN is set to INT_MAX, but it's clearer this way and
26-
// FP_ILOGBNAN can change without requiring changes to ilogb() code.
27-
r = ax > EXPBITS_SP32 ? FP_ILOGBNAN : r;
28-
r = ax == EXPBITS_SP32 ? 0x7fffffff : r;
29-
return r;
30-
}
31-
32-
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, ilogb, float);
33-
34-
#ifdef cl_khr_fp64
35-
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
36-
37-
_CLC_OVERLOAD _CLC_DEF int ilogb(double x) {
38-
ulong ux = as_ulong(x);
39-
ulong ax = ux & ~SIGNBIT_DP64;
40-
int r = (int) (ax >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
41-
int rs = -1011 - (int) clz(ax & MANTBITS_DP64);
42-
r = ax < 0x0010000000000000UL ? rs : r;
43-
r = ax == 0UL ? FP_ILOGB0 : r;
44-
45-
// We could merge those 2 tests and have:
46-
//
47-
// r = ax >= 0x7ff0000000000000UL ? 0x7fffffff : r
48-
//
49-
// since FP_ILOGBNAN is set to INT_MAX, but it's clearer this way and
50-
// FP_ILOGBNAN can change without requiring changes to ilogb() code.
51-
r = ax > 0x7ff0000000000000UL ? FP_ILOGBNAN : r;
52-
r = ax == 0x7ff0000000000000UL ? 0x7fffffff : r;
53-
return r;
54-
}
55-
56-
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, ilogb, double);
57-
58-
#endif // cl_khr_fp64
59-
60-
#ifdef cl_khr_fp16
61-
62-
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
63-
64-
_CLC_OVERLOAD _CLC_DEF int ilogb(half x) {
65-
return ilogb((float)x);
66-
}
67-
68-
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, ilogb, half);
69-
70-
#endif
12+
#define __CLC_BODY "ilogb.inc"
13+
#include <clc/math/gentype.inc>

libclc/generic/lib/math/ilogb.inc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
_CLC_OVERLOAD _CLC_DEF __CLC_INTN ilogb(__CLC_GENTYPE x) {
2+
return __clc_ilogb(x);
3+
}

libclc/generic/lib/math/logb.cl

Lines changed: 4 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -7,41 +7,8 @@
77
//===----------------------------------------------------------------------===//
88

99
#include <clc/clc.h>
10-
#include <clc/clcmacro.h>
11-
#include <clc/math/math.h>
10+
#include <clc/math/clc_logb.h>
1211

13-
_CLC_OVERLOAD _CLC_DEF float logb(float x) {
14-
int ax = as_int(x) & EXSIGNBIT_SP32;
15-
float s = -118 - clz(ax);
16-
float r = (ax >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;
17-
r = ax >= PINFBITPATT_SP32 ? as_float(ax) : r;
18-
r = ax < 0x00800000 ? s : r;
19-
r = ax == 0 ? as_float(NINFBITPATT_SP32) : r;
20-
return r;
21-
}
22-
23-
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, logb, float);
24-
25-
#ifdef cl_khr_fp64
26-
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
27-
28-
_CLC_OVERLOAD _CLC_DEF double logb(double x) {
29-
long ax = as_long(x) & EXSIGNBIT_DP64;
30-
double s = -1011L - clz(ax);
31-
double r = (int) (ax >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
32-
r = ax >= PINFBITPATT_DP64 ? as_double(ax) : r;
33-
r = ax < 0x0010000000000000L ? s : r;
34-
r = ax == 0L ? as_double(NINFBITPATT_DP64) : r;
35-
return r;
36-
}
37-
38-
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, logb, double)
39-
#endif
40-
41-
#ifdef cl_khr_fp16
42-
43-
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
44-
45-
_CLC_DEFINE_UNARY_BUILTIN_FP16(logb)
46-
47-
#endif
12+
#define FUNCTION logb
13+
#define __CLC_BODY <clc/shared/unary_def.inc>
14+
#include <clc/math/gentype.inc>

0 commit comments

Comments
 (0)