Skip to content

Commit 5acc4a8

Browse files
committed
[libclc] Move logb/ilogb to CLC library; optimize
This commit moves the logb and ilogb builtins to the CLC library. It simultaneously optimizes them both for vector types and for half types. Half types were previously promoting to float, whereas this commit provides them a native implementation. Everything passes the OpenCL-CTS. I had to intuit some magic numbers used by these implementations in order to generate the half variants. I gave them clearer definitions derived from what I believe are their actual component numbers, but named them 'magic' to convey that they weren't derived from first principles.
1 parent ae57854 commit 5acc4a8

File tree

12 files changed

+215
-120
lines changed

12 files changed

+215
-120
lines changed
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#ifndef __CLC_MATH_CLC_ILOGB_H__
2+
#define __CLC_MATH_CLC_ILOGB_H__
3+
4+
#define __CLC_BODY <clc/math/clc_ilogb.inc>
5+
#include <clc/math/gentype.inc>
6+
7+
#undef __CLC_BODY
8+
9+
#endif // __CLC_MATH_CLC_ILOGB_H__
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
_CLC_OVERLOAD _CLC_DECL __CLC_INTN __clc_ilogb(__CLC_GENTYPE x);
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#ifndef __CLC_MATH_CLC_LOGB_H__
2+
#define __CLC_MATH_CLC_LOGB_H__
3+
4+
#define __CLC_FUNCTION __clc_logb
5+
#define __CLC_BODY <clc/shared/unary_decl.inc>
6+
#include <clc/math/gentype.inc>
7+
8+
#undef __CLC_BODY
9+
#undef __CLC_FUNCTION
10+
11+
#endif // __CLC_MATH_CLC_LOGB_H__

libclc/clc/include/clc/math/math.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ bool __attribute__((noinline)) __clc_runtime_has_hw_fma32(void);
6666
#define INDEFBITPATT_SP32 0xffc00000
6767
#define PINFBITPATT_SP32 0x7f800000
6868
#define NINFBITPATT_SP32 0xff800000
69+
#define NUMEXPBITS_SP32 8
6970
#define EXPBIAS_SP32 127
7071
#define EXPSHIFTBITS_SP32 23
7172
#define BIASEDEMIN_SP32 1
@@ -76,6 +77,8 @@ bool __attribute__((noinline)) __clc_runtime_has_hw_fma32(void);
7677
#define MANTLENGTH_SP32 24
7778
#define BASEDIGITS_SP32 7
7879

80+
#define LOG_MAGIC_NUM_SP32 (1 + NUMEXPBITS_SP32 - EXPBIAS_SP32)
81+
7982
_CLC_OVERLOAD _CLC_INLINE float __clc_flush_denormal_if_not_supported(float x) {
8083
int ix = __clc_as_int(x);
8184
if (!__clc_fp32_subnormals_supported() && ((ix & EXPBITS_SP32) == 0) &&
@@ -100,6 +103,7 @@ _CLC_OVERLOAD _CLC_INLINE float __clc_flush_denormal_if_not_supported(float x) {
100103
#define INDEFBITPATT_DP64 0xfff8000000000000L
101104
#define PINFBITPATT_DP64 0x7ff0000000000000L
102105
#define NINFBITPATT_DP64 0xfff0000000000000L
106+
#define NUMEXPBITS_DP64 11
103107
#define EXPBIAS_DP64 1023
104108
#define EXPSHIFTBITS_DP64 52
105109
#define BIASEDEMIN_DP64 1
@@ -110,8 +114,26 @@ _CLC_OVERLOAD _CLC_INLINE float __clc_flush_denormal_if_not_supported(float x) {
110114
#define MANTLENGTH_DP64 53
111115
#define BASEDIGITS_DP64 15
112116

117+
#define LOG_MAGIC_NUM_DP64 (1 + NUMEXPBITS_DP64 - EXPBIAS_DP64)
118+
113119
#endif // cl_khr_fp64
114120

121+
#ifdef cl_khr_fp16
122+
123+
#define SIGNBIT_FP16 0x8000
124+
#define EXSIGNBIT_FP16 0x7fff
125+
#define EXPBITS_FP16 0x7c00
126+
#define MANTBITS_FP16 0x03ff
127+
#define PINFBITPATT_FP16 0x7c00
128+
#define NINFBITPATT_FP16 0xfc00
129+
#define NUMEXPBITS_FP16 5
130+
#define EXPBIAS_FP16 15
131+
#define EXPSHIFTBITS_FP16 10
132+
133+
#define LOG_MAGIC_NUM_FP16 (1 + NUMEXPBITS_FP16 - EXPBIAS_FP16)
134+
135+
#endif // cl_khr_fp16
136+
115137
#define ALIGNED(x) __attribute__((aligned(x)))
116138

117139
#endif // __CLC_MATH_MATH_H__

libclc/clc/lib/generic/SOURCES

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ math/clc_copysign.cl
2222
math/clc_fabs.cl
2323
math/clc_floor.cl
2424
math/clc_frexp.cl
25+
math/clc_ilogb.cl
26+
math/clc_logb.cl
2527
math/clc_mad.cl
2628
math/clc_modf.cl
2729
math/clc_nextafter.cl
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
/*
2+
* Copyright (c) 2015 Advanced Micro Devices, Inc.
3+
* Copyright (c) 2016 Aaron Watry
4+
*
5+
* Permission is hereby granted, free of charge, to any person obtaining a copy
6+
* of this software and associated documentation files (the "Software"), to deal
7+
* in the Software without restriction, including without limitation the rights
8+
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
* copies of the Software, and to permit persons to whom the Software is
10+
* furnished to do so, subject to the following conditions:
11+
*
12+
* The above copyright notice and this permission notice shall be included in
13+
* all copies or substantial portions of the Software.
14+
*
15+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21+
* THE SOFTWARE.
22+
*/
23+
24+
#include <clc/clc_convert.h>
25+
#include <clc/clcmacro.h>
26+
#include <clc/float/definitions.h>
27+
#include <clc/integer/clc_clz.h>
28+
#include <clc/internal/clc.h>
29+
#include <clc/math/math.h>
30+
31+
#define __CLC_BODY <clc_ilogb.inc>
32+
#include <clc/math/gentype.inc>
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
#if __CLC_FPSIZE == 32
2+
3+
_CLC_OVERLOAD _CLC_DEF __CLC_INTN __clc_ilogb(__CLC_GENTYPE x) {
4+
__CLC_UINTN ux = __CLC_AS_UINTN(x);
5+
__CLC_UINTN ax = ux & EXSIGNBIT_SP32;
6+
__CLC_INTN rs = (__CLC_INTN)LOG_MAGIC_NUM_SP32 -
7+
__CLC_AS_INTN(__clc_clz(ux & MANTBITS_SP32));
8+
__CLC_INTN r = __CLC_AS_INTN(ax >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;
9+
r = ax < 0x00800000U ? rs : r;
10+
r = ax == 0 ? FP_ILOGB0 : r;
11+
12+
// We could merge those 2 tests and have:
13+
//
14+
// r = ax >= EXPBITS_SP32 ? 0x7fffffff : r
15+
//
16+
// since FP_ILOGBNAN is set to INT_MAX, but it's clearer this way and
17+
// FP_ILOGBNAN can change without requiring changes to __clc_ilogb() code.
18+
r = ax > EXPBITS_SP32 ? FP_ILOGBNAN : r;
19+
r = ax == EXPBITS_SP32 ? 0x7fffffff : r;
20+
return r;
21+
}
22+
23+
#endif
24+
25+
#if __CLC_FPSIZE == 64
26+
27+
_CLC_OVERLOAD _CLC_DEF __CLC_INTN __clc_ilogb(__CLC_GENTYPE x) {
28+
__CLC_ULONGN ux = __CLC_AS_ULONGN(x);
29+
__CLC_ULONGN ax = ux & ~SIGNBIT_DP64;
30+
__CLC_INTN rs = (__CLC_INTN)LOG_MAGIC_NUM_DP64 -
31+
__CLC_CONVERT_INTN(__clc_clz(ax & MANTBITS_DP64));
32+
__CLC_INTN r = __CLC_CONVERT_INTN(ax >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
33+
r = __CLC_CONVERT_INTN(ax < 0x0010000000000000UL) ? rs : r;
34+
r = __CLC_CONVERT_INTN(ax == 0UL) ? (__CLC_INTN)FP_ILOGB0 : r;
35+
36+
// We could merge those 2 tests and have:
37+
//
38+
// r = ax >= 0x7ff0000000000000UL ? 0x7fffffff : r
39+
//
40+
// since FP_ILOGBNAN is set to INT_MAX, but it's clearer this way and
41+
// FP_ILOGBNAN can change without requiring changes to __clc_ilogb() code.
42+
r = __CLC_CONVERT_INTN(ax > 0x7ff0000000000000UL) ? FP_ILOGBNAN : r;
43+
r = __CLC_CONVERT_INTN(ax == 0x7ff0000000000000UL) ? 0x7fffffff : r;
44+
return r;
45+
}
46+
47+
#endif
48+
49+
#if __CLC_FPSIZE == 16
50+
51+
_CLC_OVERLOAD _CLC_DEF __CLC_INTN __clc_ilogb(__CLC_GENTYPE x) {
52+
__CLC_USHORTN ux = __CLC_AS_USHORTN(x);
53+
__CLC_USHORTN ax = ux & (__CLC_USHORTN)EXSIGNBIT_FP16;
54+
__CLC_USHORTN mantx = ux & (__CLC_USHORTN)MANTBITS_FP16;
55+
__CLC_INTN rs =
56+
(__CLC_INTN)LOG_MAGIC_NUM_FP16 - __CLC_CONVERT_INTN(__clc_clz(mantx));
57+
__CLC_INTN r =
58+
__CLC_CONVERT_INTN(ax >> (__CLC_USHORTN)EXPSHIFTBITS_FP16) - EXPBIAS_FP16;
59+
r = __CLC_CONVERT_INTN(ax < (__CLC_USHORTN)0x0400U) ? rs : r;
60+
r = __CLC_CONVERT_INTN(ax == (__CLC_USHORTN)0) ? (__CLC_INTN)FP_ILOGB0 : r;
61+
62+
// We could merge those 2 tests and have:
63+
//
64+
// r = ax >= EXPBITS_FP16 ? 0x7fffffff : r
65+
//
66+
// since FP_ILOGBNAN is set to INT_MAX, but it's clearer this way and
67+
// FP_ILOGBNAN can change without requiring changes to __clc_ilogb() code.
68+
r = __CLC_CONVERT_INTN(ax > (__CLC_USHORTN)EXPBITS_FP16) ? FP_ILOGBNAN : r;
69+
r = __CLC_CONVERT_INTN(ax == (__CLC_USHORTN)EXPBITS_FP16) ? 0x7fffffff : r;
70+
return r;
71+
}
72+
73+
#endif
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#include <clc/clc_convert.h>
2+
#include <clc/clcmacro.h>
3+
#include <clc/float/definitions.h>
4+
#include <clc/integer/clc_clz.h>
5+
#include <clc/internal/clc.h>
6+
#include <clc/math/math.h>
7+
8+
#define __CLC_BODY <clc_logb.inc>
9+
#include <clc/math/gentype.inc>
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
#if __CLC_FPSIZE == 32
2+
3+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_logb(__CLC_GENTYPE x) {
4+
__CLC_INTN ax = __CLC_AS_INTN(x) & EXSIGNBIT_SP32;
5+
__CLC_GENTYPE s = __CLC_CONVERT_GENTYPE(LOG_MAGIC_NUM_SP32 - __clc_clz(ax));
6+
__CLC_GENTYPE r =
7+
__CLC_CONVERT_GENTYPE((ax >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32);
8+
r = ax >= PINFBITPATT_SP32 ? __CLC_AS_GENTYPE(ax) : r;
9+
r = ax < 0x00800000 ? s : r;
10+
r = ax == 0 ? __CLC_AS_GENTYPE((__CLC_INTN)NINFBITPATT_SP32) : r;
11+
return r;
12+
}
13+
14+
#endif
15+
16+
#if __CLC_FPSIZE == 64
17+
18+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_logb(__CLC_GENTYPE x) {
19+
__CLC_LONGN ax = __CLC_AS_LONGN(x) & EXSIGNBIT_DP64;
20+
__CLC_GENTYPE s = __CLC_CONVERT_GENTYPE(LOG_MAGIC_NUM_DP64 - __clc_clz(ax));
21+
__CLC_GENTYPE r =
22+
__CLC_CONVERT_GENTYPE((ax >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64);
23+
r = ax >= PINFBITPATT_DP64 ? __CLC_AS_GENTYPE(ax) : r;
24+
r = ax < 0x0010000000000000L ? s : r;
25+
r = ax == 0L ? __CLC_AS_GENTYPE((__CLC_LONGN)NINFBITPATT_DP64) : r;
26+
return r;
27+
}
28+
29+
#endif
30+
31+
#if __CLC_FPSIZE == 16
32+
33+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_logb(__CLC_GENTYPE x) {
34+
__CLC_SHORTN ax = __CLC_AS_SHORTN(x) & (__CLC_SHORTN)EXSIGNBIT_FP16;
35+
__CLC_GENTYPE s = __CLC_CONVERT_GENTYPE((__CLC_SHORTN)LOG_MAGIC_NUM_FP16 -
36+
(__CLC_SHORTN)__clc_clz(ax));
37+
__CLC_GENTYPE r = __CLC_CONVERT_GENTYPE(
38+
(ax >> (__CLC_SHORTN)EXPSHIFTBITS_FP16) - (__CLC_SHORTN)EXPBIAS_FP16);
39+
r = ax >= (__CLC_SHORTN)PINFBITPATT_FP16 ? __CLC_AS_GENTYPE(ax) : r;
40+
r = ax < (__CLC_SHORTN)0x0400 ? s : r;
41+
r = ax == (__CLC_SHORTN)0 ? __CLC_AS_GENTYPE((__CLC_SHORTN)NINFBITPATT_FP16)
42+
: r;
43+
return r;
44+
}
45+
46+
#endif

libclc/generic/lib/math/ilogb.cl

Lines changed: 3 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -1,85 +1,5 @@
1-
/*
2-
* Copyright (c) 2015 Advanced Micro Devices, Inc.
3-
* Copyright (c) 2016 Aaron Watry
4-
*
5-
* Permission is hereby granted, free of charge, to any person obtaining a copy
6-
* of this software and associated documentation files (the "Software"), to deal
7-
* in the Software without restriction, including without limitation the rights
8-
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9-
* copies of the Software, and to permit persons to whom the Software is
10-
* furnished to do so, subject to the following conditions:
11-
*
12-
* The above copyright notice and this permission notice shall be included in
13-
* all copies or substantial portions of the Software.
14-
*
15-
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16-
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17-
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18-
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19-
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20-
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21-
* THE SOFTWARE.
22-
*/
23-
241
#include <clc/clc.h>
25-
#include <clc/clcmacro.h>
26-
#include <clc/math/math.h>
27-
28-
_CLC_OVERLOAD _CLC_DEF int ilogb(float x) {
29-
uint ux = as_uint(x);
30-
uint ax = ux & EXSIGNBIT_SP32;
31-
int rs = -118 - (int) clz(ux & MANTBITS_SP32);
32-
int r = (int) (ax >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;
33-
r = ax < 0x00800000U ? rs : r;
34-
r = ax == 0 ? FP_ILOGB0 : r;
35-
36-
// We could merge those 2 tests and have:
37-
//
38-
// r = ax >= EXPBITS_SP32 ? 0x7fffffff : r
39-
//
40-
// since FP_ILOGBNAN is set to INT_MAX, but it's clearer this way and
41-
// FP_ILOGBNAN can change without requiring changes to ilogb() code.
42-
r = ax > EXPBITS_SP32 ? FP_ILOGBNAN : r;
43-
r = ax == EXPBITS_SP32 ? 0x7fffffff : r;
44-
return r;
45-
}
46-
47-
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, ilogb, float);
48-
49-
#ifdef cl_khr_fp64
50-
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
51-
52-
_CLC_OVERLOAD _CLC_DEF int ilogb(double x) {
53-
ulong ux = as_ulong(x);
54-
ulong ax = ux & ~SIGNBIT_DP64;
55-
int r = (int) (ax >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
56-
int rs = -1011 - (int) clz(ax & MANTBITS_DP64);
57-
r = ax < 0x0010000000000000UL ? rs : r;
58-
r = ax == 0UL ? FP_ILOGB0 : r;
59-
60-
// We could merge those 2 tests and have:
61-
//
62-
// r = ax >= 0x7ff0000000000000UL ? 0x7fffffff : r
63-
//
64-
// since FP_ILOGBNAN is set to INT_MAX, but it's clearer this way and
65-
// FP_ILOGBNAN can change without requiring changes to ilogb() code.
66-
r = ax > 0x7ff0000000000000UL ? FP_ILOGBNAN : r;
67-
r = ax == 0x7ff0000000000000UL ? 0x7fffffff : r;
68-
return r;
69-
}
70-
71-
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, ilogb, double);
72-
73-
#endif // cl_khr_fp64
74-
75-
#ifdef cl_khr_fp16
76-
77-
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
78-
79-
_CLC_OVERLOAD _CLC_DEF int ilogb(half x) {
80-
return ilogb((float)x);
81-
}
82-
83-
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, ilogb, half);
2+
#include <clc/math/clc_ilogb.h>
843

85-
#endif
4+
#define __CLC_BODY "ilogb.inc"
5+
#include <clc/math/gentype.inc>

libclc/generic/lib/math/ilogb.inc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
_CLC_OVERLOAD _CLC_DEF __CLC_INTN ilogb(__CLC_GENTYPE x) {
2+
return __clc_ilogb(x);
3+
}

libclc/generic/lib/math/logb.cl

Lines changed: 4 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,6 @@
11
#include <clc/clc.h>
2-
#include <clc/clcmacro.h>
3-
#include <clc/math/math.h>
2+
#include <clc/math/clc_logb.h>
43

5-
_CLC_OVERLOAD _CLC_DEF float logb(float x) {
6-
int ax = as_int(x) & EXSIGNBIT_SP32;
7-
float s = -118 - clz(ax);
8-
float r = (ax >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;
9-
r = ax >= PINFBITPATT_SP32 ? as_float(ax) : r;
10-
r = ax < 0x00800000 ? s : r;
11-
r = ax == 0 ? as_float(NINFBITPATT_SP32) : r;
12-
return r;
13-
}
14-
15-
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, logb, float);
16-
17-
#ifdef cl_khr_fp64
18-
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
19-
20-
_CLC_OVERLOAD _CLC_DEF double logb(double x) {
21-
long ax = as_long(x) & EXSIGNBIT_DP64;
22-
double s = -1011L - clz(ax);
23-
double r = (int) (ax >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
24-
r = ax >= PINFBITPATT_DP64 ? as_double(ax) : r;
25-
r = ax < 0x0010000000000000L ? s : r;
26-
r = ax == 0L ? as_double(NINFBITPATT_DP64) : r;
27-
return r;
28-
}
29-
30-
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, logb, double)
31-
#endif
32-
33-
#ifdef cl_khr_fp16
34-
35-
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
36-
37-
_CLC_DEFINE_UNARY_BUILTIN_FP16(logb)
38-
39-
#endif
4+
#define FUNCTION logb
5+
#define __CLC_BODY <clc/shared/unary_def.inc>
6+
#include <clc/math/gentype.inc>

0 commit comments

Comments
 (0)