Skip to content

Commit 7e6a739

Browse files
authored
libclc: increase fp16 support (#98149)
Increase fp16 support to allow clspv to continue to be OpenCL compliant following the update of the OpenCL-CTS adding more testing on math functions and conversions with half. Math functions are implemented by upscaling to fp32 and using the fp32 implementation. It garantees the accuracy required for half-precision float-point by the CTS.
1 parent c0084c3 commit 7e6a739

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+403
-89
lines changed

libclc/clspv/lib/math/fma.cl

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,3 +269,14 @@ _CLC_DEF _CLC_OVERLOAD float fma(float a, float b, float c) {
269269
((uint)st_fma.mantissa.lo & 0x7fffff));
270270
}
271271
_CLC_TERNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, float, fma, float, float, float)
272+
273+
#ifdef cl_khr_fp16
274+
275+
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
276+
277+
_CLC_DEF _CLC_OVERLOAD half fma(half a, half b, half c) {
278+
return (half)mad((float)a, (float)b, (float)c);
279+
}
280+
_CLC_TERNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, half, fma, half, half, half)
281+
282+
#endif

libclc/generic/include/clc/convert.h

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,19 @@
2020
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, ulong, SUFFIX) \
2121
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, float, SUFFIX)
2222

23-
#ifdef cl_khr_fp64
23+
#if defined(cl_khr_fp64) && defined(cl_khr_fp16)
24+
#define _CLC_VECTOR_CONVERT_FROM(FROM_TYPE, SUFFIX) \
25+
_CLC_VECTOR_CONVERT_FROM1(FROM_TYPE, SUFFIX) \
26+
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, double, SUFFIX) \
27+
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, half, SUFFIX)
28+
#elif defined(cl_khr_fp64)
2429
#define _CLC_VECTOR_CONVERT_FROM(FROM_TYPE, SUFFIX) \
2530
_CLC_VECTOR_CONVERT_FROM1(FROM_TYPE, SUFFIX) \
2631
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, double, SUFFIX)
32+
#elif defined(cl_khr_fp16)
33+
#define _CLC_VECTOR_CONVERT_FROM(FROM_TYPE, SUFFIX) \
34+
_CLC_VECTOR_CONVERT_FROM1(FROM_TYPE, SUFFIX) \
35+
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, half, SUFFIX)
2736
#else
2837
#define _CLC_VECTOR_CONVERT_FROM(FROM_TYPE, SUFFIX) \
2938
_CLC_VECTOR_CONVERT_FROM1(FROM_TYPE, SUFFIX)
@@ -40,11 +49,19 @@
4049
_CLC_VECTOR_CONVERT_FROM(ulong, SUFFIX) \
4150
_CLC_VECTOR_CONVERT_FROM(float, SUFFIX)
4251

43-
#ifdef cl_khr_fp64
52+
#if defined(cl_khr_fp64) && defined(cl_khr_fp16)
53+
#define _CLC_VECTOR_CONVERT_TO(SUFFIX) \
54+
_CLC_VECTOR_CONVERT_TO1(SUFFIX) \
55+
_CLC_VECTOR_CONVERT_FROM(double, SUFFIX) \
56+
_CLC_VECTOR_CONVERT_FROM(half, SUFFIX)
57+
#elif defined(cl_khr_fp64)
4458
#define _CLC_VECTOR_CONVERT_TO(SUFFIX) \
4559
_CLC_VECTOR_CONVERT_TO1(SUFFIX) \
4660
_CLC_VECTOR_CONVERT_FROM(double, SUFFIX)
47-
#else
61+
#elif defined(cl_khr_fp16)
62+
#define _CLC_VECTOR_CONVERT_TO(SUFFIX) \
63+
_CLC_VECTOR_CONVERT_TO1(SUFFIX) \
64+
_CLC_VECTOR_CONVERT_FROM(half, SUFFIX)
4865
#define _CLC_VECTOR_CONVERT_TO(SUFFIX) \
4966
_CLC_VECTOR_CONVERT_TO1(SUFFIX)
5067
#endif

libclc/generic/include/math/clc_ldexp.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,5 @@ _CLC_DEF _CLC_OVERLOAD double __clc_ldexp(double, int);
77

88
#ifdef cl_khr_fp16
99
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
10-
_CLC_DEF _CLC_OVERLOAD float __clc_ldexp(half, int);
10+
_CLC_DEF _CLC_OVERLOAD half __clc_ldexp(half, int);
1111
#endif

libclc/generic/lib/clcmacro.h

Lines changed: 94 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
#include <utils.h>
2+
13
#define _CLC_UNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE) \
24
DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x) { \
35
return (RET_TYPE##2)(FUNCTION(x.x), FUNCTION(x.y)); \
@@ -86,64 +88,76 @@
8688
return (RET_TYPE##16)(FUNCTION(x.lo, y.lo, z.lo), FUNCTION(x.hi, y.hi, z.hi)); \
8789
}
8890

89-
#define _CLC_V_S_S_V_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE, ARG3_TYPE) \
90-
DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##2 z) { \
91-
return (RET_TYPE##2)(FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \
92-
} \
93-
\
94-
DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##3 z) { \
95-
return (RET_TYPE##3)(FUNCTION(x, y, z.x), FUNCTION(x, y, z.y), \
96-
FUNCTION(x, y, z.z)); \
97-
} \
98-
\
99-
DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##4 z) { \
100-
return (RET_TYPE##4)(FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \
101-
} \
102-
\
103-
DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##8 z) { \
104-
return (RET_TYPE##8)(FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \
105-
} \
106-
\
107-
DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##16 z) { \
108-
return (RET_TYPE##16)(FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \
109-
} \
110-
\
91+
#define _CLC_V_S_S_V_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \
92+
ARG2_TYPE, ARG3_TYPE) \
93+
DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##2 z) { \
94+
return (RET_TYPE##2)(FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \
95+
} \
96+
\
97+
DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##3 z) { \
98+
return (RET_TYPE##3)(FUNCTION(x, y, z.x), FUNCTION(x, y, z.y), \
99+
FUNCTION(x, y, z.z)); \
100+
} \
101+
\
102+
DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##4 z) { \
103+
return (RET_TYPE##4)(FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \
104+
} \
105+
\
106+
DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##8 z) { \
107+
return (RET_TYPE##8)(FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \
108+
} \
109+
\
110+
DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##16 z) { \
111+
return (RET_TYPE##16)(FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \
112+
}
111113

112-
#define _CLC_V_V_VP_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, ADDR_SPACE, ARG2_TYPE) \
113-
DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ADDR_SPACE ARG2_TYPE##2 *y) { \
114-
return (RET_TYPE##2)( \
115-
FUNCTION(x.x, (ARG2_TYPE*)y), \
116-
FUNCTION(x.y, (ADDR_SPACE ARG2_TYPE*)((ADDR_SPACE ARG2_TYPE*)y+1)) \
117-
); \
118-
} \
119-
\
120-
DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x, ADDR_SPACE ARG2_TYPE##3 *y) { \
121-
return (RET_TYPE##3)( \
122-
FUNCTION(x.x, (ARG2_TYPE*)y), \
123-
FUNCTION(x.y, (ADDR_SPACE ARG2_TYPE*)((ADDR_SPACE ARG2_TYPE*)y+1)), \
124-
FUNCTION(x.z, (ADDR_SPACE ARG2_TYPE*)((ADDR_SPACE ARG2_TYPE*)y+2)) \
125-
); \
126-
} \
127-
\
128-
DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x, ADDR_SPACE ARG2_TYPE##4 *y) { \
129-
return (RET_TYPE##4)( \
130-
FUNCTION(x.lo, (ARG2_TYPE##2*)y), \
131-
FUNCTION(x.hi, (ADDR_SPACE ARG2_TYPE##2*)((ADDR_SPACE ARG2_TYPE*)y+2)) \
132-
); \
133-
} \
134-
\
135-
DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x, ADDR_SPACE ARG2_TYPE##8 *y) { \
136-
return (RET_TYPE##8)( \
137-
FUNCTION(x.lo, (ARG2_TYPE##4*)y), \
138-
FUNCTION(x.hi, (ADDR_SPACE ARG2_TYPE##4*)((ADDR_SPACE ARG2_TYPE*)y+4)) \
139-
); \
140-
} \
141-
\
142-
DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x, ADDR_SPACE ARG2_TYPE##16 *y) { \
143-
return (RET_TYPE##16)( \
144-
FUNCTION(x.lo, (ARG2_TYPE##8*)y), \
145-
FUNCTION(x.hi, (ADDR_SPACE ARG2_TYPE##8*)((ADDR_SPACE ARG2_TYPE*)y+8)) \
146-
); \
114+
#define _CLC_V_V_VP_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \
115+
ADDR_SPACE, ARG2_TYPE) \
116+
DECLSPEC __CLC_XCONCAT(RET_TYPE, 2) \
117+
FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 2) x, \
118+
ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 2) * y) { \
119+
return (__CLC_XCONCAT(RET_TYPE, 2))( \
120+
FUNCTION(x.x, (ADDR_SPACE ARG2_TYPE *)y), \
121+
FUNCTION(x.y, \
122+
(ADDR_SPACE ARG2_TYPE *)((ADDR_SPACE ARG2_TYPE *)y + 1))); \
123+
} \
124+
\
125+
DECLSPEC __CLC_XCONCAT(RET_TYPE, 3) \
126+
FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 3) x, \
127+
ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 3) * y) { \
128+
return (__CLC_XCONCAT(RET_TYPE, 3))( \
129+
FUNCTION(x.x, (ADDR_SPACE ARG2_TYPE *)y), \
130+
FUNCTION(x.y, \
131+
(ADDR_SPACE ARG2_TYPE *)((ADDR_SPACE ARG2_TYPE *)y + 1)), \
132+
FUNCTION(x.z, \
133+
(ADDR_SPACE ARG2_TYPE *)((ADDR_SPACE ARG2_TYPE *)y + 2))); \
134+
} \
135+
\
136+
DECLSPEC __CLC_XCONCAT(RET_TYPE, 4) \
137+
FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 4) x, \
138+
ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 4) * y) { \
139+
return (__CLC_XCONCAT(RET_TYPE, 4))( \
140+
FUNCTION(x.lo, (ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 2) *)y), \
141+
FUNCTION(x.hi, (ADDR_SPACE __CLC_XCONCAT( \
142+
ARG2_TYPE, 2) *)((ADDR_SPACE ARG2_TYPE *)y + 2))); \
143+
} \
144+
\
145+
DECLSPEC __CLC_XCONCAT(RET_TYPE, 8) \
146+
FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 8) x, \
147+
ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 8) * y) { \
148+
return (__CLC_XCONCAT(RET_TYPE, 8))( \
149+
FUNCTION(x.lo, (ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 4) *)y), \
150+
FUNCTION(x.hi, (ADDR_SPACE __CLC_XCONCAT( \
151+
ARG2_TYPE, 4) *)((ADDR_SPACE ARG2_TYPE *)y + 4))); \
152+
} \
153+
\
154+
DECLSPEC __CLC_XCONCAT(RET_TYPE, 16) \
155+
FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 16) x, \
156+
ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 16) * y) { \
157+
return (__CLC_XCONCAT(RET_TYPE, 16))( \
158+
FUNCTION(x.lo, (ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 8) *)y), \
159+
FUNCTION(x.hi, (ADDR_SPACE __CLC_XCONCAT( \
160+
ARG2_TYPE, 8) *)((ADDR_SPACE ARG2_TYPE *)y + 8))); \
147161
}
148162

149163
#define _CLC_DEFINE_BINARY_BUILTIN(RET_TYPE, FUNCTION, BUILTIN, ARG1_TYPE, ARG2_TYPE) \
@@ -161,3 +175,26 @@ _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x) { \
161175
return BUILTIN(x); \
162176
} \
163177
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, RET_TYPE, FUNCTION, ARG1_TYPE)
178+
179+
#ifdef cl_khr_fp16
180+
181+
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
182+
183+
#define _CLC_DEFINE_UNARY_BUILTIN_FP16(FUNCTION) \
184+
_CLC_DEF _CLC_OVERLOAD half FUNCTION(half x) { \
185+
return (half)FUNCTION((float)x); \
186+
} \
187+
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, FUNCTION, half)
188+
189+
#define _CLC_DEFINE_BINARY_BUILTIN_FP16(FUNCTION) \
190+
_CLC_DEF _CLC_OVERLOAD half FUNCTION(half x, half y) { \
191+
return (half)FUNCTION((float)x, (float)y); \
192+
} \
193+
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, FUNCTION, half, half)
194+
195+
#else
196+
197+
#define _CLC_DEFINE_UNARY_BUILTIN_FP16(FUNCTION)
198+
#define _CLC_DEFINE_BINARY_BUILTIN_FP16(FUNCTION)
199+
200+
#endif

0 commit comments

Comments
 (0)