Skip to content

Commit da9dded

Browse files
committed
[libclc] Move sqrt to CLC library
This is fairly straightforward for most targets. AMDGPU provides its own implementation of sqrt for double types. This commit moves this into the implementation of CLC sqrt. It uses weak linkage on the 'default' CLC sqrt to allow AMDGPU to only override the builtin for the types it cares about. Since we don't yet have CLC ldexp, and AMDGPU prefers the builtin anyway, it also uses __builtin_ldexp. There are no changes to the codegen for any AMDGPU target. There is some minor code movement on NVIDIA targets.
1 parent 8bea511 commit da9dded

File tree

12 files changed

+47
-55
lines changed

12 files changed

+47
-55
lines changed

libclc/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS
2828
spirv/lib/SOURCES;
2929
# CLC internal libraries
3030
clc/lib/generic/SOURCES;
31+
clc/lib/amdgpu/SOURCES;
3132
clc/lib/clspv/SOURCES;
3233
clc/lib/spirv/SOURCES;
3334
)

libclc/amdgpu/lib/SOURCES

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,3 @@ math/half_log2.cl
1010
math/half_recip.cl
1111
math/half_rsqrt.cl
1212
math/half_sqrt.cl
13-
math/sqrt.cl

libclc/clc/include/clc/float/definitions.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
#define MAXFLOAT 0x1.fffffep127f
22
#define HUGE_VALF __builtin_huge_valf()
33
#define INFINITY __builtin_inff()
4-
#define NAN __builtin_nanf("")
54

65
#define FLT_DIG 6
76
#define FLT_MANT_DIG 24
@@ -13,6 +12,7 @@
1312
#define FLT_MAX MAXFLOAT
1413
#define FLT_MIN 0x1.0p-126f
1514
#define FLT_EPSILON 0x1.0p-23f
15+
#define FLT_NAN __builtin_nanf("")
1616

1717
#define FP_ILOGB0 (-2147483647 - 1)
1818
#define FP_ILOGBNAN 2147483647
@@ -46,6 +46,7 @@
4646
#define DBL_MAX 0x1.fffffffffffffp1023
4747
#define DBL_MIN 0x1.0p-1022
4848
#define DBL_EPSILON 0x1.0p-52
49+
#define DBL_NAN __builtin_nan("")
4950

5051
#define M_E 0x1.5bf0a8b145769p+1
5152
#define M_LOG2E 0x1.71547652b82fep+0
@@ -80,6 +81,7 @@
8081
#define HALF_MAX 0x1.ffcp15h
8182
#define HALF_MIN 0x1.0p-14h
8283
#define HALF_EPSILON 0x1.0p-10h
84+
#define HALF_NAN __builtin_nanf16("")
8385

8486
#define M_LOG2E_H 0x1.714p+0h
8587

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
1-
#include <clc/clcfunc.h>
2-
#include <clc/clctypes.h>
1+
#ifndef __CLC_MATH_CLC_SQRT_H__
2+
#define __CLC_MATH_CLC_SQRT_H__
33

4-
#define __CLC_FUNCTION __clc_sqrt
54
#define __CLC_BODY <clc/math/unary_decl.inc>
5+
#define __CLC_FUNCTION __clc_sqrt
6+
67
#include <clc/math/gentype.inc>
8+
79
#undef __CLC_BODY
810
#undef __CLC_FUNCTION
11+
12+
#endif // __CLC_MATH_CLC_SQRT_H__

libclc/clc/lib/amdgpu/SOURCES

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
math/clc_sqrt_fp64.cl

libclc/amdgpu/lib/math/sqrt.cl renamed to libclc/clc/lib/amdgpu/math/clc_sqrt_fp64.cl

Lines changed: 16 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -20,52 +20,42 @@
2020
* THE SOFTWARE.
2121
*/
2222

23-
#include "math/clc_sqrt.h"
24-
#include <clc/clc.h>
2523
#include <clc/clcmacro.h>
26-
27-
_CLC_DEFINE_UNARY_BUILTIN(float, sqrt, __clc_sqrt, float)
28-
29-
#ifdef cl_khr_fp16
30-
31-
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
32-
_CLC_DEFINE_UNARY_BUILTIN(half, sqrt, __clc_sqrt, half)
33-
34-
#endif
24+
#include <clc/internal/clc.h>
25+
#include <clc/math/clc_fma.h>
3526

3627
#ifdef cl_khr_fp64
3728

3829
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
3930

4031
#ifdef __AMDGCN__
41-
#define __clc_builtin_rsq __builtin_amdgcn_rsq
32+
#define __clc_builtin_rsq __builtin_amdgcn_rsq
4233
#else
43-
#define __clc_builtin_rsq __builtin_r600_recipsqrt_ieee
34+
#define __clc_builtin_rsq __builtin_r600_recipsqrt_ieee
4435
#endif
4536

46-
_CLC_OVERLOAD _CLC_DEF double sqrt(double x) {
47-
37+
_CLC_OVERLOAD _CLC_DEF double __clc_sqrt(double x) {
4838
uint vcc = x < 0x1p-767;
4939
uint exp0 = vcc ? 0x100 : 0;
5040
unsigned exp1 = vcc ? 0xffffff80 : 0;
5141

52-
double v01 = ldexp(x, exp0);
42+
double v01 = __builtin_ldexp(x, exp0);
5343
double v23 = __clc_builtin_rsq(v01);
5444
double v45 = v01 * v23;
5545
v23 = v23 * 0.5;
5646

57-
double v67 = fma(-v23, v45, 0.5);
58-
v45 = fma(v45, v67, v45);
59-
double v89 = fma(-v45, v45, v01);
60-
v23 = fma(v23, v67, v23);
61-
v45 = fma(v89, v23, v45);
62-
v67 = fma(-v45, v45, v01);
63-
v23 = fma(v67, v23, v45);
47+
double v67 = __clc_fma(-v23, v45, 0.5);
48+
v45 = __clc_fma(v45, v67, v45);
49+
double v89 = __clc_fma(-v45, v45, v01);
50+
v23 = __clc_fma(v23, v67, v23);
51+
v45 = __clc_fma(v89, v23, v45);
52+
v67 = __clc_fma(-v45, v45, v01);
53+
v23 = __clc_fma(v67, v23, v45);
6454

65-
v23 = ldexp(v23, exp1);
66-
return ((x == __builtin_inf()) || (x == 0.0)) ? v01 : v23;
55+
v23 = __builtin_ldexp(v23, exp1);
56+
return (x == __builtin_inf() || (x == 0.0)) ? v01 : v23;
6757
}
6858

69-
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, sqrt, double);
59+
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __clc_sqrt, double);
7060

7161
#endif

libclc/clc/lib/generic/SOURCES

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ math/clc_nan.cl
3232
math/clc_nextafter.cl
3333
math/clc_rint.cl
3434
math/clc_round.cl
35+
math/clc_sqrt.cl
3536
math/clc_sw_fma.cl
3637
math/clc_trunc.cl
3738
relational/clc_all.cl

libclc/generic/lib/math/clc_sqrt.cl renamed to libclc/clc/lib/generic/math/clc_sqrt.cl

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,8 @@
2020
* THE SOFTWARE.
2121
*/
2222

23-
#include <clc/clc.h>
23+
#include <clc/float/definitions.h>
24+
#include <clc/internal/clc.h>
2425

25-
// Map the llvm sqrt intrinsic to an OpenCL function.
26-
#define __CLC_FUNCTION __clc_llvm_intr_sqrt
27-
#define __CLC_INTRINSIC "llvm.sqrt"
28-
#include <clc/math/unary_intrin.inc>
29-
#undef __CLC_FUNCTION
30-
#undef __CLC_INTRINSIC
31-
32-
#define __CLC_BODY <clc_sqrt_impl.inc>
26+
#define __CLC_BODY <clc_sqrt.inc>
3327
#include <clc/math/gentype.inc>

libclc/generic/lib/math/clc_sqrt_impl.inc renamed to libclc/clc/lib/generic/math/clc_sqrt.inc

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,19 +21,17 @@
2121
*/
2222

2323
#if __CLC_FPSIZE == 64
24-
#define __CLC_NAN __builtin_nan("")
25-
#define ZERO 0.0
24+
#define __CLC_NAN DBL_NAN
2625
#elif __CLC_FPSIZE == 32
27-
#define __CLC_NAN NAN
28-
#define ZERO 0.0f
26+
#define __CLC_NAN FLT_NAN
2927
#elif __CLC_FPSIZE == 16
30-
#define __CLC_NAN (half)NAN
31-
#define ZERO 0.0h
28+
#define __CLC_NAN HALF_NAN
3229
#endif
3330

34-
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_sqrt(__CLC_GENTYPE val) {
35-
return val < ZERO ? __CLC_NAN : __clc_llvm_intr_sqrt(val);
31+
__attribute__((weak)) _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE
32+
__clc_sqrt(__CLC_GENTYPE val) {
33+
return val < __CLC_FP_LIT(0.0) ? (__CLC_GENTYPE)__CLC_NAN
34+
: __builtin_elementwise_sqrt(val);
3635
}
3736

3837
#undef __CLC_NAN
39-
#undef ZERO

libclc/generic/lib/SOURCES

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,6 @@ math/sincos.cl
180180
math/sincos_helpers.cl
181181
math/sinh.cl
182182
math/sinpi.cl
183-
math/clc_sqrt.cl
184183
math/sqrt.cl
185184
math/clc_tan.cl
186185
math/tan.cl

libclc/generic/lib/math/clc_hypot.cl

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include <clc/math/clc_mad.h>
2828
#include <clc/math/clc_subnormal_config.h>
2929
#include <clc/math/math.h>
30+
#include <clc/math/clc_sqrt.h>
3031
#include <clc/relational/clc_isnan.h>
3132
#include <clc/shared/clc_clamp.h>
3233
#include <math/clc_hypot.h>
@@ -49,7 +50,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_hypot(float x, float y) {
4950
float fi_exp = as_float((-xexp + EXPBIAS_SP32) << EXPSHIFTBITS_SP32);
5051
float fx = as_float(ux) * fi_exp;
5152
float fy = as_float(uy) * fi_exp;
52-
retval = sqrt(__clc_mad(fx, fx, fy * fy)) * fx_exp;
53+
retval = __clc_sqrt(__clc_mad(fx, fx, fy * fy)) * fx_exp;
5354

5455
retval = ux > PINFBITPATT_SP32 | uy == 0 ? as_float(ux) : retval;
5556
retval = ux == PINFBITPATT_SP32 | uy == PINFBITPATT_SP32
@@ -81,7 +82,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_hypot(double x, double y) {
8182
double ay = y * preadjust;
8283

8384
// The post adjust may overflow, but this can't be avoided in any case
84-
double r = sqrt(__clc_fma(ax, ax, ay * ay)) * postadjust;
85+
double r = __clc_sqrt(__clc_fma(ax, ax, ay * ay)) * postadjust;
8586

8687
// If the difference in exponents between x and y is large
8788
double s = x + y;

libclc/generic/lib/math/sqrt.cl

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@
2121
*/
2222

2323
#include <clc/clc.h>
24-
#include "math/clc_sqrt.h"
24+
#include <clc/math/clc_sqrt.h>
2525

26-
#define __CLC_FUNCTION sqrt
27-
#include <clc/math/unary_builtin.inc>
26+
#define FUNCTION sqrt
27+
#define __CLC_BODY <clc/shared/unary_def.inc>
28+
29+
#include <clc/math/gentype.inc>

0 commit comments

Comments
 (0)