Skip to content

Commit 285b411

Browse files
authored
[libclc] Move sqrt to CLC library (#128748)
This is fairly straightforward for most targets. We use the element-wise sqrt builtin by default. We also remove a legacy pre-filtering of the input argument, which the intrinsic now officially handles. AMDGPU provides its own implementation of sqrt for double types. This commit moves this into the implementation of CLC sqrt. It uses weak linkage on the 'default' CLC sqrt to allow AMDGPU to only override the builtin for the types it cares about.
1 parent 0865a38 commit 285b411

File tree

12 files changed

+44
-62
lines changed

12 files changed

+44
-62
lines changed

libclc/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS
2929
# CLC internal libraries
3030
clc/lib/generic/SOURCES;
3131
clc/lib/amdgcn/SOURCES;
32+
clc/lib/amdgpu/SOURCES;
3233
clc/lib/clspv/SOURCES;
3334
clc/lib/spirv/SOURCES;
3435
)

libclc/amdgpu/lib/SOURCES

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,3 @@ math/half_log2.cl
1010
math/half_recip.cl
1111
math/half_rsqrt.cl
1212
math/half_sqrt.cl
13-
math/sqrt.cl

libclc/clc/include/clc/float/definitions.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
#define MAXFLOAT 0x1.fffffep127f
22
#define HUGE_VALF __builtin_huge_valf()
33
#define INFINITY __builtin_inff()
4-
#define NAN __builtin_nanf("")
54

65
#define FLT_DIG 6
76
#define FLT_MANT_DIG 24
@@ -13,6 +12,7 @@
1312
#define FLT_MAX MAXFLOAT
1413
#define FLT_MIN 0x1.0p-126f
1514
#define FLT_EPSILON 0x1.0p-23f
15+
#define FLT_NAN __builtin_nanf("")
1616

1717
#define FP_ILOGB0 (-2147483647 - 1)
1818
#define FP_ILOGBNAN 2147483647
@@ -46,6 +46,7 @@
4646
#define DBL_MAX 0x1.fffffffffffffp1023
4747
#define DBL_MIN 0x1.0p-1022
4848
#define DBL_EPSILON 0x1.0p-52
49+
#define DBL_NAN __builtin_nan("")
4950

5051
#define M_E 0x1.5bf0a8b145769p+1
5152
#define M_LOG2E 0x1.71547652b82fep+0
@@ -80,6 +81,7 @@
8081
#define HALF_MAX 0x1.ffcp15h
8182
#define HALF_MIN 0x1.0p-14h
8283
#define HALF_EPSILON 0x1.0p-10h
84+
#define HALF_NAN __builtin_nanf16("")
8385

8486
#define M_LOG2E_H 0x1.714p+0h
8587

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
1-
#include <clc/clcfunc.h>
2-
#include <clc/clctypes.h>
1+
#ifndef __CLC_MATH_CLC_SQRT_H__
2+
#define __CLC_MATH_CLC_SQRT_H__
33

4-
#define __CLC_FUNCTION __clc_sqrt
54
#define __CLC_BODY <clc/math/unary_decl.inc>
5+
#define __CLC_FUNCTION __clc_sqrt
6+
67
#include <clc/math/gentype.inc>
8+
79
#undef __CLC_BODY
810
#undef __CLC_FUNCTION
11+
12+
#endif // __CLC_MATH_CLC_SQRT_H__

libclc/clc/lib/amdgpu/SOURCES

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
math/clc_sqrt_fp64.cl

libclc/amdgpu/lib/math/sqrt.cl renamed to libclc/clc/lib/amdgpu/math/clc_sqrt_fp64.cl

Lines changed: 17 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -20,52 +20,43 @@
2020
* THE SOFTWARE.
2121
*/
2222

23-
#include "math/clc_sqrt.h"
24-
#include <clc/clc.h>
2523
#include <clc/clcmacro.h>
26-
27-
_CLC_DEFINE_UNARY_BUILTIN(float, sqrt, __clc_sqrt, float)
28-
29-
#ifdef cl_khr_fp16
30-
31-
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
32-
_CLC_DEFINE_UNARY_BUILTIN(half, sqrt, __clc_sqrt, half)
33-
34-
#endif
24+
#include <clc/internal/clc.h>
25+
#include <clc/math/clc_fma.h>
26+
#include <clc/math/clc_ldexp.h>
3527

3628
#ifdef cl_khr_fp64
3729

3830
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
3931

4032
#ifdef __AMDGCN__
41-
#define __clc_builtin_rsq __builtin_amdgcn_rsq
33+
#define __clc_builtin_rsq __builtin_amdgcn_rsq
4234
#else
43-
#define __clc_builtin_rsq __builtin_r600_recipsqrt_ieee
35+
#define __clc_builtin_rsq __builtin_r600_recipsqrt_ieee
4436
#endif
4537

46-
_CLC_OVERLOAD _CLC_DEF double sqrt(double x) {
47-
38+
_CLC_OVERLOAD _CLC_DEF double __clc_sqrt(double x) {
4839
uint vcc = x < 0x1p-767;
4940
uint exp0 = vcc ? 0x100 : 0;
5041
unsigned exp1 = vcc ? 0xffffff80 : 0;
5142

52-
double v01 = ldexp(x, exp0);
43+
double v01 = __clc_ldexp(x, exp0);
5344
double v23 = __clc_builtin_rsq(v01);
5445
double v45 = v01 * v23;
5546
v23 = v23 * 0.5;
5647

57-
double v67 = fma(-v23, v45, 0.5);
58-
v45 = fma(v45, v67, v45);
59-
double v89 = fma(-v45, v45, v01);
60-
v23 = fma(v23, v67, v23);
61-
v45 = fma(v89, v23, v45);
62-
v67 = fma(-v45, v45, v01);
63-
v23 = fma(v67, v23, v45);
48+
double v67 = __clc_fma(-v23, v45, 0.5);
49+
v45 = __clc_fma(v45, v67, v45);
50+
double v89 = __clc_fma(-v45, v45, v01);
51+
v23 = __clc_fma(v23, v67, v23);
52+
v45 = __clc_fma(v89, v23, v45);
53+
v67 = __clc_fma(-v45, v45, v01);
54+
v23 = __clc_fma(v67, v23, v45);
6455

65-
v23 = ldexp(v23, exp1);
66-
return ((x == __builtin_inf()) || (x == 0.0)) ? v01 : v23;
56+
v23 = __clc_ldexp(v23, exp1);
57+
return (x == __builtin_inf() || (x == 0.0)) ? v01 : v23;
6758
}
6859

69-
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, sqrt, double);
60+
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __clc_sqrt, double);
7061

7162
#endif

libclc/clc/lib/generic/SOURCES

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ math/clc_nan.cl
3333
math/clc_nextafter.cl
3434
math/clc_rint.cl
3535
math/clc_round.cl
36+
math/clc_sqrt.cl
3637
math/clc_sw_fma.cl
3738
math/clc_trunc.cl
3839
relational/clc_all.cl

libclc/generic/lib/math/clc_sqrt.cl renamed to libclc/clc/lib/generic/math/clc_sqrt.cl

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,8 @@
2020
* THE SOFTWARE.
2121
*/
2222

23-
#include <clc/clc.h>
23+
#include <clc/float/definitions.h>
24+
#include <clc/internal/clc.h>
2425

25-
// Map the llvm sqrt intrinsic to an OpenCL function.
26-
#define __CLC_FUNCTION __clc_llvm_intr_sqrt
27-
#define __CLC_INTRINSIC "llvm.sqrt"
28-
#include <clc/math/unary_intrin.inc>
29-
#undef __CLC_FUNCTION
30-
#undef __CLC_INTRINSIC
31-
32-
#define __CLC_BODY <clc_sqrt_impl.inc>
26+
#define __CLC_BODY <clc_sqrt.inc>
3327
#include <clc/math/gentype.inc>

libclc/generic/lib/math/clc_sqrt_impl.inc renamed to libclc/clc/lib/generic/math/clc_sqrt.inc

Lines changed: 3 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -20,20 +20,7 @@
2020
* THE SOFTWARE.
2121
*/
2222

23-
#if __CLC_FPSIZE == 64
24-
#define __CLC_NAN __builtin_nan("")
25-
#define ZERO 0.0
26-
#elif __CLC_FPSIZE == 32
27-
#define __CLC_NAN NAN
28-
#define ZERO 0.0f
29-
#elif __CLC_FPSIZE == 16
30-
#define __CLC_NAN (half)NAN
31-
#define ZERO 0.0h
32-
#endif
33-
34-
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_sqrt(__CLC_GENTYPE val) {
35-
return val < ZERO ? __CLC_NAN : __clc_llvm_intr_sqrt(val);
23+
__attribute__((weak)) _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE
24+
__clc_sqrt(__CLC_GENTYPE val) {
25+
return __builtin_elementwise_sqrt(val);
3626
}
37-
38-
#undef __CLC_NAN
39-
#undef ZERO

libclc/generic/lib/SOURCES

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,6 @@ math/sincos.cl
179179
math/sincos_helpers.cl
180180
math/sinh.cl
181181
math/sinpi.cl
182-
math/clc_sqrt.cl
183182
math/sqrt.cl
184183
math/clc_tan.cl
185184
math/tan.cl

libclc/generic/lib/math/clc_hypot.cl

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include <clc/math/clc_mad.h>
2828
#include <clc/math/clc_subnormal_config.h>
2929
#include <clc/math/math.h>
30+
#include <clc/math/clc_sqrt.h>
3031
#include <clc/relational/clc_isnan.h>
3132
#include <clc/shared/clc_clamp.h>
3233
#include <math/clc_hypot.h>
@@ -49,7 +50,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_hypot(float x, float y) {
4950
float fi_exp = as_float((-xexp + EXPBIAS_SP32) << EXPSHIFTBITS_SP32);
5051
float fx = as_float(ux) * fi_exp;
5152
float fy = as_float(uy) * fi_exp;
52-
retval = sqrt(__clc_mad(fx, fx, fy * fy)) * fx_exp;
53+
retval = __clc_sqrt(__clc_mad(fx, fx, fy * fy)) * fx_exp;
5354

5455
retval = ux > PINFBITPATT_SP32 | uy == 0 ? as_float(ux) : retval;
5556
retval = ux == PINFBITPATT_SP32 | uy == PINFBITPATT_SP32
@@ -81,7 +82,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_hypot(double x, double y) {
8182
double ay = y * preadjust;
8283

8384
// The post adjust may overflow, but this can't be avoided in any case
84-
double r = sqrt(__clc_fma(ax, ax, ay * ay)) * postadjust;
85+
double r = __clc_sqrt(__clc_fma(ax, ax, ay * ay)) * postadjust;
8586

8687
// If the difference in exponents between x and y is large
8788
double s = x + y;

libclc/generic/lib/math/sqrt.cl

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@
2121
*/
2222

2323
#include <clc/clc.h>
24-
#include "math/clc_sqrt.h"
24+
#include <clc/math/clc_sqrt.h>
2525

26-
#define __CLC_FUNCTION sqrt
27-
#include <clc/math/unary_builtin.inc>
26+
#define FUNCTION sqrt
27+
#define __CLC_BODY <clc/shared/unary_def.inc>
28+
29+
#include <clc/math/gentype.inc>

0 commit comments

Comments
 (0)