Skip to content

Commit cfc8ef0

Browse files
authored
[libclc] Move copysign to CLC library; fix & optimize (#124598)
This commit moves the implementation of the copysign builtin to the CLC library. It simultaneously optimizes it for vector types by avoiding scalarization. It does so by using the __builtin_elementwise_copysign clang builtins, which can handle vector types. It also fixes a bug in the half/fp16 implementation of the builtin. This version was using an incorrect mask (0x7FFFF instead of 0x7FFF) and was thus preserving the original sign bit, rather than masking it out.
1 parent 4a00c84 commit cfc8ef0

File tree

8 files changed

+57
-24
lines changed

8 files changed

+57
-24
lines changed
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#ifndef __CLC_MATH_CLC_COPYSIGN_H__
2+
#define __CLC_MATH_CLC_COPYSIGN_H__
3+
4+
#define __CLC_BODY <clc/shared/binary_decl.inc>
5+
#define __CLC_FUNCTION __clc_copysign
6+
7+
#include <clc/math/gentype.inc>
8+
9+
#undef __CLC_BODY
10+
#undef __CLC_FUNCTION
11+
12+
#endif // __CLC_MATH_CLC_COPYSIGN_H__
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
#include <clc/utils.h>
2+
3+
#ifndef __CLC_FUNCTION
4+
#define __CLC_FUNCTION(x) __CLC_CONCAT(__clc_, x)
5+
#endif
6+
7+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE FUNCTION(__CLC_GENTYPE a,
8+
__CLC_GENTYPE b) {
9+
return __CLC_FUNCTION(FUNCTION)(a, b);
10+
}

libclc/clc/lib/clspv/SOURCES

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
../generic/math/clc_ceil.cl
2+
../generic/math/clc_copysign.cl
23
../generic/math/clc_fabs.cl
34
../generic/math/clc_floor.cl
45
../generic/math/clc_mad.cl

libclc/clc/lib/generic/SOURCES

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ geometric/clc_dot.cl
55
integer/clc_abs.cl
66
integer/clc_abs_diff.cl
77
math/clc_ceil.cl
8+
math/clc_copysign.cl
89
math/clc_fabs.cl
910
math/clc_floor.cl
1011
math/clc_mad.cl
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#include <clc/clcmacro.h>
2+
#include <clc/internal/clc.h>
3+
4+
_CLC_DEFINE_BINARY_BUILTIN_NO_SCALARIZE(float, __clc_copysign,
5+
__builtin_elementwise_copysign, float,
6+
float)
7+
8+
#ifdef cl_khr_fp64
9+
10+
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
11+
12+
_CLC_DEFINE_BINARY_BUILTIN_NO_SCALARIZE(double, __clc_copysign,
13+
__builtin_elementwise_copysign, double,
14+
double)
15+
16+
#endif
17+
18+
#ifdef cl_khr_fp16
19+
20+
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
21+
22+
_CLC_DEFINE_BINARY_BUILTIN_NO_SCALARIZE(half, __clc_copysign,
23+
__builtin_elementwise_copysign, half,
24+
half)
25+
26+
#endif
27+

libclc/clc/lib/spirv/SOURCES

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
../generic/common/clc_smoothstep.cl
44
../generic/geometric/clc_dot.cl
55
../generic/math/clc_ceil.cl
6+
../generic/math/clc_copysign.cl
67
../generic/math/clc_fabs.cl
78
../generic/math/clc_floor.cl
89
../generic/math/clc_mad.cl

libclc/clc/lib/spirv64/SOURCES

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
../generic/common/clc_smoothstep.cl
44
../generic/geometric/clc_dot.cl
55
../generic/math/clc_ceil.cl
6+
../generic/math/clc_copysign.cl
67
../generic/math/clc_fabs.cl
78
../generic/math/clc_floor.cl
89
../generic/math/clc_mad.cl

libclc/generic/lib/math/copysign.cl

Lines changed: 4 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,7 @@
11
#include <clc/clc.h>
2-
#include <clc/clcmacro.h>
2+
#include <clc/math/clc_copysign.h>
33

4-
_CLC_DEFINE_BINARY_BUILTIN(float, copysign, __builtin_copysignf, float, float)
4+
#define FUNCTION copysign
5+
#define __CLC_BODY <clc/shared/binary_def.inc>
56

6-
#ifdef cl_khr_fp64
7-
8-
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
9-
10-
_CLC_DEFINE_BINARY_BUILTIN(double, copysign, __builtin_copysign, double, double)
11-
12-
#endif
13-
14-
#ifdef cl_khr_fp16
15-
16-
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
17-
18-
_CLC_DEF _CLC_OVERLOAD half copysign(half x, half y)
19-
{
20-
ushort sign_x = as_ushort(x) & 0x8000u;
21-
ushort unsigned_y = as_ushort(y) & 0x7ffffu;
22-
23-
return as_half((ushort)(sign_x | unsigned_y));
24-
}
25-
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, copysign, half, half)
26-
27-
#endif
7+
#include <clc/math/gentype.inc>

0 commit comments

Comments
 (0)