[libclc] Move copysign to CLC library; fix & optimize (#124598)

frasercrmck · web-flow · commit cfc8ef0ad8f7 · 2025-01-28T09:18:34.000Z
This commit moves the implementation of the copysign builtin to the CLC
library.

It simultaneously optimizes it for vector types by avoiding
scalarization. It does so by using the __builtin_elementwise_copysign
clang builtins, which can handle vector types.

It also fixes a bug in the half/fp16 implementation of the builtin. This
version was using an incorrect mask (0x7FFFF instead of 0x7FFF) and was
thus preserving the original sign bit, rather than masking it out.
diff --git a/libclc/clc/include/clc/math/clc_copysign.h b/libclc/clc/include/clc/math/clc_copysign.h
@@ -0,0 +1,12 @@
+#ifndef __CLC_MATH_CLC_COPYSIGN_H__
+#define __CLC_MATH_CLC_COPYSIGN_H__
+
+#define __CLC_BODY <clc/shared/binary_decl.inc>
+#define __CLC_FUNCTION __clc_copysign
+
+#include <clc/math/gentype.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
+
+#endif // __CLC_MATH_CLC_COPYSIGN_H__
diff --git a/libclc/clc/include/clc/shared/binary_def.inc b/libclc/clc/include/clc/shared/binary_def.inc
@@ -0,0 +1,10 @@
+#include <clc/utils.h>
+
+#ifndef __CLC_FUNCTION
+#define __CLC_FUNCTION(x) __CLC_CONCAT(__clc_, x)
+#endif
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE FUNCTION(__CLC_GENTYPE a,
+                                              __CLC_GENTYPE b) {
+  return __CLC_FUNCTION(FUNCTION)(a, b);
+}
diff --git a/libclc/clc/lib/clspv/SOURCES b/libclc/clc/lib/clspv/SOURCES
@@ -1,4 +1,5 @@
 ../generic/math/clc_ceil.cl
+../generic/math/clc_copysign.cl
 ../generic/math/clc_fabs.cl
 ../generic/math/clc_floor.cl
 ../generic/math/clc_mad.cl
diff --git a/libclc/clc/lib/generic/SOURCES b/libclc/clc/lib/generic/SOURCES
@@ -5,6 +5,7 @@ geometric/clc_dot.cl
 integer/clc_abs.cl
 integer/clc_abs_diff.cl
 math/clc_ceil.cl
+math/clc_copysign.cl
 math/clc_fabs.cl
 math/clc_floor.cl
 math/clc_mad.cl
diff --git a/libclc/clc/lib/generic/math/clc_copysign.cl b/libclc/clc/lib/generic/math/clc_copysign.cl
@@ -0,0 +1,27 @@
+#include <clc/clcmacro.h>
+#include <clc/internal/clc.h>
+
+_CLC_DEFINE_BINARY_BUILTIN_NO_SCALARIZE(float, __clc_copysign,
+                                        __builtin_elementwise_copysign, float,
+                                        float)
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+_CLC_DEFINE_BINARY_BUILTIN_NO_SCALARIZE(double, __clc_copysign,
+                                        __builtin_elementwise_copysign, double,
+                                        double)
+
+#endif
+
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+_CLC_DEFINE_BINARY_BUILTIN_NO_SCALARIZE(half, __clc_copysign,
+                                        __builtin_elementwise_copysign, half,
+                                        half)
+
+#endif
+
diff --git a/libclc/clc/lib/spirv/SOURCES b/libclc/clc/lib/spirv/SOURCES
@@ -3,6 +3,7 @@
 ../generic/common/clc_smoothstep.cl
 ../generic/geometric/clc_dot.cl
 ../generic/math/clc_ceil.cl
+../generic/math/clc_copysign.cl
 ../generic/math/clc_fabs.cl
 ../generic/math/clc_floor.cl
 ../generic/math/clc_mad.cl
diff --git a/libclc/clc/lib/spirv64/SOURCES b/libclc/clc/lib/spirv64/SOURCES
@@ -3,6 +3,7 @@
 ../generic/common/clc_smoothstep.cl
 ../generic/geometric/clc_dot.cl
 ../generic/math/clc_ceil.cl
+../generic/math/clc_copysign.cl
 ../generic/math/clc_fabs.cl
 ../generic/math/clc_floor.cl
 ../generic/math/clc_mad.cl
diff --git a/libclc/generic/lib/math/copysign.cl b/libclc/generic/lib/math/copysign.cl
@@ -1,27 +1,7 @@
 #include <clc/clc.h>
-#include <clc/clcmacro.h>
+#include <clc/math/clc_copysign.h>
 
-_CLC_DEFINE_BINARY_BUILTIN(float, copysign, __builtin_copysignf, float, float)
+#define FUNCTION copysign
+#define __CLC_BODY <clc/shared/binary_def.inc>
 
-#ifdef cl_khr_fp64
-
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-_CLC_DEFINE_BINARY_BUILTIN(double, copysign, __builtin_copysign, double, double)
-
-#endif
-
-#ifdef cl_khr_fp16
-
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
-_CLC_DEF _CLC_OVERLOAD half copysign(half x, half y)
-{
-   ushort sign_x = as_ushort(x) & 0x8000u;
-   ushort unsigned_y = as_ushort(y) & 0x7ffffu;
-
-   return as_half((ushort)(sign_x | unsigned_y));
-}
-_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, copysign, half, half)
-
-#endif
+#include <clc/math/gentype.inc>

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,5 @@`
`1`	`1`	`../generic/math/clc_ceil.cl`
	`2`	`+../generic/math/clc_copysign.cl`
`2`	`3`	`../generic/math/clc_fabs.cl`
`3`	`4`	`../generic/math/clc_floor.cl`
`4`	`5`	`../generic/math/clc_mad.cl`