Skip to content

[libclc] Optimize CLC vector is(un)ordered builtins #124546

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 0 additions & 79 deletions libclc/clc/include/clc/relational/relational.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,85 +63,6 @@
ARG_TYPE) \
_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(RET_TYPE, FUNCTION, ARG_TYPE)

#define _CLC_DEFINE_RELATIONAL_BINARY_SCALAR(RET_TYPE, FUNCTION, BUILTIN_NAME, \
ARG0_TYPE, ARG1_TYPE) \
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \
return BUILTIN_NAME(x, y); \
}

#define _CLC_DEFINE_RELATIONAL_BINARY_VEC(RET_TYPE, FUNCTION, ARG0_TYPE, \
ARG1_TYPE) \
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \
return (RET_TYPE)((RET_TYPE){FUNCTION(x.lo, y.lo), \
FUNCTION(x.hi, y.hi)} != (RET_TYPE)0); \
}

#define _CLC_DEFINE_RELATIONAL_BINARY_VEC2(RET_TYPE, FUNCTION, ARG0_TYPE, \
ARG1_TYPE) \
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \
return (RET_TYPE)((RET_TYPE){FUNCTION(x.lo, y.lo), \
FUNCTION(x.hi, y.hi)} != (RET_TYPE)0); \
}

#define _CLC_DEFINE_RELATIONAL_BINARY_VEC3(RET_TYPE, FUNCTION, ARG0_TYPE, \
ARG1_TYPE) \
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \
return (RET_TYPE)((RET_TYPE){FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), \
FUNCTION(x.s2, y.s2)} != (RET_TYPE)0); \
}

#define _CLC_DEFINE_RELATIONAL_BINARY_VEC4(RET_TYPE, FUNCTION, ARG0_TYPE, \
ARG1_TYPE) \
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \
return (RET_TYPE)((RET_TYPE){FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), \
FUNCTION(x.s2, y.s2), \
FUNCTION(x.s3, y.s3)} != (RET_TYPE)0); \
}

#define _CLC_DEFINE_RELATIONAL_BINARY_VEC8(RET_TYPE, FUNCTION, ARG0_TYPE, \
ARG1_TYPE) \
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \
return (RET_TYPE)((RET_TYPE){FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), \
FUNCTION(x.s2, y.s2), FUNCTION(x.s3, y.s3), \
FUNCTION(x.s4, y.s4), FUNCTION(x.s5, y.s5), \
FUNCTION(x.s6, y.s6), \
FUNCTION(x.s7, y.s7)} != (RET_TYPE)0); \
}

#define _CLC_DEFINE_RELATIONAL_BINARY_VEC16(RET_TYPE, FUNCTION, ARG0_TYPE, \
ARG1_TYPE) \
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \
return (RET_TYPE)((RET_TYPE){FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), \
FUNCTION(x.s2, y.s2), FUNCTION(x.s3, y.s3), \
FUNCTION(x.s4, y.s4), FUNCTION(x.s5, y.s5), \
FUNCTION(x.s6, y.s6), FUNCTION(x.s7, y.s7), \
FUNCTION(x.s8, y.s8), FUNCTION(x.s9, y.s9), \
FUNCTION(x.sa, y.sa), FUNCTION(x.sb, y.sb), \
FUNCTION(x.sc, y.sc), FUNCTION(x.sd, y.sd), \
FUNCTION(x.se, y.se), \
FUNCTION(x.sf, y.sf)} != (RET_TYPE)0); \
}

#define _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(RET_TYPE, FUNCTION, ARG0_TYPE, \
ARG1_TYPE) \
_CLC_DEFINE_RELATIONAL_BINARY_VEC2(RET_TYPE##2, FUNCTION, ARG0_TYPE##2, \
ARG1_TYPE##2) \
_CLC_DEFINE_RELATIONAL_BINARY_VEC3(RET_TYPE##3, FUNCTION, ARG0_TYPE##3, \
ARG1_TYPE##3) \
_CLC_DEFINE_RELATIONAL_BINARY_VEC4(RET_TYPE##4, FUNCTION, ARG0_TYPE##4, \
ARG1_TYPE##4) \
_CLC_DEFINE_RELATIONAL_BINARY_VEC8(RET_TYPE##8, FUNCTION, ARG0_TYPE##8, \
ARG1_TYPE##8) \
_CLC_DEFINE_RELATIONAL_BINARY_VEC16(RET_TYPE##16, FUNCTION, ARG0_TYPE##16, \
ARG1_TYPE##16)

#define _CLC_DEFINE_RELATIONAL_BINARY(RET_TYPE, FUNCTION, BUILTIN_FUNCTION, \
ARG0_TYPE, ARG1_TYPE) \
_CLC_DEFINE_RELATIONAL_BINARY_SCALAR(RET_TYPE, FUNCTION, BUILTIN_FUNCTION, \
ARG0_TYPE, ARG1_TYPE) \
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(RET_TYPE, FUNCTION, ARG0_TYPE, \
ARG1_TYPE)

#define _CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(RET_TYPE, RET_TYPE_VEC, FUNCTION, \
ARG1_TYPE, ARG2_TYPE) \
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \
Expand Down
22 changes: 9 additions & 13 deletions libclc/clc/lib/generic/relational/clc_isordered.cl
Original file line number Diff line number Diff line change
Expand Up @@ -2,33 +2,29 @@
#include <clc/relational/clc_isequal.h>
#include <clc/relational/relational.h>

#define _CLC_DEFINE_ISORDERED(RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) \
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \
return __clc_isequal(x, x) && __clc_isequal(y, y); \
}
#define _CLC_RELATIONAL_OP(X, Y) \
__clc_isequal((X), (X)) && __clc_isequal((Y), (Y))

_CLC_DEFINE_ISORDERED(int, __clc_isordered, float, float)
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(int, __clc_isordered, float, float)
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, int, __clc_isordered, float, float)

#ifdef cl_khr_fp64

#pragma OPENCL EXTENSION cl_khr_fp64 : enable

// The scalar version of __clc_isordered(double, double) returns an int, but the
// vector versions return long.

_CLC_DEFINE_ISORDERED(int, __clc_isordered, double, double)
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_isordered, double, double)
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, long, __clc_isordered, double, double)

#endif

#ifdef cl_khr_fp16

#pragma OPENCL EXTENSION cl_khr_fp16 : enable

// The scalar version of __clc_isordered(half, half) returns an int, but the
// vector versions return short.

_CLC_DEFINE_ISORDERED(int, __clc_isordered, half, half)
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, __clc_isordered, half, half)
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, short, __clc_isordered, half, half)

#endif

#undef _CLC_DEFINE_ISORDERED
#undef _CLC_RELATIONAL_OP
26 changes: 9 additions & 17 deletions libclc/clc/lib/generic/relational/clc_isunordered.cl
Original file line number Diff line number Diff line change
@@ -1,38 +1,30 @@
#include <clc/internal/clc.h>
#include <clc/relational/clc_isequal.h>
#include <clc/relational/relational.h>

// Note: It would be nice to use __builtin_isunordered with vector inputs, but
// it seems to only take scalar values as input, which will produce incorrect
// output for vector input types.
#define _CLC_RELATIONAL_OP(X, Y) \
!__clc_isequal((X), (X)) || !__clc_isequal((Y), (Y))

_CLC_DEFINE_RELATIONAL_BINARY(int, __clc_isunordered, __builtin_isunordered,
float, float)
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, int, __clc_isunordered, float, float)

#ifdef cl_khr_fp64

#pragma OPENCL EXTENSION cl_khr_fp64 : enable

// The scalar version of __clc_isunordered(double, double) returns an int, but
// the vector versions return long.

_CLC_DEF _CLC_OVERLOAD int __clc_isunordered(double x, double y) {
return __builtin_isunordered(x, y);
}

_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_isunordered, double, double)
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, long, __clc_isunordered, double, double)

#endif

#ifdef cl_khr_fp16

#pragma OPENCL EXTENSION cl_khr_fp16 : enable

// The scalar version of __clc_isunordered(half, half) returns an int, but the
// vector versions return short.

_CLC_DEF _CLC_OVERLOAD int __clc_isunordered(half x, half y) {
return __builtin_isunordered(x, y);
}

_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, __clc_isunordered, half, half)
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, short, __clc_isunordered, half, half)

#endif

#undef _CLC_RELATIONAL_OP
Loading