Skip to content

[libclc] Optimize CLC vector relational builtins #124537

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions libclc/clc/include/clc/relational/relational.h
Original file line number Diff line number Diff line change
Expand Up @@ -142,4 +142,30 @@
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(RET_TYPE, FUNCTION, ARG0_TYPE, \
ARG1_TYPE)

#define _CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(RET_TYPE, RET_TYPE_VEC, FUNCTION, \
ARG1_TYPE, ARG2_TYPE) \
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \
return _CLC_RELATIONAL_OP(x, y); \
} \
_CLC_DEF _CLC_OVERLOAD RET_TYPE_VEC##2 FUNCTION(ARG1_TYPE##2 x, \
ARG2_TYPE##2 y) { \
return _CLC_RELATIONAL_OP(x, y); \
} \
_CLC_DEF _CLC_OVERLOAD RET_TYPE_VEC##3 FUNCTION(ARG1_TYPE##3 x, \
ARG2_TYPE##3 y) { \
return _CLC_RELATIONAL_OP(x, y); \
} \
_CLC_DEF _CLC_OVERLOAD RET_TYPE_VEC##4 FUNCTION(ARG1_TYPE##4 x, \
ARG2_TYPE##4 y) { \
return _CLC_RELATIONAL_OP(x, y); \
} \
_CLC_DEF _CLC_OVERLOAD RET_TYPE_VEC##8 FUNCTION(ARG1_TYPE##8 x, \
ARG2_TYPE##8 y) { \
return _CLC_RELATIONAL_OP(x, y); \
} \
_CLC_DEF _CLC_OVERLOAD RET_TYPE_VEC##16 FUNCTION(ARG1_TYPE##16 x, \
ARG2_TYPE##16 y) { \
return _CLC_RELATIONAL_OP(x, y); \
}

#endif // __CLC_RELATIONAL_RELATIONAL_H__
38 changes: 11 additions & 27 deletions libclc/clc/lib/generic/relational/clc_isequal.cl
Original file line number Diff line number Diff line change
@@ -1,44 +1,28 @@
#include <clc/internal/clc.h>
#include <clc/relational/relational.h>

#define _CLC_DEFINE_ISEQUAL(RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) \
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \
return (x == y); \
}
#define _CLC_RELATIONAL_OP(X, Y) (X) == (Y)

_CLC_DEFINE_ISEQUAL(int, __clc_isequal, float, float)
_CLC_DEFINE_ISEQUAL(int2, __clc_isequal, float2, float2)
_CLC_DEFINE_ISEQUAL(int3, __clc_isequal, float3, float3)
_CLC_DEFINE_ISEQUAL(int4, __clc_isequal, float4, float4)
_CLC_DEFINE_ISEQUAL(int8, __clc_isequal, float8, float8)
_CLC_DEFINE_ISEQUAL(int16, __clc_isequal, float16, float16)
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, int, __clc_isequal, float, float)

#ifdef cl_khr_fp64

#pragma OPENCL EXTENSION cl_khr_fp64 : enable

// The scalar version of __clc_isequal(double) returns an int, but the vector
// versions return long.
_CLC_DEFINE_ISEQUAL(int, __clc_isequal, double, double)
_CLC_DEFINE_ISEQUAL(long2, __clc_isequal, double2, double2)
_CLC_DEFINE_ISEQUAL(long3, __clc_isequal, double3, double3)
_CLC_DEFINE_ISEQUAL(long4, __clc_isequal, double4, double4)
_CLC_DEFINE_ISEQUAL(long8, __clc_isequal, double8, double8)
_CLC_DEFINE_ISEQUAL(long16, __clc_isequal, double16, double16)
// The scalar version of __clc_isequal(double, double) returns an int, but the
// vector versions return long.
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, long, __clc_isequal, double, double)

#endif

#ifdef cl_khr_fp16

#pragma OPENCL EXTENSION cl_khr_fp16 : enable

// The scalar version of __clc_isequal(half) returns an int, but the vector
// versions return short.
_CLC_DEFINE_ISEQUAL(int, __clc_isequal, half, half)
_CLC_DEFINE_ISEQUAL(short2, __clc_isequal, half2, half2)
_CLC_DEFINE_ISEQUAL(short3, __clc_isequal, half3, half3)
_CLC_DEFINE_ISEQUAL(short4, __clc_isequal, half4, half4)
_CLC_DEFINE_ISEQUAL(short8, __clc_isequal, half8, half8)
_CLC_DEFINE_ISEQUAL(short16, __clc_isequal, half16, half16)
// The scalar version of __clc_isequal(half, half) returns an int, but the
// vector versions return short.
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, short, __clc_isequal, half, half)

#endif

#undef _CLC_DEFINE_ISEQUAL
#undef _CLC_RELATIONAL_OP
23 changes: 6 additions & 17 deletions libclc/clc/lib/generic/relational/clc_isgreater.cl
Original file line number Diff line number Diff line change
@@ -1,25 +1,17 @@
#include <clc/internal/clc.h>
#include <clc/relational/relational.h>

// Note: It would be nice to use __builtin_isgreater with vector inputs, but it
// seems to only take scalar values as input, which will produce incorrect
// output for vector input types.
#define _CLC_RELATIONAL_OP(X, Y) (X) > (Y)

_CLC_DEFINE_RELATIONAL_BINARY(int, __clc_isgreater, __builtin_isgreater, float,
float)
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, int, __clc_isgreater, float, float)

#ifdef cl_khr_fp64

#pragma OPENCL EXTENSION cl_khr_fp64 : enable

// The scalar version of __clc_isgreater(double, double) returns an int, but the
// vector versions return long.

_CLC_DEF _CLC_OVERLOAD int __clc_isgreater(double x, double y) {
return __builtin_isgreater(x, y);
}

_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_isgreater, double, double)
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, long, __clc_isgreater, double, double)

#endif

Expand All @@ -29,11 +21,8 @@ _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_isgreater, double, double)

// The scalar version of __clc_isgreater(half, half) returns an int, but the
// vector versions return short.

_CLC_DEF _CLC_OVERLOAD int __clc_isgreater(half x, half y) {
return __builtin_isgreater(x, y);
}

_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, __clc_isgreater, half, half)
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, short, __clc_isgreater, half, half)

#endif

#undef _CLC_RELATIONAL_OP
30 changes: 11 additions & 19 deletions libclc/clc/lib/generic/relational/clc_isgreaterequal.cl
Original file line number Diff line number Diff line change
@@ -1,39 +1,31 @@
#include <clc/internal/clc.h>
#include <clc/relational/relational.h>

// Note: It would be nice to use __builtin_isgreaterequal with vector inputs,
// but it seems to only take scalar values as input, which will produce
// incorrect output for vector input types.
#define _CLC_RELATIONAL_OP(X, Y) (X) >= (Y)

_CLC_DEFINE_RELATIONAL_BINARY(int, __clc_isgreaterequal,
__builtin_isgreaterequal, float, float)
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, int, __clc_isgreaterequal, float,
float)

#ifdef cl_khr_fp64

#pragma OPENCL EXTENSION cl_khr_fp64 : enable

// The scalar version of __clc_isgreaterequal(double, double) returns an int,
// but the vector versions return long.

_CLC_DEF _CLC_OVERLOAD int __clc_isgreaterequal(double x, double y) {
return __builtin_isgreaterequal(x, y);
}

_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_isgreaterequal, double,
double)
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, long, __clc_isgreaterequal, double,
double)

#endif

#ifdef cl_khr_fp16

#pragma OPENCL EXTENSION cl_khr_fp16 : enable

// The scalar version of __clc_isgreaterequal(half, half) returns an int, but
// The scalar version of __clc_isgreaterequal(half, hafl) returns an int, but
// the vector versions return short.

_CLC_DEF _CLC_OVERLOAD int __clc_isgreaterequal(half x, half y) {
return __builtin_isgreaterequal(x, y);
}

_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, __clc_isgreaterequal, half, half)
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, short, __clc_isgreaterequal, half,
half)

#endif

#undef _CLC_RELATIONAL_OP
31 changes: 11 additions & 20 deletions libclc/clc/lib/generic/relational/clc_isless.cl
Original file line number Diff line number Diff line change
@@ -1,37 +1,28 @@
#include <clc/internal/clc.h>
#include <clc/relational/relational.h>

// Note: It would be nice to use __builtin_isless with vector inputs, but it
// seems to only take scalar values as input, which will produce incorrect
// output for vector input types.
#define _CLC_RELATIONAL_OP(X, Y) (X) < (Y)

_CLC_DEFINE_RELATIONAL_BINARY(int, __clc_isless, __builtin_isless, float, float)
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, int, __clc_isless, float, float)

#ifdef cl_khr_fp64

#pragma OPENCL EXTENSION cl_khr_fp64 : enable

// The scalar version of __clc_isless(double, double) returns an int, but the
// vector versions return long.

_CLC_DEF _CLC_OVERLOAD int __clc_isless(double x, double y) {
return __builtin_isless(x, y);
}

_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_isless, double, double)
// The scalar version of __clc_isless(double, double) returns an int, but
// the vector versions return long.
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, long, __clc_isless, double, double)

#endif

#ifdef cl_khr_fp16

#pragma OPENCL EXTENSION cl_khr_fp16 : enable

// The scalar version of __clc_isless(half, half) returns an int, but the vector
// versions return short.

_CLC_DEF _CLC_OVERLOAD int __clc_isless(half x, half y) {
return __builtin_isless(x, y);
}

_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, __clc_isless, half, half)
// The scalar version of __clc_isless(half, half) returns an int, but the
// vector versions return short.
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, short, __clc_isless, half, half)

#endif

#undef _CLC_RELATIONAL_OP
24 changes: 7 additions & 17 deletions libclc/clc/lib/generic/relational/clc_islessequal.cl
Original file line number Diff line number Diff line change
@@ -1,25 +1,18 @@
#include <clc/internal/clc.h>
#include <clc/relational/relational.h>

// Note: It would be nice to use __builtin_islessequal with vector inputs, but
// it seems to only take scalar values as input, which will produce incorrect
// output for vector input types.
#define _CLC_RELATIONAL_OP(X, Y) (X) <= (Y)

_CLC_DEFINE_RELATIONAL_BINARY(int, __clc_islessequal, __builtin_islessequal,
float, float)
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, int, __clc_islessequal, float, float)

#ifdef cl_khr_fp64

#pragma OPENCL EXTENSION cl_khr_fp64 : enable

// The scalar version of __clc_islessequal(double, double) returns an int, but
// the vector versions return long.

_CLC_DEF _CLC_OVERLOAD int __clc_islessequal(double x, double y) {
return __builtin_islessequal(x, y);
}

_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_islessequal, double, double)
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, long, __clc_islessequal, double,
double)

#endif

Expand All @@ -29,11 +22,8 @@ _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_islessequal, double, double)

// The scalar version of __clc_islessequal(half, half) returns an int, but the
// vector versions return short.

_CLC_DEF _CLC_OVERLOAD int __clc_islessequal(half x, half y) {
return __builtin_islessequal(x, y);
}

_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, __clc_islessequal, half, half)
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, short, __clc_islessequal, half, half)

#endif

#undef _CLC_RELATIONAL_OP
27 changes: 10 additions & 17 deletions libclc/clc/lib/generic/relational/clc_islessgreater.cl
Original file line number Diff line number Diff line change
@@ -1,38 +1,31 @@
#include <clc/internal/clc.h>
#include <clc/relational/relational.h>

// Note: It would be nice to use __builtin_islessgreater with vector inputs, but
// it seems to only take scalar values as input, which will produce incorrect
// output for vector input types.
#define _CLC_RELATIONAL_OP(X, Y) ((X) < (Y)) || ((X) > (Y))

_CLC_DEFINE_RELATIONAL_BINARY(int, __clc_islessgreater, __builtin_islessgreater,
float, float)
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, int, __clc_islessgreater, float,
float)

#ifdef cl_khr_fp64

#pragma OPENCL EXTENSION cl_khr_fp64 : enable

// The scalar version of __clc_islessgreater(double, double) returns an int, but
// the vector versions return long.

_CLC_DEF _CLC_OVERLOAD int __clc_islessgreater(double x, double y) {
return __builtin_islessgreater(x, y);
}

_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_islessgreater, double, double)
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, long, __clc_islessgreater, double,
double)

#endif

#ifdef cl_khr_fp16

#pragma OPENCL EXTENSION cl_khr_fp16 : enable

// The scalar version of __clc_islessgreater(half, half) returns an int, but the
// vector versions return short.

_CLC_DEF _CLC_OVERLOAD int __clc_islessgreater(half x, half y) {
return __builtin_islessgreater(x, y);
}

_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, __clc_islessgreater, half, half)
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, short, __clc_islessgreater, half,
half)

#endif

#undef _CLC_RELATIONAL_OP
21 changes: 8 additions & 13 deletions libclc/clc/lib/generic/relational/clc_isnotequal.cl
Original file line number Diff line number Diff line change
@@ -1,33 +1,28 @@
#include <clc/internal/clc.h>
#include <clc/relational/relational.h>

#define _CLC_DEFINE_ISNOTEQUAL(RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) \
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \
return (x != y); \
}
#define _CLC_RELATIONAL_OP(X, Y) (X) != (Y)

_CLC_DEFINE_ISNOTEQUAL(int, __clc_isnotequal, float, float)
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(int, __clc_isnotequal, float, float)
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, int, __clc_isnotequal, float, float)

#ifdef cl_khr_fp64

#pragma OPENCL EXTENSION cl_khr_fp64 : enable

// The scalar version of __clc_isnotequal(double, double) returns an int, but
// the vector versions return long.

_CLC_DEFINE_ISNOTEQUAL(int, __clc_isnotequal, double, double)
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_isnotequal, double, double)
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, long, __clc_isnotequal, double, double)

#endif

#ifdef cl_khr_fp16

#pragma OPENCL EXTENSION cl_khr_fp16 : enable

// The scalar version of __clc_isnotequal(half, half) returns an int, but the
// vector versions return short.

_CLC_DEFINE_ISNOTEQUAL(int, __clc_isnotequal, half, half)
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, __clc_isnotequal, half, half)
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, short, __clc_isnotequal, half, half)

#endif

#undef _CLC_DEFINE_ISNOTEQUAL
#undef _CLC_RELATIONAL_OP
Loading