Skip to content

Commit eaa5897

Browse files
authored
[libclc] Optimize CLC vector is(un)ordered builtins (#124546)
These are similar to 347fb20, but these builtins are expressed in terms of other builtins. The LLVM IR generated features the same fcmp ord/uno comparisons as before, but consistently in vector form.
1 parent f1d5e70 commit eaa5897

File tree

3 files changed

+18
-109
lines changed

3 files changed

+18
-109
lines changed

libclc/clc/include/clc/relational/relational.h

Lines changed: 0 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -63,85 +63,6 @@
6363
ARG_TYPE) \
6464
_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(RET_TYPE, FUNCTION, ARG_TYPE)
6565

66-
#define _CLC_DEFINE_RELATIONAL_BINARY_SCALAR(RET_TYPE, FUNCTION, BUILTIN_NAME, \
67-
ARG0_TYPE, ARG1_TYPE) \
68-
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \
69-
return BUILTIN_NAME(x, y); \
70-
}
71-
72-
#define _CLC_DEFINE_RELATIONAL_BINARY_VEC(RET_TYPE, FUNCTION, ARG0_TYPE, \
73-
ARG1_TYPE) \
74-
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \
75-
return (RET_TYPE)((RET_TYPE){FUNCTION(x.lo, y.lo), \
76-
FUNCTION(x.hi, y.hi)} != (RET_TYPE)0); \
77-
}
78-
79-
#define _CLC_DEFINE_RELATIONAL_BINARY_VEC2(RET_TYPE, FUNCTION, ARG0_TYPE, \
80-
ARG1_TYPE) \
81-
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \
82-
return (RET_TYPE)((RET_TYPE){FUNCTION(x.lo, y.lo), \
83-
FUNCTION(x.hi, y.hi)} != (RET_TYPE)0); \
84-
}
85-
86-
#define _CLC_DEFINE_RELATIONAL_BINARY_VEC3(RET_TYPE, FUNCTION, ARG0_TYPE, \
87-
ARG1_TYPE) \
88-
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \
89-
return (RET_TYPE)((RET_TYPE){FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), \
90-
FUNCTION(x.s2, y.s2)} != (RET_TYPE)0); \
91-
}
92-
93-
#define _CLC_DEFINE_RELATIONAL_BINARY_VEC4(RET_TYPE, FUNCTION, ARG0_TYPE, \
94-
ARG1_TYPE) \
95-
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \
96-
return (RET_TYPE)((RET_TYPE){FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), \
97-
FUNCTION(x.s2, y.s2), \
98-
FUNCTION(x.s3, y.s3)} != (RET_TYPE)0); \
99-
}
100-
101-
#define _CLC_DEFINE_RELATIONAL_BINARY_VEC8(RET_TYPE, FUNCTION, ARG0_TYPE, \
102-
ARG1_TYPE) \
103-
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \
104-
return (RET_TYPE)((RET_TYPE){FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), \
105-
FUNCTION(x.s2, y.s2), FUNCTION(x.s3, y.s3), \
106-
FUNCTION(x.s4, y.s4), FUNCTION(x.s5, y.s5), \
107-
FUNCTION(x.s6, y.s6), \
108-
FUNCTION(x.s7, y.s7)} != (RET_TYPE)0); \
109-
}
110-
111-
#define _CLC_DEFINE_RELATIONAL_BINARY_VEC16(RET_TYPE, FUNCTION, ARG0_TYPE, \
112-
ARG1_TYPE) \
113-
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \
114-
return (RET_TYPE)((RET_TYPE){FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), \
115-
FUNCTION(x.s2, y.s2), FUNCTION(x.s3, y.s3), \
116-
FUNCTION(x.s4, y.s4), FUNCTION(x.s5, y.s5), \
117-
FUNCTION(x.s6, y.s6), FUNCTION(x.s7, y.s7), \
118-
FUNCTION(x.s8, y.s8), FUNCTION(x.s9, y.s9), \
119-
FUNCTION(x.sa, y.sa), FUNCTION(x.sb, y.sb), \
120-
FUNCTION(x.sc, y.sc), FUNCTION(x.sd, y.sd), \
121-
FUNCTION(x.se, y.se), \
122-
FUNCTION(x.sf, y.sf)} != (RET_TYPE)0); \
123-
}
124-
125-
#define _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(RET_TYPE, FUNCTION, ARG0_TYPE, \
126-
ARG1_TYPE) \
127-
_CLC_DEFINE_RELATIONAL_BINARY_VEC2(RET_TYPE##2, FUNCTION, ARG0_TYPE##2, \
128-
ARG1_TYPE##2) \
129-
_CLC_DEFINE_RELATIONAL_BINARY_VEC3(RET_TYPE##3, FUNCTION, ARG0_TYPE##3, \
130-
ARG1_TYPE##3) \
131-
_CLC_DEFINE_RELATIONAL_BINARY_VEC4(RET_TYPE##4, FUNCTION, ARG0_TYPE##4, \
132-
ARG1_TYPE##4) \
133-
_CLC_DEFINE_RELATIONAL_BINARY_VEC8(RET_TYPE##8, FUNCTION, ARG0_TYPE##8, \
134-
ARG1_TYPE##8) \
135-
_CLC_DEFINE_RELATIONAL_BINARY_VEC16(RET_TYPE##16, FUNCTION, ARG0_TYPE##16, \
136-
ARG1_TYPE##16)
137-
138-
#define _CLC_DEFINE_RELATIONAL_BINARY(RET_TYPE, FUNCTION, BUILTIN_FUNCTION, \
139-
ARG0_TYPE, ARG1_TYPE) \
140-
_CLC_DEFINE_RELATIONAL_BINARY_SCALAR(RET_TYPE, FUNCTION, BUILTIN_FUNCTION, \
141-
ARG0_TYPE, ARG1_TYPE) \
142-
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(RET_TYPE, FUNCTION, ARG0_TYPE, \
143-
ARG1_TYPE)
144-
14566
#define _CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(RET_TYPE, RET_TYPE_VEC, FUNCTION, \
14667
ARG1_TYPE, ARG2_TYPE) \
14768
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \

libclc/clc/lib/generic/relational/clc_isordered.cl

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,33 +2,29 @@
22
#include <clc/relational/clc_isequal.h>
33
#include <clc/relational/relational.h>
44

5-
#define _CLC_DEFINE_ISORDERED(RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) \
6-
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \
7-
return __clc_isequal(x, x) && __clc_isequal(y, y); \
8-
}
5+
#define _CLC_RELATIONAL_OP(X, Y) \
6+
__clc_isequal((X), (X)) && __clc_isequal((Y), (Y))
97

10-
_CLC_DEFINE_ISORDERED(int, __clc_isordered, float, float)
11-
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(int, __clc_isordered, float, float)
8+
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, int, __clc_isordered, float, float)
129

1310
#ifdef cl_khr_fp64
11+
1412
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
1513

1614
// The scalar version of __clc_isordered(double, double) returns an int, but the
1715
// vector versions return long.
18-
19-
_CLC_DEFINE_ISORDERED(int, __clc_isordered, double, double)
20-
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_isordered, double, double)
16+
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, long, __clc_isordered, double, double)
2117

2218
#endif
19+
2320
#ifdef cl_khr_fp16
21+
2422
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
2523

2624
// The scalar version of __clc_isordered(half, half) returns an int, but the
2725
// vector versions return short.
28-
29-
_CLC_DEFINE_ISORDERED(int, __clc_isordered, half, half)
30-
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, __clc_isordered, half, half)
26+
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, short, __clc_isordered, half, half)
3127

3228
#endif
3329

34-
#undef _CLC_DEFINE_ISORDERED
30+
#undef _CLC_RELATIONAL_OP
Lines changed: 9 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,30 @@
11
#include <clc/internal/clc.h>
2+
#include <clc/relational/clc_isequal.h>
23
#include <clc/relational/relational.h>
34

4-
// Note: It would be nice to use __builtin_isunordered with vector inputs, but
5-
// it seems to only take scalar values as input, which will produce incorrect
6-
// output for vector input types.
5+
#define _CLC_RELATIONAL_OP(X, Y) \
6+
!__clc_isequal((X), (X)) || !__clc_isequal((Y), (Y))
77

8-
_CLC_DEFINE_RELATIONAL_BINARY(int, __clc_isunordered, __builtin_isunordered,
9-
float, float)
8+
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, int, __clc_isunordered, float, float)
109

1110
#ifdef cl_khr_fp64
1211

1312
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
1413

1514
// The scalar version of __clc_isunordered(double, double) returns an int, but
1615
// the vector versions return long.
17-
18-
_CLC_DEF _CLC_OVERLOAD int __clc_isunordered(double x, double y) {
19-
return __builtin_isunordered(x, y);
20-
}
21-
22-
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_isunordered, double, double)
16+
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, long, __clc_isunordered, double, double)
2317

2418
#endif
19+
2520
#ifdef cl_khr_fp16
2621

2722
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
2823

2924
// The scalar version of __clc_isunordered(half, half) returns an int, but the
3025
// vector versions return short.
31-
32-
_CLC_DEF _CLC_OVERLOAD int __clc_isunordered(half x, half y) {
33-
return __builtin_isunordered(x, y);
34-
}
35-
36-
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, __clc_isunordered, half, half)
26+
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, short, __clc_isunordered, half, half)
3727

3828
#endif
29+
30+
#undef _CLC_RELATIONAL_OP

0 commit comments

Comments
 (0)