Skip to content

Commit d01bbfd

Browse files
committed
optimize via vectors
1 parent 9c5f79e commit d01bbfd

File tree

4 files changed

+80
-39
lines changed

4 files changed

+80
-39
lines changed

libclc/clc/include/clc/math/gentype.inc

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,55 +3,69 @@
33

44
#define __CLC_SCALAR_GENTYPE float
55
#define __CLC_FPSIZE 32
6+
#define __CLC_FP_LIT(x) x##F
67

78
#define __CLC_GENTYPE float
89
#define __CLC_INTN int
10+
#define __CLC_BIT_INTN int
911
#define __CLC_SCALAR
1012
#include __CLC_BODY
1113
#undef __CLC_GENTYPE
14+
#undef __CLC_BIT_INTN
1215
#undef __CLC_INTN
1316
#undef __CLC_SCALAR
1417

1518
#define __CLC_GENTYPE float2
1619
#define __CLC_INTN int2
20+
#define __CLC_BIT_INTN int2
1721
#define __CLC_VECSIZE 2
1822
#include __CLC_BODY
1923
#undef __CLC_VECSIZE
2024
#undef __CLC_GENTYPE
25+
#undef __CLC_BIT_INTN
2126
#undef __CLC_INTN
2227

2328
#define __CLC_GENTYPE float3
2429
#define __CLC_INTN int3
30+
#define __CLC_BIT_INTN int3
2531
#define __CLC_VECSIZE 3
2632
#include __CLC_BODY
2733
#undef __CLC_VECSIZE
2834
#undef __CLC_GENTYPE
35+
#undef __CLC_BIT_INTN
2936
#undef __CLC_INTN
3037

3138
#define __CLC_GENTYPE float4
3239
#define __CLC_INTN int4
40+
#define __CLC_BIT_INTN int4
3341
#define __CLC_VECSIZE 4
3442
#include __CLC_BODY
3543
#undef __CLC_VECSIZE
3644
#undef __CLC_GENTYPE
45+
#undef __CLC_BIT_INTN
3746
#undef __CLC_INTN
3847

3948
#define __CLC_GENTYPE float8
4049
#define __CLC_INTN int8
50+
#define __CLC_BIT_INTN int8
4151
#define __CLC_VECSIZE 8
4252
#include __CLC_BODY
4353
#undef __CLC_VECSIZE
4454
#undef __CLC_GENTYPE
55+
#undef __CLC_BIT_INTN
4556
#undef __CLC_INTN
4657

4758
#define __CLC_GENTYPE float16
4859
#define __CLC_INTN int16
60+
#define __CLC_BIT_INTN int16
4961
#define __CLC_VECSIZE 16
5062
#include __CLC_BODY
5163
#undef __CLC_VECSIZE
5264
#undef __CLC_GENTYPE
65+
#undef __CLC_BIT_INTN
5366
#undef __CLC_INTN
5467

68+
#undef __CLC_FP_LIT
5569
#undef __CLC_FPSIZE
5670
#undef __CLC_SCALAR_GENTYPE
5771

@@ -61,55 +75,69 @@
6175

6276
#define __CLC_SCALAR_GENTYPE double
6377
#define __CLC_FPSIZE 64
78+
#define __CLC_FP_LIT(x) (x)
6479

6580
#define __CLC_SCALAR
6681
#define __CLC_GENTYPE double
6782
#define __CLC_INTN int
83+
#define __CLC_BIT_INTN long
6884
#include __CLC_BODY
6985
#undef __CLC_GENTYPE
86+
#undef __CLC_BIT_INTN
7087
#undef __CLC_INTN
7188
#undef __CLC_SCALAR
7289

7390
#define __CLC_GENTYPE double2
7491
#define __CLC_INTN int2
92+
#define __CLC_BIT_INTN long2
7593
#define __CLC_VECSIZE 2
7694
#include __CLC_BODY
7795
#undef __CLC_VECSIZE
7896
#undef __CLC_GENTYPE
97+
#undef __CLC_BIT_INTN
7998
#undef __CLC_INTN
8099

81100
#define __CLC_GENTYPE double3
82101
#define __CLC_INTN int3
102+
#define __CLC_BIT_INTN long3
83103
#define __CLC_VECSIZE 3
84104
#include __CLC_BODY
85105
#undef __CLC_VECSIZE
86106
#undef __CLC_GENTYPE
107+
#undef __CLC_BIT_INTN
87108
#undef __CLC_INTN
88109

89110
#define __CLC_GENTYPE double4
90111
#define __CLC_INTN int4
112+
#define __CLC_BIT_INTN long4
91113
#define __CLC_VECSIZE 4
92114
#include __CLC_BODY
93115
#undef __CLC_VECSIZE
94116
#undef __CLC_GENTYPE
117+
#undef __CLC_BIT_INTN
95118
#undef __CLC_INTN
96119

97120
#define __CLC_GENTYPE double8
98121
#define __CLC_INTN int8
122+
#define __CLC_BIT_INTN long8
99123
#define __CLC_VECSIZE 8
100124
#include __CLC_BODY
101125
#undef __CLC_VECSIZE
102126
#undef __CLC_GENTYPE
127+
#undef __CLC_BIT_INTN
103128
#undef __CLC_INTN
104129

105130
#define __CLC_GENTYPE double16
106131
#define __CLC_INTN int16
132+
#define __CLC_BIT_INTN long16
107133
#define __CLC_VECSIZE 16
108134
#include __CLC_BODY
109135
#undef __CLC_VECSIZE
110136
#undef __CLC_GENTYPE
137+
#undef __CLC_BIT_INTN
111138
#undef __CLC_INTN
112139

140+
#undef __CLC_FP_LIT
113141
#undef __CLC_FPSIZE
114142
#undef __CLC_SCALAR_GENTYPE
115143
#endif
@@ -121,55 +149,69 @@
121149

122150
#define __CLC_SCALAR_GENTYPE half
123151
#define __CLC_FPSIZE 16
152+
#define __CLC_FP_LIT(x) x##H
124153

125154
#define __CLC_SCALAR
126155
#define __CLC_GENTYPE half
127156
#define __CLC_INTN int
157+
#define __CLC_BIT_INTN short
128158
#include __CLC_BODY
129159
#undef __CLC_GENTYPE
160+
#undef __CLC_BIT_INTN
130161
#undef __CLC_INTN
131162
#undef __CLC_SCALAR
132163

133164
#define __CLC_GENTYPE half2
134165
#define __CLC_INTN int2
166+
#define __CLC_BIT_INTN short2
135167
#define __CLC_VECSIZE 2
136168
#include __CLC_BODY
137169
#undef __CLC_VECSIZE
138170
#undef __CLC_GENTYPE
171+
#undef __CLC_BIT_INTN
139172
#undef __CLC_INTN
140173

141174
#define __CLC_GENTYPE half3
142175
#define __CLC_INTN int3
176+
#define __CLC_BIT_INTN short3
143177
#define __CLC_VECSIZE 3
144178
#include __CLC_BODY
145179
#undef __CLC_VECSIZE
146180
#undef __CLC_GENTYPE
181+
#undef __CLC_BIT_INTN
147182
#undef __CLC_INTN
148183

149184
#define __CLC_GENTYPE half4
150185
#define __CLC_INTN int4
186+
#define __CLC_BIT_INTN short4
151187
#define __CLC_VECSIZE 4
152188
#include __CLC_BODY
153189
#undef __CLC_VECSIZE
154190
#undef __CLC_GENTYPE
191+
#undef __CLC_BIT_INTN
155192
#undef __CLC_INTN
156193

157194
#define __CLC_GENTYPE half8
158195
#define __CLC_INTN int8
196+
#define __CLC_BIT_INTN short8
159197
#define __CLC_VECSIZE 8
160198
#include __CLC_BODY
161199
#undef __CLC_VECSIZE
162200
#undef __CLC_GENTYPE
201+
#undef __CLC_BIT_INTN
163202
#undef __CLC_INTN
164203

165204
#define __CLC_GENTYPE half16
166205
#define __CLC_INTN int16
206+
#define __CLC_BIT_INTN short16
167207
#define __CLC_VECSIZE 16
168208
#include __CLC_BODY
169209
#undef __CLC_VECSIZE
170210
#undef __CLC_GENTYPE
211+
#undef __CLC_BIT_INTN
171212
#undef __CLC_INTN
172213

214+
#undef __CLC_FP_LIT
173215
#undef __CLC_FPSIZE
174216
#undef __CLC_SCALAR_GENTYPE
175217
#endif
Lines changed: 3 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,5 @@
1-
#include <clc/clcmacro.h>
2-
#include <clc/internal/clc.h>
31
#include <clc/relational/clc_isnan.h>
2+
#include <clc/relational/clc_select.h>
43

5-
#define CLC_SIGN(TYPE, F) \
6-
_CLC_DEF _CLC_OVERLOAD TYPE __clc_sign(TYPE x) { \
7-
if (__clc_isnan(x)) { \
8-
return 0.0F; \
9-
} \
10-
if (x > 0.0F) { \
11-
return 1.0F; \
12-
} \
13-
if (x < 0.0F) { \
14-
return -1.0F; \
15-
} \
16-
return x; /* -0.0 or +0.0 */ \
17-
}
18-
19-
CLC_SIGN(float, f)
20-
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __clc_sign, float)
21-
22-
#ifdef cl_khr_fp64
23-
24-
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
25-
26-
CLC_SIGN(double, )
27-
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __clc_sign, double)
28-
29-
#endif
30-
31-
#ifdef cl_khr_fp16
32-
33-
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
34-
35-
CLC_SIGN(half, )
36-
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __clc_sign, half)
37-
38-
#endif
4+
#define __CLC_BODY <clc_sign.inc>
5+
#include <clc/math/gentype.inc>
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
// TYPE sign(TYPE x) {
2+
// if (isnan(x)) {
3+
// return 0.0F;
4+
// }
5+
// if (x > 0.0F) {
6+
// return 1.0F;
7+
// }
8+
// if (x < 0.0F) {
9+
// return -1.0F;
10+
// }
11+
// return x; /* -0.0 or +0.0 */
12+
// }
13+
_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE __clc_sign(__CLC_GENTYPE x) {
14+
__CLC_BIT_INTN x_isnan = __clc_isnan(x);
15+
__CLC_BIT_INTN x_isgreater_zero = x > __CLC_FP_LIT(0.0);
16+
__CLC_BIT_INTN x_isless_zero = x < __CLC_FP_LIT(0.0);
17+
__CLC_GENTYPE sel0 = __clc_select(x, __CLC_FP_LIT(1.0), x_isgreater_zero);
18+
__CLC_GENTYPE sel1 = __clc_select(sel0, __CLC_FP_LIT(-1.0), x_isless_zero);
19+
__CLC_GENTYPE sel2 = __clc_select(sel1, __CLC_FP_LIT(0.0), x_isnan);
20+
return sel2;
21+
}

libclc/generic/lib/common/sign.cl

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,18 @@
22
#include <clc/clcmacro.h>
33
#include <clc/common/clc_sign.h>
44

5-
#define FUNCTION sign
6-
#define __CLC_BODY <clc/common/unary_def.inc>
5+
_CLC_DEFINE_UNARY_BUILTIN(float, sign, __clc_sign, float)
76

8-
#include <clc/common/floatn.inc>
7+
#ifdef cl_khr_fp64
8+
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
9+
10+
_CLC_DEFINE_UNARY_BUILTIN(double, sign, __clc_sign, double)
11+
12+
#endif
13+
14+
#ifdef cl_khr_fp16
15+
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
16+
17+
_CLC_DEFINE_UNARY_BUILTIN(half, sign, __clc_sign, half)
18+
19+
#endif

0 commit comments

Comments
 (0)