1
+ #include <utils.h>
2
+
1
3
#define _CLC_UNARY_VECTORIZE (DECLSPEC , RET_TYPE , FUNCTION , ARG1_TYPE ) \
2
4
DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x) { \
3
5
return (RET_TYPE##2)(FUNCTION(x.x), FUNCTION(x.y)); \
86
88
return (RET_TYPE##16)(FUNCTION(x.lo, y.lo, z.lo), FUNCTION(x.hi, y.hi, z.hi)); \
87
89
}
88
90
89
- #define _CLC_V_S_S_V_VECTORIZE (DECLSPEC , RET_TYPE , FUNCTION , ARG1_TYPE , ARG2_TYPE , ARG3_TYPE ) \
90
- DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE x, ARG2_TYPE y , ARG3_TYPE##2 z) { \
91
- return ( RET_TYPE##2)( FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \
92
- } \
93
- \
94
- DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##3 z) { \
95
- return ( RET_TYPE##3)( FUNCTION(x, y, z.x), FUNCTION(x, y, z.y), \
96
- FUNCTION(x, y, z.z)); \
97
- } \
98
- \
99
- DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##4 z) { \
100
- return ( RET_TYPE##4)( FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \
101
- } \
102
- \
103
- DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##8 z) { \
104
- return ( RET_TYPE##8)( FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \
105
- } \
106
- \
107
- DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##16 z) { \
108
- return ( RET_TYPE##16)( FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \
109
- } \
110
- \
91
+ #define _CLC_V_S_S_V_VECTORIZE (DECLSPEC , RET_TYPE , FUNCTION , ARG1_TYPE , \
92
+ ARG2_TYPE , ARG3_TYPE ) \
93
+ DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##2 z) { \
94
+ return (RET_TYPE##2)(FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \
95
+ } \
96
+ \
97
+ DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##3 z) { \
98
+ return (RET_TYPE##3)(FUNCTION(x, y, z.x), FUNCTION(x, y, z.y), \
99
+ FUNCTION(x, y, z.z)); \
100
+ } \
101
+ \
102
+ DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##4 z) { \
103
+ return (RET_TYPE##4)(FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \
104
+ } \
105
+ \
106
+ DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##8 z) { \
107
+ return (RET_TYPE##8)(FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \
108
+ } \
109
+ \
110
+ DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##16 z) { \
111
+ return (RET_TYPE##16)(FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \
112
+ }
111
113
112
- #define _CLC_V_V_VP_VECTORIZE (DECLSPEC , RET_TYPE , FUNCTION , ARG1_TYPE , ADDR_SPACE , ARG2_TYPE ) \
113
- DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ADDR_SPACE ARG2_TYPE##2 *y) { \
114
- return (RET_TYPE##2)( \
115
- FUNCTION(x.x, (ARG2_TYPE*)y), \
116
- FUNCTION(x.y, (ADDR_SPACE ARG2_TYPE*)((ADDR_SPACE ARG2_TYPE*)y+1)) \
117
- ); \
118
- } \
119
- \
120
- DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x, ADDR_SPACE ARG2_TYPE##3 *y) { \
121
- return (RET_TYPE##3)( \
122
- FUNCTION(x.x, (ARG2_TYPE*)y), \
123
- FUNCTION(x.y, (ADDR_SPACE ARG2_TYPE*)((ADDR_SPACE ARG2_TYPE*)y+1)), \
124
- FUNCTION(x.z, (ADDR_SPACE ARG2_TYPE*)((ADDR_SPACE ARG2_TYPE*)y+2)) \
125
- ); \
126
- } \
127
- \
128
- DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x, ADDR_SPACE ARG2_TYPE##4 *y) { \
129
- return (RET_TYPE##4)( \
130
- FUNCTION(x.lo, (ARG2_TYPE##2*)y), \
131
- FUNCTION(x.hi, (ADDR_SPACE ARG2_TYPE##2*)((ADDR_SPACE ARG2_TYPE*)y+2)) \
132
- ); \
133
- } \
134
- \
135
- DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x, ADDR_SPACE ARG2_TYPE##8 *y) { \
136
- return (RET_TYPE##8)( \
137
- FUNCTION(x.lo, (ARG2_TYPE##4*)y), \
138
- FUNCTION(x.hi, (ADDR_SPACE ARG2_TYPE##4*)((ADDR_SPACE ARG2_TYPE*)y+4)) \
139
- ); \
140
- } \
141
- \
142
- DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x, ADDR_SPACE ARG2_TYPE##16 *y) { \
143
- return (RET_TYPE##16)( \
144
- FUNCTION(x.lo, (ARG2_TYPE##8*)y), \
145
- FUNCTION(x.hi, (ADDR_SPACE ARG2_TYPE##8*)((ADDR_SPACE ARG2_TYPE*)y+8)) \
146
- ); \
114
+ #define _CLC_V_V_VP_VECTORIZE (DECLSPEC , RET_TYPE , FUNCTION , ARG1_TYPE , \
115
+ ADDR_SPACE , ARG2_TYPE ) \
116
+ DECLSPEC __CLC_XCONCAT(RET_TYPE, 2) \
117
+ FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 2) x, \
118
+ ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 2) * y) { \
119
+ return (__CLC_XCONCAT(RET_TYPE, 2))( \
120
+ FUNCTION(x.x, (ADDR_SPACE ARG2_TYPE *)y), \
121
+ FUNCTION(x.y, \
122
+ (ADDR_SPACE ARG2_TYPE *)((ADDR_SPACE ARG2_TYPE *)y + 1))); \
123
+ } \
124
+ \
125
+ DECLSPEC __CLC_XCONCAT(RET_TYPE, 3) \
126
+ FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 3) x, \
127
+ ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 3) * y) { \
128
+ return (__CLC_XCONCAT(RET_TYPE, 3))( \
129
+ FUNCTION(x.x, (ADDR_SPACE ARG2_TYPE *)y), \
130
+ FUNCTION(x.y, \
131
+ (ADDR_SPACE ARG2_TYPE *)((ADDR_SPACE ARG2_TYPE *)y + 1)), \
132
+ FUNCTION(x.z, \
133
+ (ADDR_SPACE ARG2_TYPE *)((ADDR_SPACE ARG2_TYPE *)y + 2))); \
134
+ } \
135
+ \
136
+ DECLSPEC __CLC_XCONCAT(RET_TYPE, 4) \
137
+ FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 4) x, \
138
+ ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 4) * y) { \
139
+ return (__CLC_XCONCAT(RET_TYPE, 4))( \
140
+ FUNCTION(x.lo, (ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 2) *)y), \
141
+ FUNCTION(x.hi, (ADDR_SPACE __CLC_XCONCAT( \
142
+ ARG2_TYPE, 2) *)((ADDR_SPACE ARG2_TYPE *)y + 2))); \
143
+ } \
144
+ \
145
+ DECLSPEC __CLC_XCONCAT(RET_TYPE, 8) \
146
+ FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 8) x, \
147
+ ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 8) * y) { \
148
+ return (__CLC_XCONCAT(RET_TYPE, 8))( \
149
+ FUNCTION(x.lo, (ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 4) *)y), \
150
+ FUNCTION(x.hi, (ADDR_SPACE __CLC_XCONCAT( \
151
+ ARG2_TYPE, 4) *)((ADDR_SPACE ARG2_TYPE *)y + 4))); \
152
+ } \
153
+ \
154
+ DECLSPEC __CLC_XCONCAT(RET_TYPE, 16) \
155
+ FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 16) x, \
156
+ ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 16) * y) { \
157
+ return (__CLC_XCONCAT(RET_TYPE, 16))( \
158
+ FUNCTION(x.lo, (ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 8) *)y), \
159
+ FUNCTION(x.hi, (ADDR_SPACE __CLC_XCONCAT( \
160
+ ARG2_TYPE, 8) *)((ADDR_SPACE ARG2_TYPE *)y + 8))); \
147
161
}
148
162
149
163
#define _CLC_DEFINE_BINARY_BUILTIN (RET_TYPE , FUNCTION , BUILTIN , ARG1_TYPE , ARG2_TYPE ) \
@@ -161,3 +175,26 @@ _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x) { \
161
175
return BUILTIN(x); \
162
176
} \
163
177
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, RET_TYPE, FUNCTION, ARG1_TYPE)
178
+
179
+ #ifdef cl_khr_fp16
180
+
181
+ #pragma OPENCL EXTENSION cl_khr_fp16 : enable
182
+
183
+ #define _CLC_DEFINE_UNARY_BUILTIN_FP16 (FUNCTION ) \
184
+ _CLC_DEF _CLC_OVERLOAD half FUNCTION(half x) { \
185
+ return (half)FUNCTION((float)x); \
186
+ } \
187
+ _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, FUNCTION, half)
188
+
189
+ #define _CLC_DEFINE_BINARY_BUILTIN_FP16 (FUNCTION ) \
190
+ _CLC_DEF _CLC_OVERLOAD half FUNCTION(half x, half y) { \
191
+ return (half)FUNCTION((float)x, (float)y); \
192
+ } \
193
+ _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, FUNCTION, half, half)
194
+
195
+ #else
196
+
197
+ #define _CLC_DEFINE_UNARY_BUILTIN_FP16 (FUNCTION )
198
+ #define _CLC_DEFINE_BINARY_BUILTIN_FP16 (FUNCTION )
199
+
200
+ #endif
0 commit comments