@@ -32,7 +32,7 @@ IN THE SOFTWARE.
32
32
//*****************************************************************************/
33
33
34
34
#define VLOAD_MACRO (addressSpace , scalarType , numElements , offsetType , mangle ) \
35
- INLINE scalarType##numElements __builtin_spirv_OpenCL_vload## numElements##_##mangle(offsetType offset, const addressSpace scalarType *p) \
35
+ INLINE scalarType##numElements SPIRV_OVERLOADABLE SPIRV_OCL_BUILTIN(vload, numElements##_##mangle, n_R##scalarType##numElements) (offsetType offset, addressSpace scalarType *p) \
36
36
{ \
37
37
const addressSpace scalarType *pOffset = p + offset * numElements; \
38
38
scalarType##numElements ret; \
@@ -49,16 +49,16 @@ INLINE void __builtin_spirv_OpenCL_vstore##numElements##_##mangle(scalarType##nu
49
49
}
50
50
51
51
#define ELEM_ARG (addressSpace , scalarType , mang ) \
52
- VLOAD_MACRO(addressSpace, scalarType, 2, ulong , i64_##mang) \
53
- VLOAD_MACRO(addressSpace, scalarType, 2, uint , i32_##mang) \
54
- VLOAD_MACRO(addressSpace, scalarType, 3, ulong , i64_##mang) \
55
- VLOAD_MACRO(addressSpace, scalarType, 3, uint , i32_##mang) \
56
- VLOAD_MACRO(addressSpace, scalarType, 4, ulong , i64_##mang) \
57
- VLOAD_MACRO(addressSpace, scalarType, 4, uint , i32_##mang) \
58
- VLOAD_MACRO(addressSpace, scalarType, 8, ulong , i64_##mang) \
59
- VLOAD_MACRO(addressSpace, scalarType, 8, uint , i32_##mang) \
60
- VLOAD_MACRO(addressSpace, scalarType, 16, ulong , i64_##mang) \
61
- VLOAD_MACRO(addressSpace, scalarType, 16, uint , i32_##mang)
52
+ VLOAD_MACRO(addressSpace, scalarType, 2, long , i64_##mang) \
53
+ VLOAD_MACRO(addressSpace, scalarType, 2, int , i32_##mang) \
54
+ VLOAD_MACRO(addressSpace, scalarType, 3, long , i64_##mang) \
55
+ VLOAD_MACRO(addressSpace, scalarType, 3, int , i32_##mang) \
56
+ VLOAD_MACRO(addressSpace, scalarType, 4, long , i64_##mang) \
57
+ VLOAD_MACRO(addressSpace, scalarType, 4, int , i32_##mang) \
58
+ VLOAD_MACRO(addressSpace, scalarType, 8, long , i64_##mang) \
59
+ VLOAD_MACRO(addressSpace, scalarType, 8, int , i32_##mang) \
60
+ VLOAD_MACRO(addressSpace, scalarType, 16, long , i64_##mang) \
61
+ VLOAD_MACRO(addressSpace, scalarType, 16, int , i32_##mang)
62
62
63
63
#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0 )
64
64
#define TYPE_ARG (TYPE , TYPEMANG ) \
@@ -75,10 +75,10 @@ ELEM_ARG(local, TYPE, p3##TYPEMANG) \
75
75
ELEM_ARG(private, TYPE, p0##TYPEMANG)
76
76
#endif // __OPENCL_C_VERSION__ >= CL_VERSION_2_0
77
77
78
- TYPE_ARG (uchar , i8 )
79
- TYPE_ARG (ushort , i16 )
80
- TYPE_ARG (uint , i32 )
81
- TYPE_ARG (ulong , i64 )
78
+ TYPE_ARG (char , i8 )
79
+ TYPE_ARG (short , i16 )
80
+ TYPE_ARG (int , i32 )
81
+ TYPE_ARG (long , i64 )
82
82
TYPE_ARG (half , f16 )
83
83
TYPE_ARG (float , f32 )
84
84
#if defined(cl_khr_fp64 )
@@ -129,17 +129,17 @@ TYPE_ARG(double, f64)
129
129
//*****************************************************************************/
130
130
// vload macros
131
131
//*****************************************************************************/
132
- static OVERLOADABLE float __intel_spirv_half2float (ushort h )
132
+ static OVERLOADABLE float __intel_spirv_half2float (short h )
133
133
{
134
134
return SPIRV_BUILTIN (FConvert , _f32_f16 , _Rfloat )(as_half (h ));
135
135
}
136
136
137
137
#define VLOAD_SHORT (addressSpace , ASNUM ) \
138
- INLINE static ushort __builtin_spirv_OpenCL_vload_i64_p ##ASNUM##i16(ulong offset, const addressSpace ushort * p) \
138
+ INLINE static short SPIRV_OVERLOADABLE SPIRV_OCL_BUILTIN(vload, _i64_p ##ASNUM##i16, n_Rshort)(long offset, addressSpace short * p) \
139
139
{ \
140
140
return *(p + offset); \
141
141
} \
142
- INLINE static ushort __builtin_spirv_OpenCL_vload_i32_p ##ASNUM##i16(uint offset, const addressSpace ushort * p) \
142
+ INLINE static short SPIRV_OVERLOADABLE SPIRV_OCL_BUILTIN(vload, _i32_p ##ASNUM##i16, n_Rshort)(int offset, addressSpace short * p) \
143
143
{ \
144
144
return *(p + offset); \
145
145
}
@@ -152,55 +152,55 @@ VLOAD_SHORT(__local, 3)
152
152
VLOAD_SHORT (__constant , 2 )
153
153
VLOAD_SHORT (__private , 0 )
154
154
155
- GENERATE_VECTOR_FUNCTIONS_1ARG_NO_MANG (__intel_spirv_half2float , float , ushort )
155
+ GENERATE_VECTOR_FUNCTIONS_1ARG_NO_MANG (__intel_spirv_half2float , float , short )
156
156
157
157
// Two copies for the i32 and i64 size_t offsets.
158
158
#define __CLFN_DEF_F_VLOAD_SCALAR_HALF (addressSpace , ASNUM ) \
159
- INLINE half __builtin_spirv_OpenCL_vload_i32_p ##ASNUM##f16(uint offset, const addressSpace half* p) { \
159
+ INLINE half SPIRV_OVERLOADABLE SPIRV_OCL_BUILTIN(vload, _i32_p ##ASNUM##f16, _Rhalf)(int offset, addressSpace half* p) { \
160
160
return *(p + offset); \
161
161
} \
162
- INLINE half __builtin_spirv_OpenCL_vload_i64_p ##ASNUM##f16(ulong offset, const addressSpace half* p) { \
162
+ INLINE half SPIRV_OVERLOADABLE SPIRV_OCL_BUILTIN(vload, _i64_p ##ASNUM##f16, _Rhalf)(long offset, addressSpace half* p) { \
163
163
return *(p + offset); \
164
164
}
165
165
166
- #define __CLFN_DEF_F_VLOAD_HALFX (addressSpace , ASNUM , MANGSIZE , SIZETYPE , numElements ) \
167
- INLINE float##numElements __builtin_spirv_OpenCL_vload_half## numElements##_##MANGSIZE##_p##ASNUM##f16(SIZETYPE offset, const addressSpace half* p) { \
168
- return __intel_spirv_half2float(__builtin_spirv_OpenCL_vload## numElements##_##MANGSIZE##_p##ASNUM##i16(offset, (const addressSpace ushort *)p)); \
166
+ #define __CLFN_DEF_F_VLOAD_HALFX (addressSpace , ASNUM , MANGSIZE , SIZETYPE , numElements , postfix ) \
167
+ INLINE float##numElements SPIRV_OVERLOADABLE SPIRV_OCL_BUILTIN(vload_half, numElements##_##MANGSIZE##_p##ASNUM##f16, postfix##_Rfloat##numElements) (SIZETYPE offset, addressSpace half* p) { \
168
+ return __intel_spirv_half2float(SPIRV_OCL_BUILTIN(vload, numElements##_##MANGSIZE##_p##ASNUM##i16, n_Rshort##numElements) (offset, (addressSpace short *)p)); \
169
169
}
170
170
171
171
#define __CLFN_DEF_F_VLOADA_HALFX (addressSpace , ASNUM , MANGSIZE , SIZETYPE , step , numElements ) \
172
- INLINE float##numElements __builtin_spirv_OpenCL_vloada_half## numElements##_##MANGSIZE##_p##ASNUM##f16(SIZETYPE offset, const addressSpace half* p) { \
173
- const addressSpace ushort ##numElements* pHalf = (const addressSpace ushort ##numElements*)(p + offset * step); \
174
- return __intel_spirv_half2float(*pHalf); \
172
+ INLINE float##numElements SPIRV_OVERLOADABLE SPIRV_OCL_BUILTIN(vloada_half, numElements##_##MANGSIZE##_p##ASNUM##f16, n_Rfloat##numElements) (SIZETYPE offset, addressSpace half* p) { \
173
+ const addressSpace short ##numElements* pHalf = (const addressSpace short ##numElements*)(p + offset * step); \
174
+ return __intel_spirv_half2float(*pHalf); \
175
175
}
176
176
177
177
#define __CLFN_DEF_F_VLOAD_HALFX_AS (addressSpace , ASNUM ) \
178
- __CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i64, ulong, ) \
179
- __CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i64, ulong , 2) \
180
- __CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i64, ulong , 3) \
181
- __CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i64, ulong , 4) \
182
- __CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i64, ulong , 8) \
183
- __CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i64, ulong , 16) \
184
- __CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i32, uint, ) \
185
- __CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i32, uint , 2) \
186
- __CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i32, uint , 3) \
187
- __CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i32, uint , 4) \
188
- __CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i32, uint , 8) \
189
- __CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i32, uint , 16)
190
-
191
- #define __CLFN_DEF_F_VLOADA_HALFX_AS (addressSpace , ASNUM ) \
192
- __CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i64, ulong , 1, ) \
193
- __CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i64, ulong , 2, 2) \
194
- __CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i64, ulong , 4, 3) \
195
- __CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i64, ulong , 4, 4) \
196
- __CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i64, ulong , 8, 8) \
197
- __CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i64, ulong , 16, 16) \
198
- __CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i32, uint , 1, ) \
199
- __CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i32, uint , 2, 2) \
200
- __CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i32, uint , 4, 3) \
201
- __CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i32, uint , 4, 4) \
202
- __CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i32, uint , 8, 8) \
203
- __CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i32, uint , 16, 16)
178
+ __CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i64, long, , ) \
179
+ __CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i64, long , 2, n) \
180
+ __CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i64, long , 3, n) \
181
+ __CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i64, long , 4, n) \
182
+ __CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i64, long , 8, n) \
183
+ __CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i64, long , 16, n) \
184
+ __CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i32, int, , ) \
185
+ __CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i32, int , 2, n) \
186
+ __CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i32, int , 3, n) \
187
+ __CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i32, int , 4, n) \
188
+ __CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i32, int , 8, n) \
189
+ __CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i32, int , 16, n )
190
+
191
+ #define __CLFN_DEF_F_VLOADA_HALFX_AS (addressSpace , ASNUM ) \
192
+ __CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i64, long , 1, ) \
193
+ __CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i64, long , 2, 2) \
194
+ __CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i64, long , 4, 3) \
195
+ __CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i64, long , 4, 4) \
196
+ __CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i64, long , 8, 8) \
197
+ __CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i64, long , 16, 16) \
198
+ __CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i32, int , 1, ) \
199
+ __CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i32, int , 2, 2) \
200
+ __CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i32, int , 4, 3) \
201
+ __CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i32, int , 4, 4) \
202
+ __CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i32, int , 8, 8) \
203
+ __CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i32, int , 16, 16)
204
204
205
205
#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0 )
206
206
#define __CLFN_DEF_F_VLOAD_HALF_ALL () \
0 commit comments