Skip to content

Commit 4e198de

Browse files
aratajewigcbot
authored andcommitted
Implement support for both SPV-IR forms of vload builtins
1 parent 1174f6b commit 4e198de

File tree

1 file changed

+53
-53
lines changed

1 file changed

+53
-53
lines changed

IGC/BiFModule/Implementation/vloadvstore.cl

Lines changed: 53 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ IN THE SOFTWARE.
3232
//*****************************************************************************/
3333

3434
#define VLOAD_MACRO(addressSpace, scalarType, numElements, offsetType, mangle) \
35-
INLINE scalarType##numElements __builtin_spirv_OpenCL_vload##numElements##_##mangle(offsetType offset, const addressSpace scalarType *p) \
35+
INLINE scalarType##numElements SPIRV_OVERLOADABLE SPIRV_OCL_BUILTIN(vload, numElements##_##mangle, n_R##scalarType##numElements)(offsetType offset, addressSpace scalarType *p) \
3636
{ \
3737
const addressSpace scalarType *pOffset = p + offset * numElements; \
3838
scalarType##numElements ret; \
@@ -49,16 +49,16 @@ INLINE void __builtin_spirv_OpenCL_vstore##numElements##_##mangle(scalarType##nu
4949
}
5050

5151
#define ELEM_ARG(addressSpace, scalarType, mang) \
52-
VLOAD_MACRO(addressSpace, scalarType, 2, ulong, i64_##mang) \
53-
VLOAD_MACRO(addressSpace, scalarType, 2, uint, i32_##mang) \
54-
VLOAD_MACRO(addressSpace, scalarType, 3, ulong, i64_##mang) \
55-
VLOAD_MACRO(addressSpace, scalarType, 3, uint, i32_##mang) \
56-
VLOAD_MACRO(addressSpace, scalarType, 4, ulong, i64_##mang) \
57-
VLOAD_MACRO(addressSpace, scalarType, 4, uint, i32_##mang) \
58-
VLOAD_MACRO(addressSpace, scalarType, 8, ulong, i64_##mang) \
59-
VLOAD_MACRO(addressSpace, scalarType, 8, uint, i32_##mang) \
60-
VLOAD_MACRO(addressSpace, scalarType, 16, ulong, i64_##mang) \
61-
VLOAD_MACRO(addressSpace, scalarType, 16, uint, i32_##mang)
52+
VLOAD_MACRO(addressSpace, scalarType, 2, long, i64_##mang) \
53+
VLOAD_MACRO(addressSpace, scalarType, 2, int, i32_##mang) \
54+
VLOAD_MACRO(addressSpace, scalarType, 3, long, i64_##mang) \
55+
VLOAD_MACRO(addressSpace, scalarType, 3, int, i32_##mang) \
56+
VLOAD_MACRO(addressSpace, scalarType, 4, long, i64_##mang) \
57+
VLOAD_MACRO(addressSpace, scalarType, 4, int, i32_##mang) \
58+
VLOAD_MACRO(addressSpace, scalarType, 8, long, i64_##mang) \
59+
VLOAD_MACRO(addressSpace, scalarType, 8, int, i32_##mang) \
60+
VLOAD_MACRO(addressSpace, scalarType, 16, long, i64_##mang) \
61+
VLOAD_MACRO(addressSpace, scalarType, 16, int, i32_##mang)
6262

6363
#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
6464
#define TYPE_ARG(TYPE, TYPEMANG) \
@@ -75,10 +75,10 @@ ELEM_ARG(local, TYPE, p3##TYPEMANG) \
7575
ELEM_ARG(private, TYPE, p0##TYPEMANG)
7676
#endif // __OPENCL_C_VERSION__ >= CL_VERSION_2_0
7777

78-
TYPE_ARG(uchar, i8)
79-
TYPE_ARG(ushort, i16)
80-
TYPE_ARG(uint, i32)
81-
TYPE_ARG(ulong, i64)
78+
TYPE_ARG(char, i8)
79+
TYPE_ARG(short, i16)
80+
TYPE_ARG(int, i32)
81+
TYPE_ARG(long, i64)
8282
TYPE_ARG(half, f16)
8383
TYPE_ARG(float, f32)
8484
#if defined(cl_khr_fp64)
@@ -129,17 +129,17 @@ TYPE_ARG(double, f64)
129129
//*****************************************************************************/
130130
// vload macros
131131
//*****************************************************************************/
132-
static OVERLOADABLE float __intel_spirv_half2float(ushort h)
132+
static OVERLOADABLE float __intel_spirv_half2float(short h)
133133
{
134134
return SPIRV_BUILTIN(FConvert, _f32_f16, _Rfloat)(as_half(h));
135135
}
136136

137137
#define VLOAD_SHORT(addressSpace, ASNUM) \
138-
INLINE static ushort __builtin_spirv_OpenCL_vload_i64_p##ASNUM##i16(ulong offset, const addressSpace ushort* p) \
138+
INLINE static short SPIRV_OVERLOADABLE SPIRV_OCL_BUILTIN(vload, _i64_p##ASNUM##i16, n_Rshort)(long offset, addressSpace short* p) \
139139
{ \
140140
return *(p + offset); \
141141
} \
142-
INLINE static ushort __builtin_spirv_OpenCL_vload_i32_p##ASNUM##i16(uint offset, const addressSpace ushort* p) \
142+
INLINE static short SPIRV_OVERLOADABLE SPIRV_OCL_BUILTIN(vload, _i32_p##ASNUM##i16, n_Rshort)(int offset, addressSpace short* p) \
143143
{ \
144144
return *(p + offset); \
145145
}
@@ -152,55 +152,55 @@ VLOAD_SHORT(__local, 3)
152152
VLOAD_SHORT(__constant, 2)
153153
VLOAD_SHORT(__private, 0)
154154

155-
GENERATE_VECTOR_FUNCTIONS_1ARG_NO_MANG(__intel_spirv_half2float, float, ushort)
155+
GENERATE_VECTOR_FUNCTIONS_1ARG_NO_MANG(__intel_spirv_half2float, float, short)
156156

157157
// Two copies for the i32 and i64 size_t offsets.
158158
#define __CLFN_DEF_F_VLOAD_SCALAR_HALF(addressSpace, ASNUM) \
159-
INLINE half __builtin_spirv_OpenCL_vload_i32_p##ASNUM##f16(uint offset, const addressSpace half* p) { \
159+
INLINE half SPIRV_OVERLOADABLE SPIRV_OCL_BUILTIN(vload, _i32_p##ASNUM##f16, _Rhalf)(int offset, addressSpace half* p) { \
160160
return *(p + offset); \
161161
} \
162-
INLINE half __builtin_spirv_OpenCL_vload_i64_p##ASNUM##f16(ulong offset, const addressSpace half* p) { \
162+
INLINE half SPIRV_OVERLOADABLE SPIRV_OCL_BUILTIN(vload, _i64_p##ASNUM##f16, _Rhalf)(long offset, addressSpace half* p) { \
163163
return *(p + offset); \
164164
}
165165

166-
#define __CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, MANGSIZE, SIZETYPE, numElements) \
167-
INLINE float##numElements __builtin_spirv_OpenCL_vload_half##numElements##_##MANGSIZE##_p##ASNUM##f16(SIZETYPE offset, const addressSpace half* p) { \
168-
return __intel_spirv_half2float(__builtin_spirv_OpenCL_vload##numElements##_##MANGSIZE##_p##ASNUM##i16(offset, (const addressSpace ushort*)p)); \
166+
#define __CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, MANGSIZE, SIZETYPE, numElements, postfix) \
167+
INLINE float##numElements SPIRV_OVERLOADABLE SPIRV_OCL_BUILTIN(vload_half, numElements##_##MANGSIZE##_p##ASNUM##f16, postfix##_Rfloat##numElements)(SIZETYPE offset, addressSpace half* p) { \
168+
return __intel_spirv_half2float(SPIRV_OCL_BUILTIN(vload, numElements##_##MANGSIZE##_p##ASNUM##i16, n_Rshort##numElements)(offset, (addressSpace short*)p)); \
169169
}
170170

171171
#define __CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, MANGSIZE, SIZETYPE, step, numElements) \
172-
INLINE float##numElements __builtin_spirv_OpenCL_vloada_half##numElements##_##MANGSIZE##_p##ASNUM##f16(SIZETYPE offset, const addressSpace half* p) { \
173-
const addressSpace ushort##numElements* pHalf = (const addressSpace ushort##numElements*)(p + offset * step); \
174-
return __intel_spirv_half2float(*pHalf); \
172+
INLINE float##numElements SPIRV_OVERLOADABLE SPIRV_OCL_BUILTIN(vloada_half, numElements##_##MANGSIZE##_p##ASNUM##f16, n_Rfloat##numElements)(SIZETYPE offset, addressSpace half* p) { \
173+
const addressSpace short##numElements* pHalf = (const addressSpace short##numElements*)(p + offset * step); \
174+
return __intel_spirv_half2float(*pHalf); \
175175
}
176176

177177
#define __CLFN_DEF_F_VLOAD_HALFX_AS(addressSpace, ASNUM) \
178-
__CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i64, ulong, ) \
179-
__CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i64, ulong, 2) \
180-
__CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i64, ulong, 3) \
181-
__CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i64, ulong, 4) \
182-
__CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i64, ulong, 8) \
183-
__CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i64, ulong, 16) \
184-
__CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i32, uint, ) \
185-
__CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i32, uint, 2) \
186-
__CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i32, uint, 3) \
187-
__CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i32, uint, 4) \
188-
__CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i32, uint, 8) \
189-
__CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i32, uint, 16)
190-
191-
#define __CLFN_DEF_F_VLOADA_HALFX_AS(addressSpace, ASNUM) \
192-
__CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i64, ulong, 1, ) \
193-
__CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i64, ulong, 2, 2) \
194-
__CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i64, ulong, 4, 3) \
195-
__CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i64, ulong, 4, 4) \
196-
__CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i64, ulong, 8, 8) \
197-
__CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i64, ulong, 16, 16) \
198-
__CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i32, uint, 1, ) \
199-
__CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i32, uint, 2, 2) \
200-
__CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i32, uint, 4, 3) \
201-
__CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i32, uint, 4, 4) \
202-
__CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i32, uint, 8, 8) \
203-
__CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i32, uint, 16, 16)
178+
__CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i64, long, , ) \
179+
__CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i64, long, 2, n) \
180+
__CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i64, long, 3, n) \
181+
__CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i64, long, 4, n) \
182+
__CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i64, long, 8, n) \
183+
__CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i64, long, 16, n) \
184+
__CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i32, int, , ) \
185+
__CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i32, int, 2, n) \
186+
__CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i32, int, 3, n) \
187+
__CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i32, int, 4, n) \
188+
__CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i32, int, 8, n) \
189+
__CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i32, int, 16, n)
190+
191+
#define __CLFN_DEF_F_VLOADA_HALFX_AS(addressSpace, ASNUM) \
192+
__CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i64, long, 1, ) \
193+
__CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i64, long, 2, 2) \
194+
__CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i64, long, 4, 3) \
195+
__CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i64, long, 4, 4) \
196+
__CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i64, long, 8, 8) \
197+
__CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i64, long, 16, 16) \
198+
__CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i32, int, 1, ) \
199+
__CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i32, int, 2, 2) \
200+
__CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i32, int, 4, 3) \
201+
__CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i32, int, 4, 4) \
202+
__CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i32, int, 8, 8) \
203+
__CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i32, int, 16, 16)
204204

205205
#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
206206
#define __CLFN_DEF_F_VLOAD_HALF_ALL() \

0 commit comments

Comments
 (0)