Skip to content

Commit 51ffc04

Browse files
authored
[SYCL][libclc] Add generic addrspace overloads of vload/vstore builtins (#13092)
These definitions and declarations were missing, which would be problematic for a target which supports the generic address space where that address space is uniquely mangled. As with the maths builtins, the NVIDIA target wouldn't see this problem as the 'generic' address space is mangled identically to the 'private' one, so we were serendipitously providing the builtins through these. I couldn't seem to make SYCL actually generate these builtins, but there is a path from SPIR-V to them (through the OpenCL SPIR-V environment), so providing them is prudent irregardless. In the absence of tests I manually verified that there are now as many 'AS0' overloads of these builtins as there 'AS5'/'AS1'/etc overloads.
1 parent 93a1abb commit 51ffc04

File tree

9 files changed

+198
-51
lines changed

9 files changed

+198
-51
lines changed

libclc/generic/include/clc/shared/vload.h

Lines changed: 33 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,31 @@
1-
#define _CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, VEC_TYPE, WIDTH, ADDR_SPACE) \
2-
_CLC_OVERLOAD _CLC_DECL VEC_TYPE vload##SUFFIX##WIDTH(size_t offset, const ADDR_SPACE MEM_TYPE *x);
3-
4-
#define _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, ADDR_SPACE) \
5-
_CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##2, 2, ADDR_SPACE) \
6-
_CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##3, 3, ADDR_SPACE) \
7-
_CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##4, 4, ADDR_SPACE) \
8-
_CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##8, 8, ADDR_SPACE) \
1+
#define _CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, VEC_TYPE, WIDTH, ADDR_SPACE) \
2+
_CLC_OVERLOAD _CLC_DECL VEC_TYPE vload##SUFFIX##WIDTH( \
3+
size_t offset, const ADDR_SPACE MEM_TYPE *x);
4+
5+
#define _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, ADDR_SPACE) \
6+
_CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##2, 2, ADDR_SPACE) \
7+
_CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##3, 3, ADDR_SPACE) \
8+
_CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##4, 4, ADDR_SPACE) \
9+
_CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##8, 8, ADDR_SPACE) \
910
_CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##16, 16, ADDR_SPACE)
1011

11-
#define _CLC_VECTOR_VLOAD_PRIM3(SUFFIX, MEM_TYPE, PRIM_TYPE) \
12-
_CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __private) \
13-
_CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __local) \
14-
_CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __constant) \
15-
_CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __global)
12+
#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
13+
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
14+
defined(__opencl_c_generic_address_space))
15+
#define _CLC_VECTOR_VLOAD_GENERIC_DECL _CLC_VECTOR_VLOAD_DECL
16+
#else
17+
// The generic address space isn't available, so make the macro do nothing
18+
#define _CLC_VECTOR_VLOAD_GENERIC_DECL(X, Y, Z, W)
19+
#endif
20+
21+
#define _CLC_VECTOR_VLOAD_PRIM3(SUFFIX, MEM_TYPE, PRIM_TYPE) \
22+
_CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __private) \
23+
_CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __local) \
24+
_CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __constant) \
25+
_CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __global) \
26+
_CLC_VECTOR_VLOAD_GENERIC_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __generic)
1627

17-
#define _CLC_VECTOR_VLOAD_PRIM1(PRIM_TYPE) \
28+
#define _CLC_VECTOR_VLOAD_PRIM1(PRIM_TYPE) \
1829
_CLC_VECTOR_VLOAD_PRIM3(, PRIM_TYPE, PRIM_TYPE)
1930

2031
// Declare vector load prototypes
@@ -52,7 +63,15 @@ _CLC_VLOAD_DECL(a_half, half, float, , __global)
5263
_CLC_VLOAD_DECL(a_half, half, float, , __local)
5364
_CLC_VLOAD_DECL(a_half, half, float, , __private)
5465

66+
#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
67+
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
68+
defined(__opencl_c_generic_address_space))
69+
_CLC_VLOAD_DECL(_half, half, float, , __generic)
70+
_CLC_VLOAD_DECL(a_half, half, float, , __generic)
71+
#endif
72+
5573
#undef _CLC_VLOAD_DECL
5674
#undef _CLC_VECTOR_VLOAD_DECL
5775
#undef _CLC_VECTOR_VLOAD_PRIM3
5876
#undef _CLC_VECTOR_VLOAD_PRIM1
77+
#undef _CLC_VECTOR_VLOAD_GENERIC_DECL

libclc/generic/include/clc/shared/vstore.h

Lines changed: 40 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,45 @@
1-
#define _CLC_VSTORE_DECL(SUFFIX, PRIM_TYPE, VEC_TYPE, WIDTH, ADDR_SPACE, RND) \
2-
_CLC_OVERLOAD _CLC_DECL void vstore##SUFFIX##WIDTH##RND(VEC_TYPE vec, size_t offset, ADDR_SPACE PRIM_TYPE *out);
3-
4-
#define _CLC_VECTOR_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, ADDR_SPACE, RND) \
5-
_CLC_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##2, 2, ADDR_SPACE, RND) \
6-
_CLC_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##3, 3, ADDR_SPACE, RND) \
7-
_CLC_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##4, 4, ADDR_SPACE, RND) \
8-
_CLC_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##8, 8, ADDR_SPACE, RND) \
1+
#define _CLC_VSTORE_DECL(SUFFIX, PRIM_TYPE, VEC_TYPE, WIDTH, ADDR_SPACE, RND) \
2+
_CLC_OVERLOAD _CLC_DECL void vstore##SUFFIX##WIDTH##RND( \
3+
VEC_TYPE vec, size_t offset, ADDR_SPACE PRIM_TYPE *out);
4+
5+
#define _CLC_VECTOR_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, ADDR_SPACE, RND) \
6+
_CLC_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##2, 2, ADDR_SPACE, RND) \
7+
_CLC_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##3, 3, ADDR_SPACE, RND) \
8+
_CLC_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##4, 4, ADDR_SPACE, RND) \
9+
_CLC_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##8, 8, ADDR_SPACE, RND) \
910
_CLC_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##16, 16, ADDR_SPACE, RND)
1011

11-
#define _CLC_VECTOR_VSTORE_PRIM3(SUFFIX, MEM_TYPE, PRIM_TYPE, RND) \
12-
_CLC_VECTOR_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __private, RND) \
13-
_CLC_VECTOR_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __local, RND) \
14-
_CLC_VECTOR_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __global, RND)
12+
#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
13+
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
14+
defined(__opencl_c_generic_address_space))
15+
#define _CLC_VSTORE_GENERIC_DECL _CLC_VSTORE_DECL
16+
#define _CLC_VECTOR_VSTORE_GENERIC_DECL _CLC_VECTOR_VSTORE_DECL
17+
#else
18+
// The generic address space isn't available, so make the macros do nothing
19+
#define _CLC_VSTORE_GENERIC_DECL(X, Y, Z, W, V, U)
20+
#define _CLC_VECTOR_VSTORE_GENERIC_DECL(X, Y, Z, W, V)
21+
#endif
22+
23+
#define _CLC_VECTOR_VSTORE_PRIM3(SUFFIX, MEM_TYPE, PRIM_TYPE, RND) \
24+
_CLC_VECTOR_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __private, RND) \
25+
_CLC_VECTOR_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __local, RND) \
26+
_CLC_VECTOR_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __global, RND) \
27+
_CLC_VECTOR_VSTORE_GENERIC_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __generic, RND)
1528

16-
#define _CLC_VECTOR_VSTORE_PRIM1(PRIM_TYPE) \
17-
_CLC_VECTOR_VSTORE_PRIM3(,PRIM_TYPE, PRIM_TYPE, )
29+
#define _CLC_VECTOR_VSTORE_PRIM1(PRIM_TYPE) \
30+
_CLC_VECTOR_VSTORE_PRIM3(, PRIM_TYPE, PRIM_TYPE, )
1831

19-
#define _CLC_VECTOR_VSTORE_HALF_PRIM1(PRIM_TYPE, RND) \
20-
_CLC_VSTORE_DECL(_half, half, PRIM_TYPE, , __private, RND) \
21-
_CLC_VSTORE_DECL(_half, half, PRIM_TYPE, , __local, RND) \
22-
_CLC_VSTORE_DECL(_half, half, PRIM_TYPE, , __global, RND) \
23-
_CLC_VECTOR_VSTORE_PRIM3(_half, half, PRIM_TYPE, RND) \
24-
_CLC_VSTORE_DECL(a_half, half, PRIM_TYPE, , __private, RND) \
25-
_CLC_VSTORE_DECL(a_half, half, PRIM_TYPE, , __local, RND) \
26-
_CLC_VSTORE_DECL(a_half, half, PRIM_TYPE, , __global, RND) \
27-
_CLC_VECTOR_VSTORE_PRIM3(a_half, half, PRIM_TYPE, RND)
32+
#define _CLC_VECTOR_VSTORE_HALF_PRIM1(PRIM_TYPE, RND) \
33+
_CLC_VSTORE_DECL(_half, half, PRIM_TYPE, , __private, RND) \
34+
_CLC_VSTORE_DECL(_half, half, PRIM_TYPE, , __local, RND) \
35+
_CLC_VSTORE_DECL(_half, half, PRIM_TYPE, , __global, RND) \
36+
_CLC_VSTORE_GENERIC_DECL(_half, half, PRIM_TYPE, , __generic, RND) \
37+
_CLC_VECTOR_VSTORE_PRIM3(_half, half, PRIM_TYPE, RND) \
38+
_CLC_VSTORE_DECL(a_half, half, PRIM_TYPE, , __private, RND) \
39+
_CLC_VSTORE_DECL(a_half, half, PRIM_TYPE, , __local, RND) \
40+
_CLC_VSTORE_DECL(a_half, half, PRIM_TYPE, , __global, RND) \
41+
_CLC_VSTORE_GENERIC_DECL(a_half, half, PRIM_TYPE, , __generic, RND) \
42+
_CLC_VECTOR_VSTORE_PRIM3(a_half, half, PRIM_TYPE, RND)
2843

2944
_CLC_VECTOR_VSTORE_PRIM1(char)
3045
_CLC_VECTOR_VSTORE_PRIM1(uchar)
@@ -57,6 +72,8 @@ _CLC_VECTOR_VSTORE_HALF_PRIM1(float, _rte)
5772

5873

5974
#undef _CLC_VSTORE_DECL
75+
#undef _CLC_VSTORE_GENERIC_DECL
6076
#undef _CLC_VECTOR_VSTORE_DECL
6177
#undef _CLC_VECTOR_VSTORE_PRIM3
6278
#undef _CLC_VECTOR_VSTORE_PRIM1
79+
#undef _CLC_VECTOR_VSTORE_GENERIC_DECL

libclc/generic/lib/shared/vload.cl

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,23 @@
2525
typedef PRIM_TYPE##16 less_aligned_##ADDR_SPACE##PRIM_TYPE##16 __attribute__ ((aligned (sizeof(PRIM_TYPE))));\
2626
_CLC_OVERLOAD _CLC_DEF PRIM_TYPE##16 vload16(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
2727
return *((const ADDR_SPACE less_aligned_##ADDR_SPACE##PRIM_TYPE##16*) (&x[16*offset])); \
28-
} \
28+
}
29+
30+
#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
31+
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
32+
defined(__opencl_c_generic_address_space))
33+
#define VLOAD_VECTORIZE_GENERIC VLOAD_VECTORIZE
34+
#else
35+
// The generic address space isn't available, so make the macro do nothing
36+
#define VLOAD_VECTORIZE_GENERIC(X,Y)
37+
#endif
2938

30-
#define VLOAD_ADDR_SPACES(__CLC_SCALAR_GENTYPE) \
31-
VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __private) \
32-
VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __local) \
33-
VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __constant) \
34-
VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __global) \
39+
#define VLOAD_ADDR_SPACES(__CLC_SCALAR_GENTYPE) \
40+
VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __private) \
41+
VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __local) \
42+
VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __constant) \
43+
VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __global) \
44+
VLOAD_VECTORIZE_GENERIC(__CLC_SCALAR_GENTYPE, __generic)
3545

3646
#define VLOAD_TYPES() \
3747
VLOAD_ADDR_SPACES(char) \
@@ -62,6 +72,11 @@ float __clc_vload_half_float_helper__constant(const __constant half *);
6272
float __clc_vload_half_float_helper__global(const __global half *);
6373
float __clc_vload_half_float_helper__local(const __local half *);
6474
float __clc_vload_half_float_helper__private(const __private half *);
75+
#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
76+
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
77+
defined(__opencl_c_generic_address_space))
78+
float __clc_vload_half_float_helper__generic(const __generic half *);
79+
#endif
6580

6681
#define VEC_LOAD1(val, AS) val = __clc_vload_half_float_helper##AS (&mem[offset++]);
6782
#else
@@ -115,3 +130,4 @@ float __clc_vload_half_float_helper__private(const __private half *);
115130
#undef VLOAD_TYPES
116131
#undef VLOAD_ADDR_SPACES
117132
#undef VLOAD_VECTORIZE
133+
#undef VLOAD_VECTORIZE_GENERIC

libclc/generic/lib/shared/vload_half.inc

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,22 @@
1212
FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __local);
1313
FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __global);
1414
FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __constant);
15+
#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
16+
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
17+
defined(__opencl_c_generic_address_space))
18+
FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __generic);
19+
#endif
1520

1621
#undef __CLC_OFFSET
1722
#else
1823
FUNC(, 1, 1, __CLC_GENTYPE, __private);
1924
FUNC(, 1, 1, __CLC_GENTYPE, __local);
2025
FUNC(, 1, 1, __CLC_GENTYPE, __global);
2126
FUNC(, 1, 1, __CLC_GENTYPE, __constant);
27+
#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
28+
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
29+
defined(__opencl_c_generic_address_space))
30+
FUNC(, 1, 1, __CLC_GENTYPE, __generic);
31+
#endif
2232
#endif
2333
#endif

libclc/generic/lib/shared/vstore.cl

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,22 @@
2626
typedef PRIM_TYPE##16 less_aligned_##ADDR_SPACE##PRIM_TYPE##16 __attribute__ ((aligned (sizeof(PRIM_TYPE))));\
2727
_CLC_OVERLOAD _CLC_DEF void vstore16(PRIM_TYPE##16 vec, size_t offset, ADDR_SPACE PRIM_TYPE *mem) { \
2828
*((ADDR_SPACE less_aligned_##ADDR_SPACE##PRIM_TYPE##16*) (&mem[16*offset])) = vec; \
29-
} \
29+
}
30+
31+
#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
32+
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
33+
defined(__opencl_c_generic_address_space))
34+
#define VSTORE_VECTORIZE_GENERIC VSTORE_VECTORIZE
35+
#else
36+
// The generic address space isn't available, so make the macro do nothing
37+
#define VSTORE_VECTORIZE_GENERIC(X,Y)
38+
#endif
3039

31-
#define VSTORE_ADDR_SPACES(__CLC_SCALAR___CLC_GENTYPE) \
32-
VSTORE_VECTORIZE(__CLC_SCALAR___CLC_GENTYPE, __private) \
33-
VSTORE_VECTORIZE(__CLC_SCALAR___CLC_GENTYPE, __local) \
34-
VSTORE_VECTORIZE(__CLC_SCALAR___CLC_GENTYPE, __global) \
40+
#define VSTORE_ADDR_SPACES(__CLC_SCALAR___CLC_GENTYPE) \
41+
VSTORE_VECTORIZE(__CLC_SCALAR___CLC_GENTYPE, __private) \
42+
VSTORE_VECTORIZE(__CLC_SCALAR___CLC_GENTYPE, __local) \
43+
VSTORE_VECTORIZE(__CLC_SCALAR___CLC_GENTYPE, __global) \
44+
VSTORE_VECTORIZE_GENERIC(__CLC_SCALAR___CLC_GENTYPE, __generic)
3545

3646
VSTORE_ADDR_SPACES(char)
3747
VSTORE_ADDR_SPACES(uchar)
@@ -43,7 +53,6 @@ VSTORE_ADDR_SPACES(long)
4353
VSTORE_ADDR_SPACES(ulong)
4454
VSTORE_ADDR_SPACES(float)
4555

46-
4756
#ifdef cl_khr_fp64
4857
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
4958
VSTORE_ADDR_SPACES(double)
@@ -68,11 +77,21 @@ _CLC_DEF void __clc_vstore_half_##STYPE##_helper##AS(STYPE s, AS half *d) \
6877
DECLARE_HELPER(float, __private, __builtin_store_halff);
6978
DECLARE_HELPER(float, __global, __builtin_store_halff);
7079
DECLARE_HELPER(float, __local, __builtin_store_halff);
80+
#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
81+
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
82+
defined(__opencl_c_generic_address_space))
83+
DECLARE_HELPER(float, __generic, __builtin_store_halff);
84+
#endif
7185

7286
#ifdef cl_khr_fp64
7387
DECLARE_HELPER(double, __private, __builtin_store_half);
7488
DECLARE_HELPER(double, __global, __builtin_store_half);
7589
DECLARE_HELPER(double, __local, __builtin_store_half);
90+
#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
91+
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
92+
defined(__opencl_c_generic_address_space))
93+
DECLARE_HELPER(double, __generic, __builtin_store_half);
94+
#endif
7695
#endif
7796

7897
#define VEC_STORE1(STYPE, AS, val, ROUNDF) __clc_vstore_half_##STYPE##_helper##AS (ROUNDF(val), &mem[offset++]);
@@ -261,3 +280,4 @@ _CLC_DEF _CLC_OVERLOAD double __clc_rte(double x)
261280
#undef DECLARE_HELPER
262281
#undef VSTORE_ADDR_SPACES
263282
#undef VSTORE_VECTORIZE
283+
#undef VSTORE_VECTORIZE_GENERIC

libclc/generic/lib/shared/vstore_half.inc

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,21 @@
1111
FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __private);
1212
FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __local);
1313
FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __global);
14+
#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
15+
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
16+
defined(__opencl_c_generic_address_space))
17+
FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __generic);
18+
#endif
1419

1520
#undef __CLC_OFFSET
1621
#else
1722
FUNC(, 1, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __private);
1823
FUNC(, 1, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __local);
1924
FUNC(, 1, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __global);
25+
#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
26+
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
27+
defined(__opencl_c_generic_address_space))
28+
FUNC(, 1, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __generic);
29+
#endif
2030
#endif
2131
#endif

libclc/generic/libspirv/shared/vload.cl

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,11 +59,21 @@
5959
*)(&x[16 * offset])); \
6060
}
6161

62+
#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
63+
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
64+
defined(__opencl_c_generic_address_space))
65+
#define VLOAD_VECTORIZE_GENERIC VLOAD_VECTORIZE
66+
#else
67+
// The generic address space isn't available, so make the macro do nothing
68+
#define VLOAD_VECTORIZE_GENERIC(X,Y,Z)
69+
#endif
70+
6271
#define VLOAD_ADDR_SPACES_IMPL(__CLC_RET_GENTYPE, __CLC_SCALAR_GENTYPE) \
6372
VLOAD_VECTORIZE(__CLC_RET_GENTYPE, __CLC_SCALAR_GENTYPE, __private) \
6473
VLOAD_VECTORIZE(__CLC_RET_GENTYPE, __CLC_SCALAR_GENTYPE, __local) \
6574
VLOAD_VECTORIZE(__CLC_RET_GENTYPE, __CLC_SCALAR_GENTYPE, __constant) \
66-
VLOAD_VECTORIZE(__CLC_RET_GENTYPE, __CLC_SCALAR_GENTYPE, __global)
75+
VLOAD_VECTORIZE(__CLC_RET_GENTYPE, __CLC_SCALAR_GENTYPE, __global) \
76+
VLOAD_VECTORIZE_GENERIC(__CLC_RET_GENTYPE, __CLC_SCALAR_GENTYPE, __generic)
6777

6878
#define VLOAD_ADDR_SPACES(__CLC_SCALAR_GENTYPE) \
6979
VLOAD_ADDR_SPACES_IMPL(__CLC_SCALAR_GENTYPE, __CLC_SCALAR_GENTYPE)
@@ -99,6 +109,12 @@ float __clc_vload_half_float_helper__global(const __global half *);
99109
float __clc_vload_half_float_helper__local(const __local half *);
100110
float __clc_vload_half_float_helper__private(const __private half *);
101111

112+
#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
113+
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
114+
defined(__opencl_c_generic_address_space))
115+
float __clc_vload_half_float_helper__generic(const __generic half *);
116+
#endif
117+
102118
#define VEC_LOAD1(val, AS) \
103119
val = __clc_vload_half_float_helper##AS(&mem[offset++]);
104120
#else
@@ -158,6 +174,12 @@ GEN_VLOAD_HALF(__global)
158174
GEN_VLOAD_HALF(__local)
159175
GEN_VLOAD_HALF(__constant)
160176

177+
#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
178+
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
179+
defined(__opencl_c_generic_address_space))
180+
GEN_VLOAD_HALF(__generic)
181+
#endif
182+
161183
#undef VLOAD_HALF_IMPL
162184
#undef VLOAD_HALF_VEC_IMPL
163185
#undef GEN_VLOAD_HALF
@@ -170,3 +192,5 @@ GEN_VLOAD_HALF(__constant)
170192
#undef VLOAD_TYPES
171193
#undef VLOAD_ADDR_SPACES
172194
#undef VLOAD_VECTORIZE
195+
#undef VLOAD_VECTORIZE_GENERIC
196+
#undef VLOAD_VECTORIZE

libclc/generic/libspirv/shared/vstore.cl

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,20 @@
5858
*)(&mem[16 * offset])) = vec; \
5959
}
6060

61+
#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
62+
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
63+
defined(__opencl_c_generic_address_space))
64+
#define VSTORE_VECTORIZE_GENERIC VSTORE_VECTORIZE
65+
#else
66+
// The generic address space isn't available, so make the macro do nothing
67+
#define VSTORE_VECTORIZE_GENERIC(X,Y)
68+
#endif
69+
6170
#define VSTORE_ADDR_SPACES(__CLC_SCALAR___CLC_GENTYPE) \
6271
VSTORE_VECTORIZE(__CLC_SCALAR___CLC_GENTYPE, __private) \
6372
VSTORE_VECTORIZE(__CLC_SCALAR___CLC_GENTYPE, __local) \
64-
VSTORE_VECTORIZE(__CLC_SCALAR___CLC_GENTYPE, __global)
73+
VSTORE_VECTORIZE(__CLC_SCALAR___CLC_GENTYPE, __global) \
74+
VSTORE_VECTORIZE_GENERIC(__CLC_SCALAR___CLC_GENTYPE, __generic)
6575

6676
VSTORE_ADDR_SPACES(schar)
6777
VSTORE_ADDR_SPACES(uchar)
@@ -97,11 +107,21 @@ VSTORE_ADDR_SPACES(half)
97107
DECLARE_HELPER(float, __private, __builtin_store_halff);
98108
DECLARE_HELPER(float, __global, __builtin_store_halff);
99109
DECLARE_HELPER(float, __local, __builtin_store_halff);
110+
#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
111+
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
112+
defined(__opencl_c_generic_address_space))
113+
DECLARE_HELPER(float, __generic, __builtin_store_halff);
114+
#endif
100115

101116
#ifdef cl_khr_fp64
102117
DECLARE_HELPER(double, __private, __builtin_store_half);
103118
DECLARE_HELPER(double, __global, __builtin_store_half);
104119
DECLARE_HELPER(double, __local, __builtin_store_half);
120+
#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
121+
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
122+
defined(__opencl_c_generic_address_space))
123+
DECLARE_HELPER(double, __generic, __builtin_store_half);
124+
#endif
105125
#endif
106126

107127
#define VEC_STORE1(STYPE, AS, val, ROUNDF) \

0 commit comments

Comments
 (0)