Skip to content

Commit 2edade2

Browse files
committed
[libclc][NFC] Clang-format vload/vstore code
1 parent 15321d2 commit 2edade2

File tree

6 files changed

+350
-326
lines changed

6 files changed

+350
-326
lines changed

libclc/generic/include/clc/shared/vload.h

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -6,23 +6,24 @@
66
//
77
//===----------------------------------------------------------------------===//
88

9-
#define _CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, VEC_TYPE, WIDTH, ADDR_SPACE) \
10-
_CLC_OVERLOAD _CLC_DECL VEC_TYPE vload##SUFFIX##WIDTH(size_t offset, const ADDR_SPACE MEM_TYPE *x);
9+
#define _CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, VEC_TYPE, WIDTH, ADDR_SPACE) \
10+
_CLC_OVERLOAD _CLC_DECL VEC_TYPE vload##SUFFIX##WIDTH( \
11+
size_t offset, const ADDR_SPACE MEM_TYPE *x);
1112

12-
#define _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, ADDR_SPACE) \
13-
_CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##2, 2, ADDR_SPACE) \
14-
_CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##3, 3, ADDR_SPACE) \
15-
_CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##4, 4, ADDR_SPACE) \
16-
_CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##8, 8, ADDR_SPACE) \
13+
#define _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, ADDR_SPACE) \
14+
_CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##2, 2, ADDR_SPACE) \
15+
_CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##3, 3, ADDR_SPACE) \
16+
_CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##4, 4, ADDR_SPACE) \
17+
_CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##8, 8, ADDR_SPACE) \
1718
_CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##16, 16, ADDR_SPACE)
1819

19-
#define _CLC_VECTOR_VLOAD_PRIM3(SUFFIX, MEM_TYPE, PRIM_TYPE) \
20-
_CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __private) \
21-
_CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __local) \
22-
_CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __constant) \
20+
#define _CLC_VECTOR_VLOAD_PRIM3(SUFFIX, MEM_TYPE, PRIM_TYPE) \
21+
_CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __private) \
22+
_CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __local) \
23+
_CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __constant) \
2324
_CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __global)
2425

25-
#define _CLC_VECTOR_VLOAD_PRIM1(PRIM_TYPE) \
26+
#define _CLC_VECTOR_VLOAD_PRIM1(PRIM_TYPE) \
2627
_CLC_VECTOR_VLOAD_PRIM3(, PRIM_TYPE, PRIM_TYPE)
2728

2829
// Declare vector load prototypes
@@ -40,12 +41,12 @@ _CLC_VECTOR_VLOAD_PRIM3(_half, half, float)
4041
_CLC_VECTOR_VLOAD_PRIM3(a_half, half, float)
4142

4243
#ifdef cl_khr_fp64
43-
#pragma OPENCL EXTENSION cl_khr_fp64: enable
44-
_CLC_VECTOR_VLOAD_PRIM1(double)
44+
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
45+
_CLC_VECTOR_VLOAD_PRIM1(double)
4546
#endif
4647
#ifdef cl_khr_fp16
47-
#pragma OPENCL EXTENSION cl_khr_fp16: enable
48-
_CLC_VECTOR_VLOAD_PRIM1(half)
48+
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
49+
_CLC_VECTOR_VLOAD_PRIM1(half)
4950
#endif
5051

5152
// Scalar vload_half also needs to be declared

libclc/generic/include/clc/shared/vstore.h

Lines changed: 30 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -6,33 +6,34 @@
66
//
77
//===----------------------------------------------------------------------===//
88

9-
#define _CLC_VSTORE_DECL(SUFFIX, PRIM_TYPE, VEC_TYPE, WIDTH, ADDR_SPACE, RND) \
10-
_CLC_OVERLOAD _CLC_DECL void vstore##SUFFIX##WIDTH##RND(VEC_TYPE vec, size_t offset, ADDR_SPACE PRIM_TYPE *out);
9+
#define _CLC_VSTORE_DECL(SUFFIX, PRIM_TYPE, VEC_TYPE, WIDTH, ADDR_SPACE, RND) \
10+
_CLC_OVERLOAD _CLC_DECL void vstore##SUFFIX##WIDTH##RND( \
11+
VEC_TYPE vec, size_t offset, ADDR_SPACE PRIM_TYPE *out);
1112

12-
#define _CLC_VECTOR_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, ADDR_SPACE, RND) \
13-
_CLC_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##2, 2, ADDR_SPACE, RND) \
14-
_CLC_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##3, 3, ADDR_SPACE, RND) \
15-
_CLC_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##4, 4, ADDR_SPACE, RND) \
16-
_CLC_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##8, 8, ADDR_SPACE, RND) \
13+
#define _CLC_VECTOR_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, ADDR_SPACE, RND) \
14+
_CLC_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##2, 2, ADDR_SPACE, RND) \
15+
_CLC_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##3, 3, ADDR_SPACE, RND) \
16+
_CLC_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##4, 4, ADDR_SPACE, RND) \
17+
_CLC_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##8, 8, ADDR_SPACE, RND) \
1718
_CLC_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##16, 16, ADDR_SPACE, RND)
1819

19-
#define _CLC_VECTOR_VSTORE_PRIM3(SUFFIX, MEM_TYPE, PRIM_TYPE, RND) \
20-
_CLC_VECTOR_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __private, RND) \
21-
_CLC_VECTOR_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __local, RND) \
20+
#define _CLC_VECTOR_VSTORE_PRIM3(SUFFIX, MEM_TYPE, PRIM_TYPE, RND) \
21+
_CLC_VECTOR_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __private, RND) \
22+
_CLC_VECTOR_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __local, RND) \
2223
_CLC_VECTOR_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __global, RND)
2324

24-
#define _CLC_VECTOR_VSTORE_PRIM1(PRIM_TYPE) \
25-
_CLC_VECTOR_VSTORE_PRIM3(,PRIM_TYPE, PRIM_TYPE, )
25+
#define _CLC_VECTOR_VSTORE_PRIM1(PRIM_TYPE) \
26+
_CLC_VECTOR_VSTORE_PRIM3(, PRIM_TYPE, PRIM_TYPE, )
2627

27-
#define _CLC_VECTOR_VSTORE_HALF_PRIM1(PRIM_TYPE, RND) \
28-
_CLC_VSTORE_DECL(_half, half, PRIM_TYPE, , __private, RND) \
29-
_CLC_VSTORE_DECL(_half, half, PRIM_TYPE, , __local, RND) \
30-
_CLC_VSTORE_DECL(_half, half, PRIM_TYPE, , __global, RND) \
31-
_CLC_VECTOR_VSTORE_PRIM3(_half, half, PRIM_TYPE, RND) \
32-
_CLC_VSTORE_DECL(a_half, half, PRIM_TYPE, , __private, RND) \
33-
_CLC_VSTORE_DECL(a_half, half, PRIM_TYPE, , __local, RND) \
34-
_CLC_VSTORE_DECL(a_half, half, PRIM_TYPE, , __global, RND) \
35-
_CLC_VECTOR_VSTORE_PRIM3(a_half, half, PRIM_TYPE, RND)
28+
#define _CLC_VECTOR_VSTORE_HALF_PRIM1(PRIM_TYPE, RND) \
29+
_CLC_VSTORE_DECL(_half, half, PRIM_TYPE, , __private, RND) \
30+
_CLC_VSTORE_DECL(_half, half, PRIM_TYPE, , __local, RND) \
31+
_CLC_VSTORE_DECL(_half, half, PRIM_TYPE, , __global, RND) \
32+
_CLC_VECTOR_VSTORE_PRIM3(_half, half, PRIM_TYPE, RND) \
33+
_CLC_VSTORE_DECL(a_half, half, PRIM_TYPE, , __private, RND) \
34+
_CLC_VSTORE_DECL(a_half, half, PRIM_TYPE, , __local, RND) \
35+
_CLC_VSTORE_DECL(a_half, half, PRIM_TYPE, , __global, RND) \
36+
_CLC_VECTOR_VSTORE_PRIM3(a_half, half, PRIM_TYPE, RND)
3637

3738
_CLC_VECTOR_VSTORE_PRIM1(char)
3839
_CLC_VECTOR_VSTORE_PRIM1(uchar)
@@ -44,26 +45,25 @@ _CLC_VECTOR_VSTORE_PRIM1(long)
4445
_CLC_VECTOR_VSTORE_PRIM1(ulong)
4546
_CLC_VECTOR_VSTORE_PRIM1(float)
4647

47-
_CLC_VECTOR_VSTORE_HALF_PRIM1(float,)
48+
_CLC_VECTOR_VSTORE_HALF_PRIM1(float, )
4849
_CLC_VECTOR_VSTORE_HALF_PRIM1(float, _rtz)
4950
_CLC_VECTOR_VSTORE_HALF_PRIM1(float, _rtn)
5051
_CLC_VECTOR_VSTORE_HALF_PRIM1(float, _rtp)
5152
_CLC_VECTOR_VSTORE_HALF_PRIM1(float, _rte)
5253

5354
#ifdef cl_khr_fp64
54-
_CLC_VECTOR_VSTORE_PRIM1(double)
55-
_CLC_VECTOR_VSTORE_HALF_PRIM1(double,)
56-
_CLC_VECTOR_VSTORE_HALF_PRIM1(double, _rtz)
57-
_CLC_VECTOR_VSTORE_HALF_PRIM1(double, _rtn)
58-
_CLC_VECTOR_VSTORE_HALF_PRIM1(double, _rtp)
59-
_CLC_VECTOR_VSTORE_HALF_PRIM1(double, _rte)
55+
_CLC_VECTOR_VSTORE_PRIM1(double)
56+
_CLC_VECTOR_VSTORE_HALF_PRIM1(double, )
57+
_CLC_VECTOR_VSTORE_HALF_PRIM1(double, _rtz)
58+
_CLC_VECTOR_VSTORE_HALF_PRIM1(double, _rtn)
59+
_CLC_VECTOR_VSTORE_HALF_PRIM1(double, _rtp)
60+
_CLC_VECTOR_VSTORE_HALF_PRIM1(double, _rte)
6061
#endif
6162

6263
#ifdef cl_khr_fp16
63-
_CLC_VECTOR_VSTORE_PRIM1(half)
64+
_CLC_VECTOR_VSTORE_PRIM1(half)
6465
#endif
6566

66-
6767
#undef _CLC_VSTORE_DECL
6868
#undef _CLC_VECTOR_VSTORE_DECL
6969
#undef _CLC_VECTOR_VSTORE_PRIM3

libclc/generic/lib/shared/vload.cl

Lines changed: 91 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -8,59 +8,75 @@
88

99
#include <clc/clc.h>
1010

11-
#define VLOAD_VECTORIZE(PRIM_TYPE, ADDR_SPACE) \
12-
typedef PRIM_TYPE##2 less_aligned_##ADDR_SPACE##PRIM_TYPE##2 __attribute__ ((aligned (sizeof(PRIM_TYPE))));\
13-
_CLC_OVERLOAD _CLC_DEF PRIM_TYPE##2 vload2(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
14-
return *((const ADDR_SPACE less_aligned_##ADDR_SPACE##PRIM_TYPE##2*) (&x[2*offset])); \
15-
} \
16-
\
17-
typedef PRIM_TYPE##3 less_aligned_##ADDR_SPACE##PRIM_TYPE##3 __attribute__ ((aligned (sizeof(PRIM_TYPE))));\
18-
_CLC_OVERLOAD _CLC_DEF PRIM_TYPE##3 vload3(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
19-
PRIM_TYPE##2 vec = *((const ADDR_SPACE less_aligned_##ADDR_SPACE##PRIM_TYPE##2*) (&x[3*offset])); \
20-
return (PRIM_TYPE##3)(vec.s0, vec.s1, x[offset*3+2]); \
21-
} \
22-
\
23-
typedef PRIM_TYPE##4 less_aligned_##ADDR_SPACE##PRIM_TYPE##4 __attribute__ ((aligned (sizeof(PRIM_TYPE))));\
24-
_CLC_OVERLOAD _CLC_DEF PRIM_TYPE##4 vload4(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
25-
return *((const ADDR_SPACE less_aligned_##ADDR_SPACE##PRIM_TYPE##4*) (&x[4*offset])); \
26-
} \
27-
\
28-
typedef PRIM_TYPE##8 less_aligned_##ADDR_SPACE##PRIM_TYPE##8 __attribute__ ((aligned (sizeof(PRIM_TYPE))));\
29-
_CLC_OVERLOAD _CLC_DEF PRIM_TYPE##8 vload8(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
30-
return *((const ADDR_SPACE less_aligned_##ADDR_SPACE##PRIM_TYPE##8*) (&x[8*offset])); \
31-
} \
32-
\
33-
typedef PRIM_TYPE##16 less_aligned_##ADDR_SPACE##PRIM_TYPE##16 __attribute__ ((aligned (sizeof(PRIM_TYPE))));\
34-
_CLC_OVERLOAD _CLC_DEF PRIM_TYPE##16 vload16(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
35-
return *((const ADDR_SPACE less_aligned_##ADDR_SPACE##PRIM_TYPE##16*) (&x[16*offset])); \
36-
} \
11+
#define VLOAD_VECTORIZE(PRIM_TYPE, ADDR_SPACE) \
12+
typedef PRIM_TYPE##2 less_aligned_##ADDR_SPACE##PRIM_TYPE##2 \
13+
__attribute__((aligned(sizeof(PRIM_TYPE)))); \
14+
_CLC_OVERLOAD _CLC_DEF PRIM_TYPE##2 vload2(size_t offset, \
15+
const ADDR_SPACE PRIM_TYPE *x) { \
16+
return *((const ADDR_SPACE less_aligned_##ADDR_SPACE##PRIM_TYPE##2 \
17+
*)(&x[2 * offset])); \
18+
} \
19+
\
20+
typedef PRIM_TYPE##3 less_aligned_##ADDR_SPACE##PRIM_TYPE##3 \
21+
__attribute__((aligned(sizeof(PRIM_TYPE)))); \
22+
_CLC_OVERLOAD _CLC_DEF PRIM_TYPE##3 vload3(size_t offset, \
23+
const ADDR_SPACE PRIM_TYPE *x) { \
24+
PRIM_TYPE##2 vec = \
25+
*((const ADDR_SPACE less_aligned_##ADDR_SPACE##PRIM_TYPE##2 \
26+
*)(&x[3 * offset])); \
27+
return (PRIM_TYPE##3)(vec.s0, vec.s1, x[offset * 3 + 2]); \
28+
} \
29+
\
30+
typedef PRIM_TYPE##4 less_aligned_##ADDR_SPACE##PRIM_TYPE##4 \
31+
__attribute__((aligned(sizeof(PRIM_TYPE)))); \
32+
_CLC_OVERLOAD _CLC_DEF PRIM_TYPE##4 vload4(size_t offset, \
33+
const ADDR_SPACE PRIM_TYPE *x) { \
34+
return *((const ADDR_SPACE less_aligned_##ADDR_SPACE##PRIM_TYPE##4 \
35+
*)(&x[4 * offset])); \
36+
} \
37+
\
38+
typedef PRIM_TYPE##8 less_aligned_##ADDR_SPACE##PRIM_TYPE##8 \
39+
__attribute__((aligned(sizeof(PRIM_TYPE)))); \
40+
_CLC_OVERLOAD _CLC_DEF PRIM_TYPE##8 vload8(size_t offset, \
41+
const ADDR_SPACE PRIM_TYPE *x) { \
42+
return *((const ADDR_SPACE less_aligned_##ADDR_SPACE##PRIM_TYPE##8 \
43+
*)(&x[8 * offset])); \
44+
} \
45+
\
46+
typedef PRIM_TYPE##16 less_aligned_##ADDR_SPACE##PRIM_TYPE##16 \
47+
__attribute__((aligned(sizeof(PRIM_TYPE)))); \
48+
_CLC_OVERLOAD _CLC_DEF PRIM_TYPE##16 vload16( \
49+
size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
50+
return *((const ADDR_SPACE less_aligned_##ADDR_SPACE##PRIM_TYPE##16 \
51+
*)(&x[16 * offset])); \
52+
}
3753

38-
#define VLOAD_ADDR_SPACES(__CLC_SCALAR_GENTYPE) \
39-
VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __private) \
40-
VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __local) \
41-
VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __constant) \
42-
VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __global) \
54+
#define VLOAD_ADDR_SPACES(__CLC_SCALAR_GENTYPE) \
55+
VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __private) \
56+
VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __local) \
57+
VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __constant) \
58+
VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __global)
4359

44-
#define VLOAD_TYPES() \
45-
VLOAD_ADDR_SPACES(char) \
46-
VLOAD_ADDR_SPACES(uchar) \
47-
VLOAD_ADDR_SPACES(short) \
48-
VLOAD_ADDR_SPACES(ushort) \
49-
VLOAD_ADDR_SPACES(int) \
50-
VLOAD_ADDR_SPACES(uint) \
51-
VLOAD_ADDR_SPACES(long) \
52-
VLOAD_ADDR_SPACES(ulong) \
53-
VLOAD_ADDR_SPACES(float) \
60+
#define VLOAD_TYPES() \
61+
VLOAD_ADDR_SPACES(char) \
62+
VLOAD_ADDR_SPACES(uchar) \
63+
VLOAD_ADDR_SPACES(short) \
64+
VLOAD_ADDR_SPACES(ushort) \
65+
VLOAD_ADDR_SPACES(int) \
66+
VLOAD_ADDR_SPACES(uint) \
67+
VLOAD_ADDR_SPACES(long) \
68+
VLOAD_ADDR_SPACES(ulong) \
69+
VLOAD_ADDR_SPACES(float)
5470

5571
VLOAD_TYPES()
5672

5773
#ifdef cl_khr_fp64
5874
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
59-
VLOAD_ADDR_SPACES(double)
75+
VLOAD_ADDR_SPACES(double)
6076
#endif
6177
#ifdef cl_khr_fp16
6278
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
63-
VLOAD_ADDR_SPACES(half)
79+
VLOAD_ADDR_SPACES(half)
6480
#endif
6581

6682
/* vload_half are legal even without cl_khr_fp16 */
@@ -71,43 +87,45 @@ float __clc_vload_half_float_helper__global(const __global half *);
7187
float __clc_vload_half_float_helper__local(const __local half *);
7288
float __clc_vload_half_float_helper__private(const __private half *);
7389

74-
#define VEC_LOAD1(val, AS) val = __clc_vload_half_float_helper##AS (&mem[offset++]);
90+
#define VEC_LOAD1(val, AS) \
91+
val = __clc_vload_half_float_helper##AS(&mem[offset++]);
7592
#else
7693
#define VEC_LOAD1(val, AS) val = __builtin_load_halff(&mem[offset++]);
7794
#endif
7895

79-
#define VEC_LOAD2(val, AS) \
80-
VEC_LOAD1(val.lo, AS) \
81-
VEC_LOAD1(val.hi, AS)
82-
#define VEC_LOAD3(val, AS) \
83-
VEC_LOAD1(val.s0, AS) \
84-
VEC_LOAD1(val.s1, AS) \
85-
VEC_LOAD1(val.s2, AS)
86-
#define VEC_LOAD4(val, AS) \
87-
VEC_LOAD2(val.lo, AS) \
88-
VEC_LOAD2(val.hi, AS)
89-
#define VEC_LOAD8(val, AS) \
90-
VEC_LOAD4(val.lo, AS) \
91-
VEC_LOAD4(val.hi, AS)
92-
#define VEC_LOAD16(val, AS) \
93-
VEC_LOAD8(val.lo, AS) \
94-
VEC_LOAD8(val.hi, AS)
96+
#define VEC_LOAD2(val, AS) \
97+
VEC_LOAD1(val.lo, AS) \
98+
VEC_LOAD1(val.hi, AS)
99+
#define VEC_LOAD3(val, AS) \
100+
VEC_LOAD1(val.s0, AS) \
101+
VEC_LOAD1(val.s1, AS) \
102+
VEC_LOAD1(val.s2, AS)
103+
#define VEC_LOAD4(val, AS) \
104+
VEC_LOAD2(val.lo, AS) \
105+
VEC_LOAD2(val.hi, AS)
106+
#define VEC_LOAD8(val, AS) \
107+
VEC_LOAD4(val.lo, AS) \
108+
VEC_LOAD4(val.hi, AS)
109+
#define VEC_LOAD16(val, AS) \
110+
VEC_LOAD8(val.lo, AS) \
111+
VEC_LOAD8(val.hi, AS)
95112

96-
#define __FUNC(SUFFIX, VEC_SIZE, OFFSET_SIZE, TYPE, AS) \
97-
_CLC_OVERLOAD _CLC_DEF TYPE vload_half##SUFFIX(size_t offset, const AS half *mem) { \
98-
offset *= VEC_SIZE; \
99-
TYPE __tmp; \
100-
VEC_LOAD##VEC_SIZE(__tmp, AS) \
101-
return __tmp; \
102-
} \
103-
_CLC_OVERLOAD _CLC_DEF TYPE vloada_half##SUFFIX(size_t offset, const AS half *mem) { \
104-
offset *= OFFSET_SIZE; \
105-
TYPE __tmp; \
106-
VEC_LOAD##VEC_SIZE(__tmp, AS) \
107-
return __tmp; \
113+
#define __FUNC(SUFFIX, VEC_SIZE, OFFSET_SIZE, TYPE, AS) \
114+
_CLC_OVERLOAD _CLC_DEF TYPE vload_half##SUFFIX(size_t offset, \
115+
const AS half *mem) { \
116+
offset *= VEC_SIZE; \
117+
TYPE __tmp; \
118+
VEC_LOAD##VEC_SIZE(__tmp, AS) return __tmp; \
119+
} \
120+
_CLC_OVERLOAD _CLC_DEF TYPE vloada_half##SUFFIX(size_t offset, \
121+
const AS half *mem) { \
122+
offset *= OFFSET_SIZE; \
123+
TYPE __tmp; \
124+
VEC_LOAD##VEC_SIZE(__tmp, AS) return __tmp; \
108125
}
109126

110-
#define FUNC(SUFFIX, VEC_SIZE, OFFSET_SIZE, TYPE, AS) __FUNC(SUFFIX, VEC_SIZE, OFFSET_SIZE, TYPE, AS)
127+
#define FUNC(SUFFIX, VEC_SIZE, OFFSET_SIZE, TYPE, AS) \
128+
__FUNC(SUFFIX, VEC_SIZE, OFFSET_SIZE, TYPE, AS)
111129

112130
#define __CLC_BODY "vload_half.inc"
113131
#include <clc/math/gentype.inc>

libclc/generic/lib/shared/vload_half.inc

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,21 +11,21 @@
1111
#ifndef __CLC_SCALAR
1212

1313
#if __CLC_VECSIZE == 3
14-
# define __CLC_OFFSET 4
14+
#define __CLC_OFFSET 4
1515
#else
16-
# define __CLC_OFFSET __CLC_VECSIZE
16+
#define __CLC_OFFSET __CLC_VECSIZE
1717
#endif
1818

19-
FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __private);
20-
FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __local);
21-
FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __global);
22-
FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __constant);
19+
FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __private);
20+
FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __local);
21+
FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __global);
22+
FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __constant);
2323

2424
#undef __CLC_OFFSET
2525
#else
26-
FUNC(, 1, 1, __CLC_GENTYPE, __private);
27-
FUNC(, 1, 1, __CLC_GENTYPE, __local);
28-
FUNC(, 1, 1, __CLC_GENTYPE, __global);
29-
FUNC(, 1, 1, __CLC_GENTYPE, __constant);
26+
FUNC(, 1, 1, __CLC_GENTYPE, __private);
27+
FUNC(, 1, 1, __CLC_GENTYPE, __local);
28+
FUNC(, 1, 1, __CLC_GENTYPE, __global);
29+
FUNC(, 1, 1, __CLC_GENTYPE, __constant);
3030
#endif
3131
#endif

0 commit comments

Comments
 (0)