Skip to content

Commit b474c3f

Browse files
authored
[libclc] Move vload & vstore to CLC library (#141755)
This commit moves the various vload and vstore builtins (including vload_half, vloada_half, etc.) to the CLC library. This is almost entirely a code move and does not make any attempt to clean up or optimize the definitions of these builtins. There is no change to any of the targets' builtin libraries, except that the vstore helper rounding functions are now internalized. Cleanups can come in future work. The new CLC declarations and new OpenCL wrappers show how these CLC implementations could be defined more simply. The builtins could probably also be vectorized in future work; right now all of the 'half' versions for both vload and vstore are essentially scalarized.
1 parent 45b874b commit b474c3f

File tree

15 files changed

+776
-375
lines changed

15 files changed

+776
-375
lines changed
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// Defines types to be used with (CLC) vstore and vload functions. These are
10+
// vector types whose alignment is that of their respective scalar types.
11+
//
12+
//===----------------------------------------------------------------------===//
13+
14+
#ifndef __CLC_SHARED_CLC_LESS_ALIGNED_TYPES_H__
15+
#define __CLC_SHARED_CLC_LESS_ALIGNED_TYPES_H__
16+
17+
#define __CLC_BODY <clc/shared/clc_less_aligned_types.inc>
18+
#include <clc/integer/gentype.inc>
19+
20+
#define __CLC_BODY <clc/shared/clc_less_aligned_types.inc>
21+
#include <clc/math/gentype.inc>
22+
23+
#endif // __CLC_SHARED_CLC_LESS_ALIGNED_TYPES_H__
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// Defines types to be used with (CLC) vstore and vload functions. These are
10+
// vector types whose alignment is that of their respective scalar types.
11+
//
12+
//===----------------------------------------------------------------------===//
13+
14+
#ifdef __CLC_SCALAR
15+
16+
typedef __CLC_GENTYPE __CLC_XCONCAT(less_aligned_, __CLC_GENTYPE);
17+
18+
#else
19+
20+
typedef __CLC_GENTYPE __CLC_XCONCAT(less_aligned_, __CLC_GENTYPE)
21+
__attribute__((aligned(sizeof(__CLC_SCALAR_GENTYPE))));
22+
23+
#endif
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef __CLC_SHARED_CLC_VLOAD_H__
10+
#define __CLC_SHARED_CLC_VLOAD_H__
11+
12+
#include <clc/shared/clc_less_aligned_types.h>
13+
14+
#define __CLC_BODY <clc/shared/clc_vload.inc>
15+
#include <clc/integer/gentype.inc>
16+
17+
#define __CLC_BODY <clc/shared/clc_vload.inc>
18+
#include <clc/math/gentype.inc>
19+
20+
#endif // __CLC_SHARED_CLC_VLOAD_H__
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#define CLC_VLOAD_NAME __CLC_XCONCAT(__clc_vload, __CLC_VECSIZE)
10+
#define CLC_VLOAD_HALF_NAME __CLC_XCONCAT(__clc_vload_half, __CLC_VECSIZE)
11+
#define CLC_VLOADA_HALF_NAME __CLC_XCONCAT(__clc_vloada_half, __CLC_VECSIZE)
12+
13+
#ifndef __CLC_SCALAR
14+
15+
#define CLC_VLOAD_TY __CLC_XCONCAT(less_aligned_, __CLC_GENTYPE)
16+
17+
#define CLC_VLOAD_DECL(ADDRSPACE) \
18+
_CLC_OVERLOAD _CLC_DECL CLC_VLOAD_TY CLC_VLOAD_NAME( \
19+
size_t offset, const ADDRSPACE __CLC_SCALAR_GENTYPE *x);
20+
21+
CLC_VLOAD_DECL(__private)
22+
CLC_VLOAD_DECL(__local)
23+
CLC_VLOAD_DECL(__constant)
24+
CLC_VLOAD_DECL(__global)
25+
26+
#if _CLC_DISTINCT_GENERIC_AS_SUPPORTED
27+
CLC_VLOAD_DECL(__generic)
28+
#endif
29+
30+
#undef CLC_VLOAD_DECL
31+
#undef CLC_VLOAD_TY
32+
33+
#endif // __CLC_SCALAR
34+
35+
// vload_half and vloada_half are available even if cl_khr_fp16 is unavailable.
36+
// Declare these functions when working on float types, which we know are
37+
// always available.
38+
#ifdef __CLC_FPSIZE
39+
#if __CLC_FPSIZE == 32
40+
41+
#define CLC_VLOAD_HALF_DECL(ADDRSPACE) \
42+
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE CLC_VLOAD_HALF_NAME( \
43+
size_t offset, const ADDRSPACE half *mem); \
44+
\
45+
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE CLC_VLOADA_HALF_NAME( \
46+
size_t offset, const ADDRSPACE half *mem);
47+
48+
CLC_VLOAD_HALF_DECL(__private)
49+
CLC_VLOAD_HALF_DECL(__local)
50+
CLC_VLOAD_HALF_DECL(__constant)
51+
CLC_VLOAD_HALF_DECL(__global)
52+
53+
#if _CLC_DISTINCT_GENERIC_AS_SUPPORTED
54+
CLC_VLOAD_HALF_DECL(__generic)
55+
#endif
56+
57+
#undef CLC_VLOAD_HALF_DECL
58+
59+
#endif
60+
#endif
61+
62+
#undef CLC_VLOAD_NAME
63+
#undef CLC_VLOAD_HALF_NAME
64+
#undef CLC_VLOADA_HALF_NAME
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef __CLC_SHARED_CLC_VSTORE_H__
10+
#define __CLC_SHARED_CLC_VSTORE_H__
11+
12+
#include <clc/shared/clc_less_aligned_types.h>
13+
14+
#define __CLC_BODY <clc/shared/clc_vstore.inc>
15+
#include <clc/integer/gentype.inc>
16+
17+
#define __CLC_BODY <clc/shared/clc_vstore.inc>
18+
#include <clc/math/gentype.inc>
19+
20+
#endif // __CLC_SHARED_CLC_VSTORE_H__
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#define CLC_VSTORE_TY __CLC_XCONCAT(less_aligned_, __CLC_GENTYPE)
10+
#define CLC_VSTORE_NAME __CLC_XCONCAT(__clc_vstore, __CLC_VECSIZE)
11+
#define CLC_VSTORE_HALF_NAME(x) \
12+
__CLC_XCONCAT(__CLC_XCONCAT(__clc_vstore_half, __CLC_VECSIZE), x)
13+
#define CLC_VSTOREA_HALF_NAME(x) \
14+
__CLC_XCONCAT(__CLC_XCONCAT(__clc_vstorea_half, __CLC_VECSIZE), x)
15+
16+
#ifndef __CLC_SCALAR
17+
18+
#define CLC_VSTORE_DECL(ADDRSPACE) \
19+
_CLC_OVERLOAD _CLC_DECL void CLC_VSTORE_NAME( \
20+
CLC_VSTORE_TY data, size_t offset, ADDRSPACE __CLC_SCALAR_GENTYPE *p);
21+
22+
CLC_VSTORE_DECL(__private)
23+
CLC_VSTORE_DECL(__local)
24+
CLC_VSTORE_DECL(__global)
25+
26+
#if _CLC_DISTINCT_GENERIC_AS_SUPPORTED
27+
CLC_VSTORE_DECL(__generic)
28+
#endif
29+
30+
#undef CLC_VSTORE_DECL
31+
32+
#endif // __CLC_SCALAR
33+
34+
// vstore_half and vstorea_half are available even if cl_khr_fp16 is
35+
// unavailable.
36+
#ifdef __CLC_FPSIZE
37+
#if __CLC_FPSIZE == 32 || __CLC_FPSIZE == 64
38+
39+
#define CLC_VSTORE_HALF_DECL(ADDRSPACE, SUFFIX) \
40+
_CLC_OVERLOAD _CLC_DECL void CLC_VSTORE_HALF_NAME(SUFFIX)( \
41+
CLC_VSTORE_TY data, size_t offset, ADDRSPACE half *p); \
42+
\
43+
_CLC_OVERLOAD _CLC_DECL void CLC_VSTOREA_HALF_NAME(SUFFIX)( \
44+
CLC_VSTORE_TY data, size_t offset, ADDRSPACE half *p);
45+
46+
#define CLC_VSTORE_HALF_DECL_ALL_MODES(ADDRSPACE) \
47+
CLC_VSTORE_HALF_DECL(ADDRSPACE, ) \
48+
CLC_VSTORE_HALF_DECL(ADDRSPACE, _rtz) \
49+
CLC_VSTORE_HALF_DECL(ADDRSPACE, _rtn) \
50+
CLC_VSTORE_HALF_DECL(ADDRSPACE, _rtp) \
51+
CLC_VSTORE_HALF_DECL(ADDRSPACE, _rte)
52+
53+
CLC_VSTORE_HALF_DECL_ALL_MODES(__private)
54+
CLC_VSTORE_HALF_DECL_ALL_MODES(__local)
55+
CLC_VSTORE_HALF_DECL_ALL_MODES(__global)
56+
57+
#if _CLC_DISTINCT_GENERIC_AS_SUPPORTED
58+
CLC_VSTORE_HALF_DECL_ALL_MODES(__generic)
59+
#endif
60+
61+
#undef CLC_VSTORE_HALF_DECL
62+
#undef CLC_VSTORE_HALF_DECL_ALL_MODES
63+
64+
#endif
65+
#endif
66+
67+
#undef CLC_VSTORE_TY
68+
#undef CLC_VSTORE_NAME
69+
#undef CLC_VSTORE_HALF_NAME
70+
#undef CLC_VSTOREA_HALF_NAME

libclc/clc/lib/generic/SOURCES

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,3 +147,5 @@ relational/clc_signbit.cl
147147
shared/clc_clamp.cl
148148
shared/clc_max.cl
149149
shared/clc_min.cl
150+
shared/clc_vload.cl
151+
shared/clc_vstore.cl
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <clc/internal/clc.h>
10+
#include <clc/shared/clc_vload.h>
11+
12+
#define VLOAD_VECTORIZE(PRIM_TYPE, ADDR_SPACE) \
13+
_CLC_OVERLOAD _CLC_DEF PRIM_TYPE##2 __clc_vload2( \
14+
size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
15+
return *( \
16+
(const ADDR_SPACE less_aligned_##PRIM_TYPE##2 *)(&x[2 * offset])); \
17+
} \
18+
\
19+
_CLC_OVERLOAD _CLC_DEF PRIM_TYPE##3 __clc_vload3( \
20+
size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
21+
PRIM_TYPE##2 vec = \
22+
*((const ADDR_SPACE less_aligned_##PRIM_TYPE##2 *)(&x[3 * offset])); \
23+
return (PRIM_TYPE##3)(vec.s0, vec.s1, x[offset * 3 + 2]); \
24+
} \
25+
\
26+
_CLC_OVERLOAD _CLC_DEF PRIM_TYPE##4 __clc_vload4( \
27+
size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
28+
return *( \
29+
(const ADDR_SPACE less_aligned_##PRIM_TYPE##4 *)(&x[4 * offset])); \
30+
} \
31+
\
32+
_CLC_OVERLOAD _CLC_DEF PRIM_TYPE##8 __clc_vload8( \
33+
size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
34+
return *( \
35+
(const ADDR_SPACE less_aligned_##PRIM_TYPE##8 *)(&x[8 * offset])); \
36+
} \
37+
\
38+
_CLC_OVERLOAD _CLC_DEF PRIM_TYPE##16 __clc_vload16( \
39+
size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
40+
return *( \
41+
(const ADDR_SPACE less_aligned_##PRIM_TYPE##16 *)(&x[16 * offset])); \
42+
}
43+
44+
#if _CLC_DISTINCT_GENERIC_AS_SUPPORTED
45+
#define VLOAD_VECTORIZE_GENERIC VLOAD_VECTORIZE
46+
#else
47+
// The generic address space isn't available, so make the macro do nothing
48+
#define VLOAD_VECTORIZE_GENERIC(X, Y)
49+
#endif
50+
51+
#define VLOAD_ADDR_SPACES(__CLC_SCALAR_GENTYPE) \
52+
VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __private) \
53+
VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __local) \
54+
VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __constant) \
55+
VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __global) \
56+
VLOAD_VECTORIZE_GENERIC(__CLC_SCALAR_GENTYPE, __generic)
57+
58+
#define VLOAD_TYPES() \
59+
VLOAD_ADDR_SPACES(char) \
60+
VLOAD_ADDR_SPACES(uchar) \
61+
VLOAD_ADDR_SPACES(short) \
62+
VLOAD_ADDR_SPACES(ushort) \
63+
VLOAD_ADDR_SPACES(int) \
64+
VLOAD_ADDR_SPACES(uint) \
65+
VLOAD_ADDR_SPACES(long) \
66+
VLOAD_ADDR_SPACES(ulong) \
67+
VLOAD_ADDR_SPACES(float)
68+
69+
VLOAD_TYPES()
70+
71+
#ifdef cl_khr_fp64
72+
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
73+
VLOAD_ADDR_SPACES(double)
74+
#endif
75+
#ifdef cl_khr_fp16
76+
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
77+
VLOAD_ADDR_SPACES(half)
78+
#endif
79+
80+
/* vload_half are legal even without cl_khr_fp16 */
81+
/* no vload_half for double */
82+
#define VEC_LOAD1(val, AS) val = __builtin_load_halff(&mem[offset++]);
83+
#define VEC_LOAD2(val, AS) \
84+
VEC_LOAD1(val.lo, AS) \
85+
VEC_LOAD1(val.hi, AS)
86+
#define VEC_LOAD3(val, AS) \
87+
VEC_LOAD1(val.s0, AS) \
88+
VEC_LOAD1(val.s1, AS) \
89+
VEC_LOAD1(val.s2, AS)
90+
#define VEC_LOAD4(val, AS) \
91+
VEC_LOAD2(val.lo, AS) \
92+
VEC_LOAD2(val.hi, AS)
93+
#define VEC_LOAD8(val, AS) \
94+
VEC_LOAD4(val.lo, AS) \
95+
VEC_LOAD4(val.hi, AS)
96+
#define VEC_LOAD16(val, AS) \
97+
VEC_LOAD8(val.lo, AS) \
98+
VEC_LOAD8(val.hi, AS)
99+
100+
#define __FUNC(SUFFIX, VEC_SIZE, OFFSET_SIZE, TYPE, AS) \
101+
_CLC_OVERLOAD _CLC_DEF TYPE __clc_vload_half##SUFFIX(size_t offset, \
102+
const AS half *mem) { \
103+
offset *= VEC_SIZE; \
104+
TYPE __tmp; \
105+
VEC_LOAD##VEC_SIZE(__tmp, AS) return __tmp; \
106+
} \
107+
_CLC_OVERLOAD _CLC_DEF TYPE __clc_vloada_half##SUFFIX(size_t offset, \
108+
const AS half *mem) { \
109+
offset *= OFFSET_SIZE; \
110+
TYPE __tmp; \
111+
VEC_LOAD##VEC_SIZE(__tmp, AS) return __tmp; \
112+
}
113+
114+
#define FUNC(SUFFIX, VEC_SIZE, OFFSET_SIZE, TYPE, AS) \
115+
__FUNC(SUFFIX, VEC_SIZE, OFFSET_SIZE, TYPE, AS)
116+
117+
#define __CLC_BODY "clc_vload_half.inc"
118+
#include <clc/math/gentype.inc>
119+
#undef FUNC
120+
#undef __FUNC
121+
#undef VEC_LOAD16
122+
#undef VEC_LOAD8
123+
#undef VEC_LOAD4
124+
#undef VEC_LOAD3
125+
#undef VEC_LOAD2
126+
#undef VEC_LOAD1
127+
#undef VLOAD_TYPES
128+
#undef VLOAD_ADDR_SPACES
129+
#undef VLOAD_VECTORIZE
130+
#undef VLOAD_VECTORIZE_GENERIC

0 commit comments

Comments
 (0)