Skip to content

[libclc] Move vload & vstore to CLC library #141755

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions libclc/clc/include/clc/shared/clc_less_aligned_types.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Defines types to be used with (CLC) vstore and vload functions. These are
// vector types whose alignment is that of their respective scalar types.
//
//===----------------------------------------------------------------------===//

#ifndef __CLC_SHARED_CLC_LESS_ALIGNED_TYPES_H__
#define __CLC_SHARED_CLC_LESS_ALIGNED_TYPES_H__

#define __CLC_BODY <clc/shared/clc_less_aligned_types.inc>
#include <clc/integer/gentype.inc>

#define __CLC_BODY <clc/shared/clc_less_aligned_types.inc>
#include <clc/math/gentype.inc>

#endif // __CLC_SHARED_CLC_LESS_ALIGNED_TYPES_H__
23 changes: 23 additions & 0 deletions libclc/clc/include/clc/shared/clc_less_aligned_types.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Defines types to be used with (CLC) vstore and vload functions. These are
// vector types whose alignment is that of their respective scalar types.
//
//===----------------------------------------------------------------------===//

#ifdef __CLC_SCALAR

typedef __CLC_GENTYPE __CLC_XCONCAT(less_aligned_, __CLC_GENTYPE);

#else

typedef __CLC_GENTYPE __CLC_XCONCAT(less_aligned_, __CLC_GENTYPE)
__attribute__((aligned(sizeof(__CLC_SCALAR_GENTYPE))));

#endif
20 changes: 20 additions & 0 deletions libclc/clc/include/clc/shared/clc_vload.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef __CLC_SHARED_CLC_VLOAD_H__
#define __CLC_SHARED_CLC_VLOAD_H__

#include <clc/shared/clc_less_aligned_types.h>

#define __CLC_BODY <clc/shared/clc_vload.inc>
#include <clc/integer/gentype.inc>

#define __CLC_BODY <clc/shared/clc_vload.inc>
#include <clc/math/gentype.inc>

#endif // __CLC_SHARED_CLC_VLOAD_H__
64 changes: 64 additions & 0 deletions libclc/clc/include/clc/shared/clc_vload.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#define CLC_VLOAD_NAME __CLC_XCONCAT(__clc_vload, __CLC_VECSIZE)
#define CLC_VLOAD_HALF_NAME __CLC_XCONCAT(__clc_vload_half, __CLC_VECSIZE)
#define CLC_VLOADA_HALF_NAME __CLC_XCONCAT(__clc_vloada_half, __CLC_VECSIZE)

#ifndef __CLC_SCALAR

#define CLC_VLOAD_TY __CLC_XCONCAT(less_aligned_, __CLC_GENTYPE)

#define CLC_VLOAD_DECL(ADDRSPACE) \
_CLC_OVERLOAD _CLC_DECL CLC_VLOAD_TY CLC_VLOAD_NAME( \
size_t offset, const ADDRSPACE __CLC_SCALAR_GENTYPE *x);

CLC_VLOAD_DECL(__private)
CLC_VLOAD_DECL(__local)
CLC_VLOAD_DECL(__constant)
CLC_VLOAD_DECL(__global)

#if _CLC_DISTINCT_GENERIC_AS_SUPPORTED
CLC_VLOAD_DECL(__generic)
#endif

#undef CLC_VLOAD_DECL
#undef CLC_VLOAD_TY

#endif // __CLC_SCALAR

// vload_half and vloada_half are available even if cl_khr_fp16 is unavailable.
// Declare these functions when working on float types, which we know are
// always available.
#ifdef __CLC_FPSIZE
#if __CLC_FPSIZE == 32

#define CLC_VLOAD_HALF_DECL(ADDRSPACE) \
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE CLC_VLOAD_HALF_NAME( \
size_t offset, const ADDRSPACE half *mem); \
\
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE CLC_VLOADA_HALF_NAME( \
size_t offset, const ADDRSPACE half *mem);

CLC_VLOAD_HALF_DECL(__private)
CLC_VLOAD_HALF_DECL(__local)
CLC_VLOAD_HALF_DECL(__constant)
CLC_VLOAD_HALF_DECL(__global)

#if _CLC_DISTINCT_GENERIC_AS_SUPPORTED
CLC_VLOAD_HALF_DECL(__generic)
#endif

#undef CLC_VLOAD_HALF_DECL

#endif
#endif

#undef CLC_VLOAD_NAME
#undef CLC_VLOAD_HALF_NAME
#undef CLC_VLOADA_HALF_NAME
20 changes: 20 additions & 0 deletions libclc/clc/include/clc/shared/clc_vstore.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef __CLC_SHARED_CLC_VSTORE_H__
#define __CLC_SHARED_CLC_VSTORE_H__

#include <clc/shared/clc_less_aligned_types.h>

#define __CLC_BODY <clc/shared/clc_vstore.inc>
#include <clc/integer/gentype.inc>

#define __CLC_BODY <clc/shared/clc_vstore.inc>
#include <clc/math/gentype.inc>

#endif // __CLC_SHARED_CLC_VSTORE_H__
70 changes: 70 additions & 0 deletions libclc/clc/include/clc/shared/clc_vstore.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#define CLC_VSTORE_TY __CLC_XCONCAT(less_aligned_, __CLC_GENTYPE)
#define CLC_VSTORE_NAME __CLC_XCONCAT(__clc_vstore, __CLC_VECSIZE)
#define CLC_VSTORE_HALF_NAME(x) \
__CLC_XCONCAT(__CLC_XCONCAT(__clc_vstore_half, __CLC_VECSIZE), x)
#define CLC_VSTOREA_HALF_NAME(x) \
__CLC_XCONCAT(__CLC_XCONCAT(__clc_vstorea_half, __CLC_VECSIZE), x)

#ifndef __CLC_SCALAR

#define CLC_VSTORE_DECL(ADDRSPACE) \
_CLC_OVERLOAD _CLC_DECL void CLC_VSTORE_NAME( \
CLC_VSTORE_TY data, size_t offset, ADDRSPACE __CLC_SCALAR_GENTYPE *p);

CLC_VSTORE_DECL(__private)
CLC_VSTORE_DECL(__local)
CLC_VSTORE_DECL(__global)

#if _CLC_DISTINCT_GENERIC_AS_SUPPORTED
CLC_VSTORE_DECL(__generic)
#endif

#undef CLC_VSTORE_DECL

#endif // __CLC_SCALAR

// vstore_half and vstorea_half are available even if cl_khr_fp16 is
// unavailable.
#ifdef __CLC_FPSIZE
#if __CLC_FPSIZE == 32 || __CLC_FPSIZE == 64

#define CLC_VSTORE_HALF_DECL(ADDRSPACE, SUFFIX) \
_CLC_OVERLOAD _CLC_DECL void CLC_VSTORE_HALF_NAME(SUFFIX)( \
CLC_VSTORE_TY data, size_t offset, ADDRSPACE half *p); \
\
_CLC_OVERLOAD _CLC_DECL void CLC_VSTOREA_HALF_NAME(SUFFIX)( \
CLC_VSTORE_TY data, size_t offset, ADDRSPACE half *p);

#define CLC_VSTORE_HALF_DECL_ALL_MODES(ADDRSPACE) \
CLC_VSTORE_HALF_DECL(ADDRSPACE, ) \
CLC_VSTORE_HALF_DECL(ADDRSPACE, _rtz) \
CLC_VSTORE_HALF_DECL(ADDRSPACE, _rtn) \
CLC_VSTORE_HALF_DECL(ADDRSPACE, _rtp) \
CLC_VSTORE_HALF_DECL(ADDRSPACE, _rte)

CLC_VSTORE_HALF_DECL_ALL_MODES(__private)
CLC_VSTORE_HALF_DECL_ALL_MODES(__local)
CLC_VSTORE_HALF_DECL_ALL_MODES(__global)

#if _CLC_DISTINCT_GENERIC_AS_SUPPORTED
CLC_VSTORE_HALF_DECL_ALL_MODES(__generic)
#endif

#undef CLC_VSTORE_HALF_DECL
#undef CLC_VSTORE_HALF_DECL_ALL_MODES

#endif
#endif

#undef CLC_VSTORE_TY
#undef CLC_VSTORE_NAME
#undef CLC_VSTORE_HALF_NAME
#undef CLC_VSTOREA_HALF_NAME
2 changes: 2 additions & 0 deletions libclc/clc/lib/generic/SOURCES
Original file line number Diff line number Diff line change
Expand Up @@ -147,3 +147,5 @@ relational/clc_signbit.cl
shared/clc_clamp.cl
shared/clc_max.cl
shared/clc_min.cl
shared/clc_vload.cl
shared/clc_vstore.cl
130 changes: 130 additions & 0 deletions libclc/clc/lib/generic/shared/clc_vload.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include <clc/internal/clc.h>
#include <clc/shared/clc_vload.h>

#define VLOAD_VECTORIZE(PRIM_TYPE, ADDR_SPACE) \
_CLC_OVERLOAD _CLC_DEF PRIM_TYPE##2 __clc_vload2( \
size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
return *( \
(const ADDR_SPACE less_aligned_##PRIM_TYPE##2 *)(&x[2 * offset])); \
} \
\
_CLC_OVERLOAD _CLC_DEF PRIM_TYPE##3 __clc_vload3( \
size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
PRIM_TYPE##2 vec = \
*((const ADDR_SPACE less_aligned_##PRIM_TYPE##2 *)(&x[3 * offset])); \
return (PRIM_TYPE##3)(vec.s0, vec.s1, x[offset * 3 + 2]); \
} \
\
_CLC_OVERLOAD _CLC_DEF PRIM_TYPE##4 __clc_vload4( \
size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
return *( \
(const ADDR_SPACE less_aligned_##PRIM_TYPE##4 *)(&x[4 * offset])); \
} \
\
_CLC_OVERLOAD _CLC_DEF PRIM_TYPE##8 __clc_vload8( \
size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
return *( \
(const ADDR_SPACE less_aligned_##PRIM_TYPE##8 *)(&x[8 * offset])); \
} \
\
_CLC_OVERLOAD _CLC_DEF PRIM_TYPE##16 __clc_vload16( \
size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
return *( \
(const ADDR_SPACE less_aligned_##PRIM_TYPE##16 *)(&x[16 * offset])); \
}

#if _CLC_DISTINCT_GENERIC_AS_SUPPORTED
#define VLOAD_VECTORIZE_GENERIC VLOAD_VECTORIZE
#else
// The generic address space isn't available, so make the macro do nothing
#define VLOAD_VECTORIZE_GENERIC(X, Y)
#endif

#define VLOAD_ADDR_SPACES(__CLC_SCALAR_GENTYPE) \
VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __private) \
VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __local) \
VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __constant) \
VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __global) \
VLOAD_VECTORIZE_GENERIC(__CLC_SCALAR_GENTYPE, __generic)

#define VLOAD_TYPES() \
VLOAD_ADDR_SPACES(char) \
VLOAD_ADDR_SPACES(uchar) \
VLOAD_ADDR_SPACES(short) \
VLOAD_ADDR_SPACES(ushort) \
VLOAD_ADDR_SPACES(int) \
VLOAD_ADDR_SPACES(uint) \
VLOAD_ADDR_SPACES(long) \
VLOAD_ADDR_SPACES(ulong) \
VLOAD_ADDR_SPACES(float)

VLOAD_TYPES()

#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
VLOAD_ADDR_SPACES(double)
#endif
#ifdef cl_khr_fp16
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
VLOAD_ADDR_SPACES(half)
#endif

/* vload_half are legal even without cl_khr_fp16 */
/* no vload_half for double */
#define VEC_LOAD1(val, AS) val = __builtin_load_halff(&mem[offset++]);
#define VEC_LOAD2(val, AS) \
VEC_LOAD1(val.lo, AS) \
VEC_LOAD1(val.hi, AS)
#define VEC_LOAD3(val, AS) \
VEC_LOAD1(val.s0, AS) \
VEC_LOAD1(val.s1, AS) \
VEC_LOAD1(val.s2, AS)
#define VEC_LOAD4(val, AS) \
VEC_LOAD2(val.lo, AS) \
VEC_LOAD2(val.hi, AS)
#define VEC_LOAD8(val, AS) \
VEC_LOAD4(val.lo, AS) \
VEC_LOAD4(val.hi, AS)
#define VEC_LOAD16(val, AS) \
VEC_LOAD8(val.lo, AS) \
VEC_LOAD8(val.hi, AS)

#define __FUNC(SUFFIX, VEC_SIZE, OFFSET_SIZE, TYPE, AS) \
_CLC_OVERLOAD _CLC_DEF TYPE __clc_vload_half##SUFFIX(size_t offset, \
const AS half *mem) { \
offset *= VEC_SIZE; \
TYPE __tmp; \
VEC_LOAD##VEC_SIZE(__tmp, AS) return __tmp; \
} \
_CLC_OVERLOAD _CLC_DEF TYPE __clc_vloada_half##SUFFIX(size_t offset, \
const AS half *mem) { \
offset *= OFFSET_SIZE; \
TYPE __tmp; \
VEC_LOAD##VEC_SIZE(__tmp, AS) return __tmp; \
}

#define FUNC(SUFFIX, VEC_SIZE, OFFSET_SIZE, TYPE, AS) \
__FUNC(SUFFIX, VEC_SIZE, OFFSET_SIZE, TYPE, AS)

#define __CLC_BODY "clc_vload_half.inc"
#include <clc/math/gentype.inc>
#undef FUNC
#undef __FUNC
#undef VEC_LOAD16
#undef VEC_LOAD8
#undef VEC_LOAD4
#undef VEC_LOAD3
#undef VEC_LOAD2
#undef VEC_LOAD1
#undef VLOAD_TYPES
#undef VLOAD_ADDR_SPACES
#undef VLOAD_VECTORIZE
#undef VLOAD_VECTORIZE_GENERIC
Loading
Loading