Skip to content

Commit 464c23e

Browse files
committed
[SYCL][ESIMD] Deprecate block_load/store, add simd::copy_from/to.
This patch: 1) Fixes the following TODO in esimd_memory.hpp: // TODO @rolandschulz // Should follow existing std::simd naming for similar APIs - "copy_from" and // "copy_to" to avoid confusion. 2) Adds type checks for the sycl accessor arguments in the added APIs. Signed-off-by: kbobrovs <[email protected]>
1 parent 7f6da43 commit 464c23e

File tree

5 files changed

+327
-45
lines changed

5 files changed

+327
-45
lines changed
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
//==------------- esimd_sycl_util.hpp - DPC++ Explicit SIMD API -----------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
// Utility functions related to interaction with generic SYCL and used for
9+
// implementing Explicit SIMD APIs.
10+
//===----------------------------------------------------------------------===//
11+
12+
#pragma once
13+
14+
#include <CL/sycl/accessor.hpp>
15+
16+
__SYCL_INLINE_NAMESPACE(cl) {
17+
namespace sycl {
18+
namespace INTEL {
19+
namespace gpu {
20+
namespace detail {
21+
22+
// Checks that given type is a SYCL accessor type. Sets its static field
23+
// \c value accordingly. Also, if the check is succesful, sets \c mode and
24+
// \c target static fields to the accessor type's access mode and access target
25+
// respectively. Otherwise they are set to -1.
26+
template <typename T> struct is_sycl_accessor : public std::false_type {
27+
static constexpr sycl::access::mode mode =
28+
static_cast<sycl::access::mode>(-1);
29+
static constexpr sycl::access::target target =
30+
static_cast<sycl::access::target>(-1);
31+
};
32+
33+
template <typename DataT, int Dimensions, sycl::access::mode AccessMode,
34+
sycl::access::target AccessTarget,
35+
sycl::access::placeholder IsPlaceholder, typename PropertyListT>
36+
struct is_sycl_accessor<sycl::accessor<
37+
DataT, Dimensions, AccessMode, AccessTarget, IsPlaceholder, PropertyListT>>
38+
: public std::true_type {
39+
static constexpr sycl::access::mode mode = AccessMode;
40+
static constexpr sycl::access::target target = AccessTarget;
41+
};
42+
43+
using accessor_mode_cap_val_t = bool;
44+
45+
// Denotes an accessor's capability - whether it can read or write.
46+
struct accessor_mode_cap {
47+
static inline constexpr accessor_mode_cap_val_t can_read = false;
48+
static inline constexpr accessor_mode_cap_val_t can_write = true;
49+
};
50+
51+
template <sycl::access::mode Mode, accessor_mode_cap_val_t Cap>
52+
constexpr bool accessor_mode_has_capability() {
53+
static_assert(Cap == accessor_mode_cap::can_read ||
54+
Cap == accessor_mode_cap::can_write,
55+
"unsupported capability");
56+
57+
if constexpr (Mode == sycl::access::mode::atomic ||
58+
Mode == sycl::access::mode::read_write ||
59+
Mode == sycl::access::mode::discard_read_write)
60+
return true; // atomic and *read_write accessors can read/write
61+
62+
return (Cap == accessor_mode_cap::can_read) ==
63+
(Mode == sycl::access::mode::read);
64+
}
65+
66+
// Checks that given type is a SYCL accessor type with given capability and
67+
// target.
68+
template <typename T, accessor_mode_cap_val_t Capability,
69+
sycl::access::target AccessTarget>
70+
struct is_sycl_accessor_with
71+
: public std::conditional_t<
72+
accessor_mode_has_capability<is_sycl_accessor<T>::mode,
73+
Capability>() &&
74+
(is_sycl_accessor<T>::target == AccessTarget),
75+
std::true_type, std::false_type> {};
76+
77+
#define __ESIMD_ENABLE_IF_ACCESSOR(T, acc_capability, acc_target, ret_type) \
78+
sycl::detail::enable_if_t<detail::is_sycl_accessor_with< \
79+
T, detail::accessor_mode_cap::acc_capability, \
80+
sycl::access::target::acc_target>::value, \
81+
ret_type>
82+
83+
} // namespace detail
84+
} // namespace gpu
85+
} // namespace INTEL
86+
} // namespace sycl
87+
} // __SYCL_INLINE_NAMESPACE(cl)

sycl/include/CL/sycl/INTEL/esimd/esimd.hpp

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
#pragma once
1212

1313
#include <CL/sycl/INTEL/esimd/detail/esimd_intrin.hpp>
14+
#include <CL/sycl/INTEL/esimd/detail/esimd_memory_intrin.hpp>
15+
#include <CL/sycl/INTEL/esimd/detail/esimd_sycl_util.hpp>
1416
#include <CL/sycl/INTEL/esimd/detail/esimd_types.hpp>
1517

1618
__SYCL_INLINE_NAMESPACE(cl) {
@@ -480,6 +482,47 @@ template <typename Ty, int N> class simd {
480482
}
481483
}
482484

485+
/// @name Memory operations
486+
/// TODO NOTE: These APIs do not support cache hint specification yet, as this
487+
/// is WIP. Later addition of hints is not expected to break code using these
488+
/// APIs.
489+
///
490+
/// @{
491+
492+
/// Copy a contiguous block of data from memory into this simd object.
493+
/// The amount of memory copied equals the total size of vector elements in
494+
/// this object.
495+
/// @param addr the memory address to copy from. Must be a pointer to the
496+
/// global address space, otherwise behavior is undefined.
497+
ESIMD_INLINE void copy_from(const Ty *const addr) SYCL_ESIMD_FUNCTION;
498+
499+
/// Copy a contiguous block of data from memory into this simd object.
500+
/// The amount of memory copied equals the total size of vector elements in
501+
/// this object.
502+
/// Source memory location is represented via a global accessor and offset.
503+
/// @param acc accessor to copy from.
504+
/// @param offset offset to copy from.
505+
template <typename AccessorT>
506+
ESIMD_INLINE __ESIMD_ENABLE_IF_ACCESSOR(AccessorT, can_read, global_buffer,
507+
void)
508+
copy_from(AccessorT acc, uint32_t offset) SYCL_ESIMD_FUNCTION;
509+
510+
/// Copy all vector elements of this object into a contiguous block in memory.
511+
/// @param addr the memory address to copy to. Must be a pointer to the
512+
/// global address space, otherwise behavior is undefined.
513+
ESIMD_INLINE void copy_to(Ty *addr) SYCL_ESIMD_FUNCTION;
514+
515+
/// Copy all vector elements of this object into a contiguous block in memory.
516+
/// Destination memory location is represented via a global accessor and
517+
/// offset.
518+
/// @param acc accessor to copy from.
519+
/// @param offset offset to copy from.
520+
template <typename AccessorT>
521+
ESIMD_INLINE __ESIMD_ENABLE_IF_ACCESSOR(AccessorT, can_write, global_buffer,
522+
void)
523+
copy_to(AccessorT acc, uint32_t offset) SYCL_ESIMD_FUNCTION;
524+
525+
/// @} // Memory operations
483526
private:
484527
// The underlying data for this vector.
485528
vector_type M_data;
@@ -498,6 +541,84 @@ ESIMD_INLINE simd<U, n> convert(simd<T, n> val) {
498541
return __builtin_convertvector(val.data(), detail::vector_type_t<U, n>);
499542
}
500543

544+
// ----------- Outlined implementations of esimd class APIs.
545+
546+
template <typename T, int N> void simd<T, N>::copy_from(const T *const addr) {
547+
constexpr unsigned Sz = sizeof(T) * N;
548+
static_assert(Sz >= detail::OperandSize::OWORD,
549+
"block size must be at least 1 oword");
550+
static_assert(Sz % detail::OperandSize::OWORD == 0,
551+
"block size must be whole number of owords");
552+
static_assert(detail::isPowerOf2(Sz / detail::OperandSize::OWORD),
553+
"block must be 1, 2, 4 or 8 owords long");
554+
static_assert(Sz <= 8 * detail::OperandSize::OWORD,
555+
"block size must be at most 8 owords");
556+
557+
uintptr_t AddrVal = reinterpret_cast<uintptr_t>(addr);
558+
*this =
559+
__esimd_flat_block_read_unaligned<T, N, CacheHint::None, CacheHint::None>(
560+
AddrVal);
561+
}
562+
563+
template <typename T, int N>
564+
template <typename AccessorT>
565+
__ESIMD_ENABLE_IF_ACCESSOR(AccessorT, can_read, global_buffer, void)
566+
simd<T, N>::copy_from(AccessorT acc, uint32_t offset) {
567+
constexpr unsigned Sz = sizeof(T) * N;
568+
static_assert(Sz >= detail::OperandSize::OWORD,
569+
"block size must be at least 1 oword");
570+
static_assert(Sz % detail::OperandSize::OWORD == 0,
571+
"block size must be whole number of owords");
572+
static_assert(detail::isPowerOf2(Sz / detail::OperandSize::OWORD),
573+
"block must be 1, 2, 4 or 8 owords long");
574+
static_assert(Sz <= 8 * detail::OperandSize::OWORD,
575+
"block size must be at most 8 owords");
576+
#if defined(__SYCL_DEVICE_ONLY__)
577+
auto surf_ind = detail::AccessorPrivateProxy::getNativeImageObj(acc);
578+
*this = __esimd_block_read<T, N>(surf_ind, offset);
579+
#else
580+
*this = __esimd_block_read<T, N>(acc, offset);
581+
#endif // __SYCL_DEVICE_ONLY__
582+
}
583+
584+
template <typename T, int N> void simd<T, N>::copy_to(T *addr) {
585+
constexpr unsigned Sz = sizeof(T) * N;
586+
static_assert(Sz >= detail::OperandSize::OWORD,
587+
"block size must be at least 1 oword");
588+
static_assert(Sz % detail::OperandSize::OWORD == 0,
589+
"block size must be whole number of owords");
590+
static_assert(detail::isPowerOf2(Sz / detail::OperandSize::OWORD),
591+
"block must be 1, 2, 4 or 8 owords long");
592+
static_assert(Sz <= 8 * detail::OperandSize::OWORD,
593+
"block size must be at most 8 owords");
594+
595+
uintptr_t AddrVal = reinterpret_cast<uintptr_t>(addr);
596+
__esimd_flat_block_write<T, N, CacheHint::None, CacheHint::None>(AddrVal,
597+
data());
598+
}
599+
600+
template <typename T, int N>
601+
template <typename AccessorT>
602+
__ESIMD_ENABLE_IF_ACCESSOR(AccessorT, can_write, global_buffer, void)
603+
simd<T, N>::copy_to(AccessorT acc, uint32_t offset) {
604+
constexpr unsigned Sz = sizeof(T) * N;
605+
static_assert(Sz >= detail::OperandSize::OWORD,
606+
"block size must be at least 1 oword");
607+
static_assert(Sz % detail::OperandSize::OWORD == 0,
608+
"block size must be whole number of owords");
609+
static_assert(detail::isPowerOf2(Sz / detail::OperandSize::OWORD),
610+
"block must be 1, 2, 4 or 8 owords long");
611+
static_assert(Sz <= 8 * detail::OperandSize::OWORD,
612+
"block size must be at most 8 owords");
613+
614+
#if defined(__SYCL_DEVICE_ONLY__)
615+
auto surf_ind = detail::AccessorPrivateProxy::getNativeImageObj(acc);
616+
__esimd_block_write<T, N>(surf_ind, offset >> 4, data());
617+
#else
618+
__esimd_block_write<T, N>(acc, offset >> 4, data());
619+
#endif // __SYCL_DEVICE_ONLY__
620+
}
621+
501622
} // namespace gpu
502623
} // namespace INTEL
503624
} // namespace sycl
@@ -516,4 +637,5 @@ std::ostream &operator<<(std::ostream &OS,
516637
OS << "}";
517638
return OS;
518639
}
640+
519641
#endif

sycl/include/CL/sycl/INTEL/esimd/esimd_memory.hpp

Lines changed: 15 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -158,14 +158,15 @@ scatter(T *p, simd<T, n * ElemsPerAddr> vals, simd<uint32_t, n> offsets,
158158
pred.data());
159159
}
160160

161-
// TODO @rolandschulz
162-
// Should follow existing std::simd naming for similar APIs - "copy_from" and
163-
// "copy_to" to avoid confusion.
164-
//
165161
/// Flat-address block-load.
166162
/// \ingroup sycl_esimd
163+
// TODO normally, this function should just delegate to
164+
// simd::copy_from for the deprecation period, but separate implementations are
165+
// needed for now, as simd::copy_from does not support cache hints yet.
166+
// This API, even though deprecated, can't be removed until then.
167167
template <typename T, int n, CacheHint L1H = CacheHint::None,
168168
CacheHint L3H = CacheHint::None>
169+
__SYCL_DEPRECATED("Replaced by simd::copy_from")
169170
ESIMD_INLINE ESIMD_NODEBUG simd<T, n> block_load(const T *const addr) {
170171
constexpr unsigned Sz = sizeof(T) * n;
171172
static_assert(Sz >= detail::OperandSize::OWORD,
@@ -184,30 +185,20 @@ ESIMD_INLINE ESIMD_NODEBUG simd<T, n> block_load(const T *const addr) {
184185
/// Accessor-based block-load.
185186
/// \ingroup sycl_esimd
186187
template <typename T, int n, typename AccessorTy>
188+
__SYCL_DEPRECATED("Replaced by simd::copy_from")
187189
ESIMD_INLINE ESIMD_NODEBUG simd<T, n> block_load(AccessorTy acc,
188190
uint32_t offset) {
189-
constexpr unsigned Sz = sizeof(T) * n;
190-
static_assert(Sz >= detail::OperandSize::OWORD,
191-
"block size must be at least 1 oword");
192-
static_assert(Sz % detail::OperandSize::OWORD == 0,
193-
"block size must be whole number of owords");
194-
static_assert(detail::isPowerOf2(Sz / detail::OperandSize::OWORD),
195-
"block must be 1, 2, 4 or 8 owords long");
196-
static_assert(Sz <= 8 * detail::OperandSize::OWORD,
197-
"block size must be at most 8 owords");
198-
199-
#if defined(__SYCL_DEVICE_ONLY__)
200-
auto surf_ind = detail::AccessorPrivateProxy::getNativeImageObj(acc);
201-
return __esimd_block_read<T, n>(surf_ind, offset);
202-
#else
203-
return __esimd_block_read<T, n>(acc, offset);
204-
#endif // __SYCL_DEVICE_ONLY__
191+
simd<T, n> Res;
192+
Res.copy_from(acc, offset);
193+
return Res;
205194
}
206195

207196
/// Flat-address block-store.
208197
/// \ingroup sycl_esimd
198+
// TODO the above note about cache hints applies to this API as well.
209199
template <typename T, int n, CacheHint L1H = CacheHint::None,
210200
CacheHint L3H = CacheHint::None>
201+
__SYCL_DEPRECATED("Replaced by simd::copy_to")
211202
ESIMD_INLINE ESIMD_NODEBUG void block_store(T *p, simd<T, n> vals) {
212203
constexpr unsigned Sz = sizeof(T) * n;
213204
static_assert(Sz >= detail::OperandSize::OWORD,
@@ -226,24 +217,10 @@ ESIMD_INLINE ESIMD_NODEBUG void block_store(T *p, simd<T, n> vals) {
226217
/// Accessor-based block-store.
227218
/// \ingroup sycl_esimd
228219
template <typename T, int n, typename AccessorTy>
229-
ESIMD_INLINE ESIMD_NODEBUG void block_store(AccessorTy acc, uint32_t offset,
230-
simd<T, n> vals) {
231-
constexpr unsigned Sz = sizeof(T) * n;
232-
static_assert(Sz >= detail::OperandSize::OWORD,
233-
"block size must be at least 1 oword");
234-
static_assert(Sz % detail::OperandSize::OWORD == 0,
235-
"block size must be whole number of owords");
236-
static_assert(detail::isPowerOf2(Sz / detail::OperandSize::OWORD),
237-
"block must be 1, 2, 4 or 8 owords long");
238-
static_assert(Sz <= 8 * detail::OperandSize::OWORD,
239-
"block size must be at most 8 owords");
240-
241-
#if defined(__SYCL_DEVICE_ONLY__)
242-
auto surf_ind = detail::AccessorPrivateProxy::getNativeImageObj(acc);
243-
__esimd_block_write<T, n>(surf_ind, offset >> 4, vals.data());
244-
#else
245-
__esimd_block_write<T, n>(acc, offset >> 4, vals.data());
246-
#endif // __SYCL_DEVICE_ONLY__
220+
__SYCL_DEPRECATED("Replaced by simd::copy_to")
221+
ESIMD_INLINE ESIMD_NODEBUG
222+
void block_store(AccessorTy acc, uint32_t offset, simd<T, n> vals) {
223+
vals.copy_to(acc, offset);
247224
}
248225

249226
/// Accessor-based gather.

sycl/test/esimd/block_load_store.cpp

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
// RUN: %clangxx -fsycl -fsycl-device-only -fsyntax-only -Xclang -verify %s
2-
// expected-no-diagnostics
1+
// RUN: %clangxx -fsycl -fsyntax-only -Xclang -verify %s
32

43
#include <CL/sycl.hpp>
54
#include <CL/sycl/INTEL/esimd.hpp>
@@ -9,12 +8,31 @@
98
using namespace sycl::INTEL::gpu;
109
using namespace cl::sycl;
1110

12-
void kernel(accessor<int, 1, access::mode::read_write, access::target::global_buffer> &buf) __attribute__((sycl_device)) {
13-
simd<int, 32> v1(0, 1);
14-
15-
auto v0 = block_load<int, 32>(buf.get_pointer());
11+
#ifdef __SYCL_DEVICE_ONLY__
12+
#define __SYCL_DEVICE_ATTR __attribute__((sycl_device))
13+
#else
14+
#define __SYCL_DEVICE_ATTR
15+
#endif // __SYCL_DEVICE_ONLY__
1616

17+
void kernel1(accessor<int, 1, access::mode::read_write,
18+
access::target::global_buffer> &buf) __SYCL_DEVICE_ATTR {
19+
simd<int, 32> v1(0, 1);
20+
// expected-warning@+2 {{deprecated}}
21+
// expected-note@CL/sycl/INTEL/esimd/esimd_memory.hpp:188 {{}}
22+
auto v0 = block_load<int, 32>(buf, 0);
1723
v0 = v0 + v1;
24+
// expected-warning@+2 {{deprecated}}
25+
// expected-note@CL/sycl/INTEL/esimd/esimd_memory.hpp:220 {{}}
26+
block_store<int, 32>(buf, 0, v0);
27+
}
1828

19-
block_store<int, 32>(buf.get_pointer(), v0);
29+
void kernel2(int *ptr) __SYCL_DEVICE_ATTR {
30+
simd<int, 32> v1(0, 1);
31+
// expected-warning@+2 {{deprecated}}
32+
// expected-note@CL/sycl/INTEL/esimd/esimd_memory.hpp:169 {{}}
33+
auto v0 = block_load<int, 32>(ptr);
34+
v0 = v0 + v1;
35+
// expected-warning@+2 {{deprecated}}
36+
// expected-note@CL/sycl/INTEL/esimd/esimd_memory.hpp:201 {{}}
37+
block_store<int, 32>(ptr, v0);
2038
}

0 commit comments

Comments
 (0)