Skip to content

Commit 5c41ed6

Browse files
authored
[SYCL][ESIMD] Deprecate block_load/store, add simd::copy_from/to. (#3572)
* [SYCL][ESIMD] Deprecate block_load/store, add simd::copy_from/to. This patch: 1) Fixes the following TODO in esimd_memory.hpp: // TODO @rolandschulz // Should follow existing std::simd naming for similar APIs - "copy_from" and // "copy_to" to avoid confusion. 2) Adds type checks for the sycl accessor arguments in the added APIs. Signed-off-by: kbobrovs <[email protected]>
1 parent c2d6cfa commit 5c41ed6

File tree

5 files changed

+324
-45
lines changed

5 files changed

+324
-45
lines changed
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
//==------------- esimd_sycl_util.hpp - DPC++ Explicit SIMD API -----------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
// Utility functions related to interaction with generic SYCL and used for
9+
// implementing Explicit SIMD APIs.
10+
//===----------------------------------------------------------------------===//
11+
12+
#pragma once
13+
14+
#include <CL/sycl/accessor.hpp>
15+
16+
__SYCL_INLINE_NAMESPACE(cl) {
17+
namespace sycl {
18+
namespace INTEL {
19+
namespace gpu {
20+
namespace detail {
21+
22+
// Checks that given type is a SYCL accessor type. Sets its static field
23+
// \c value accordingly. Also, if the check is succesful, sets \c mode and
24+
// \c target static fields to the accessor type's access mode and access target
25+
// respectively. Otherwise they are set to -1.
26+
template <typename T> struct is_sycl_accessor : public std::false_type {
27+
static constexpr sycl::access::mode mode =
28+
static_cast<sycl::access::mode>(-1);
29+
static constexpr sycl::access::target target =
30+
static_cast<sycl::access::target>(-1);
31+
};
32+
33+
template <typename DataT, int Dimensions, sycl::access::mode AccessMode,
34+
sycl::access::target AccessTarget,
35+
sycl::access::placeholder IsPlaceholder, typename PropertyListT>
36+
struct is_sycl_accessor<sycl::accessor<
37+
DataT, Dimensions, AccessMode, AccessTarget, IsPlaceholder, PropertyListT>>
38+
: public std::true_type {
39+
static constexpr sycl::access::mode mode = AccessMode;
40+
static constexpr sycl::access::target target = AccessTarget;
41+
};
42+
43+
using accessor_mode_cap_val_t = bool;
44+
45+
// Denotes an accessor's capability - whether it can read or write.
46+
struct accessor_mode_cap {
47+
static inline constexpr accessor_mode_cap_val_t can_read = false;
48+
static inline constexpr accessor_mode_cap_val_t can_write = true;
49+
};
50+
51+
template <sycl::access::mode Mode, accessor_mode_cap_val_t Cap>
52+
constexpr bool accessor_mode_has_capability() {
53+
static_assert(Cap == accessor_mode_cap::can_read ||
54+
Cap == accessor_mode_cap::can_write,
55+
"unsupported capability");
56+
57+
if constexpr (Mode == sycl::access::mode::atomic ||
58+
Mode == sycl::access::mode::read_write ||
59+
Mode == sycl::access::mode::discard_read_write)
60+
return true; // atomic and *read_write accessors can read/write
61+
62+
return (Cap == accessor_mode_cap::can_read) ==
63+
(Mode == sycl::access::mode::read);
64+
}
65+
66+
// Checks that given type is a SYCL accessor type with given capability and
67+
// target.
68+
template <typename T, accessor_mode_cap_val_t Capability,
69+
sycl::access::target AccessTarget>
70+
struct is_sycl_accessor_with
71+
: public std::conditional_t<
72+
accessor_mode_has_capability<is_sycl_accessor<T>::mode,
73+
Capability>() &&
74+
(is_sycl_accessor<T>::target == AccessTarget),
75+
std::true_type, std::false_type> {};
76+
77+
template <typename T, accessor_mode_cap_val_t Capability,
78+
sycl::access::target AccessTarget, typename RetT>
79+
using EnableIfAccessor = sycl::detail::enable_if_t<
80+
detail::is_sycl_accessor_with<T, Capability, AccessTarget>::value, RetT>;
81+
82+
} // namespace detail
83+
} // namespace gpu
84+
} // namespace INTEL
85+
} // namespace sycl
86+
} // __SYCL_INLINE_NAMESPACE(cl)

sycl/include/CL/sycl/INTEL/esimd/esimd.hpp

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
#pragma once
1212

1313
#include <CL/sycl/INTEL/esimd/detail/esimd_intrin.hpp>
14+
#include <CL/sycl/INTEL/esimd/detail/esimd_memory_intrin.hpp>
15+
#include <CL/sycl/INTEL/esimd/detail/esimd_sycl_util.hpp>
1416
#include <CL/sycl/INTEL/esimd/detail/esimd_types.hpp>
1517

1618
__SYCL_INLINE_NAMESPACE(cl) {
@@ -480,6 +482,49 @@ template <typename Ty, int N> class simd {
480482
}
481483
}
482484

485+
/// @name Memory operations
486+
/// TODO NOTE: These APIs do not support cache hint specification yet, as this
487+
/// is WIP. Later addition of hints is not expected to break code using these
488+
/// APIs.
489+
///
490+
/// @{
491+
492+
/// Copy a contiguous block of data from memory into this simd object.
493+
/// The amount of memory copied equals the total size of vector elements in
494+
/// this object.
495+
/// @param addr the memory address to copy from. Must be a pointer to the
496+
/// global address space, otherwise behavior is undefined.
497+
ESIMD_INLINE void copy_from(const Ty *const addr) SYCL_ESIMD_FUNCTION;
498+
499+
/// Copy a contiguous block of data from memory into this simd object.
500+
/// The amount of memory copied equals the total size of vector elements in
501+
/// this object.
502+
/// Source memory location is represented via a global accessor and offset.
503+
/// @param acc accessor to copy from.
504+
/// @param offset offset to copy from.
505+
template <typename AccessorT>
506+
ESIMD_INLINE
507+
detail::EnableIfAccessor<AccessorT, detail::accessor_mode_cap::can_read,
508+
sycl::access::target::global_buffer, void>
509+
copy_from(AccessorT acc, uint32_t offset) SYCL_ESIMD_FUNCTION;
510+
511+
/// Copy all vector elements of this object into a contiguous block in memory.
512+
/// @param addr the memory address to copy to. Must be a pointer to the
513+
/// global address space, otherwise behavior is undefined.
514+
ESIMD_INLINE void copy_to(Ty *addr) SYCL_ESIMD_FUNCTION;
515+
516+
/// Copy all vector elements of this object into a contiguous block in memory.
517+
/// Destination memory location is represented via a global accessor and
518+
/// offset.
519+
/// @param acc accessor to copy from.
520+
/// @param offset offset to copy from.
521+
template <typename AccessorT>
522+
ESIMD_INLINE
523+
detail::EnableIfAccessor<AccessorT, detail::accessor_mode_cap::can_write,
524+
sycl::access::target::global_buffer, void>
525+
copy_to(AccessorT acc, uint32_t offset) SYCL_ESIMD_FUNCTION;
526+
527+
/// @} // Memory operations
483528
private:
484529
// The underlying data for this vector.
485530
vector_type M_data;
@@ -498,6 +543,88 @@ ESIMD_INLINE simd<U, n> convert(simd<T, n> val) {
498543
return __builtin_convertvector(val.data(), detail::vector_type_t<U, n>);
499544
}
500545

546+
// ----------- Outlined implementations of esimd class APIs.
547+
548+
template <typename T, int N> void simd<T, N>::copy_from(const T *const Addr) {
549+
constexpr unsigned Sz = sizeof(T) * N;
550+
static_assert(Sz >= detail::OperandSize::OWORD,
551+
"block size must be at least 1 oword");
552+
static_assert(Sz % detail::OperandSize::OWORD == 0,
553+
"block size must be whole number of owords");
554+
static_assert(detail::isPowerOf2(Sz / detail::OperandSize::OWORD),
555+
"block must be 1, 2, 4 or 8 owords long");
556+
static_assert(Sz <= 8 * detail::OperandSize::OWORD,
557+
"block size must be at most 8 owords");
558+
559+
uintptr_t AddrVal = reinterpret_cast<uintptr_t>(Addr);
560+
*this =
561+
__esimd_flat_block_read_unaligned<T, N, CacheHint::None, CacheHint::None>(
562+
AddrVal);
563+
}
564+
565+
template <typename T, int N>
566+
template <typename AccessorT>
567+
ESIMD_INLINE
568+
detail::EnableIfAccessor<AccessorT, detail::accessor_mode_cap::can_read,
569+
sycl::access::target::global_buffer, void>
570+
simd<T, N>::copy_from(AccessorT acc, uint32_t offset) {
571+
constexpr unsigned Sz = sizeof(T) * N;
572+
static_assert(Sz >= detail::OperandSize::OWORD,
573+
"block size must be at least 1 oword");
574+
static_assert(Sz % detail::OperandSize::OWORD == 0,
575+
"block size must be whole number of owords");
576+
static_assert(detail::isPowerOf2(Sz / detail::OperandSize::OWORD),
577+
"block must be 1, 2, 4 or 8 owords long");
578+
static_assert(Sz <= 8 * detail::OperandSize::OWORD,
579+
"block size must be at most 8 owords");
580+
#if defined(__SYCL_DEVICE_ONLY__)
581+
auto surf_ind = detail::AccessorPrivateProxy::getNativeImageObj(acc);
582+
*this = __esimd_block_read<T, N>(surf_ind, offset);
583+
#else
584+
*this = __esimd_block_read<T, N>(acc, offset);
585+
#endif // __SYCL_DEVICE_ONLY__
586+
}
587+
588+
template <typename T, int N> void simd<T, N>::copy_to(T *addr) {
589+
constexpr unsigned Sz = sizeof(T) * N;
590+
static_assert(Sz >= detail::OperandSize::OWORD,
591+
"block size must be at least 1 oword");
592+
static_assert(Sz % detail::OperandSize::OWORD == 0,
593+
"block size must be whole number of owords");
594+
static_assert(detail::isPowerOf2(Sz / detail::OperandSize::OWORD),
595+
"block must be 1, 2, 4 or 8 owords long");
596+
static_assert(Sz <= 8 * detail::OperandSize::OWORD,
597+
"block size must be at most 8 owords");
598+
599+
uintptr_t AddrVal = reinterpret_cast<uintptr_t>(addr);
600+
__esimd_flat_block_write<T, N, CacheHint::None, CacheHint::None>(AddrVal,
601+
data());
602+
}
603+
604+
template <typename T, int N>
605+
template <typename AccessorT>
606+
ESIMD_INLINE
607+
detail::EnableIfAccessor<AccessorT, detail::accessor_mode_cap::can_write,
608+
sycl::access::target::global_buffer, void>
609+
simd<T, N>::copy_to(AccessorT acc, uint32_t offset) {
610+
constexpr unsigned Sz = sizeof(T) * N;
611+
static_assert(Sz >= detail::OperandSize::OWORD,
612+
"block size must be at least 1 oword");
613+
static_assert(Sz % detail::OperandSize::OWORD == 0,
614+
"block size must be whole number of owords");
615+
static_assert(detail::isPowerOf2(Sz / detail::OperandSize::OWORD),
616+
"block must be 1, 2, 4 or 8 owords long");
617+
static_assert(Sz <= 8 * detail::OperandSize::OWORD,
618+
"block size must be at most 8 owords");
619+
620+
#if defined(__SYCL_DEVICE_ONLY__)
621+
auto surf_ind = detail::AccessorPrivateProxy::getNativeImageObj(acc);
622+
__esimd_block_write<T, N>(surf_ind, offset >> 4, data());
623+
#else
624+
__esimd_block_write<T, N>(acc, offset >> 4, data());
625+
#endif // __SYCL_DEVICE_ONLY__
626+
}
627+
501628
} // namespace gpu
502629
} // namespace INTEL
503630
} // namespace sycl
@@ -516,4 +643,5 @@ std::ostream &operator<<(std::ostream &OS,
516643
OS << "}";
517644
return OS;
518645
}
646+
519647
#endif

sycl/include/CL/sycl/INTEL/esimd/esimd_memory.hpp

Lines changed: 15 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -158,14 +158,15 @@ scatter(T *p, simd<T, n * ElemsPerAddr> vals, simd<uint32_t, n> offsets,
158158
pred.data());
159159
}
160160

161-
// TODO @rolandschulz
162-
// Should follow existing std::simd naming for similar APIs - "copy_from" and
163-
// "copy_to" to avoid confusion.
164-
//
165161
/// Flat-address block-load.
166162
/// \ingroup sycl_esimd
163+
// TODO normally, this function should just delegate to
164+
// simd::copy_from for the deprecation period, but separate implementations are
165+
// needed for now, as simd::copy_from does not support cache hints yet.
166+
// This API, even though deprecated, can't be removed until then.
167167
template <typename T, int n, CacheHint L1H = CacheHint::None,
168168
CacheHint L3H = CacheHint::None>
169+
__SYCL_DEPRECATED("use simd::copy_from.")
169170
ESIMD_INLINE ESIMD_NODEBUG simd<T, n> block_load(const T *const addr) {
170171
constexpr unsigned Sz = sizeof(T) * n;
171172
static_assert(Sz >= detail::OperandSize::OWORD,
@@ -184,30 +185,20 @@ ESIMD_INLINE ESIMD_NODEBUG simd<T, n> block_load(const T *const addr) {
184185
/// Accessor-based block-load.
185186
/// \ingroup sycl_esimd
186187
template <typename T, int n, typename AccessorTy>
188+
__SYCL_DEPRECATED("use simd::copy_from.")
187189
ESIMD_INLINE ESIMD_NODEBUG simd<T, n> block_load(AccessorTy acc,
188190
uint32_t offset) {
189-
constexpr unsigned Sz = sizeof(T) * n;
190-
static_assert(Sz >= detail::OperandSize::OWORD,
191-
"block size must be at least 1 oword");
192-
static_assert(Sz % detail::OperandSize::OWORD == 0,
193-
"block size must be whole number of owords");
194-
static_assert(detail::isPowerOf2(Sz / detail::OperandSize::OWORD),
195-
"block must be 1, 2, 4 or 8 owords long");
196-
static_assert(Sz <= 8 * detail::OperandSize::OWORD,
197-
"block size must be at most 8 owords");
198-
199-
#if defined(__SYCL_DEVICE_ONLY__)
200-
auto surf_ind = detail::AccessorPrivateProxy::getNativeImageObj(acc);
201-
return __esimd_block_read<T, n>(surf_ind, offset);
202-
#else
203-
return __esimd_block_read<T, n>(acc, offset);
204-
#endif // __SYCL_DEVICE_ONLY__
191+
simd<T, n> Res;
192+
Res.copy_from(acc, offset);
193+
return Res;
205194
}
206195

207196
/// Flat-address block-store.
208197
/// \ingroup sycl_esimd
198+
// TODO the above note about cache hints applies to this API as well.
209199
template <typename T, int n, CacheHint L1H = CacheHint::None,
210200
CacheHint L3H = CacheHint::None>
201+
__SYCL_DEPRECATED("use simd::copy_to.")
211202
ESIMD_INLINE ESIMD_NODEBUG void block_store(T *p, simd<T, n> vals) {
212203
constexpr unsigned Sz = sizeof(T) * n;
213204
static_assert(Sz >= detail::OperandSize::OWORD,
@@ -226,24 +217,10 @@ ESIMD_INLINE ESIMD_NODEBUG void block_store(T *p, simd<T, n> vals) {
226217
/// Accessor-based block-store.
227218
/// \ingroup sycl_esimd
228219
template <typename T, int n, typename AccessorTy>
229-
ESIMD_INLINE ESIMD_NODEBUG void block_store(AccessorTy acc, uint32_t offset,
230-
simd<T, n> vals) {
231-
constexpr unsigned Sz = sizeof(T) * n;
232-
static_assert(Sz >= detail::OperandSize::OWORD,
233-
"block size must be at least 1 oword");
234-
static_assert(Sz % detail::OperandSize::OWORD == 0,
235-
"block size must be whole number of owords");
236-
static_assert(detail::isPowerOf2(Sz / detail::OperandSize::OWORD),
237-
"block must be 1, 2, 4 or 8 owords long");
238-
static_assert(Sz <= 8 * detail::OperandSize::OWORD,
239-
"block size must be at most 8 owords");
240-
241-
#if defined(__SYCL_DEVICE_ONLY__)
242-
auto surf_ind = detail::AccessorPrivateProxy::getNativeImageObj(acc);
243-
__esimd_block_write<T, n>(surf_ind, offset >> 4, vals.data());
244-
#else
245-
__esimd_block_write<T, n>(acc, offset >> 4, vals.data());
246-
#endif // __SYCL_DEVICE_ONLY__
220+
__SYCL_DEPRECATED("use simd::copy_to.")
221+
ESIMD_INLINE ESIMD_NODEBUG
222+
void block_store(AccessorTy acc, uint32_t offset, simd<T, n> vals) {
223+
vals.copy_to(acc, offset);
247224
}
248225

249226
/// Accessor-based gather.

sycl/test/esimd/block_load_store.cpp

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
// RUN: %clangxx -fsycl -fsycl-device-only -fsyntax-only -Xclang -verify %s
2-
// expected-no-diagnostics
1+
// RUN: %clangxx -fsycl -fsyntax-only -Xclang -verify %s
32

43
#include <CL/sycl.hpp>
54
#include <CL/sycl/INTEL/esimd.hpp>
@@ -9,12 +8,26 @@
98
using namespace sycl::INTEL::gpu;
109
using namespace cl::sycl;
1110

12-
void kernel(accessor<int, 1, access::mode::read_write, access::target::global_buffer> &buf) __attribute__((sycl_device)) {
11+
SYCL_EXTERNAL void kernel1(
12+
accessor<int, 1, access::mode::read_write, access::target::global_buffer>
13+
&buf) SYCL_ESIMD_FUNCTION {
1314
simd<int, 32> v1(0, 1);
14-
15-
auto v0 = block_load<int, 32>(buf.get_pointer());
16-
15+
// expected-warning@+2 {{deprecated}}
16+
// expected-note@CL/sycl/INTEL/esimd/esimd_memory.hpp:188 {{}}
17+
auto v0 = block_load<int, 32>(buf, 0);
1718
v0 = v0 + v1;
19+
// expected-warning@+2 {{deprecated}}
20+
// expected-note@CL/sycl/INTEL/esimd/esimd_memory.hpp:220 {{}}
21+
block_store<int, 32>(buf, 0, v0);
22+
}
1823

19-
block_store<int, 32>(buf.get_pointer(), v0);
24+
SYCL_EXTERNAL void kernel2(int *ptr) SYCL_ESIMD_FUNCTION {
25+
simd<int, 32> v1(0, 1);
26+
// expected-warning@+2 {{deprecated}}
27+
// expected-note@CL/sycl/INTEL/esimd/esimd_memory.hpp:169 {{}}
28+
auto v0 = block_load<int, 32>(ptr);
29+
v0 = v0 + v1;
30+
// expected-warning@+2 {{deprecated}}
31+
// expected-note@CL/sycl/INTEL/esimd/esimd_memory.hpp:201 {{}}
32+
block_store<int, 32>(ptr, v0);
2033
}

0 commit comments

Comments
 (0)