Skip to content

[SYCL][ESIMD] Deprecate block_load/store, add simd::copy_from/to. #3572

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
May 5, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 86 additions & 0 deletions sycl/include/CL/sycl/INTEL/esimd/detail/esimd_sycl_util.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
//==------------- esimd_sycl_util.hpp - DPC++ Explicit SIMD API -----------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Utility functions related to interaction with generic SYCL and used for
// implementing Explicit SIMD APIs.
//===----------------------------------------------------------------------===//

#pragma once

#include <CL/sycl/accessor.hpp>

__SYCL_INLINE_NAMESPACE(cl) {
namespace sycl {
namespace INTEL {
namespace gpu {
namespace detail {

// Checks that given type is a SYCL accessor type. Sets its static field
// \c value accordingly. Also, if the check is succesful, sets \c mode and
// \c target static fields to the accessor type's access mode and access target
// respectively. Otherwise they are set to -1.
template <typename T> struct is_sycl_accessor : public std::false_type {
static constexpr sycl::access::mode mode =
static_cast<sycl::access::mode>(-1);
static constexpr sycl::access::target target =
static_cast<sycl::access::target>(-1);
};

template <typename DataT, int Dimensions, sycl::access::mode AccessMode,
sycl::access::target AccessTarget,
sycl::access::placeholder IsPlaceholder, typename PropertyListT>
struct is_sycl_accessor<sycl::accessor<
DataT, Dimensions, AccessMode, AccessTarget, IsPlaceholder, PropertyListT>>
: public std::true_type {
static constexpr sycl::access::mode mode = AccessMode;
static constexpr sycl::access::target target = AccessTarget;
};

using accessor_mode_cap_val_t = bool;

// Denotes an accessor's capability - whether it can read or write.
struct accessor_mode_cap {
static inline constexpr accessor_mode_cap_val_t can_read = false;
static inline constexpr accessor_mode_cap_val_t can_write = true;
};

template <sycl::access::mode Mode, accessor_mode_cap_val_t Cap>
constexpr bool accessor_mode_has_capability() {
static_assert(Cap == accessor_mode_cap::can_read ||
Cap == accessor_mode_cap::can_write,
"unsupported capability");

if constexpr (Mode == sycl::access::mode::atomic ||
Mode == sycl::access::mode::read_write ||
Mode == sycl::access::mode::discard_read_write)
return true; // atomic and *read_write accessors can read/write

return (Cap == accessor_mode_cap::can_read) ==
(Mode == sycl::access::mode::read);
}

// Checks that given type is a SYCL accessor type with given capability and
// target.
template <typename T, accessor_mode_cap_val_t Capability,
sycl::access::target AccessTarget>
struct is_sycl_accessor_with
: public std::conditional_t<
accessor_mode_has_capability<is_sycl_accessor<T>::mode,
Capability>() &&
(is_sycl_accessor<T>::target == AccessTarget),
std::true_type, std::false_type> {};

template <typename T, accessor_mode_cap_val_t Capability,
sycl::access::target AccessTarget, typename RetT>
using EnableIfAccessor = sycl::detail::enable_if_t<
detail::is_sycl_accessor_with<T, Capability, AccessTarget>::value, RetT>;

} // namespace detail
} // namespace gpu
} // namespace INTEL
} // namespace sycl
} // __SYCL_INLINE_NAMESPACE(cl)
128 changes: 128 additions & 0 deletions sycl/include/CL/sycl/INTEL/esimd/esimd.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
#pragma once

#include <CL/sycl/INTEL/esimd/detail/esimd_intrin.hpp>
#include <CL/sycl/INTEL/esimd/detail/esimd_memory_intrin.hpp>
#include <CL/sycl/INTEL/esimd/detail/esimd_sycl_util.hpp>
#include <CL/sycl/INTEL/esimd/detail/esimd_types.hpp>

__SYCL_INLINE_NAMESPACE(cl) {
Expand Down Expand Up @@ -480,6 +482,49 @@ template <typename Ty, int N> class simd {
}
}

/// @name Memory operations
/// TODO NOTE: These APIs do not support cache hint specification yet, as this
/// is WIP. Later addition of hints is not expected to break code using these
/// APIs.
///
/// @{

/// Copy a contiguous block of data from memory into this simd object.
/// The amount of memory copied equals the total size of vector elements in
/// this object.
/// @param addr the memory address to copy from. Must be a pointer to the
/// global address space, otherwise behavior is undefined.
ESIMD_INLINE void copy_from(const Ty *const addr) SYCL_ESIMD_FUNCTION;

/// Copy a contiguous block of data from memory into this simd object.
/// The amount of memory copied equals the total size of vector elements in
/// this object.
/// Source memory location is represented via a global accessor and offset.
/// @param acc accessor to copy from.
/// @param offset offset to copy from.
template <typename AccessorT>
ESIMD_INLINE
detail::EnableIfAccessor<AccessorT, detail::accessor_mode_cap::can_read,
sycl::access::target::global_buffer, void>
copy_from(AccessorT acc, uint32_t offset) SYCL_ESIMD_FUNCTION;

/// Copy all vector elements of this object into a contiguous block in memory.
/// @param addr the memory address to copy to. Must be a pointer to the
/// global address space, otherwise behavior is undefined.
ESIMD_INLINE void copy_to(Ty *addr) SYCL_ESIMD_FUNCTION;

/// Copy all vector elements of this object into a contiguous block in memory.
/// Destination memory location is represented via a global accessor and
/// offset.
/// @param acc accessor to copy from.
/// @param offset offset to copy from.
template <typename AccessorT>
ESIMD_INLINE
detail::EnableIfAccessor<AccessorT, detail::accessor_mode_cap::can_write,
sycl::access::target::global_buffer, void>
copy_to(AccessorT acc, uint32_t offset) SYCL_ESIMD_FUNCTION;

/// @} // Memory operations
private:
// The underlying data for this vector.
vector_type M_data;
Expand All @@ -498,6 +543,88 @@ ESIMD_INLINE simd<U, n> convert(simd<T, n> val) {
return __builtin_convertvector(val.data(), detail::vector_type_t<U, n>);
}

// ----------- Outlined implementations of esimd class APIs.

template <typename T, int N> void simd<T, N>::copy_from(const T *const Addr) {
constexpr unsigned Sz = sizeof(T) * N;
static_assert(Sz >= detail::OperandSize::OWORD,
"block size must be at least 1 oword");
static_assert(Sz % detail::OperandSize::OWORD == 0,
"block size must be whole number of owords");
static_assert(detail::isPowerOf2(Sz / detail::OperandSize::OWORD),
"block must be 1, 2, 4 or 8 owords long");
static_assert(Sz <= 8 * detail::OperandSize::OWORD,
"block size must be at most 8 owords");

uintptr_t AddrVal = reinterpret_cast<uintptr_t>(Addr);
*this =
__esimd_flat_block_read_unaligned<T, N, CacheHint::None, CacheHint::None>(
AddrVal);
}

template <typename T, int N>
template <typename AccessorT>
ESIMD_INLINE
detail::EnableIfAccessor<AccessorT, detail::accessor_mode_cap::can_read,
sycl::access::target::global_buffer, void>
simd<T, N>::copy_from(AccessorT acc, uint32_t offset) {
constexpr unsigned Sz = sizeof(T) * N;
static_assert(Sz >= detail::OperandSize::OWORD,
"block size must be at least 1 oword");
static_assert(Sz % detail::OperandSize::OWORD == 0,
"block size must be whole number of owords");
static_assert(detail::isPowerOf2(Sz / detail::OperandSize::OWORD),
"block must be 1, 2, 4 or 8 owords long");
static_assert(Sz <= 8 * detail::OperandSize::OWORD,
"block size must be at most 8 owords");
#if defined(__SYCL_DEVICE_ONLY__)
auto surf_ind = detail::AccessorPrivateProxy::getNativeImageObj(acc);
*this = __esimd_block_read<T, N>(surf_ind, offset);
#else
*this = __esimd_block_read<T, N>(acc, offset);
#endif // __SYCL_DEVICE_ONLY__
}

template <typename T, int N> void simd<T, N>::copy_to(T *addr) {
constexpr unsigned Sz = sizeof(T) * N;
static_assert(Sz >= detail::OperandSize::OWORD,
"block size must be at least 1 oword");
static_assert(Sz % detail::OperandSize::OWORD == 0,
"block size must be whole number of owords");
static_assert(detail::isPowerOf2(Sz / detail::OperandSize::OWORD),
"block must be 1, 2, 4 or 8 owords long");
static_assert(Sz <= 8 * detail::OperandSize::OWORD,
"block size must be at most 8 owords");

uintptr_t AddrVal = reinterpret_cast<uintptr_t>(addr);
__esimd_flat_block_write<T, N, CacheHint::None, CacheHint::None>(AddrVal,
data());
}

template <typename T, int N>
template <typename AccessorT>
ESIMD_INLINE
detail::EnableIfAccessor<AccessorT, detail::accessor_mode_cap::can_write,
sycl::access::target::global_buffer, void>
simd<T, N>::copy_to(AccessorT acc, uint32_t offset) {
constexpr unsigned Sz = sizeof(T) * N;
static_assert(Sz >= detail::OperandSize::OWORD,
"block size must be at least 1 oword");
static_assert(Sz % detail::OperandSize::OWORD == 0,
"block size must be whole number of owords");
static_assert(detail::isPowerOf2(Sz / detail::OperandSize::OWORD),
"block must be 1, 2, 4 or 8 owords long");
static_assert(Sz <= 8 * detail::OperandSize::OWORD,
"block size must be at most 8 owords");

#if defined(__SYCL_DEVICE_ONLY__)
auto surf_ind = detail::AccessorPrivateProxy::getNativeImageObj(acc);
__esimd_block_write<T, N>(surf_ind, offset >> 4, data());
#else
__esimd_block_write<T, N>(acc, offset >> 4, data());
#endif // __SYCL_DEVICE_ONLY__
}

} // namespace gpu
} // namespace INTEL
} // namespace sycl
Expand All @@ -516,4 +643,5 @@ std::ostream &operator<<(std::ostream &OS,
OS << "}";
return OS;
}

#endif
53 changes: 15 additions & 38 deletions sycl/include/CL/sycl/INTEL/esimd/esimd_memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -158,14 +158,15 @@ scatter(T *p, simd<T, n * ElemsPerAddr> vals, simd<uint32_t, n> offsets,
pred.data());
}

// TODO @rolandschulz
// Should follow existing std::simd naming for similar APIs - "copy_from" and
// "copy_to" to avoid confusion.
//
/// Flat-address block-load.
/// \ingroup sycl_esimd
// TODO normally, this function should just delegate to
// simd::copy_from for the deprecation period, but separate implementations are
// needed for now, as simd::copy_from does not support cache hints yet.
// This API, even though deprecated, can't be removed until then.
template <typename T, int n, CacheHint L1H = CacheHint::None,
CacheHint L3H = CacheHint::None>
__SYCL_DEPRECATED("use simd::copy_from.")
ESIMD_INLINE ESIMD_NODEBUG simd<T, n> block_load(const T *const addr) {
constexpr unsigned Sz = sizeof(T) * n;
static_assert(Sz >= detail::OperandSize::OWORD,
Expand All @@ -184,30 +185,20 @@ ESIMD_INLINE ESIMD_NODEBUG simd<T, n> block_load(const T *const addr) {
/// Accessor-based block-load.
/// \ingroup sycl_esimd
template <typename T, int n, typename AccessorTy>
__SYCL_DEPRECATED("use simd::copy_from.")
ESIMD_INLINE ESIMD_NODEBUG simd<T, n> block_load(AccessorTy acc,
uint32_t offset) {
constexpr unsigned Sz = sizeof(T) * n;
static_assert(Sz >= detail::OperandSize::OWORD,
"block size must be at least 1 oword");
static_assert(Sz % detail::OperandSize::OWORD == 0,
"block size must be whole number of owords");
static_assert(detail::isPowerOf2(Sz / detail::OperandSize::OWORD),
"block must be 1, 2, 4 or 8 owords long");
static_assert(Sz <= 8 * detail::OperandSize::OWORD,
"block size must be at most 8 owords");

#if defined(__SYCL_DEVICE_ONLY__)
auto surf_ind = detail::AccessorPrivateProxy::getNativeImageObj(acc);
return __esimd_block_read<T, n>(surf_ind, offset);
#else
return __esimd_block_read<T, n>(acc, offset);
#endif // __SYCL_DEVICE_ONLY__
simd<T, n> Res;
Res.copy_from(acc, offset);
return Res;
}

/// Flat-address block-store.
/// \ingroup sycl_esimd
// TODO the above note about cache hints applies to this API as well.
template <typename T, int n, CacheHint L1H = CacheHint::None,
CacheHint L3H = CacheHint::None>
__SYCL_DEPRECATED("use simd::copy_to.")
ESIMD_INLINE ESIMD_NODEBUG void block_store(T *p, simd<T, n> vals) {
constexpr unsigned Sz = sizeof(T) * n;
static_assert(Sz >= detail::OperandSize::OWORD,
Expand All @@ -226,24 +217,10 @@ ESIMD_INLINE ESIMD_NODEBUG void block_store(T *p, simd<T, n> vals) {
/// Accessor-based block-store.
/// \ingroup sycl_esimd
template <typename T, int n, typename AccessorTy>
ESIMD_INLINE ESIMD_NODEBUG void block_store(AccessorTy acc, uint32_t offset,
simd<T, n> vals) {
constexpr unsigned Sz = sizeof(T) * n;
static_assert(Sz >= detail::OperandSize::OWORD,
"block size must be at least 1 oword");
static_assert(Sz % detail::OperandSize::OWORD == 0,
"block size must be whole number of owords");
static_assert(detail::isPowerOf2(Sz / detail::OperandSize::OWORD),
"block must be 1, 2, 4 or 8 owords long");
static_assert(Sz <= 8 * detail::OperandSize::OWORD,
"block size must be at most 8 owords");

#if defined(__SYCL_DEVICE_ONLY__)
auto surf_ind = detail::AccessorPrivateProxy::getNativeImageObj(acc);
__esimd_block_write<T, n>(surf_ind, offset >> 4, vals.data());
#else
__esimd_block_write<T, n>(acc, offset >> 4, vals.data());
#endif // __SYCL_DEVICE_ONLY__
__SYCL_DEPRECATED("use simd::copy_to.")
ESIMD_INLINE ESIMD_NODEBUG
void block_store(AccessorTy acc, uint32_t offset, simd<T, n> vals) {
vals.copy_to(acc, offset);
}

/// Accessor-based gather.
Expand Down
27 changes: 20 additions & 7 deletions sycl/test/esimd/block_load_store.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
// RUN: %clangxx -fsycl -fsycl-device-only -fsyntax-only -Xclang -verify %s
// expected-no-diagnostics
// RUN: %clangxx -fsycl -fsyntax-only -Xclang -verify %s

#include <CL/sycl.hpp>
#include <CL/sycl/INTEL/esimd.hpp>
Expand All @@ -9,12 +8,26 @@
using namespace sycl::INTEL::gpu;
using namespace cl::sycl;

void kernel(accessor<int, 1, access::mode::read_write, access::target::global_buffer> &buf) __attribute__((sycl_device)) {
SYCL_EXTERNAL void kernel1(
accessor<int, 1, access::mode::read_write, access::target::global_buffer>
&buf) SYCL_ESIMD_FUNCTION {
simd<int, 32> v1(0, 1);

auto v0 = block_load<int, 32>(buf.get_pointer());

// expected-warning@+2 {{deprecated}}
// expected-note@CL/sycl/INTEL/esimd/esimd_memory.hpp:188 {{}}
auto v0 = block_load<int, 32>(buf, 0);
v0 = v0 + v1;
// expected-warning@+2 {{deprecated}}
// expected-note@CL/sycl/INTEL/esimd/esimd_memory.hpp:220 {{}}
block_store<int, 32>(buf, 0, v0);
}

block_store<int, 32>(buf.get_pointer(), v0);
SYCL_EXTERNAL void kernel2(int *ptr) SYCL_ESIMD_FUNCTION {
simd<int, 32> v1(0, 1);
// expected-warning@+2 {{deprecated}}
// expected-note@CL/sycl/INTEL/esimd/esimd_memory.hpp:169 {{}}
auto v0 = block_load<int, 32>(ptr);
v0 = v0 + v1;
// expected-warning@+2 {{deprecated}}
// expected-note@CL/sycl/INTEL/esimd/esimd_memory.hpp:201 {{}}
block_store<int, 32>(ptr, v0);
}
Loading