Skip to content

[SYCL][ESIMD] reduce restriction on some esimd API based upon user feedback #2526

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions sycl/include/CL/sycl/INTEL/esimd/esimd_memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ constexpr unsigned int ElemsPerAddrEncoding() {
template <typename T, int n, int ElemsPerAddr = 1,
CacheHint L1H = CacheHint::None, CacheHint L3H = CacheHint::None>
ESIMD_INLINE ESIMD_NODEBUG
typename std::enable_if<((n == 8 || n == 16 || n == 32) &&
typename std::enable_if<(__esimd::isPowerOf2(n, 32) &&
(ElemsPerAddr == 1 || ElemsPerAddr == 2 ||
ElemsPerAddr == 4)),
simd<T, n * ElemsPerAddr>>::type
Expand Down Expand Up @@ -120,7 +120,7 @@ ESIMD_INLINE ESIMD_NODEBUG
template <typename T, int n, int ElemsPerAddr = 1,
CacheHint L1H = CacheHint::None, CacheHint L3H = CacheHint::None>
ESIMD_INLINE ESIMD_NODEBUG
typename std::enable_if<((n == 8 || n == 16 || n == 32) &&
typename std::enable_if<(__esimd::isPowerOf2(n, 32) &&
(ElemsPerAddr == 1 || ElemsPerAddr == 2 ||
ElemsPerAddr == 4)),
void>::type
Expand Down Expand Up @@ -487,9 +487,9 @@ ESIMD_INLINE ESIMD_NODEBUG simd<T, n> slm_block_load(uint32_t offset) {
static_assert(Sz % __esimd::OWORD == 0,
"block size must be whole number of owords");
static_assert(__esimd::isPowerOf2(Sz / __esimd::OWORD),
"block must be 1, 2, 4 or 8 owords long");
static_assert(Sz <= 8 * __esimd::OWORD,
"block size must be at most 8 owords");
"block must be 1, 2, 4, 8, 16 owords long");
static_assert(Sz <= 16 * __esimd::OWORD,
"block size must be at most 16 owords");

return __esimd_slm_block_read<T, n>(offset);
}
Expand All @@ -503,9 +503,9 @@ ESIMD_INLINE ESIMD_NODEBUG void slm_block_store(uint32_t offset,
static_assert(Sz % __esimd::OWORD == 0,
"block size must be whole number of owords");
static_assert(__esimd::isPowerOf2(Sz / __esimd::OWORD),
"block must be 1, 2, 4 or 8 owords long");
static_assert(Sz <= 8 * __esimd::OWORD,
"block size must be at most 8 owords");
"block must be 1, 2, 4, 8, or 16 owords long");
static_assert(Sz <= 16 * __esimd::OWORD,
"block size must be at most 16 owords");

// offset in genx.oword.st is in owords
__esimd_slm_block_write<T, n>(offset >> 4, vals.data());
Expand Down
25 changes: 22 additions & 3 deletions sycl/test/basic_tests/esimd/gather_scatter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,22 @@
using namespace sycl::INTEL::gpu;
using namespace cl::sycl;

void kernel(accessor<int, 1, access::mode::read_write, access::target::global_buffer> &buf) __attribute__((sycl_device)) {
void kernel0(accessor<int, 1, access::mode::read_write,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
void kernel0(accessor<int, 1, access::mode::read_write,
// TODO: add executable tests for gather/scatter with verification
void kernel0(accessor<int, 1, access::mode::read_write,

access::target::global_buffer> &buf)
__attribute__((sycl_device)) {
simd<uint32_t, 2> offsets(0, 1);
simd<int, 2> v1(0, 1);

auto v0 = gather<int, 2>(buf.get_pointer(), offsets);

v0 = v0 + v1;

scatter<int, 2>(buf.get_pointer(), v0, offsets);
}

void kernel(accessor<int, 1, access::mode::read_write,
access::target::global_buffer> &buf)
__attribute__((sycl_device)) {
simd<uint32_t, 32> offsets(0, 1);
simd<int, 32> v1(0, 1);

Expand All @@ -20,7 +35,9 @@ void kernel(accessor<int, 1, access::mode::read_write, access::target::global_bu
scatter<int, 32>(buf.get_pointer(), v0, offsets);
}

void kernel(accessor<uint8_t, 1, access::mode::read_write, access::target::global_buffer> &buf) __attribute__((sycl_device)) {
void kernel(accessor<uint8_t, 1, access::mode::read_write,
access::target::global_buffer> &buf)
__attribute__((sycl_device)) {
simd<uint32_t, 32> offsets(0, 1);
simd<uint8_t, 32> v1(0, 1);

Expand All @@ -33,7 +50,9 @@ void kernel(accessor<uint8_t, 1, access::mode::read_write, access::target::globa
scatter<uint8_t, 32>(buf.get_pointer(), v0, offsets);
}

void kernel(accessor<uint16_t, 1, access::mode::read_write, access::target::global_buffer> &buf) __attribute__((sycl_device)) {
void kernel(accessor<uint16_t, 1, access::mode::read_write,
access::target::global_buffer> &buf)
__attribute__((sycl_device)) {
simd<uint32_t, 32> offsets(0, 1);
simd<uint16_t, 32> v1(0, 1);

Expand Down
10 changes: 10 additions & 0 deletions sycl/test/basic_tests/esimd/slm_block.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,13 @@ void kernel() __attribute__((sycl_device)) {

slm_block_store<int, 32>(0, v0);
}

void kernel2() __attribute__((sycl_device)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
void kernel2() __attribute__((sycl_device)) {
// TODO: add executable tests for slm load/stores with verification
void kernel2() __attribute__((sycl_device)) {

simd<int, 64> v1(0, 1);

auto v0 = slm_block_load<int, 64>(0);

v0 = v0 + v1;

slm_block_store<int, 64>(0, v0);
}