Skip to content

[SYCL] Adjust for all Dims offset in accessor's device __init #6560

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Aug 18, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions llvm/lib/SYCLLowerIR/ESIMD/ESIMDVerifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,12 @@ static const char *LegalSYCLFunctions[] = {
"^sycl::_V1::ext::oneapi::experimental::this_sub_group"};

static const char *LegalSYCLFunctionsInStatelessMode[] = {
"^sycl::_V1::multi_ptr<.+>::get", "^sycl::_V1::multi_ptr<.+>::multi_ptr",
"^sycl::_V1::multi_ptr<.+>::get",
"^sycl::_V1::multi_ptr<.+>::multi_ptr",
"^sycl::_V1::accessor<.+>::get_pointer.+",
"^sycl::_V1::accessor<.+>::getPointerAdjusted",
"^sycl::_V1::accessor<.+>::getQualifiedPtr"};
"^sycl::_V1::accessor<.+>::getQualifiedPtr",
"^sycl::_V1::accessor<.+>::getTotalOffset"};

namespace {

Expand Down
75 changes: 50 additions & 25 deletions sycl/include/sycl/accessor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@

#include <type_traits>

#include <utility>

/// \file accessor.hpp
/// The file contains implementations of accessor class.
///
Expand Down Expand Up @@ -224,6 +226,20 @@ template <typename DataT, int Dimensions = 1,
class accessor;

namespace detail {
// To ensure loop unrolling is done when processing dimensions.
template <size_t... Inds, class F>
void dim_loop_impl(std::integer_sequence<size_t, Inds...>, F &&f) {
#if __cplusplus >= 201703L
(f(Inds), ...);
#else
(void)std::initializer_list<int>{((void)(f(Inds)), 0)...};
#endif
}

template <size_t count, class F> void dim_loop(F &&f) {
dim_loop_impl(std::make_index_sequence<count>{}, std::forward<F>(f));
}

void __SYCL_EXPORT constructorNotification(void *BufferObj, void *AccessorObj,
access::target Target,
access::mode Mode,
Expand Down Expand Up @@ -832,17 +848,12 @@ class __SYCL_SPECIAL_CLASS accessor :

template <int Dims = Dimensions> size_t getLinearIndex(id<Dims> Id) const {

#ifdef __SYCL_DEVICE_ONLY__
// Pointer is already adjusted for 1D case.
if (Dimensions == 1)
return Id[0];
#endif // __SYCL_DEVICE_ONLY__

size_t Result = 0;
// Unroll the following loop for both host and device code
__SYCL_UNROLL(3)
for (int I = 0; I < Dims; ++I) {
detail::dim_loop<Dims>([&, this](size_t I) {
Result = Result * getMemoryRange()[I] + Id[I];
// We've already adjusted for the accessor's offset in the __init, so
// don't include it here in case of device.
#ifndef __SYCL_DEVICE_ONLY__
#if __cplusplus >= 201703L
if constexpr (!(PropertyListT::template has_property<
sycl::ext::oneapi::property::no_offset>())) {
Expand All @@ -851,7 +862,9 @@ class __SYCL_SPECIAL_CLASS accessor :
#else
Result += getOffset()[I];
#endif
}
#endif // __SYCL_DEVICE_ONLY__
});

return Result;
}

Expand Down Expand Up @@ -919,17 +932,10 @@ class __SYCL_SPECIAL_CLASS accessor :
getAccessRange()[I] = AccessRange[I];
getMemoryRange()[I] = MemRange[I];
}
// In case of 1D buffer, adjust pointer during initialization rather
// then each time in operator[]. Will have to re-adjust in get_pointer
if (1 == AdjustedDim)
#if __cplusplus >= 201703L
if constexpr (!(PropertyListT::template has_property<
sycl::ext::oneapi::property::no_offset>())) {
MData += Offset[0];
}
#else
MData += Offset[0];
#endif

// Adjust for offsets as that part is invariant for all invocations of
// operator[]. Will have to re-adjust in get_pointer.
MData += getTotalOffset();
}

// __init variant used by the device compiler for ESIMD kernels.
Expand Down Expand Up @@ -1797,17 +1803,36 @@ class __SYCL_SPECIAL_CLASS accessor :
bool operator!=(const accessor &Rhs) const { return !(*this == Rhs); }

private:
#ifdef __SYCL_DEVICE_ONLY__
size_t getTotalOffset() const {
size_t TotalOffset = 0;
detail::dim_loop<Dimensions>([&, this](size_t I) {
TotalOffset = TotalOffset * impl.MemRange[I];
#if __cplusplus >= 201703L
if constexpr (!(PropertyListT::template has_property<
sycl::ext::oneapi::property::no_offset>())) {
TotalOffset += impl.Offset[I];
}
#else
TotalOffset += impl.Offset[I];
#endif
});

return TotalOffset;
}
#endif

// supporting function for get_pointer()
// when dim==1, MData will have been preadjusted for faster access with []
// MData has been preadjusted with offset for faster access with []
// but for get_pointer() we must return the original pointer.
// On device, getQualifiedPtr() returns MData, so we need to backjust it.
// On host, getQualifiedPtr() does not return MData, no need to adjust.
PtrType getPointerAdjusted() const {
#ifdef __SYCL_DEVICE_ONLY__
if (1 == AdjustedDim)
return getQualifiedPtr() - impl.Offset[0];
#endif
return getQualifiedPtr() - getTotalOffset();
#else
return getQualifiedPtr();
#endif
}

void preScreenAccessor(const size_t elemInBuffer,
Expand Down
14 changes: 0 additions & 14 deletions sycl/include/sycl/detail/defines_elementary.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,20 +105,6 @@
#define __SYCL_WARNING(msg) __pragma(message(msg))
#endif // __GNUC__

// Define __SYCL_UNROLL to add pragma/attribute unroll to a loop.
#ifndef __SYCL_UNROLL
#if defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER)
#define __SYCL_UNROLL(x) _Pragma(__SYCL_STRINGIFY(unroll x))
#elif defined(__clang__)
#define __SYCL_UNROLL(x) _Pragma(__SYCL_STRINGIFY(unroll x))
#elif (defined(__GNUC__) && __GNUC__ >= 8) || \
(defined(__GNUG__) && __GNUG__ >= 8)
#define __SYCL_UNROLL(x) _Pragma(__SYCL_STRINGIFY(GCC unroll x))
#else
#define __SYCL_UNROLL(x)
#endif // compiler switch
#endif // __SYCL_UNROLL

#if !defined(SYCL_DISABLE_CPP_VERSION_CHECK_WARNING) && __cplusplus < 201703L

#if defined(_MSC_VER) && !defined(__clang__)
Expand Down
52 changes: 41 additions & 11 deletions sycl/test/gdb/accessors-device.cpp
Original file line number Diff line number Diff line change
@@ -1,19 +1,49 @@
// RUN: %clangxx -fsycl-device-only -c -fno-color-diagnostics -Xclang -ast-dump %s -I %sycl_include -Wno-sycl-strict | FileCheck %s
// RUN: %clangxx -fsycl-device-only -c -fno-color-diagnostics -Xclang -fdump-record-layouts %s -I %sycl_include -Wno-sycl-strict | FileCheck %s
// UNSUPPORTED: windows
#include <sycl/accessor.hpp>
#include <sycl/sycl.hpp>

typedef sycl::accessor<int, 1, sycl::access::mode::read> dummy;
using namespace sycl;

int main() {
queue q;
buffer<int, 1> b(1);
q.submit([&](handler &cgh) {
accessor a{b, cgh};

cgh.single_task([=]() { a[0] = 42; });
});
}

// AccessorImplDevice must have MemRange and Offset fields

// CHECK: CXXRecordDecl {{.*}} class AccessorImplDevice definition
// CHECK-NOT: CXXRecordDecl {{.*}} definition
// CHECK: FieldDecl {{.*}} referenced Offset
// CHECK-NOT: CXXRecordDecl {{.*}} definition
// CHECK: FieldDecl {{.*}} referenced MemRange
// CHECK: 0 | class sycl::detail::AccessorImplDevice<1>
// CHECK-NEXT: 0 | class sycl::id<1> Offset
// CHECK-NEXT: 0 | class sycl::detail::array<1> (base)
// CHECK-NEXT: 0 | size_t[1] common_array
// CHECK-NEXT: 8 | class sycl::range<1> AccessRange
// CHECK-NEXT: 8 | class sycl::detail::array<1> (base)
// CHECK-NEXT: 8 | size_t[1] common_array
// CHECK-NEXT: 16 | class sycl::range<1> MemRange
// CHECK-NEXT: 16 | class sycl::detail::array<1> (base)
// CHECK-NEXT: 16 | size_t[1] common_array
// CHECK-NEXT: | [sizeof=24, dsize=24, align=8,
// CHECK-NEXT: | nvsize=24, nvalign=8]

// accessor.impl must be present and of AccessorImplDevice type

// CHECK: CXXRecordDecl {{.*}} class accessor definition
// CHECK-NOT: CXXRecordDecl {{.*}} definition
// CHECK: FieldDecl {{.*}} referenced impl 'detail::AccessorImplDevice<AdjustedDim>'
// CHECK: 0 | class sycl::accessor<int, 1, sycl::access::mode::read_write, sycl::access::target::global_buffer, sycl::access::placeholder::false_t>
// CHECK-NEXT: 0 | class sycl::detail::accessor_common<int, 1, sycl::access::mode::read_write, sycl::access::target::global_buffer, sycl::access::placeholder::false_t> (base) (empty)
// CHECK-NEXT: 0 | class sycl::detail::AccessorImplDevice<1> impl
// CHECK-NEXT: 0 | class sycl::id<1> Offset
// CHECK-NEXT: 0 | class sycl::detail::array<1> (base)
// CHECK-NEXT: 0 | size_t[1] common_array
// CHECK-NEXT: 8 | class sycl::range<1> AccessRange
// CHECK-NEXT: 8 | class sycl::detail::array<1> (base)
// CHECK-NEXT: 8 | size_t[1] common_array
// CHECK-NEXT: 16 | class sycl::range<1> MemRange
// CHECK-NEXT: 16 | class sycl::detail::array<1> (base)
// CHECK-NEXT: 16 | size_t[1] common_array
// CHECK-NEXT: 24 | union sycl::accessor<int, 1, sycl::access::mode::read_write, sycl::access::target::global_buffer, sycl::access::placeholder::false_t>::(anonymous at
// CHECK-NEXT: 24 | ConcreteASPtrType MData
// CHECK-NEXT: | [sizeof=32, dsize=32, align=8,
// CHECK-NEXT: | nvsize=32, nvalign=8]