Skip to content

Commit 7c58b9a

Browse files
[SYCL] Adjust for all Dims offset in accessor's device __init (#6560)
The optimization done for 1-dim accessor is suitable for all dimensions. The test sycl/test/gdb/accessors-device.cpp had to be updated as its previous implementation was fragile in capturing info it tried to verify.
1 parent 5b2cfe2 commit 7c58b9a

File tree

4 files changed

+95
-52
lines changed

4 files changed

+95
-52
lines changed

llvm/lib/SYCLLowerIR/ESIMD/ESIMDVerifier.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,12 @@ static const char *LegalSYCLFunctions[] = {
5252
"^sycl::_V1::ext::oneapi::experimental::this_sub_group"};
5353

5454
static const char *LegalSYCLFunctionsInStatelessMode[] = {
55-
"^sycl::_V1::multi_ptr<.+>::get", "^sycl::_V1::multi_ptr<.+>::multi_ptr",
55+
"^sycl::_V1::multi_ptr<.+>::get",
56+
"^sycl::_V1::multi_ptr<.+>::multi_ptr",
5657
"^sycl::_V1::accessor<.+>::get_pointer.+",
5758
"^sycl::_V1::accessor<.+>::getPointerAdjusted",
58-
"^sycl::_V1::accessor<.+>::getQualifiedPtr"};
59+
"^sycl::_V1::accessor<.+>::getQualifiedPtr",
60+
"^sycl::_V1::accessor<.+>::getTotalOffset"};
5961

6062
namespace {
6163

sycl/include/sycl/accessor.hpp

Lines changed: 50 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131

3232
#include <type_traits>
3333

34+
#include <utility>
35+
3436
/// \file accessor.hpp
3537
/// The file contains implementations of accessor class.
3638
///
@@ -224,6 +226,20 @@ template <typename DataT, int Dimensions = 1,
224226
class accessor;
225227

226228
namespace detail {
229+
// To ensure loop unrolling is done when processing dimensions.
230+
template <size_t... Inds, class F>
231+
void dim_loop_impl(std::integer_sequence<size_t, Inds...>, F &&f) {
232+
#if __cplusplus >= 201703L
233+
(f(Inds), ...);
234+
#else
235+
(void)std::initializer_list<int>{((void)(f(Inds)), 0)...};
236+
#endif
237+
}
238+
239+
template <size_t count, class F> void dim_loop(F &&f) {
240+
dim_loop_impl(std::make_index_sequence<count>{}, std::forward<F>(f));
241+
}
242+
227243
void __SYCL_EXPORT constructorNotification(void *BufferObj, void *AccessorObj,
228244
access::target Target,
229245
access::mode Mode,
@@ -832,17 +848,12 @@ class __SYCL_SPECIAL_CLASS accessor :
832848

833849
template <int Dims = Dimensions> size_t getLinearIndex(id<Dims> Id) const {
834850

835-
#ifdef __SYCL_DEVICE_ONLY__
836-
// Pointer is already adjusted for 1D case.
837-
if (Dimensions == 1)
838-
return Id[0];
839-
#endif // __SYCL_DEVICE_ONLY__
840-
841851
size_t Result = 0;
842-
// Unroll the following loop for both host and device code
843-
__SYCL_UNROLL(3)
844-
for (int I = 0; I < Dims; ++I) {
852+
detail::dim_loop<Dims>([&, this](size_t I) {
845853
Result = Result * getMemoryRange()[I] + Id[I];
854+
// We've already adjusted for the accessor's offset in the __init, so
855+
// don't include it here in case of device.
856+
#ifndef __SYCL_DEVICE_ONLY__
846857
#if __cplusplus >= 201703L
847858
if constexpr (!(PropertyListT::template has_property<
848859
sycl::ext::oneapi::property::no_offset>())) {
@@ -851,7 +862,9 @@ class __SYCL_SPECIAL_CLASS accessor :
851862
#else
852863
Result += getOffset()[I];
853864
#endif
854-
}
865+
#endif // __SYCL_DEVICE_ONLY__
866+
});
867+
855868
return Result;
856869
}
857870

@@ -919,17 +932,10 @@ class __SYCL_SPECIAL_CLASS accessor :
919932
getAccessRange()[I] = AccessRange[I];
920933
getMemoryRange()[I] = MemRange[I];
921934
}
922-
// In case of 1D buffer, adjust pointer during initialization rather
923-
// then each time in operator[]. Will have to re-adjust in get_pointer
924-
if (1 == AdjustedDim)
925-
#if __cplusplus >= 201703L
926-
if constexpr (!(PropertyListT::template has_property<
927-
sycl::ext::oneapi::property::no_offset>())) {
928-
MData += Offset[0];
929-
}
930-
#else
931-
MData += Offset[0];
932-
#endif
935+
936+
// Adjust for offsets as that part is invariant for all invocations of
937+
// operator[]. Will have to re-adjust in get_pointer.
938+
MData += getTotalOffset();
933939
}
934940

935941
// __init variant used by the device compiler for ESIMD kernels.
@@ -1797,17 +1803,36 @@ class __SYCL_SPECIAL_CLASS accessor :
17971803
bool operator!=(const accessor &Rhs) const { return !(*this == Rhs); }
17981804

17991805
private:
1806+
#ifdef __SYCL_DEVICE_ONLY__
1807+
size_t getTotalOffset() const {
1808+
size_t TotalOffset = 0;
1809+
detail::dim_loop<Dimensions>([&, this](size_t I) {
1810+
TotalOffset = TotalOffset * impl.MemRange[I];
1811+
#if __cplusplus >= 201703L
1812+
if constexpr (!(PropertyListT::template has_property<
1813+
sycl::ext::oneapi::property::no_offset>())) {
1814+
TotalOffset += impl.Offset[I];
1815+
}
1816+
#else
1817+
TotalOffset += impl.Offset[I];
1818+
#endif
1819+
});
1820+
1821+
return TotalOffset;
1822+
}
1823+
#endif
1824+
18001825
// supporting function for get_pointer()
1801-
// when dim==1, MData will have been preadjusted for faster access with []
1826+
// MData has been preadjusted with offset for faster access with []
18021827
// but for get_pointer() we must return the original pointer.
18031828
// On device, getQualifiedPtr() returns MData, so we need to backjust it.
18041829
// On host, getQualifiedPtr() does not return MData, no need to adjust.
18051830
PtrType getPointerAdjusted() const {
18061831
#ifdef __SYCL_DEVICE_ONLY__
1807-
if (1 == AdjustedDim)
1808-
return getQualifiedPtr() - impl.Offset[0];
1809-
#endif
1832+
return getQualifiedPtr() - getTotalOffset();
1833+
#else
18101834
return getQualifiedPtr();
1835+
#endif
18111836
}
18121837

18131838
void preScreenAccessor(const size_t elemInBuffer,

sycl/include/sycl/detail/defines_elementary.hpp

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -105,20 +105,6 @@
105105
#define __SYCL_WARNING(msg) __pragma(message(msg))
106106
#endif // __GNUC__
107107

108-
// Define __SYCL_UNROLL to add pragma/attribute unroll to a loop.
109-
#ifndef __SYCL_UNROLL
110-
#if defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER)
111-
#define __SYCL_UNROLL(x) _Pragma(__SYCL_STRINGIFY(unroll x))
112-
#elif defined(__clang__)
113-
#define __SYCL_UNROLL(x) _Pragma(__SYCL_STRINGIFY(unroll x))
114-
#elif (defined(__GNUC__) && __GNUC__ >= 8) || \
115-
(defined(__GNUG__) && __GNUG__ >= 8)
116-
#define __SYCL_UNROLL(x) _Pragma(__SYCL_STRINGIFY(GCC unroll x))
117-
#else
118-
#define __SYCL_UNROLL(x)
119-
#endif // compiler switch
120-
#endif // __SYCL_UNROLL
121-
122108
#if !defined(SYCL_DISABLE_CPP_VERSION_CHECK_WARNING) && __cplusplus < 201703L
123109

124110
#if defined(_MSC_VER) && !defined(__clang__)

sycl/test/gdb/accessors-device.cpp

Lines changed: 41 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,49 @@
1-
// RUN: %clangxx -fsycl-device-only -c -fno-color-diagnostics -Xclang -ast-dump %s -I %sycl_include -Wno-sycl-strict | FileCheck %s
1+
// RUN: %clangxx -fsycl-device-only -c -fno-color-diagnostics -Xclang -fdump-record-layouts %s -I %sycl_include -Wno-sycl-strict | FileCheck %s
22
// UNSUPPORTED: windows
3-
#include <sycl/accessor.hpp>
3+
#include <sycl/sycl.hpp>
44

5-
typedef sycl::accessor<int, 1, sycl::access::mode::read> dummy;
5+
using namespace sycl;
6+
7+
int main() {
8+
queue q;
9+
buffer<int, 1> b(1);
10+
q.submit([&](handler &cgh) {
11+
accessor a{b, cgh};
12+
13+
cgh.single_task([=]() { a[0] = 42; });
14+
});
15+
}
616

717
// AccessorImplDevice must have MemRange and Offset fields
818

9-
// CHECK: CXXRecordDecl {{.*}} class AccessorImplDevice definition
10-
// CHECK-NOT: CXXRecordDecl {{.*}} definition
11-
// CHECK: FieldDecl {{.*}} referenced Offset
12-
// CHECK-NOT: CXXRecordDecl {{.*}} definition
13-
// CHECK: FieldDecl {{.*}} referenced MemRange
19+
// CHECK: 0 | class sycl::detail::AccessorImplDevice<1>
20+
// CHECK-NEXT: 0 | class sycl::id<1> Offset
21+
// CHECK-NEXT: 0 | class sycl::detail::array<1> (base)
22+
// CHECK-NEXT: 0 | size_t[1] common_array
23+
// CHECK-NEXT: 8 | class sycl::range<1> AccessRange
24+
// CHECK-NEXT: 8 | class sycl::detail::array<1> (base)
25+
// CHECK-NEXT: 8 | size_t[1] common_array
26+
// CHECK-NEXT: 16 | class sycl::range<1> MemRange
27+
// CHECK-NEXT: 16 | class sycl::detail::array<1> (base)
28+
// CHECK-NEXT: 16 | size_t[1] common_array
29+
// CHECK-NEXT: | [sizeof=24, dsize=24, align=8,
30+
// CHECK-NEXT: | nvsize=24, nvalign=8]
1431

1532
// accessor.impl must be present and of AccessorImplDevice type
1633

17-
// CHECK: CXXRecordDecl {{.*}} class accessor definition
18-
// CHECK-NOT: CXXRecordDecl {{.*}} definition
19-
// CHECK: FieldDecl {{.*}} referenced impl 'detail::AccessorImplDevice<AdjustedDim>'
34+
// CHECK: 0 | class sycl::accessor<int, 1, sycl::access::mode::read_write, sycl::access::target::global_buffer, sycl::access::placeholder::false_t>
35+
// CHECK-NEXT: 0 | class sycl::detail::accessor_common<int, 1, sycl::access::mode::read_write, sycl::access::target::global_buffer, sycl::access::placeholder::false_t> (base) (empty)
36+
// CHECK-NEXT: 0 | class sycl::detail::AccessorImplDevice<1> impl
37+
// CHECK-NEXT: 0 | class sycl::id<1> Offset
38+
// CHECK-NEXT: 0 | class sycl::detail::array<1> (base)
39+
// CHECK-NEXT: 0 | size_t[1] common_array
40+
// CHECK-NEXT: 8 | class sycl::range<1> AccessRange
41+
// CHECK-NEXT: 8 | class sycl::detail::array<1> (base)
42+
// CHECK-NEXT: 8 | size_t[1] common_array
43+
// CHECK-NEXT: 16 | class sycl::range<1> MemRange
44+
// CHECK-NEXT: 16 | class sycl::detail::array<1> (base)
45+
// CHECK-NEXT: 16 | size_t[1] common_array
46+
// CHECK-NEXT: 24 | union sycl::accessor<int, 1, sycl::access::mode::read_write, sycl::access::target::global_buffer, sycl::access::placeholder::false_t>::(anonymous at
47+
// CHECK-NEXT: 24 | ConcreteASPtrType MData
48+
// CHECK-NEXT: | [sizeof=32, dsize=32, align=8,
49+
// CHECK-NEXT: | nvsize=32, nvalign=8]

0 commit comments

Comments
 (0)