31
31
32
32
#include < type_traits>
33
33
34
+ #include < utility>
35
+
34
36
// / \file accessor.hpp
35
37
// / The file contains implementations of accessor class.
36
38
// /
@@ -224,6 +226,20 @@ template <typename DataT, int Dimensions = 1,
224
226
class accessor ;
225
227
226
228
namespace detail {
229
+ // To ensure loop unrolling is done when processing dimensions.
230
+ template <size_t ... Inds, class F >
231
+ void dim_loop_impl (std::integer_sequence<size_t , Inds...>, F &&f) {
232
+ #if __cplusplus >= 201703L
233
+ (f (Inds), ...);
234
+ #else
235
+ (void )std::initializer_list<int >{((void )(f (Inds)), 0 )...};
236
+ #endif
237
+ }
238
+
239
+ template <size_t count, class F > void dim_loop (F &&f) {
240
+ dim_loop_impl (std::make_index_sequence<count>{}, std::forward<F>(f));
241
+ }
242
+
227
243
void __SYCL_EXPORT constructorNotification (void *BufferObj, void *AccessorObj,
228
244
access::target Target,
229
245
access::mode Mode,
@@ -832,17 +848,12 @@ class __SYCL_SPECIAL_CLASS accessor :
832
848
833
849
template <int Dims = Dimensions> size_t getLinearIndex (id<Dims> Id) const {
834
850
835
- #ifdef __SYCL_DEVICE_ONLY__
836
- // Pointer is already adjusted for 1D case.
837
- if (Dimensions == 1 )
838
- return Id[0 ];
839
- #endif // __SYCL_DEVICE_ONLY__
840
-
841
851
size_t Result = 0 ;
842
- // Unroll the following loop for both host and device code
843
- __SYCL_UNROLL (3 )
844
- for (int I = 0 ; I < Dims; ++I) {
852
+ detail::dim_loop<Dims>([&, this ](size_t I) {
845
853
Result = Result * getMemoryRange ()[I] + Id[I];
854
+ // We've already adjusted for the accessor's offset in the __init, so
855
+ // don't include it here in case of device.
856
+ #ifndef __SYCL_DEVICE_ONLY__
846
857
#if __cplusplus >= 201703L
847
858
if constexpr (!(PropertyListT::template has_property<
848
859
sycl::ext::oneapi::property::no_offset>())) {
@@ -851,7 +862,9 @@ class __SYCL_SPECIAL_CLASS accessor :
851
862
#else
852
863
Result += getOffset ()[I];
853
864
#endif
854
- }
865
+ #endif // __SYCL_DEVICE_ONLY__
866
+ });
867
+
855
868
return Result;
856
869
}
857
870
@@ -919,17 +932,10 @@ class __SYCL_SPECIAL_CLASS accessor :
919
932
getAccessRange ()[I] = AccessRange[I];
920
933
getMemoryRange ()[I] = MemRange[I];
921
934
}
922
- // In case of 1D buffer, adjust pointer during initialization rather
923
- // then each time in operator[]. Will have to re-adjust in get_pointer
924
- if (1 == AdjustedDim)
925
- #if __cplusplus >= 201703L
926
- if constexpr (!(PropertyListT::template has_property<
927
- sycl::ext::oneapi::property::no_offset>())) {
928
- MData += Offset[0 ];
929
- }
930
- #else
931
- MData += Offset[0 ];
932
- #endif
935
+
936
+ // Adjust for offsets as that part is invariant for all invocations of
937
+ // operator[]. Will have to re-adjust in get_pointer.
938
+ MData += getTotalOffset ();
933
939
}
934
940
935
941
// __init variant used by the device compiler for ESIMD kernels.
@@ -1797,17 +1803,36 @@ class __SYCL_SPECIAL_CLASS accessor :
1797
1803
bool operator !=(const accessor &Rhs) const { return !(*this == Rhs); }
1798
1804
1799
1805
private:
1806
+ #ifdef __SYCL_DEVICE_ONLY__
1807
+ size_t getTotalOffset () const {
1808
+ size_t TotalOffset = 0 ;
1809
+ detail::dim_loop<Dimensions>([&, this ](size_t I) {
1810
+ TotalOffset = TotalOffset * impl.MemRange [I];
1811
+ #if __cplusplus >= 201703L
1812
+ if constexpr (!(PropertyListT::template has_property<
1813
+ sycl::ext::oneapi::property::no_offset>())) {
1814
+ TotalOffset += impl.Offset [I];
1815
+ }
1816
+ #else
1817
+ TotalOffset += impl.Offset [I];
1818
+ #endif
1819
+ });
1820
+
1821
+ return TotalOffset;
1822
+ }
1823
+ #endif
1824
+
1800
1825
// supporting function for get_pointer()
1801
- // when dim==1, MData will have been preadjusted for faster access with []
1826
+ // MData has been preadjusted with offset for faster access with []
1802
1827
// but for get_pointer() we must return the original pointer.
1803
1828
// On device, getQualifiedPtr() returns MData, so we need to backjust it.
1804
1829
// On host, getQualifiedPtr() does not return MData, no need to adjust.
1805
1830
PtrType getPointerAdjusted () const {
1806
1831
#ifdef __SYCL_DEVICE_ONLY__
1807
- if (1 == AdjustedDim)
1808
- return getQualifiedPtr () - impl.Offset [0 ];
1809
- #endif
1832
+ return getQualifiedPtr () - getTotalOffset ();
1833
+ #else
1810
1834
return getQualifiedPtr ();
1835
+ #endif
1811
1836
}
1812
1837
1813
1838
void preScreenAccessor (const size_t elemInBuffer,
0 commit comments