Skip to content

Commit 59fcb82

Browse files
[SYCL] accessor::get_pointer() should always return base pointer, even for offset accessors (#4687)
According to the SYCL 2020 spec get_pointer() returns a pointer to the beginning of the underlying buffer, even if it is offset. Presently our get_pointer() implementation returns an offset pointer when using an offset accessor. This PR fixes this. The Jenkins Summary is failing because the CTS tests have not been updated for this requirement. I have a PR introducing this update on the CTS used by the CI system. Signed-off-by: Chris Perkins <[email protected]>
1 parent 09ece34 commit 59fcb82

File tree

3 files changed

+131
-8
lines changed

3 files changed

+131
-8
lines changed

sycl/include/CL/sycl/accessor.hpp

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -918,7 +918,7 @@ class __SYCL_SPECIAL_CLASS accessor :
918918
getMemoryRange()[I] = MemRange[I];
919919
}
920920
// In case of 1D buffer, adjust pointer during initialization rather
921-
// then each time in operator[] or get_pointer functions.
921+
// then each time in operator[]. Will have to re-adjust in get_pointer
922922
if (1 == AdjustedDim)
923923
#if __cplusplus >= 201703L
924924
if constexpr (!(PropertyListT::template has_property<
@@ -1632,30 +1632,40 @@ class __SYCL_SPECIAL_CLASS accessor :
16321632
typename = detail::enable_if_t<AccessTarget_ ==
16331633
access::target::host_buffer>>
16341634
DataT *get_pointer() const {
1635-
const size_t LinearIndex = getLinearIndex(id<AdjustedDim>());
1636-
return getQualifiedPtr() + LinearIndex;
1635+
return getPointerAdjusted();
16371636
}
16381637

16391638
template <
16401639
access::target AccessTarget_ = AccessTarget,
16411640
typename = detail::enable_if_t<AccessTarget_ == access::target::device>>
16421641
global_ptr<DataT> get_pointer() const {
1643-
const size_t LinearIndex = getLinearIndex(id<AdjustedDim>());
1644-
return global_ptr<DataT>(getQualifiedPtr() + LinearIndex);
1642+
return global_ptr<DataT>(getPointerAdjusted());
16451643
}
16461644

16471645
template <access::target AccessTarget_ = AccessTarget,
16481646
typename = detail::enable_if_t<AccessTarget_ ==
16491647
access::target::constant_buffer>>
16501648
constant_ptr<DataT> get_pointer() const {
1651-
const size_t LinearIndex = getLinearIndex(id<AdjustedDim>());
1652-
return constant_ptr<DataT>(getQualifiedPtr() + LinearIndex);
1649+
return constant_ptr<DataT>(getPointerAdjusted());
16531650
}
16541651

16551652
bool operator==(const accessor &Rhs) const { return impl == Rhs.impl; }
16561653
bool operator!=(const accessor &Rhs) const { return !(*this == Rhs); }
16571654

16581655
private:
1656+
// supporting function for get_pointer()
1657+
// when dim==1, MData will have been preadjusted for faster access with []
1658+
// but for get_pointer() we must return the original pointer.
1659+
// On device, getQualifiedPtr() returns MData, so we need to backjust it.
1660+
// On host, getQualifiedPtr() does not return MData, no need to adjust.
1661+
PtrType getPointerAdjusted() const {
1662+
#ifdef __SYCL_DEVICE_ONLY__
1663+
if (1 == AdjustedDim)
1664+
return getQualifiedPtr() - impl.Offset[0];
1665+
#endif
1666+
return getQualifiedPtr();
1667+
}
1668+
16591669
void checkDeviceAccessorBufferSize(const size_t elemInBuffer) {
16601670
if (!IsHostBuf && elemInBuffer == 0)
16611671
throw cl::sycl::invalid_object_error(

sycl/source/detail/stream_impl.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ void stream_impl::flush() {
8686
.get_access<access::mode::read_write, access::target::host_buffer>(
8787
cgh);
8888
cgh.host_task([=] {
89-
printf("%s", BufHostAcc.get_pointer());
89+
printf("%s", &(BufHostAcc[0]));
9090
fflush(stdout);
9191
});
9292
});
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
2+
// RUN: %RUN_ON_HOST %t.out
3+
4+
// Per the SYCL 2020 spec (4.7.6.12 and others)
5+
// accessor::get_pointer() returns a pointer to the start of this accessor’s
6+
// memory. For a buffer accessor this is a pointer to the start of the
7+
// underlying buffer, even if this is a ranged accessor whose range does not
8+
// start at the beginning of the buffer.
9+
10+
// This is a departure from how get_pointer() was interpreted with offset
11+
// accessors in the past. Not relevant for images, which do not support offset
12+
// accessors.
13+
14+
#include <CL/sycl.hpp>
15+
#include <vector>
16+
using namespace cl::sycl;
17+
18+
void test_across_ranges() {
19+
constexpr auto r_w = access::mode::read_write;
20+
constexpr unsigned long width = 4;
21+
constexpr unsigned long count = width * width;
22+
constexpr unsigned long count3D = width * width * width; // 64
23+
std::vector<int> v1(count); // for 1D testing.
24+
std::vector<int> v2(count); // for 2D testing.
25+
std::vector<int> v3(count3D); // 3D
26+
27+
range<1> range_1D(count);
28+
range<2> range_2D(width, width);
29+
range<3> range_3D(width, width, width);
30+
31+
queue myQueue;
32+
{
33+
// 1D, 2D, 3D
34+
buffer<int> buf_1D(v1.data(), count);
35+
buffer<int, 2> buf_2D(v2.data(), range_2D);
36+
buffer<int, 3> buf_3D(v3.data(), range_3D);
37+
38+
myQueue.submit([&](handler &cgh) {
39+
auto acc_1D = buf_1D.get_access<r_w>(cgh, {count}, {10});
40+
auto acc_2D = buf_2D.get_access<r_w>(cgh, {2, 2}, {1, 1});
41+
auto acc_3D = buf_3D.get_access<r_w>(cgh, {2, 2, 2}, {1, 1, 1});
42+
cgh.single_task<class task>([=] {
43+
acc_1D.get_pointer()[0] = 5; // s.b. offset 0
44+
acc_1D[0] = 15; // s.b. offset 10
45+
46+
// 2D
47+
acc_2D.get_pointer()[0] = 7; // s.b. offset 0
48+
acc_2D[{0, 0}] = 17; // s.b. offset {1,1} aka 5 if linear.
49+
50+
// 3D
51+
acc_3D.get_pointer()[0] = 9; // s.b. offset 0
52+
acc_3D[{0, 0, 0}] = 19; // s.b. offset {1,1,1} aka 21 if linear.
53+
});
54+
});
55+
myQueue.wait();
56+
// now host access - we offset by one more than the device test
57+
auto acc_1D = buf_1D.get_access<r_w>({count}, {11});
58+
auto acc_2D = buf_2D.get_access<r_w>({2, 2}, {1, 2});
59+
auto acc_3D = buf_3D.get_access<r_w>({2, 2, 2}, {1, 1, 2});
60+
acc_1D.get_pointer()[1] = 4; // s.b. offset 1
61+
acc_1D[0] = 14; // s.b. offset 11
62+
63+
// 2D
64+
acc_2D.get_pointer()[1] = 6; // s.b. offset 1
65+
acc_2D[{0, 0}] = 16; // s.b. offset {1,2} aka 6 if linear.
66+
67+
// 3D
68+
acc_3D.get_pointer()[1] = 8; // s.b. offset 1
69+
acc_3D[{0, 0, 0}] = 18; // s.b. offset {1,1,2} aka 22 if linear.
70+
} //~buffer
71+
// always nice to have some feedback
72+
std::cout << "DEVICE" << std::endl;
73+
std::cout << "1D CHECK: v1[0] should be 5: " << v1[0]
74+
<< ", and v1[10] s.b. 15: " << v1[10] << std::endl;
75+
std::cout << "2D CHECK: v2[0] should be 7: " << v2[0]
76+
<< ", and v2[5] s.b. 17: " << v2[5] << std::endl;
77+
std::cout << "3D CHECK: v3[0] should be 9: " << v3[0]
78+
<< ", and v3[21] s.b. 19: " << v3[21] << std::endl
79+
<< std::endl;
80+
81+
std::cout << "HOST" << std::endl;
82+
std::cout << "1D CHECK: v1[1] should be 4: " << v1[1]
83+
<< ", and v1[11] s.b. 14: " << v1[11] << std::endl;
84+
std::cout << "2D CHECK: v2[1] should be 6: " << v2[1]
85+
<< ", and v2[6] s.b. 16: " << v2[6] << std::endl;
86+
std::cout << "3D CHECK: v3[1] should be 8: " << v3[1]
87+
<< ", and v3[22] s.b. 17: " << v3[22] << std::endl
88+
<< std::endl;
89+
90+
// device
91+
assert(v1[0] == 5);
92+
assert(v1[10] == 15);
93+
assert(v2[0] == 7);
94+
assert(v2[5] == 17); // offset {1,1} in a 4x4 field is linear offset 5
95+
assert(v3[0] == 9);
96+
assert(v3[21] == 19); // offset {1,1,1} in a 4x4x4 field is linear offset 21
97+
98+
// host
99+
assert(v1[1] == 4);
100+
assert(v1[11] == 14);
101+
assert(v2[1] == 6);
102+
assert(v2[6] == 16); // offset {1,2} in a 4x4 field is linear offset 6
103+
assert(v3[1] == 8);
104+
assert(v3[22] == 18); // offset {1,1,2} in a 4x4x4 field is linear offset 22
105+
}
106+
107+
int main() {
108+
test_across_ranges();
109+
110+
std::cout << "OK!" << std::endl;
111+
112+
return 0;
113+
}

0 commit comments

Comments
 (0)