Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Revert "[SYCL] Refactor invalid use of local accessor" #1695

Merged
merged 1 commit into from
Mar 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 16 additions & 54 deletions SYCL/Basic/accessor/accessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,7 @@ template <typename T> void TestAccSizeFuncs(const std::vector<T> &vec) {
q.submit([&](sycl::handler &cgh) {
sycl::accessor accRes(bufRes, cgh);
sycl::local_accessor<T, 1> locAcc(vec.size(), cgh);
cgh.parallel_for(sycl::nd_range<1>{1, 1},
[=](sycl::nd_item<1>) { test(accRes, locAcc); });
cgh.single_task([=]() { test(accRes, locAcc); });
});
q.wait();
}
Expand All @@ -121,7 +120,7 @@ template <typename GlobAcc, typename LocAcc>
void testLocalAccItersImpl(sycl::handler &cgh, GlobAcc &globAcc, LocAcc &locAcc,
bool testConstIter) {
if (testConstIter) {
cgh.parallel_for(sycl::nd_range<1>{1, 1}, [=](sycl::nd_item<1>) {
cgh.single_task([=]() {
size_t Idx = 0;
for (auto &It : locAcc) {
It = globAcc[Idx++];
Expand All @@ -134,7 +133,7 @@ void testLocalAccItersImpl(sycl::handler &cgh, GlobAcc &globAcc, LocAcc &locAcc,
globAcc[Idx--] += *It;
});
} else {
cgh.parallel_for(sycl::nd_range<1>{1, 1}, [=](sycl::nd_item<1>) {
cgh.single_task([=]() {
size_t Idx = 0;
for (auto It = locAcc.begin(); It != locAcc.end(); It++)
*It = globAcc[Idx++] * 2;
Expand Down Expand Up @@ -992,11 +991,10 @@ int main() {
sycl::accessor acc1(buf1, cgh);
sycl::accessor acc2(buf2, cgh);
acc1.swap(acc2);
cgh.parallel_for<class swap1>(sycl::nd_range<1>{1, 1},
[=](sycl::nd_item<1>) {
acc1[15] = 4;
acc2[7] = 4;
});
cgh.single_task([=]() {
acc1[15] = 4;
acc2[7] = 4;
});
});
}
assert(vec1[7] == 4 && vec2[15] == 4);
Expand All @@ -1014,11 +1012,10 @@ int main() {
sycl::accessor acc2(buf2, cgh);
sycl::local_accessor<int, 1> locAcc1(8, cgh), locAcc2(16, cgh);
locAcc1.swap(locAcc2);
cgh.parallel_for<class swap2>(sycl::nd_range<1>{1, 1},
[=](sycl::nd_item<1>) {
acc1[0] = locAcc1.size();
acc2[0] = locAcc2.size();
});
cgh.single_task([=]() {
acc1[0] = locAcc1.size();
acc2[0] = locAcc2.size();
});
});
}
assert(size1 == 16 && size2 == 8);
Expand Down Expand Up @@ -1085,54 +1082,19 @@ int main() {
// Explicit block to prompt copy-back to Data
{
sycl::buffer<int, 1> DataBuffer(&Data, sycl::range<1>(1));

Queue.submit([&](sycl::handler &CGH) {
sycl::accessor<int, 0> Acc(DataBuffer, CGH);
sycl::local_accessor<int, 0> LocalAcc(CGH);
CGH.parallel_for<class copyblock>(sycl::nd_range<1>{1, 1},
[=](sycl::nd_item<1>) {
LocalAcc = 64;
Acc = LocalAcc;
});
CGH.single_task<class local_acc_0_dim_assignment>([=]() {
LocalAcc = 64;
Acc = LocalAcc;
});
});
}

assert(Data == 64);
}

// Throws exception on local_accessors used in single_task
{
constexpr static int size = 1;
sycl::queue Queue;

try {
Queue.submit([&](sycl::handler &cgh) {
auto local_acc = sycl::local_accessor<int, 1>({size}, cgh);
cgh.single_task<class local_acc_exception>([=]() { (void)local_acc; });
});
assert(0 && "local accessor must not be used in single task.");
} catch (sycl::exception e) {
std::cout << "SYCL exception caught: " << e.what() << std::endl;
}
}

// Throws exception on local_accessors used in parallel_for taking a range
// parameter.
{
constexpr static int size = 1;
sycl::queue Queue;

try {
Queue.submit([&](sycl::handler &cgh) {
auto local_acc = sycl::local_accessor<int, 1>({size}, cgh);
cgh.parallel_for<class parallel_for_exception>(
sycl::range<1>{size}, [=](sycl::id<1> ID) { (void)local_acc; });
});
assert(0 &&
"local accessor must not be used in parallel for with range.");
} catch (sycl::exception e) {
std::cout << "SYCL exception caught: " << e.what() << std::endl;
}
}

std::cout << "Test passed" << std::endl;
}
12 changes: 6 additions & 6 deletions SYCL/Basic/multi_ptr.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,11 +104,11 @@ template <typename T, access::decorated IsDecorated> void testMultPtr() {
local_accessor<T, 1> localAccessor(numOfItems, cgh);

cgh.parallel_for<class testMultPtrKernel<
T, IsDecorated>>(nd_range<1>{10, 10}, [=](nd_item<1> wiID) {
T, IsDecorated>>(range<1>{10}, [=](id<1> wiID) {
T private_data[10];
for (size_t i = 0; i < 10; ++i)
private_data[i] = 0;
localAccessor[wiID.get_local_id()] = 0;
localAccessor[wiID] = 0;

auto ptr_1 =
multi_ptr<T, access::address_space::global_space, IsDecorated>(
Expand Down Expand Up @@ -166,8 +166,8 @@ template <typename T, access::decorated IsDecorated> void testMultPtr() {
global_ptr<void, IsDecorated> ptr_12 =
global_ptr<void, IsDecorated>(ptr_11);

innerFunc<T, IsDecorated>(wiID.get_local_id().get(0), ptr_1, ptr_2,
ptr_3, ptr_4, ptr_5, local_ptr, priv_ptr);
innerFunc<T, IsDecorated>(wiID.get(0), ptr_1, ptr_2, ptr_3, ptr_4,
ptr_5, local_ptr, priv_ptr);
});
});
}
Expand Down Expand Up @@ -201,8 +201,8 @@ void testMultPtrArrowOperator() {
access::placeholder::false_t>
accessorData_3(bufferData_3, cgh);

cgh.parallel_for<class testMultPtrArrowOperatorKernel<T, IsDecorated>>(
sycl::nd_range<1>{1, 1}, [=](sycl::nd_item<1>) {
cgh.single_task<class testMultPtrArrowOperatorKernel<T, IsDecorated>>(
[=]() {
point<T> private_val = 0;

auto ptr_1 =
Expand Down
150 changes: 73 additions & 77 deletions SYCL/Basic/multi_ptr_legacy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,51 +62,50 @@ template <typename T> void testMultPtr() {
accessorData_2(bufferData_2, cgh);
local_accessor<T, 1> localAccessor(numOfItems, cgh);

cgh.parallel_for<class testMultPtrKernel<T>>(
nd_range<1>{10, 10}, [=](nd_item<1> wiID) {
auto ptr_1 = make_ptr<T, access::address_space::global_space,
access::decorated::legacy>(
accessorData_1.get_pointer());
auto ptr_2 = make_ptr<T, access::address_space::global_space,
access::decorated::legacy>(
accessorData_2.get_pointer());
auto local_ptr = make_ptr<T, access::address_space::local_space,
access::decorated::legacy>(
localAccessor.get_pointer());

// Construct extension pointer from accessors.
auto dev_ptr =
multi_ptr<T,
access::address_space::ext_intel_global_device_space>(
accessorData_1);
static_assert(
std::is_same_v<ext::intel::device_ptr<T>, decltype(dev_ptr)>,
"Incorrect type for dev_ptr.");

// General conversions in multi_ptr class
T *RawPtr = nullptr;
global_ptr<T> ptr_4(RawPtr);
ptr_4 = RawPtr;

global_ptr<T> ptr_5(accessorData_1);

global_ptr<void> ptr_6((void *)RawPtr);

ptr_6 = (void *)RawPtr;

// Explicit conversions for device_ptr/host_ptr to global_ptr
ext::intel::device_ptr<void> ptr_7((void *)RawPtr);
global_ptr<void> ptr_8 = global_ptr<void>(ptr_7);
ext::intel::host_ptr<void> ptr_9((void *)RawPtr);
global_ptr<void> ptr_10 = global_ptr<void>(ptr_9);
// TODO: need propagation of a7b763b26 patch to acl tool before
// testing these conversions - otherwise the test would fail on
// accelerator device during reversed translation from SPIR-V to
// LLVM IR device_ptr<T> ptr_11(accessorData_1); global_ptr<T>
// ptr_12 = global_ptr<T>(ptr_11);

innerFunc<T>(wiID.get_local_id().get(0), ptr_1, ptr_2, local_ptr);
});
cgh.parallel_for<class testMultPtrKernel<T>>(range<1>{10}, [=](id<1>
wiID) {
auto ptr_1 =
make_ptr<T, access::address_space::global_space,
access::decorated::legacy>(accessorData_1.get_pointer());
auto ptr_2 =
make_ptr<T, access::address_space::global_space,
access::decorated::legacy>(accessorData_2.get_pointer());
auto local_ptr =
make_ptr<T, access::address_space::local_space,
access::decorated::legacy>(localAccessor.get_pointer());

// Construct extension pointer from accessors.
auto dev_ptr =
multi_ptr<T, access::address_space::ext_intel_global_device_space>(
accessorData_1);
static_assert(
std::is_same_v<ext::intel::device_ptr<T>, decltype(dev_ptr)>,
"Incorrect type for dev_ptr.");

// General conversions in multi_ptr class
T *RawPtr = nullptr;
global_ptr<T> ptr_4(RawPtr);
ptr_4 = RawPtr;

global_ptr<T> ptr_5(accessorData_1);

global_ptr<void> ptr_6((void *)RawPtr);

ptr_6 = (void *)RawPtr;

// Explicit conversions for device_ptr/host_ptr to global_ptr
ext::intel::device_ptr<void> ptr_7((void *)RawPtr);
global_ptr<void> ptr_8 = global_ptr<void>(ptr_7);
ext::intel::host_ptr<void> ptr_9((void *)RawPtr);
global_ptr<void> ptr_10 = global_ptr<void>(ptr_9);
// TODO: need propagation of a7b763b26 patch to acl tool before
// testing these conversions - otherwise the test would fail on
// accelerator device during reversed translation from SPIR-V to
// LLVM IR device_ptr<T> ptr_11(accessorData_1); global_ptr<T>
// ptr_12 = global_ptr<T>(ptr_11);

innerFunc<T>(wiID.get(0), ptr_1, ptr_2, local_ptr);
});
});
}
for (size_t i = 0; i < 10; ++i) {
Expand Down Expand Up @@ -142,38 +141,35 @@ template <typename T> void testMultPtrArrowOperator() {
access::placeholder::false_t>
accessorData_4(bufferData_4, cgh);

cgh.parallel_for<class testMultPtrArrowOperatorKernel<T>>(
sycl::nd_range<1>{1, 1}, [=](sycl::nd_item<1>) {
auto ptr_1 = make_ptr<point<T>, access::address_space::global_space,
access::decorated::legacy>(
accessorData_1.get_pointer());
auto ptr_2 =
make_ptr<point<T>, access::address_space::constant_space,
access::decorated::legacy>(
accessorData_2.get_pointer());
auto ptr_3 = make_ptr<point<T>, access::address_space::local_space,
access::decorated::legacy>(
accessorData_3.get_pointer());
auto ptr_4 =
make_ptr<point<T>,
access::address_space::ext_intel_global_device_space,
access::decorated::legacy>(
accessorData_4.get_pointer());

auto x1 = ptr_1->x;
auto x2 = ptr_2->x;
auto x3 = ptr_3->x;
auto x4 = ptr_4->x;

static_assert(std::is_same<decltype(x1), T>::value,
"Expected decltype(ptr_1->x) == T");
static_assert(std::is_same<decltype(x2), T>::value,
"Expected decltype(ptr_2->x) == T");
static_assert(std::is_same<decltype(x3), T>::value,
"Expected decltype(ptr_3->x) == T");
static_assert(std::is_same<decltype(x4), T>::value,
"Expected decltype(ptr_4->x) == T");
});
cgh.single_task<class testMultPtrArrowOperatorKernel<T>>([=]() {
auto ptr_1 =
make_ptr<point<T>, access::address_space::global_space,
access::decorated::legacy>(accessorData_1.get_pointer());
auto ptr_2 =
make_ptr<point<T>, access::address_space::constant_space,
access::decorated::legacy>(accessorData_2.get_pointer());
auto ptr_3 =
make_ptr<point<T>, access::address_space::local_space,
access::decorated::legacy>(accessorData_3.get_pointer());
auto ptr_4 =
make_ptr<point<T>,
access::address_space::ext_intel_global_device_space,
access::decorated::legacy>(accessorData_4.get_pointer());

auto x1 = ptr_1->x;
auto x2 = ptr_2->x;
auto x3 = ptr_3->x;
auto x4 = ptr_4->x;

static_assert(std::is_same<decltype(x1), T>::value,
"Expected decltype(ptr_1->x) == T");
static_assert(std::is_same<decltype(x2), T>::value,
"Expected decltype(ptr_2->x) == T");
static_assert(std::is_same<decltype(x3), T>::value,
"Expected decltype(ptr_3->x) == T");
static_assert(std::is_same<decltype(x4), T>::value,
"Expected decltype(ptr_4->x) == T");
});
});
}
}
Expand Down
33 changes: 16 additions & 17 deletions SYCL/DeviceLib/string_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -406,23 +406,22 @@ bool kernel_test_memcpy_addr_space(sycl::queue &deviceQueue) {
sycl::access::target::device,
sycl::access::placeholder::false_t>
dst1_acc(buffer3, cgh);
cgh.parallel_for<class KernelTestMemcpyAddrSpace>(
sycl::nd_range<1>{16, 16}, [=](sycl::nd_item<1>) {
// memcpy from constant buffer to local buffer
memcpy(local_acc.get_pointer(), src_acc.get_pointer(), 8);
for (size_t idx = 0; idx < 7; ++idx)
local_acc[idx] += 1;
// memcpy from local buffer to global buffer
memcpy(dst_acc.get_pointer(), local_acc.get_pointer(), 8);
char device_buf[16];
// memcpy from constant buffer to private memory
memcpy(device_buf, src_acc.get_pointer(), 8);
for (size_t idx = 0; idx < 7; ++idx) {
device_buf[idx] += 2;
// memcpy from private to global buffer
memcpy(dst1_acc.get_pointer(), device_buf, 8);
}
});
cgh.single_task<class KernelTestMemcpyAddrSpace>([=]() {
// memcpy from constant buffer to local buffer
memcpy(local_acc.get_pointer(), src_acc.get_pointer(), 8);
for (size_t idx = 0; idx < 7; ++idx)
local_acc[idx] += 1;
// memcpy from local buffer to global buffer
memcpy(dst_acc.get_pointer(), local_acc.get_pointer(), 8);
char device_buf[16];
// memcpy from constant buffer to private memory
memcpy(device_buf, src_acc.get_pointer(), 8);
for (size_t idx = 0; idx < 7; ++idx) {
device_buf[idx] += 2;
// memcpy from private to global buffer
memcpy(dst1_acc.get_pointer(), device_buf, 8);
}
});
});
}

Expand Down
3 changes: 1 addition & 2 deletions SYCL/DiscardEvents/discard_events_accessors.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ int main(int Argc, const char *Argv[]) {
sycl::property::queue::in_order{},
sycl::ext::oneapi::property::queue::discard_events{}};
sycl::queue Q(props);
sycl::nd_range<1> NDRange(BUFFER_SIZE, BUFFER_SIZE);
sycl::range<1> Range(BUFFER_SIZE);

RunKernelHelper(Q, [&](int *Harray) {
Expand All @@ -64,7 +63,7 @@ int main(int Argc, const char *Argv[]) {
sycl::local_accessor<int, 1> LocalAcc(LocalMemSize, CGH);

CGH.parallel_for<class kernel_using_local_memory>(
NDRange, [=](sycl::item<1> itemID) {
Range, [=](sycl::item<1> itemID) {
size_t i = itemID.get_id(0);
int *Ptr = LocalAcc.get_pointer();
Ptr[i] = i + 5;
Expand Down
2 changes: 1 addition & 1 deletion SYCL/Regression/local-arg-align.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ int main(int argc, char *argv[]) {
// argument first and the float4 argument second. If the two arguments are
// simply laid out consecutively, the float4 argument will not be
// correctly aligned.
h.parallel_for(sycl::nd_range<1>{1, 1}, [a, b, ares](sycl::nd_item<1>) {
h.parallel_for(1, [a, b, ares](sycl::id<1> i) {
// Get the addresses of the two local buffers
ares[0] = (size_t)&a[0];
ares[1] = (size_t)&b[0];
Expand Down
7 changes: 3 additions & 4 deletions SYCL/Regression/local_accessor_3d_subscript.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,9 @@ int main() {
Q.submit([&](sycl::handler &CGH) {
sycl::local_accessor<size_t, 3> LocalMem(sycl::range<3>(1, 1, 1), CGH);
auto Acc = Buf.get_access(CGH);
CGH.parallel_for(sycl::nd_range<1>{1, 1}, [=](sycl::nd_item<1> It) {
LocalMem[It.get_local_id()][It.get_local_id()][It.get_local_id()] = 42;
Acc[It.get_local_id()] =
LocalMem[It.get_local_id()][It.get_local_id()][It.get_local_id()];
CGH.parallel_for(1, [=](sycl::item<1> It) {
LocalMem[It][It][It] = 42;
Acc[It] = LocalMem[It][It][It];
});
});
}
Expand Down
Loading