Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit d578db9

Browse files
authored
[SYCL] Refactor invalid use of local accessor (#1646)
According to [local accessors](https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#sec:accessor.local) of the SYCL specification, a local accessor must not be used in a SYCL kernel function that is invoked via single_task or via the simple form of parallel_for that takes a range parameter. * Update invalid use of local accessors. * Add test to catch thrown exception
1 parent a592341 commit d578db9

File tree

10 files changed

+178
-134
lines changed

10 files changed

+178
-134
lines changed

SYCL/Basic/accessor/accessor.cpp

Lines changed: 54 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,8 @@ template <typename T> void TestAccSizeFuncs(const std::vector<T> &vec) {
107107
q.submit([&](sycl::handler &cgh) {
108108
sycl::accessor accRes(bufRes, cgh);
109109
sycl::local_accessor<T, 1> locAcc(vec.size(), cgh);
110-
cgh.single_task([=]() { test(accRes, locAcc); });
110+
cgh.parallel_for(sycl::nd_range<1>{1, 1},
111+
[=](sycl::nd_item<1>) { test(accRes, locAcc); });
111112
});
112113
q.wait();
113114
}
@@ -120,7 +121,7 @@ template <typename GlobAcc, typename LocAcc>
120121
void testLocalAccItersImpl(sycl::handler &cgh, GlobAcc &globAcc, LocAcc &locAcc,
121122
bool testConstIter) {
122123
if (testConstIter) {
123-
cgh.single_task([=]() {
124+
cgh.parallel_for(sycl::nd_range<1>{1, 1}, [=](sycl::nd_item<1>) {
124125
size_t Idx = 0;
125126
for (auto &It : locAcc) {
126127
It = globAcc[Idx++];
@@ -133,7 +134,7 @@ void testLocalAccItersImpl(sycl::handler &cgh, GlobAcc &globAcc, LocAcc &locAcc,
133134
globAcc[Idx--] += *It;
134135
});
135136
} else {
136-
cgh.single_task([=]() {
137+
cgh.parallel_for(sycl::nd_range<1>{1, 1}, [=](sycl::nd_item<1>) {
137138
size_t Idx = 0;
138139
for (auto It = locAcc.begin(); It != locAcc.end(); It++)
139140
*It = globAcc[Idx++] * 2;
@@ -991,10 +992,11 @@ int main() {
991992
sycl::accessor acc1(buf1, cgh);
992993
sycl::accessor acc2(buf2, cgh);
993994
acc1.swap(acc2);
994-
cgh.single_task([=]() {
995-
acc1[15] = 4;
996-
acc2[7] = 4;
997-
});
995+
cgh.parallel_for<class swap1>(sycl::nd_range<1>{1, 1},
996+
[=](sycl::nd_item<1>) {
997+
acc1[15] = 4;
998+
acc2[7] = 4;
999+
});
9981000
});
9991001
}
10001002
assert(vec1[7] == 4 && vec2[15] == 4);
@@ -1012,10 +1014,11 @@ int main() {
10121014
sycl::accessor acc2(buf2, cgh);
10131015
sycl::local_accessor<int, 1> locAcc1(8, cgh), locAcc2(16, cgh);
10141016
locAcc1.swap(locAcc2);
1015-
cgh.single_task([=]() {
1016-
acc1[0] = locAcc1.size();
1017-
acc2[0] = locAcc2.size();
1018-
});
1017+
cgh.parallel_for<class swap2>(sycl::nd_range<1>{1, 1},
1018+
[=](sycl::nd_item<1>) {
1019+
acc1[0] = locAcc1.size();
1020+
acc2[0] = locAcc2.size();
1021+
});
10191022
});
10201023
}
10211024
assert(size1 == 16 && size2 == 8);
@@ -1082,19 +1085,54 @@ int main() {
10821085
// Explicit block to prompt copy-back to Data
10831086
{
10841087
sycl::buffer<int, 1> DataBuffer(&Data, sycl::range<1>(1));
1085-
10861088
Queue.submit([&](sycl::handler &CGH) {
10871089
sycl::accessor<int, 0> Acc(DataBuffer, CGH);
10881090
sycl::local_accessor<int, 0> LocalAcc(CGH);
1089-
CGH.single_task<class local_acc_0_dim_assignment>([=]() {
1090-
LocalAcc = 64;
1091-
Acc = LocalAcc;
1092-
});
1091+
CGH.parallel_for<class copyblock>(sycl::nd_range<1>{1, 1},
1092+
[=](sycl::nd_item<1>) {
1093+
LocalAcc = 64;
1094+
Acc = LocalAcc;
1095+
});
10931096
});
10941097
}
10951098

10961099
assert(Data == 64);
10971100
}
10981101

1102+
// Throws exception on local_accessors used in single_task
1103+
{
1104+
constexpr static int size = 1;
1105+
sycl::queue Queue;
1106+
1107+
try {
1108+
Queue.submit([&](sycl::handler &cgh) {
1109+
auto local_acc = sycl::local_accessor<int, 1>({size}, cgh);
1110+
cgh.single_task<class local_acc_exception>([=]() { (void)local_acc; });
1111+
});
1112+
assert(0 && "local accessor must not be used in single task.");
1113+
} catch (sycl::exception e) {
1114+
std::cout << "SYCL exception caught: " << e.what() << std::endl;
1115+
}
1116+
}
1117+
1118+
// Throws exception on local_accessors used in parallel_for taking a range
1119+
// parameter.
1120+
{
1121+
constexpr static int size = 1;
1122+
sycl::queue Queue;
1123+
1124+
try {
1125+
Queue.submit([&](sycl::handler &cgh) {
1126+
auto local_acc = sycl::local_accessor<int, 1>({size}, cgh);
1127+
cgh.parallel_for<class parallel_for_exception>(
1128+
sycl::range<1>{size}, [=](sycl::id<1> ID) { (void)local_acc; });
1129+
});
1130+
assert(0 &&
1131+
"local accessor must not be used in parallel for with range.");
1132+
} catch (sycl::exception e) {
1133+
std::cout << "SYCL exception caught: " << e.what() << std::endl;
1134+
}
1135+
}
1136+
10991137
std::cout << "Test passed" << std::endl;
11001138
}

SYCL/Basic/multi_ptr.hpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -104,11 +104,11 @@ template <typename T, access::decorated IsDecorated> void testMultPtr() {
104104
local_accessor<T, 1> localAccessor(numOfItems, cgh);
105105

106106
cgh.parallel_for<class testMultPtrKernel<
107-
T, IsDecorated>>(range<1>{10}, [=](id<1> wiID) {
107+
T, IsDecorated>>(nd_range<1>{10, 10}, [=](nd_item<1> wiID) {
108108
T private_data[10];
109109
for (size_t i = 0; i < 10; ++i)
110110
private_data[i] = 0;
111-
localAccessor[wiID] = 0;
111+
localAccessor[wiID.get_local_id()] = 0;
112112

113113
auto ptr_1 =
114114
multi_ptr<T, access::address_space::global_space, IsDecorated>(
@@ -166,8 +166,8 @@ template <typename T, access::decorated IsDecorated> void testMultPtr() {
166166
global_ptr<void, IsDecorated> ptr_12 =
167167
global_ptr<void, IsDecorated>(ptr_11);
168168

169-
innerFunc<T, IsDecorated>(wiID.get(0), ptr_1, ptr_2, ptr_3, ptr_4,
170-
ptr_5, local_ptr, priv_ptr);
169+
innerFunc<T, IsDecorated>(wiID.get_local_id().get(0), ptr_1, ptr_2,
170+
ptr_3, ptr_4, ptr_5, local_ptr, priv_ptr);
171171
});
172172
});
173173
}
@@ -201,8 +201,8 @@ void testMultPtrArrowOperator() {
201201
access::placeholder::false_t>
202202
accessorData_3(bufferData_3, cgh);
203203

204-
cgh.single_task<class testMultPtrArrowOperatorKernel<T, IsDecorated>>(
205-
[=]() {
204+
cgh.parallel_for<class testMultPtrArrowOperatorKernel<T, IsDecorated>>(
205+
sycl::nd_range<1>{1, 1}, [=](sycl::nd_item<1>) {
206206
point<T> private_val = 0;
207207

208208
auto ptr_1 =

SYCL/Basic/multi_ptr_legacy.hpp

Lines changed: 77 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -62,50 +62,51 @@ template <typename T> void testMultPtr() {
6262
accessorData_2(bufferData_2, cgh);
6363
local_accessor<T, 1> localAccessor(numOfItems, cgh);
6464

65-
cgh.parallel_for<class testMultPtrKernel<T>>(range<1>{10}, [=](id<1>
66-
wiID) {
67-
auto ptr_1 =
68-
make_ptr<T, access::address_space::global_space,
69-
access::decorated::legacy>(accessorData_1.get_pointer());
70-
auto ptr_2 =
71-
make_ptr<T, access::address_space::global_space,
72-
access::decorated::legacy>(accessorData_2.get_pointer());
73-
auto local_ptr =
74-
make_ptr<T, access::address_space::local_space,
75-
access::decorated::legacy>(localAccessor.get_pointer());
76-
77-
// Construct extension pointer from accessors.
78-
auto dev_ptr =
79-
multi_ptr<T, access::address_space::ext_intel_global_device_space>(
80-
accessorData_1);
81-
static_assert(
82-
std::is_same_v<ext::intel::device_ptr<T>, decltype(dev_ptr)>,
83-
"Incorrect type for dev_ptr.");
84-
85-
// General conversions in multi_ptr class
86-
T *RawPtr = nullptr;
87-
global_ptr<T> ptr_4(RawPtr);
88-
ptr_4 = RawPtr;
89-
90-
global_ptr<T> ptr_5(accessorData_1);
91-
92-
global_ptr<void> ptr_6((void *)RawPtr);
93-
94-
ptr_6 = (void *)RawPtr;
95-
96-
// Explicit conversions for device_ptr/host_ptr to global_ptr
97-
ext::intel::device_ptr<void> ptr_7((void *)RawPtr);
98-
global_ptr<void> ptr_8 = global_ptr<void>(ptr_7);
99-
ext::intel::host_ptr<void> ptr_9((void *)RawPtr);
100-
global_ptr<void> ptr_10 = global_ptr<void>(ptr_9);
101-
// TODO: need propagation of a7b763b26 patch to acl tool before
102-
// testing these conversions - otherwise the test would fail on
103-
// accelerator device during reversed translation from SPIR-V to
104-
// LLVM IR device_ptr<T> ptr_11(accessorData_1); global_ptr<T>
105-
// ptr_12 = global_ptr<T>(ptr_11);
106-
107-
innerFunc<T>(wiID.get(0), ptr_1, ptr_2, local_ptr);
108-
});
65+
cgh.parallel_for<class testMultPtrKernel<T>>(
66+
nd_range<1>{10, 10}, [=](nd_item<1> wiID) {
67+
auto ptr_1 = make_ptr<T, access::address_space::global_space,
68+
access::decorated::legacy>(
69+
accessorData_1.get_pointer());
70+
auto ptr_2 = make_ptr<T, access::address_space::global_space,
71+
access::decorated::legacy>(
72+
accessorData_2.get_pointer());
73+
auto local_ptr = make_ptr<T, access::address_space::local_space,
74+
access::decorated::legacy>(
75+
localAccessor.get_pointer());
76+
77+
// Construct extension pointer from accessors.
78+
auto dev_ptr =
79+
multi_ptr<T,
80+
access::address_space::ext_intel_global_device_space>(
81+
accessorData_1);
82+
static_assert(
83+
std::is_same_v<ext::intel::device_ptr<T>, decltype(dev_ptr)>,
84+
"Incorrect type for dev_ptr.");
85+
86+
// General conversions in multi_ptr class
87+
T *RawPtr = nullptr;
88+
global_ptr<T> ptr_4(RawPtr);
89+
ptr_4 = RawPtr;
90+
91+
global_ptr<T> ptr_5(accessorData_1);
92+
93+
global_ptr<void> ptr_6((void *)RawPtr);
94+
95+
ptr_6 = (void *)RawPtr;
96+
97+
// Explicit conversions for device_ptr/host_ptr to global_ptr
98+
ext::intel::device_ptr<void> ptr_7((void *)RawPtr);
99+
global_ptr<void> ptr_8 = global_ptr<void>(ptr_7);
100+
ext::intel::host_ptr<void> ptr_9((void *)RawPtr);
101+
global_ptr<void> ptr_10 = global_ptr<void>(ptr_9);
102+
// TODO: need propagation of a7b763b26 patch to acl tool before
103+
// testing these conversions - otherwise the test would fail on
104+
// accelerator device during reversed translation from SPIR-V to
105+
// LLVM IR device_ptr<T> ptr_11(accessorData_1); global_ptr<T>
106+
// ptr_12 = global_ptr<T>(ptr_11);
107+
108+
innerFunc<T>(wiID.get_local_id().get(0), ptr_1, ptr_2, local_ptr);
109+
});
109110
});
110111
}
111112
for (size_t i = 0; i < 10; ++i) {
@@ -141,35 +142,38 @@ template <typename T> void testMultPtrArrowOperator() {
141142
access::placeholder::false_t>
142143
accessorData_4(bufferData_4, cgh);
143144

144-
cgh.single_task<class testMultPtrArrowOperatorKernel<T>>([=]() {
145-
auto ptr_1 =
146-
make_ptr<point<T>, access::address_space::global_space,
147-
access::decorated::legacy>(accessorData_1.get_pointer());
148-
auto ptr_2 =
149-
make_ptr<point<T>, access::address_space::constant_space,
150-
access::decorated::legacy>(accessorData_2.get_pointer());
151-
auto ptr_3 =
152-
make_ptr<point<T>, access::address_space::local_space,
153-
access::decorated::legacy>(accessorData_3.get_pointer());
154-
auto ptr_4 =
155-
make_ptr<point<T>,
156-
access::address_space::ext_intel_global_device_space,
157-
access::decorated::legacy>(accessorData_4.get_pointer());
158-
159-
auto x1 = ptr_1->x;
160-
auto x2 = ptr_2->x;
161-
auto x3 = ptr_3->x;
162-
auto x4 = ptr_4->x;
163-
164-
static_assert(std::is_same<decltype(x1), T>::value,
165-
"Expected decltype(ptr_1->x) == T");
166-
static_assert(std::is_same<decltype(x2), T>::value,
167-
"Expected decltype(ptr_2->x) == T");
168-
static_assert(std::is_same<decltype(x3), T>::value,
169-
"Expected decltype(ptr_3->x) == T");
170-
static_assert(std::is_same<decltype(x4), T>::value,
171-
"Expected decltype(ptr_4->x) == T");
172-
});
145+
cgh.parallel_for<class testMultPtrArrowOperatorKernel<T>>(
146+
sycl::nd_range<1>{1, 1}, [=](sycl::nd_item<1>) {
147+
auto ptr_1 = make_ptr<point<T>, access::address_space::global_space,
148+
access::decorated::legacy>(
149+
accessorData_1.get_pointer());
150+
auto ptr_2 =
151+
make_ptr<point<T>, access::address_space::constant_space,
152+
access::decorated::legacy>(
153+
accessorData_2.get_pointer());
154+
auto ptr_3 = make_ptr<point<T>, access::address_space::local_space,
155+
access::decorated::legacy>(
156+
accessorData_3.get_pointer());
157+
auto ptr_4 =
158+
make_ptr<point<T>,
159+
access::address_space::ext_intel_global_device_space,
160+
access::decorated::legacy>(
161+
accessorData_4.get_pointer());
162+
163+
auto x1 = ptr_1->x;
164+
auto x2 = ptr_2->x;
165+
auto x3 = ptr_3->x;
166+
auto x4 = ptr_4->x;
167+
168+
static_assert(std::is_same<decltype(x1), T>::value,
169+
"Expected decltype(ptr_1->x) == T");
170+
static_assert(std::is_same<decltype(x2), T>::value,
171+
"Expected decltype(ptr_2->x) == T");
172+
static_assert(std::is_same<decltype(x3), T>::value,
173+
"Expected decltype(ptr_3->x) == T");
174+
static_assert(std::is_same<decltype(x4), T>::value,
175+
"Expected decltype(ptr_4->x) == T");
176+
});
173177
});
174178
}
175179
}

SYCL/DeviceLib/string_test.cpp

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -406,22 +406,23 @@ bool kernel_test_memcpy_addr_space(sycl::queue &deviceQueue) {
406406
sycl::access::target::device,
407407
sycl::access::placeholder::false_t>
408408
dst1_acc(buffer3, cgh);
409-
cgh.single_task<class KernelTestMemcpyAddrSpace>([=]() {
410-
// memcpy from constant buffer to local buffer
411-
memcpy(local_acc.get_pointer(), src_acc.get_pointer(), 8);
412-
for (size_t idx = 0; idx < 7; ++idx)
413-
local_acc[idx] += 1;
414-
// memcpy from local buffer to global buffer
415-
memcpy(dst_acc.get_pointer(), local_acc.get_pointer(), 8);
416-
char device_buf[16];
417-
// memcpy from constant buffer to private memory
418-
memcpy(device_buf, src_acc.get_pointer(), 8);
419-
for (size_t idx = 0; idx < 7; ++idx) {
420-
device_buf[idx] += 2;
421-
// memcpy from private to global buffer
422-
memcpy(dst1_acc.get_pointer(), device_buf, 8);
423-
}
424-
});
409+
cgh.parallel_for<class KernelTestMemcpyAddrSpace>(
410+
sycl::nd_range<1>{16, 16}, [=](sycl::nd_item<1>) {
411+
// memcpy from constant buffer to local buffer
412+
memcpy(local_acc.get_pointer(), src_acc.get_pointer(), 8);
413+
for (size_t idx = 0; idx < 7; ++idx)
414+
local_acc[idx] += 1;
415+
// memcpy from local buffer to global buffer
416+
memcpy(dst_acc.get_pointer(), local_acc.get_pointer(), 8);
417+
char device_buf[16];
418+
// memcpy from constant buffer to private memory
419+
memcpy(device_buf, src_acc.get_pointer(), 8);
420+
for (size_t idx = 0; idx < 7; ++idx) {
421+
device_buf[idx] += 2;
422+
// memcpy from private to global buffer
423+
memcpy(dst1_acc.get_pointer(), device_buf, 8);
424+
}
425+
});
425426
});
426427
}
427428

SYCL/DiscardEvents/discard_events_accessors.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ int main(int Argc, const char *Argv[]) {
5555
sycl::property::queue::in_order{},
5656
sycl::ext::oneapi::property::queue::discard_events{}};
5757
sycl::queue Q(props);
58+
sycl::nd_range<1> NDRange(BUFFER_SIZE, BUFFER_SIZE);
5859
sycl::range<1> Range(BUFFER_SIZE);
5960

6061
RunKernelHelper(Q, [&](int *Harray) {
@@ -63,7 +64,7 @@ int main(int Argc, const char *Argv[]) {
6364
sycl::local_accessor<int, 1> LocalAcc(LocalMemSize, CGH);
6465

6566
CGH.parallel_for<class kernel_using_local_memory>(
66-
Range, [=](sycl::item<1> itemID) {
67+
NDRange, [=](sycl::item<1> itemID) {
6768
size_t i = itemID.get_id(0);
6869
int *Ptr = LocalAcc.get_pointer();
6970
Ptr[i] = i + 5;

SYCL/Regression/local-arg-align.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ int main(int argc, char *argv[]) {
3434
// argument first and the float4 argument second. If the two arguments are
3535
// simply laid out consecutively, the float4 argument will not be
3636
// correctly aligned.
37-
h.parallel_for(1, [a, b, ares](sycl::id<1> i) {
37+
h.parallel_for(sycl::nd_range<1>{1, 1}, [a, b, ares](sycl::nd_item<1>) {
3838
// Get the addresses of the two local buffers
3939
ares[0] = (size_t)&a[0];
4040
ares[1] = (size_t)&b[0];

SYCL/Regression/local_accessor_3d_subscript.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,10 @@ int main() {
2323
Q.submit([&](sycl::handler &CGH) {
2424
sycl::local_accessor<size_t, 3> LocalMem(sycl::range<3>(1, 1, 1), CGH);
2525
auto Acc = Buf.get_access(CGH);
26-
CGH.parallel_for(1, [=](sycl::item<1> It) {
27-
LocalMem[It][It][It] = 42;
28-
Acc[It] = LocalMem[It][It][It];
26+
CGH.parallel_for(sycl::nd_range<1>{1, 1}, [=](sycl::nd_item<1> It) {
27+
LocalMem[It.get_local_id()][It.get_local_id()][It.get_local_id()] = 42;
28+
Acc[It.get_local_id()] =
29+
LocalMem[It.get_local_id()][It.get_local_id()][It.get_local_id()];
2930
});
3031
});
3132
}

0 commit comments

Comments
 (0)