Skip to content

Commit 7c73c11

Browse files
authored
[SYCL] Improve parallel_for error handling (#2117)
Various changes that stemmed from addressing my own code review comments for a previous commit: * Clean up OpenCL version checking in "enqueue_kernel.cpp" to avoid referencing uninitiaized memory when backend is not OpenCL. * Add TODO comments to "enqueue_kernel.cpp" for additional error checks that will need to be added for backends other than OpenCL. * Change "basic_tests/parallel_for_range.cpp" to prepare for running with L0, but it's still disabled for now. * Add TODO comments to "basic_tests/parallel_for_range.cpp" to enable some testing for OpenCL versions > 2.0. * Enable "basic_tests/reqd_work_group_size.cpp" for backends other than L0, and rearange its tests to put the positive tests first.
1 parent 71a56e7 commit 7c73c11

File tree

2 files changed

+91
-75
lines changed

2 files changed

+91
-75
lines changed

sycl/source/detail/error_handling/enqueue_kernel.cpp

Lines changed: 25 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -49,17 +49,17 @@ bool handleInvalidWorkGroupSize(const device_impl &DeviceImpl, pi_kernel Kernel,
4949
}
5050
}
5151

52-
size_t VerSize = 0;
53-
Plugin.call<PiApiKind::piDeviceGetInfo>(Device, PI_DEVICE_INFO_VERSION, 0,
54-
nullptr, &VerSize);
52+
// Some of the error handling below is special for particular OpenCL
53+
// versions. If this is an OpenCL backend, get the version.
54+
bool IsOpenCL = false; // Backend is any OpenCL version
55+
bool IsOpenCLV1x = false; // Backend is OpenCL 1.x
56+
bool IsOpenCLV20 = false; // Backend is OpenCL 2.0
5557
if (Platform.get_backend() == cl::sycl::backend::opencl) {
56-
assert(VerSize >= 10 &&
57-
"Unexpected device version string"); // strlen("OpenCL X.Y")
58+
string_class VersionString = DeviceImpl.get_info<info::device::version>();
59+
IsOpenCL = true;
60+
IsOpenCLV1x = (VersionString.find("OpenCL 1.") == 0);
61+
IsOpenCLV20 = (VersionString.find("OpenCL 2.0") == 0);
5862
}
59-
string_class VerStr(VerSize, '\0');
60-
Plugin.call<PiApiKind::piDeviceGetInfo>(Device, PI_DEVICE_INFO_VERSION,
61-
VerSize, &VerStr.front(), nullptr);
62-
const char *Ver = &VerStr[7]; // strlen("OpenCL ")
6363

6464
size_t CompileWGSize[3] = {0};
6565
Plugin.call<PiApiKind::piKernelGetGroupInfo>(
@@ -71,14 +71,12 @@ bool handleInvalidWorkGroupSize(const device_impl &DeviceImpl, pi_kernel Kernel,
7171
// PI_INVALID_WORK_GROUP_SIZE if local_work_size is NULL and the
7272
// reqd_work_group_size attribute is used to declare the work-group size
7373
// for kernel in the program source.
74-
if (Platform.get_backend() == cl::sycl::backend::opencl) {
75-
if (!HasLocalSize && (Ver[0] == '1' || (Ver[0] == '2' && Ver[2] == '0')))
76-
throw sycl::nd_range_error(
77-
"OpenCL 1.x and 2.0 requires to pass local size argument even if "
78-
"required work-group size was specified in the program source",
79-
PI_INVALID_WORK_GROUP_SIZE);
74+
if (!HasLocalSize && (IsOpenCLV1x || IsOpenCLV20)) {
75+
throw sycl::nd_range_error(
76+
"OpenCL 1.x and 2.0 requires to pass local size argument even if "
77+
"required work-group size was specified in the program source",
78+
PI_INVALID_WORK_GROUP_SIZE);
8079
}
81-
// Any OpenCL version:
8280
// PI_INVALID_WORK_GROUP_SIZE if local_work_size is specified and does not
8381
// match the required work-group size for kernel in the program source.
8482
if (NDRDesc.LocalSize[0] != CompileWGSize[0] ||
@@ -89,12 +87,12 @@ bool handleInvalidWorkGroupSize(const device_impl &DeviceImpl, pi_kernel Kernel,
8987
"specified in the program source",
9088
PI_INVALID_WORK_GROUP_SIZE);
9189
}
92-
if (Platform.get_backend() == cl::sycl::backend::opencl) {
93-
if (Ver[0] == '1') {
90+
if (IsOpenCL) {
91+
if (IsOpenCLV1x) {
9492
// OpenCL 1.x:
9593
// PI_INVALID_WORK_GROUP_SIZE if local_work_size is specified and the
9694
// total number of work-items in the work-group computed as
97-
// local_work_size[0] * ... * local_work_size[work_dim 1] is greater
95+
// local_work_size[0] * ... * local_work_size[work_dim - 1] is greater
9896
// than the value specified by PI_DEVICE_MAX_WORK_GROUP_SIZE in
9997
// table 4.3
10098
size_t MaxWGSize = 0;
@@ -109,10 +107,10 @@ bool handleInvalidWorkGroupSize(const device_impl &DeviceImpl, pi_kernel Kernel,
109107
std::to_string(MaxWGSize),
110108
PI_INVALID_WORK_GROUP_SIZE);
111109
} else {
112-
// RELEVENT // OpenCL 2.x:
110+
// OpenCL 2.x:
113111
// PI_INVALID_WORK_GROUP_SIZE if local_work_size is specified and the
114112
// total number of work-items in the work-group computed as
115-
// local_work_size[0] * ... * local_work_size[work_dim 1] is greater
113+
// local_work_size[0] * ... * local_work_size[work_dim - 1] is greater
116114
// than the value specified by PI_KERNEL_GROUP_INFO_WORK_GROUP_SIZE in
117115
// table 5.21.
118116
size_t KernelWGSize = 0;
@@ -127,6 +125,8 @@ bool handleInvalidWorkGroupSize(const device_impl &DeviceImpl, pi_kernel Kernel,
127125
std::to_string(KernelWGSize) + " for this kernel",
128126
PI_INVALID_WORK_GROUP_SIZE);
129127
}
128+
} else {
129+
// TODO: Should probably have something similar for the other backends
130130
}
131131

132132
if (HasLocalSize) {
@@ -140,14 +140,14 @@ bool handleInvalidWorkGroupSize(const device_impl &DeviceImpl, pi_kernel Kernel,
140140
NDRDesc.GlobalSize[2] % NDRDesc.LocalSize[2] != 0);
141141
// Is the local size of the workgroup greater than the global range size in
142142
// any dimension?
143-
if (Platform.get_backend() == cl::sycl::backend::opencl) {
143+
if (IsOpenCL) {
144144
const bool LocalExceedsGlobal =
145145
NonUniformWGs && (NDRDesc.LocalSize[0] > NDRDesc.GlobalSize[0] ||
146146
NDRDesc.LocalSize[1] > NDRDesc.GlobalSize[1] ||
147147
NDRDesc.LocalSize[2] > NDRDesc.GlobalSize[2]);
148148

149149
if (NonUniformWGs) {
150-
if (Ver[0] == '1') {
150+
if (IsOpenCLV1x) {
151151
// OpenCL 1.x:
152152
// PI_INVALID_WORK_GROUP_SIZE if local_work_size is specified and
153153
// number of workitems specified by global_work_size is not evenly
@@ -212,6 +212,8 @@ bool handleInvalidWorkGroupSize(const device_impl &DeviceImpl, pi_kernel Kernel,
212212
// else unknown. fallback (below)
213213
}
214214
}
215+
} else {
216+
// TODO: Decide what checks (if any) we need for the other backends
215217
}
216218
throw sycl::nd_range_error(
217219
"Non-uniform work-groups are not supported by the target device",

sycl/test/basic_tests/reqd_work_group_size.cpp

Lines changed: 66 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1-
// XFAIL: cuda || opencl
1+
// XFAIL: cuda
2+
// The negative test fails on CUDA. It's not clear whether the CUDA backend
3+
// respects the reqd_work_group_size attribute.
4+
25
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
36
// RUN: %CPU_RUN_PLACEHOLDER %t.out
47
// RUN: %GPU_RUN_PLACEHOLDER %t.out
@@ -20,59 +23,26 @@ int main() {
2023
queue Q(AsyncHandler);
2124
device D(Q.get_device());
2225

23-
string_class DeviceVendorName = D.get_info<info::device::vendor>();
24-
auto DeviceType = D.get_info<info::device::device_type>();
25-
26-
// parallel_for, (16, 16, 16) global, (8, 8, 8) local, reqd_wg_size(4, 4, 4)
27-
// -> fail
28-
try {
29-
Q.submit([&](handler &CGH) {
30-
CGH.parallel_for<class ReqdWGSizeNegativeA>(
31-
nd_range<3>(range<3>(16, 16, 16), range<3>(8, 8, 8)),
32-
[=](nd_item<3>) [[intel::reqd_work_group_size(4, 4, 4)]]{
26+
bool IsOpenCL = (D.get_platform().get_backend() == backend::opencl);
3327

34-
});
35-
});
36-
Q.wait_and_throw();
37-
std::cerr << "Test case ReqdWGSizeNegativeA failed: no exception has been "
38-
"thrown\n";
39-
return 1; // We shouldn't be here, exception is expected
40-
} catch (nd_range_error &E) {
41-
if (string_class(E.what()).find(
42-
"Specified local size doesn't match the required work-group size "
43-
"specified in the program source") == string_class::npos) {
44-
std::cerr
45-
<< "Test case ReqdWGSizeNegativeA failed 1: unexpected exception: "
46-
<< E.what() << std::endl;
47-
return 1;
48-
}
49-
} catch (runtime_error &E) {
50-
std::cerr << "Test case ReqdWGSizeNegativeA failed 2: unexpected exception: "
51-
<< E.what() << std::endl;
52-
return 1;
53-
} catch (...) {
54-
std::cerr << "Test case ReqdWGSizeNegativeA failed: something unexpected "
55-
"has been caught"
56-
<< std::endl;
57-
return 1;
58-
}
59-
60-
// Positive test-cases that should pass on any underlying OpenCL runtime
28+
// Positive test case: Specify local size that matches required size.
6129
// parallel_for, (8, 8, 8) global, (4, 4, 4) local, reqd_wg_size(4, 4, 4) ->
6230
// pass
6331
try {
6432
Q.submit([&](handler &CGH) {
6533
CGH.parallel_for<class ReqdWGSizePositiveA>(
66-
nd_range<3>(range<3>(8, 8, 8), range<3>(4, 4, 4)),
67-
[=](nd_item<3>) [[intel::reqd_work_group_size(4, 4, 4)]]{});
34+
nd_range<3>(range<3>(8, 8, 8), range<3>(4, 4, 4)), [=
35+
](nd_item<3>) [[intel::reqd_work_group_size(4, 4, 4)]]{});
6836
});
6937
Q.wait_and_throw();
7038
} catch (nd_range_error &E) {
71-
std::cerr << "Test case ReqdWGSizePositiveA failed: unexpected exception: "
39+
std::cerr << "Test case ReqdWGSizePositiveA failed: unexpected "
40+
"nd_range_error exception: "
7241
<< E.what() << std::endl;
7342
return 1;
7443
} catch (runtime_error &E) {
75-
std::cerr << "Test case ReqdWGSizePositiveA failed: unexpected exception: "
44+
std::cerr << "Test case ReqdWGSizePositiveA failed: unexpected "
45+
"runtime_error exception: "
7646
<< E.what() << std::endl;
7747
return 1;
7848
} catch (...) {
@@ -82,24 +52,68 @@ int main() {
8252
return 1;
8353
}
8454

55+
// Kernel that has a required WG size, but no local size is specified.
56+
//
57+
// TODO: This fails on OpenCL and should be investigated.
58+
if (!IsOpenCL) {
59+
try {
60+
Q.submit([&](handler &CGH) {
61+
CGH.parallel_for<class ReqdWGSizeNoLocalPositive>(
62+
range<3>(16, 16, 16), [=
63+
](item<3>) [[intel::reqd_work_group_size(4, 4, 4)]]{});
64+
});
65+
Q.wait_and_throw();
66+
} catch (nd_range_error &E) {
67+
std::cerr << "Test case ReqdWGSizeNoLocalPositive failed: unexpected "
68+
"nd_range_error exception: "
69+
<< E.what() << std::endl;
70+
return 1;
71+
} catch (runtime_error &E) {
72+
std::cerr
73+
<< "Test case ReqdWGSizeNoLocalPositive: unexpected runtime_error "
74+
"exception: "
75+
<< E.what() << std::endl;
76+
return 1;
77+
} catch (...) {
78+
std::cerr << "Test case ReqdWGSizeNoLocalPositive failed: something "
79+
"unexpected has been caught"
80+
<< std::endl;
81+
return 1;
82+
}
83+
}
84+
85+
// Negative test case: Specify local size that does not match required size.
86+
// parallel_for, (16, 16, 16) global, (8, 8, 8) local, reqd_wg_size(4, 4, 4)
87+
// -> fail
8588
try {
8689
Q.submit([&](handler &CGH) {
87-
CGH.parallel_for<class ReqdWGSizePositiveB>(
88-
range<3>(16, 16, 16), [=](item<3>) [[intel::reqd_work_group_size(4, 4, 4)]]{});
90+
CGH.parallel_for<class ReqdWGSizeNegativeA>(
91+
nd_range<3>(range<3>(16, 16, 16), range<3>(8, 8, 8)), [=
92+
](nd_item<3>) [[intel::reqd_work_group_size(4, 4, 4)]]{
93+
94+
});
8995
});
9096
Q.wait_and_throw();
91-
97+
std::cerr << "Test case ReqdWGSizeNegativeA failed: no exception has been "
98+
"thrown\n";
99+
return 1; // We shouldn't be here, exception is expected
92100
} catch (nd_range_error &E) {
93-
std::cerr << "Test case ReqdWGSizePositiveB failed 1: unexpected exception: "
94-
<< E.what() << std::endl;
95-
return 1;
101+
if (string_class(E.what()).find(
102+
"Specified local size doesn't match the required work-group size "
103+
"specified in the program source") == string_class::npos) {
104+
std::cerr
105+
<< "Test case ReqdWGSizeNegativeA failed: unexpected nd_range_error "
106+
"exception: "
107+
<< E.what() << std::endl;
108+
return 1;
109+
}
96110
} catch (runtime_error &E) {
97-
std::cerr
98-
<< "Test case ReqdWGSizePositiveB failed 2: unexpected exception: "
99-
<< E.what() << std::endl;
111+
std::cerr << "Test case ReqdWGSizeNegativeA failed: unexpected "
112+
"nd_range_error exception: "
113+
<< E.what() << std::endl;
100114
return 1;
101115
} catch (...) {
102-
std::cerr << "Test case ReqdWGSizePositiveB failed: something unexpected "
116+
std::cerr << "Test case ReqdWGSizeNegativeA failed: something unexpected "
103117
"has been caught"
104118
<< std::endl;
105119
return 1;

0 commit comments

Comments
 (0)