Skip to content

Commit d16a1fd

Browse files
[SYCL] Return the correct sycl::errc in case of invalid sycl::reqd_work_group_size (#10093)
According to SYCL 2020, > If a kernel is decorated with this attribute and then submitted to a device that does not support the work group size, the implementation must throw a synchronous exception with the `errc::kernel_not_supported` error code. This patch fixes sycl::errc in such case: it returned `errc::nd_range`, now it returns `errc::kernel_not_supported`.
1 parent 1d6cff1 commit d16a1fd

File tree

2 files changed

+46
-23
lines changed

2 files changed

+46
-23
lines changed

sycl/source/detail/error_handling/error_handling.cpp

Lines changed: 26 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -22,31 +22,8 @@ namespace detail::enqueue_kernel_launch {
2222

2323
void handleInvalidWorkGroupSize(const device_impl &DeviceImpl, pi_kernel Kernel,
2424
const NDRDescT &NDRDesc) {
25-
const bool HasLocalSize = (NDRDesc.LocalSize[0] != 0);
26-
27-
const PluginPtr &Plugin = DeviceImpl.getPlugin();
28-
sycl::detail::pi::PiDevice Device = DeviceImpl.getHandleRef();
2925
sycl::platform Platform = DeviceImpl.get_platform();
3026

31-
if (HasLocalSize) {
32-
size_t MaxThreadsPerBlock[3] = {};
33-
Plugin->call<PiApiKind::piDeviceGetInfo>(
34-
Device, PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES, sizeof(MaxThreadsPerBlock),
35-
MaxThreadsPerBlock, nullptr);
36-
37-
for (size_t I = 0; I < 3; ++I) {
38-
if (MaxThreadsPerBlock[I] < NDRDesc.LocalSize[I]) {
39-
throw sycl::nd_range_error(
40-
"The number of work-items in each dimension of a work-group cannot "
41-
"exceed {" +
42-
std::to_string(MaxThreadsPerBlock[0]) + ", " +
43-
std::to_string(MaxThreadsPerBlock[1]) + ", " +
44-
std::to_string(MaxThreadsPerBlock[2]) + "} for this device",
45-
PI_ERROR_INVALID_WORK_GROUP_SIZE);
46-
}
47-
}
48-
}
49-
5027
// Some of the error handling below is special for particular OpenCL
5128
// versions. If this is an OpenCL backend, get the version.
5229
bool IsOpenCL = false; // Backend is any OpenCL version
@@ -68,6 +45,9 @@ void handleInvalidWorkGroupSize(const device_impl &DeviceImpl, pi_kernel Kernel,
6845
IsCuda = true;
6946
}
7047

48+
const PluginPtr &Plugin = DeviceImpl.getPlugin();
49+
sycl::detail::pi::PiDevice Device = DeviceImpl.getHandleRef();
50+
7151
size_t CompileWGSize[3] = {0};
7252
Plugin->call<PiApiKind::piKernelGetGroupInfo>(
7353
Kernel, Device, PI_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE,
@@ -77,6 +57,9 @@ void handleInvalidWorkGroupSize(const device_impl &DeviceImpl, pi_kernel Kernel,
7757
Plugin->call<PiApiKind::piDeviceGetInfo>(Device,
7858
PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE,
7959
sizeof(size_t), &MaxWGSize, nullptr);
60+
61+
const bool HasLocalSize = (NDRDesc.LocalSize[0] != 0);
62+
8063
if (CompileWGSize[0] != 0) {
8164
if (CompileWGSize[0] > MaxWGSize || CompileWGSize[1] > MaxWGSize ||
8265
CompileWGSize[2] > MaxWGSize)
@@ -111,6 +94,26 @@ void handleInvalidWorkGroupSize(const device_impl &DeviceImpl, pi_kernel Kernel,
11194
std::to_string(CompileWGSize[0]) + "}",
11295
PI_ERROR_INVALID_WORK_GROUP_SIZE);
11396
}
97+
98+
if (HasLocalSize) {
99+
size_t MaxThreadsPerBlock[3] = {};
100+
Plugin->call<PiApiKind::piDeviceGetInfo>(
101+
Device, PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES, sizeof(MaxThreadsPerBlock),
102+
MaxThreadsPerBlock, nullptr);
103+
104+
for (size_t I = 0; I < 3; ++I) {
105+
if (MaxThreadsPerBlock[I] < NDRDesc.LocalSize[I]) {
106+
throw sycl::nd_range_error(
107+
"The number of work-items in each dimension of a work-group cannot "
108+
"exceed {" +
109+
std::to_string(MaxThreadsPerBlock[0]) + ", " +
110+
std::to_string(MaxThreadsPerBlock[1]) + ", " +
111+
std::to_string(MaxThreadsPerBlock[2]) + "} for this device",
112+
PI_ERROR_INVALID_WORK_GROUP_SIZE);
113+
}
114+
}
115+
}
116+
114117
if (IsOpenCLV1x) {
115118
// OpenCL 1.x:
116119
// PI_ERROR_INVALID_WORK_GROUP_SIZE if local_work_size is specified and
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// RUN: %{build} -o %t.out
2+
// RUN: %{run} %t.out
3+
4+
// UNSUPPORTED: hip
5+
6+
#include <sycl/sycl.hpp>
7+
8+
int main() {
9+
try {
10+
sycl::queue q;
11+
q.submit([&](sycl::handler &cgh) {
12+
cgh.parallel_for(sycl::nd_range<1>({INT_MAX}, {INT_MAX}),
13+
[=](auto item)
14+
[[sycl::reqd_work_group_size(INT_MAX)]] {});
15+
}).wait_and_throw();
16+
} catch (sycl::exception &e) {
17+
assert(sycl::errc::kernel_not_supported == e.code());
18+
}
19+
return 0;
20+
}

0 commit comments

Comments
 (0)