Skip to content

Commit 084c027

Browse files
authored
[SYCL] Throw for invalid reqd_work_group_size (#9237)
Also fix alignment
1 parent 5df3bd4 commit 084c027

File tree

2 files changed

+89
-39
lines changed

2 files changed

+89
-39
lines changed

sycl/source/detail/error_handling/error_handling.cpp

Lines changed: 46 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,18 @@ void handleInvalidWorkGroupSize(const device_impl &DeviceImpl, pi_kernel Kernel,
7070
Kernel, Device, PI_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE,
7171
sizeof(size_t) * 3, CompileWGSize, nullptr);
7272

73+
size_t MaxWGSize = 0;
74+
Plugin.call<PiApiKind::piDeviceGetInfo>(Device,
75+
PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE,
76+
sizeof(size_t), &MaxWGSize, nullptr);
7377
if (CompileWGSize[0] != 0) {
78+
if (CompileWGSize[0] > MaxWGSize || CompileWGSize[1] > MaxWGSize ||
79+
CompileWGSize[2] > MaxWGSize)
80+
throw sycl::exception(
81+
make_error_code(errc::kernel_not_supported),
82+
"Submitting a kernel decorated with reqd_work_group_size attribute "
83+
"to a device that does not support this work group size is invalid.");
84+
7485
// OpenCL 1.x && 2.0:
7586
// PI_ERROR_INVALID_WORK_GROUP_SIZE if local_work_size is NULL and the
7687
// reqd_work_group_size attribute is used to declare the work-group size
@@ -97,45 +108,41 @@ void handleInvalidWorkGroupSize(const device_impl &DeviceImpl, pi_kernel Kernel,
97108
std::to_string(CompileWGSize[0]) + "}",
98109
PI_ERROR_INVALID_WORK_GROUP_SIZE);
99110
}
100-
if (IsOpenCLV1x) {
101-
// OpenCL 1.x:
102-
// PI_ERROR_INVALID_WORK_GROUP_SIZE if local_work_size is specified and
103-
// the total number of work-items in the work-group computed as
104-
// local_work_size[0] * ... * local_work_size[work_dim - 1] is greater
105-
// than the value specified by PI_DEVICE_MAX_WORK_GROUP_SIZE in
106-
// table 4.3
107-
size_t MaxWGSize = 0;
108-
Plugin.call<PiApiKind::piDeviceGetInfo>(
109-
Device, PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE, sizeof(size_t),
110-
&MaxWGSize, nullptr);
111-
const size_t TotalNumberOfWIs =
112-
NDRDesc.LocalSize[0] * NDRDesc.LocalSize[1] * NDRDesc.LocalSize[2];
113-
if (TotalNumberOfWIs > MaxWGSize)
114-
throw sycl::nd_range_error(
115-
"Total number of work-items in a work-group cannot exceed " +
116-
std::to_string(MaxWGSize),
117-
PI_ERROR_INVALID_WORK_GROUP_SIZE);
118-
} else if (IsOpenCLVGE20 || IsLevelZero) {
119-
// OpenCL 2.x or OneAPI Level Zero:
120-
// PI_ERROR_INVALID_WORK_GROUP_SIZE if local_work_size is specified and
121-
// the total number of work-items in the work-group computed as
122-
// local_work_size[0] * ... * local_work_size[work_dim - 1] is greater
123-
// than the value specified by PI_KERNEL_GROUP_INFO_WORK_GROUP_SIZE in
124-
// table 5.21.
125-
size_t KernelWGSize = 0;
126-
Plugin.call<PiApiKind::piKernelGetGroupInfo>(
127-
Kernel, Device, PI_KERNEL_GROUP_INFO_WORK_GROUP_SIZE, sizeof(size_t),
128-
&KernelWGSize, nullptr);
129-
const size_t TotalNumberOfWIs =
130-
NDRDesc.LocalSize[0] * NDRDesc.LocalSize[1] * NDRDesc.LocalSize[2];
131-
if (TotalNumberOfWIs > KernelWGSize)
132-
throw sycl::nd_range_error(
133-
"Total number of work-items in a work-group cannot exceed " +
134-
std::to_string(KernelWGSize) + " for this kernel",
135-
PI_ERROR_INVALID_WORK_GROUP_SIZE);
136-
} else {
137-
// TODO: Should probably have something similar for the other backends
138-
}
111+
if (IsOpenCLV1x) {
112+
// OpenCL 1.x:
113+
// PI_ERROR_INVALID_WORK_GROUP_SIZE if local_work_size is specified and
114+
// the total number of work-items in the work-group computed as
115+
// local_work_size[0] * ... * local_work_size[work_dim - 1] is greater
116+
// than the value specified by PI_DEVICE_MAX_WORK_GROUP_SIZE in
117+
// table 4.3
118+
const size_t TotalNumberOfWIs =
119+
NDRDesc.LocalSize[0] * NDRDesc.LocalSize[1] * NDRDesc.LocalSize[2];
120+
if (TotalNumberOfWIs > MaxWGSize)
121+
throw sycl::nd_range_error(
122+
"Total number of work-items in a work-group cannot exceed " +
123+
std::to_string(MaxWGSize),
124+
PI_ERROR_INVALID_WORK_GROUP_SIZE);
125+
} else if (IsOpenCLVGE20 || IsLevelZero) {
126+
// OpenCL 2.x or OneAPI Level Zero:
127+
// PI_ERROR_INVALID_WORK_GROUP_SIZE if local_work_size is specified and
128+
// the total number of work-items in the work-group computed as
129+
// local_work_size[0] * ... * local_work_size[work_dim - 1] is greater
130+
// than the value specified by PI_KERNEL_GROUP_INFO_WORK_GROUP_SIZE in
131+
// table 5.21.
132+
size_t KernelWGSize = 0;
133+
Plugin.call<PiApiKind::piKernelGetGroupInfo>(
134+
Kernel, Device, PI_KERNEL_GROUP_INFO_WORK_GROUP_SIZE, sizeof(size_t),
135+
&KernelWGSize, nullptr);
136+
const size_t TotalNumberOfWIs =
137+
NDRDesc.LocalSize[0] * NDRDesc.LocalSize[1] * NDRDesc.LocalSize[2];
138+
if (TotalNumberOfWIs > KernelWGSize)
139+
throw sycl::nd_range_error(
140+
"Total number of work-items in a work-group cannot exceed " +
141+
std::to_string(KernelWGSize) + " for this kernel",
142+
PI_ERROR_INVALID_WORK_GROUP_SIZE);
143+
} else {
144+
// TODO: Should probably have something similar for the other backends
145+
}
139146

140147
if (HasLocalSize) {
141148
// Is the global range size evenly divisible by the local workgroup size?
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
2+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
3+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
4+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
5+
6+
// UNSUPPORTED: hip
7+
8+
#include <sycl/sycl.hpp>
9+
10+
#define CHECK_INVALID_REQD_WORK_GROUP_SIZE(Dim, ...) \
11+
{ \
12+
bool ExceptionThrown = false; \
13+
std::error_code Errc; \
14+
try { \
15+
q.submit([&](sycl::handler &h) { \
16+
h.parallel_for(sycl::range<Dim>(__VA_ARGS__), \
17+
[=](sycl::item<Dim> it) \
18+
[[sycl::reqd_work_group_size(__VA_ARGS__)]] {}); \
19+
}); \
20+
q.wait(); \
21+
} catch (sycl::exception & e) { \
22+
ExceptionThrown = true; \
23+
Errc = e.code(); \
24+
} \
25+
assert(ExceptionThrown && \
26+
"Invalid use of reqd_work_group_size should throw an exception."); \
27+
assert(Errc == sycl::errc::kernel_not_supported); \
28+
}
29+
30+
int main() {
31+
sycl::queue q;
32+
constexpr int N = 1e9;
33+
auto MaxWGSize =
34+
q.get_device().get_info<sycl::info::device::max_work_group_size>();
35+
36+
if (N > MaxWGSize) {
37+
CHECK_INVALID_REQD_WORK_GROUP_SIZE(1, N)
38+
CHECK_INVALID_REQD_WORK_GROUP_SIZE(2, 1, N)
39+
CHECK_INVALID_REQD_WORK_GROUP_SIZE(3, 1, 1, N)
40+
}
41+
42+
return 0;
43+
}

0 commit comments

Comments
 (0)