Skip to content

Commit 894c10d

Browse files
[SYCL] Adjust Messaging for Invalid NDRange WorkGroup Sizes (#1817)
Print more informative message when WG size exceeds dimensions of global range. Signed-off-by: Chris Perkins <[email protected]>
1 parent d32da99 commit 894c10d

File tree

2 files changed

+331
-46
lines changed

2 files changed

+331
-46
lines changed

sycl/source/detail/error_handling/enqueue_kernel.cpp

Lines changed: 61 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -107,55 +107,80 @@ bool oclHandleInvalidWorkGroupSize(const device_impl &DeviceImpl,
107107
}
108108

109109
if (HasLocalSize) {
110+
// Is the global range size evenly divisible by the local workgroup size?
110111
const bool NonUniformWGs =
111112
(NDRDesc.LocalSize[0] != 0 &&
112113
NDRDesc.GlobalSize[0] % NDRDesc.LocalSize[0] != 0) ||
113114
(NDRDesc.LocalSize[1] != 0 &&
114115
NDRDesc.GlobalSize[1] % NDRDesc.LocalSize[1] != 0) ||
115116
(NDRDesc.LocalSize[2] != 0 &&
116117
NDRDesc.GlobalSize[2] % NDRDesc.LocalSize[2] != 0);
117-
118-
if (Ver[0] == '1') {
119-
// OpenCL 1.x:
120-
// PI_INVALID_WORK_GROUP_SIZE if local_work_size is specified and
121-
// number of workitems specified by global_work_size is not evenly
122-
// divisible by size of work-group given by local_work_size
123-
124-
if (NonUniformWGs)
125-
throw sycl::nd_range_error(
126-
"Non-uniform work-groups are not supported by the target device",
127-
PI_INVALID_WORK_GROUP_SIZE);
128-
} else {
129-
// OpenCL 2.x:
130-
// PI_INVALID_WORK_GROUP_SIZE if the program was compiled with
131-
// –cl-uniform-work-group-size and the number of work-items specified
132-
// by global_work_size is not evenly divisible by size of work-group
133-
// given by local_work_size
134-
135-
pi_program Program = nullptr;
136-
Plugin.call<PiApiKind::piKernelGetInfo>(
137-
Kernel, PI_KERNEL_INFO_PROGRAM, sizeof(pi_program), &Program, nullptr);
138-
size_t OptsSize = 0;
139-
Plugin.call<PiApiKind::piProgramGetBuildInfo>(
140-
Program, Device, PI_PROGRAM_BUILD_INFO_OPTIONS, 0, nullptr, &OptsSize);
141-
string_class Opts(OptsSize, '\0');
142-
Plugin.call<PiApiKind::piProgramGetBuildInfo>(
143-
Program, Device, PI_PROGRAM_BUILD_INFO_OPTIONS, OptsSize, &Opts.front(),
144-
nullptr);
145-
if (NonUniformWGs) {
118+
// Is the local size of the workgroup greater than the global range size in
119+
// any dimension? This is a sub-case of NonUniformWGs.
120+
const bool LocalExceedsGlobal =
121+
NonUniformWGs && (NDRDesc.LocalSize[0] > NDRDesc.GlobalSize[0] ||
122+
NDRDesc.LocalSize[1] > NDRDesc.GlobalSize[1] ||
123+
NDRDesc.LocalSize[2] > NDRDesc.GlobalSize[2]);
124+
125+
if (NonUniformWGs) {
126+
if (Ver[0] == '1') {
127+
// OpenCL 1.x:
128+
// PI_INVALID_WORK_GROUP_SIZE if local_work_size is specified and
129+
// number of workitems specified by global_work_size is not evenly
130+
// divisible by size of work-group given by local_work_size
131+
if (LocalExceedsGlobal)
132+
throw sycl::nd_range_error("Local workgroup size cannot be greater "
133+
"than global range in any dimension",
134+
PI_INVALID_WORK_GROUP_SIZE);
135+
else
136+
throw sycl::nd_range_error(
137+
"Global_work_size must be evenly divisible by local_work_size. "
138+
"Non-uniform work-groups are not supported by the target device",
139+
PI_INVALID_WORK_GROUP_SIZE);
140+
} else {
141+
// OpenCL 2.x:
142+
// PI_INVALID_WORK_GROUP_SIZE if the program was compiled with
143+
// –cl-uniform-work-group-size and the number of work-items specified
144+
// by global_work_size is not evenly divisible by size of work-group
145+
// given by local_work_size
146+
147+
pi_program Program = nullptr;
148+
Plugin.call<PiApiKind::piKernelGetInfo>(Kernel, PI_KERNEL_INFO_PROGRAM,
149+
sizeof(pi_program), &Program,
150+
nullptr);
151+
size_t OptsSize = 0;
152+
Plugin.call<PiApiKind::piProgramGetBuildInfo>(
153+
Program, Device, PI_PROGRAM_BUILD_INFO_OPTIONS, 0, nullptr,
154+
&OptsSize);
155+
string_class Opts(OptsSize, '\0');
156+
Plugin.call<PiApiKind::piProgramGetBuildInfo>(
157+
Program, Device, PI_PROGRAM_BUILD_INFO_OPTIONS, OptsSize,
158+
&Opts.front(), nullptr);
146159
const bool HasStd20 = Opts.find("-cl-std=CL2.0") != string_class::npos;
160+
const bool RequiresUniformWGSize =
161+
Opts.find("-cl-uniform-work-group-size") != string_class::npos;
162+
std::string message =
163+
LocalExceedsGlobal
164+
? "Local workgroup size greater than global range size. "
165+
: "Global_work_size not evenly divisible by local_work_size. ";
147166
if (!HasStd20)
148167
throw sycl::nd_range_error(
149-
"Non-uniform work-groups are not allowed by default. Underlying "
150-
"OpenCL 2.x implementation supports this feature and to enable "
151-
"it, build device program with -cl-std=CL2.0",
168+
message.append("Non-uniform work-groups are not allowed by "
169+
"default. Underlying "
170+
"OpenCL 2.x implementation supports this feature "
171+
"and to enable "
172+
"it, build device program with -cl-std=CL2.0"),
152173
PI_INVALID_WORK_GROUP_SIZE);
153-
else
174+
else if (RequiresUniformWGSize)
154175
throw sycl::nd_range_error(
155-
"Non-uniform work-groups are not allowed by default. Underlying "
156-
"OpenCL 2.x implementation supports this feature, but it is "
157-
"disabled by -cl-uniform-work-group-size build flag",
176+
message.append(
177+
"Non-uniform work-groups are not allowed by when "
178+
"-cl-uniform-work-group-size flag is used. Underlying "
179+
"OpenCL 2.x implementation supports this feature, but it is "
180+
"being "
181+
"disabled by -cl-uniform-work-group-size build flag"),
158182
PI_INVALID_WORK_GROUP_SIZE);
183+
// else unknown. fallback (below)
159184
}
160185
}
161186
}

0 commit comments

Comments
 (0)