Skip to content

Commit ed877c2

Browse files
authored
[SYCL][CUDA] Improve error message for exceeding CUDA grid limits (#4563)
Improves the error message when the user specifies a number of work groups in certain dimension, which exceeds CUDA's max grid dimension limits. Before this PR the error message is not very informative: ``` PI CUDA ERROR: Value: 1 Name: CUDA_ERROR_INVALID_VALUE Description: invalid argument Function: cuda_piEnqueueKernelLaunch Source Location: /home/tadej/llvm/sycl/plugins/cuda/pi_cuda.cpp:2662 terminate called after throwing an instance of 'cl::sycl::runtime_error' what(): Native API failed. Native API returns: -30 (CL_INVALID_VALUE) -30 (CL_INVALID_VALUE) This is especially confusing when porting code from CUDA, due to the fact that dimensions in SYCL are flipped compared to CUDA and CUDA has different limits for different dimensions. ``` Tested by: intel/llvm-test-suite#952
1 parent 9284fc0 commit ed877c2

File tree

1 file changed

+28
-0
lines changed

1 file changed

+28
-0
lines changed

sycl/source/detail/error_handling/enqueue_kernel.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,31 @@ bool handleInvalidWorkItemSize(const device_impl &DeviceImpl,
268268
return 0;
269269
}
270270

271+
bool handleInvalidValue(const device_impl &DeviceImpl,
272+
const NDRDescT &NDRDesc) {
273+
const plugin &Plugin = DeviceImpl.getPlugin();
274+
RT::PiDevice Device = DeviceImpl.getHandleRef();
275+
276+
size_t MaxNWGs[] = {0, 0, 0};
277+
Plugin.call<PiApiKind::piDeviceGetInfo>(
278+
Device, PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D, sizeof(MaxNWGs),
279+
&MaxNWGs, nullptr);
280+
for (unsigned int I = 0; I < NDRDesc.Dims; I++) {
281+
size_t NWgs = NDRDesc.GlobalSize[I] / NDRDesc.LocalSize[I];
282+
if (NWgs > MaxNWGs[I])
283+
throw sycl::nd_range_error(
284+
"Number of work-groups exceed limit for dimension " +
285+
std::to_string(I) + " : " + std::to_string(NWgs) + " > " +
286+
std::to_string(MaxNWGs[I]),
287+
PI_INVALID_VALUE);
288+
}
289+
290+
// fallback
291+
constexpr pi_result Error = PI_INVALID_VALUE;
292+
throw runtime_error(
293+
"Native API failed. Native API returns: " + codeToString(Error), Error);
294+
}
295+
271296
bool handleError(pi_result Error, const device_impl &DeviceImpl,
272297
pi_kernel Kernel, const NDRDescT &NDRDesc) {
273298
assert(Error != PI_SUCCESS &&
@@ -315,6 +340,9 @@ bool handleError(pi_result Error, const device_impl &DeviceImpl,
315340
"slice pitch) are not supported by device associated with queue",
316341
PI_INVALID_IMAGE_SIZE);
317342

343+
case PI_INVALID_VALUE:
344+
return handleInvalidValue(DeviceImpl, NDRDesc);
345+
318346
// TODO: Handle other error codes
319347

320348
default:

0 commit comments

Comments
 (0)