Skip to content

Commit a395886

Browse files
[SYCL][CUDA][HIP] Support zero range kernel for cuda and hip backends. (#7044)
- Fixes issue [6963](#6963) to allow range zero kernel for cuda and hip backends. Co-authored-by: Romanov Vlad <[email protected]>
1 parent 2117657 commit a395886

File tree

3 files changed

+11
-1
lines changed

3 files changed

+11
-1
lines changed

sycl/plugins/cuda/pi_cuda.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2988,6 +2988,11 @@ pi_result cuda_piEnqueueKernelLaunch(
29882988
assert(work_dim > 0);
29892989
assert(work_dim < 4);
29902990

2991+
if (*global_work_size == 0) {
2992+
return cuda_piEnqueueEventsWaitWithBarrier(
2993+
command_queue, num_events_in_wait_list, event_wait_list, event);
2994+
}
2995+
29912996
// Set the number of threads per block to the number of threads per warp
29922997
// by default unless user has provided a better number
29932998
size_t threadsPerBlock[3] = {32u, 1u, 1u};

sycl/plugins/hip/pi_hip.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2758,6 +2758,11 @@ pi_result hip_piEnqueueKernelLaunch(
27582758
assert(work_dim > 0);
27592759
assert(work_dim < 4);
27602760

2761+
if (*global_work_size == 0) {
2762+
return hip_piEnqueueEventsWaitWithBarrier(
2763+
command_queue, num_events_in_wait_list, event_wait_list, event);
2764+
}
2765+
27612766
// Set the number of threads per block to the number of threads per warp
27622767
// by default unless user has provided a better number
27632768
size_t threadsPerBlock[3] = {32u, 1u, 1u};

sycl/source/detail/scheduler/commands.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1934,7 +1934,7 @@ static void adjustNDRangePerKernel(NDRDescT &NDR, RT::PiKernel Kernel,
19341934
if (NDR.GlobalSize[0] != 0)
19351935
return; // GlobalSize is set - no need to adjust
19361936
// check the prerequisites:
1937-
assert(NDR.NumWorkGroups[0] != 0 && NDR.LocalSize[0] == 0);
1937+
assert(NDR.LocalSize[0] == 0);
19381938
// TODO might be good to cache this info together with the kernel info to
19391939
// avoid get_kernel_work_group_info on every kernel run
19401940
range<3> WGSize = get_kernel_device_specific_info<

0 commit comments

Comments
 (0)