@@ -29,26 +29,28 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
29
29
30
30
(void )pGlobalWorkOffset;
31
31
32
- if (!pLocalWorkSize) {
33
- // TODO: This is not optimal, but it is legal
34
- static size_t DefaultWorkSize[3 ] = {1 , 1 , 1 };
35
- pLocalWorkSize = DefaultWorkSize;
32
+ // TODO: We default to 1, 1, 1 here. In future if pLocalWorkSize is not
33
+ // specified, we should pick the "best" one
34
+ size_t WorkSize[3 ] = {1 , 1 , 1 };
35
+ if (pLocalWorkSize) {
36
+ for (uint32_t I = 0 ; I < workDim; I++) {
37
+ WorkSize[I] = pLocalWorkSize[I];
38
+ }
36
39
}
37
40
38
- if (pLocalWorkSize[0 ] > pGlobalWorkSize[0 ] ||
39
- pLocalWorkSize[1 ] > pGlobalWorkSize[1 ] ||
40
- pLocalWorkSize[2 ] > pGlobalWorkSize[2 ]) {
41
+ if (WorkSize[0 ] > pGlobalWorkSize[0 ] || WorkSize[1 ] > pGlobalWorkSize[1 ] ||
42
+ WorkSize[2 ] > pGlobalWorkSize[2 ]) {
41
43
return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE;
42
44
}
43
45
44
46
ol_kernel_launch_size_args_t LaunchArgs;
45
47
LaunchArgs.Dimensions = workDim;
46
- LaunchArgs.NumGroupsX = pGlobalWorkSize[0 ] / pLocalWorkSize [0 ];
47
- LaunchArgs.NumGroupsY = pGlobalWorkSize[1 ] / pLocalWorkSize [1 ];
48
- LaunchArgs.NumGroupsZ = pGlobalWorkSize[2 ] / pLocalWorkSize [2 ];
49
- LaunchArgs.GroupSizeX = pLocalWorkSize [0 ];
50
- LaunchArgs.GroupSizeY = pLocalWorkSize [1 ];
51
- LaunchArgs.GroupSizeZ = pLocalWorkSize [2 ];
48
+ LaunchArgs.NumGroupsX = pGlobalWorkSize[0 ] / WorkSize [0 ];
49
+ LaunchArgs.NumGroupsY = pGlobalWorkSize[1 ] / WorkSize [1 ];
50
+ LaunchArgs.NumGroupsZ = pGlobalWorkSize[2 ] / WorkSize [2 ];
51
+ LaunchArgs.GroupSizeX = WorkSize [0 ];
52
+ LaunchArgs.GroupSizeY = WorkSize [1 ];
53
+ LaunchArgs.GroupSizeZ = WorkSize [2 ];
52
54
LaunchArgs.DynSharedMemory = 0 ;
53
55
54
56
ol_event_handle_t EventOut;
0 commit comments