@@ -70,7 +70,18 @@ void handleInvalidWorkGroupSize(const device_impl &DeviceImpl, pi_kernel Kernel,
70
70
Kernel, Device, PI_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE,
71
71
sizeof (size_t ) * 3 , CompileWGSize, nullptr );
72
72
73
+ size_t MaxWGSize = 0 ;
74
+ Plugin.call <PiApiKind::piDeviceGetInfo>(Device,
75
+ PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE,
76
+ sizeof (size_t ), &MaxWGSize, nullptr );
73
77
if (CompileWGSize[0 ] != 0 ) {
78
+ if (CompileWGSize[0 ] > MaxWGSize || CompileWGSize[1 ] > MaxWGSize ||
79
+ CompileWGSize[2 ] > MaxWGSize)
80
+ throw sycl::exception (
81
+ make_error_code (errc::kernel_not_supported),
82
+ " Submitting a kernel decorated with reqd_work_group_size attribute "
83
+ " to a device that does not support this work group size is invalid." );
84
+
74
85
// OpenCL 1.x && 2.0:
75
86
// PI_ERROR_INVALID_WORK_GROUP_SIZE if local_work_size is NULL and the
76
87
// reqd_work_group_size attribute is used to declare the work-group size
@@ -97,45 +108,41 @@ void handleInvalidWorkGroupSize(const device_impl &DeviceImpl, pi_kernel Kernel,
97
108
std::to_string (CompileWGSize[0 ]) + " }" ,
98
109
PI_ERROR_INVALID_WORK_GROUP_SIZE);
99
110
}
100
- if (IsOpenCLV1x) {
101
- // OpenCL 1.x:
102
- // PI_ERROR_INVALID_WORK_GROUP_SIZE if local_work_size is specified and
103
- // the total number of work-items in the work-group computed as
104
- // local_work_size[0] * ... * local_work_size[work_dim - 1] is greater
105
- // than the value specified by PI_DEVICE_MAX_WORK_GROUP_SIZE in
106
- // table 4.3
107
- size_t MaxWGSize = 0 ;
108
- Plugin.call <PiApiKind::piDeviceGetInfo>(
109
- Device, PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE, sizeof (size_t ),
110
- &MaxWGSize, nullptr );
111
- const size_t TotalNumberOfWIs =
112
- NDRDesc.LocalSize [0 ] * NDRDesc.LocalSize [1 ] * NDRDesc.LocalSize [2 ];
113
- if (TotalNumberOfWIs > MaxWGSize)
114
- throw sycl::nd_range_error (
115
- " Total number of work-items in a work-group cannot exceed " +
116
- std::to_string (MaxWGSize),
117
- PI_ERROR_INVALID_WORK_GROUP_SIZE);
118
- } else if (IsOpenCLVGE20 || IsLevelZero) {
119
- // OpenCL 2.x or OneAPI Level Zero:
120
- // PI_ERROR_INVALID_WORK_GROUP_SIZE if local_work_size is specified and
121
- // the total number of work-items in the work-group computed as
122
- // local_work_size[0] * ... * local_work_size[work_dim - 1] is greater
123
- // than the value specified by PI_KERNEL_GROUP_INFO_WORK_GROUP_SIZE in
124
- // table 5.21.
125
- size_t KernelWGSize = 0 ;
126
- Plugin.call <PiApiKind::piKernelGetGroupInfo>(
127
- Kernel, Device, PI_KERNEL_GROUP_INFO_WORK_GROUP_SIZE, sizeof (size_t ),
128
- &KernelWGSize, nullptr );
129
- const size_t TotalNumberOfWIs =
130
- NDRDesc.LocalSize [0 ] * NDRDesc.LocalSize [1 ] * NDRDesc.LocalSize [2 ];
131
- if (TotalNumberOfWIs > KernelWGSize)
132
- throw sycl::nd_range_error (
133
- " Total number of work-items in a work-group cannot exceed " +
134
- std::to_string (KernelWGSize) + " for this kernel" ,
135
- PI_ERROR_INVALID_WORK_GROUP_SIZE);
136
- } else {
137
- // TODO: Should probably have something similar for the other backends
138
- }
111
+ if (IsOpenCLV1x) {
112
+ // OpenCL 1.x:
113
+ // PI_ERROR_INVALID_WORK_GROUP_SIZE if local_work_size is specified and
114
+ // the total number of work-items in the work-group computed as
115
+ // local_work_size[0] * ... * local_work_size[work_dim - 1] is greater
116
+ // than the value specified by PI_DEVICE_MAX_WORK_GROUP_SIZE in
117
+ // table 4.3
118
+ const size_t TotalNumberOfWIs =
119
+ NDRDesc.LocalSize [0 ] * NDRDesc.LocalSize [1 ] * NDRDesc.LocalSize [2 ];
120
+ if (TotalNumberOfWIs > MaxWGSize)
121
+ throw sycl::nd_range_error (
122
+ " Total number of work-items in a work-group cannot exceed " +
123
+ std::to_string (MaxWGSize),
124
+ PI_ERROR_INVALID_WORK_GROUP_SIZE);
125
+ } else if (IsOpenCLVGE20 || IsLevelZero) {
126
+ // OpenCL 2.x or OneAPI Level Zero:
127
+ // PI_ERROR_INVALID_WORK_GROUP_SIZE if local_work_size is specified and
128
+ // the total number of work-items in the work-group computed as
129
+ // local_work_size[0] * ... * local_work_size[work_dim - 1] is greater
130
+ // than the value specified by PI_KERNEL_GROUP_INFO_WORK_GROUP_SIZE in
131
+ // table 5.21.
132
+ size_t KernelWGSize = 0 ;
133
+ Plugin.call <PiApiKind::piKernelGetGroupInfo>(
134
+ Kernel, Device, PI_KERNEL_GROUP_INFO_WORK_GROUP_SIZE, sizeof (size_t ),
135
+ &KernelWGSize, nullptr );
136
+ const size_t TotalNumberOfWIs =
137
+ NDRDesc.LocalSize [0 ] * NDRDesc.LocalSize [1 ] * NDRDesc.LocalSize [2 ];
138
+ if (TotalNumberOfWIs > KernelWGSize)
139
+ throw sycl::nd_range_error (
140
+ " Total number of work-items in a work-group cannot exceed " +
141
+ std::to_string (KernelWGSize) + " for this kernel" ,
142
+ PI_ERROR_INVALID_WORK_GROUP_SIZE);
143
+ } else {
144
+ // TODO: Should probably have something similar for the other backends
145
+ }
139
146
140
147
if (HasLocalSize) {
141
148
// Is the global range size evenly divisible by the local workgroup size?
0 commit comments