@@ -107,55 +107,80 @@ bool oclHandleInvalidWorkGroupSize(const device_impl &DeviceImpl,
107
107
}
108
108
109
109
if (HasLocalSize) {
110
+ // Is the global range size evenly divisible by the local workgroup size?
110
111
const bool NonUniformWGs =
111
112
(NDRDesc.LocalSize [0 ] != 0 &&
112
113
NDRDesc.GlobalSize [0 ] % NDRDesc.LocalSize [0 ] != 0 ) ||
113
114
(NDRDesc.LocalSize [1 ] != 0 &&
114
115
NDRDesc.GlobalSize [1 ] % NDRDesc.LocalSize [1 ] != 0 ) ||
115
116
(NDRDesc.LocalSize [2 ] != 0 &&
116
117
NDRDesc.GlobalSize [2 ] % NDRDesc.LocalSize [2 ] != 0 );
117
-
118
- if (Ver[0 ] == ' 1' ) {
119
- // OpenCL 1.x:
120
- // PI_INVALID_WORK_GROUP_SIZE if local_work_size is specified and
121
- // number of workitems specified by global_work_size is not evenly
122
- // divisible by size of work-group given by local_work_size
123
-
124
- if (NonUniformWGs)
125
- throw sycl::nd_range_error (
126
- " Non-uniform work-groups are not supported by the target device" ,
127
- PI_INVALID_WORK_GROUP_SIZE);
128
- } else {
129
- // OpenCL 2.x:
130
- // PI_INVALID_WORK_GROUP_SIZE if the program was compiled with
131
- // –cl-uniform-work-group-size and the number of work-items specified
132
- // by global_work_size is not evenly divisible by size of work-group
133
- // given by local_work_size
134
-
135
- pi_program Program = nullptr ;
136
- Plugin.call <PiApiKind::piKernelGetInfo>(
137
- Kernel, PI_KERNEL_INFO_PROGRAM, sizeof (pi_program), &Program, nullptr );
138
- size_t OptsSize = 0 ;
139
- Plugin.call <PiApiKind::piProgramGetBuildInfo>(
140
- Program, Device, PI_PROGRAM_BUILD_INFO_OPTIONS, 0 , nullptr , &OptsSize);
141
- string_class Opts (OptsSize, ' \0 ' );
142
- Plugin.call <PiApiKind::piProgramGetBuildInfo>(
143
- Program, Device, PI_PROGRAM_BUILD_INFO_OPTIONS, OptsSize, &Opts.front (),
144
- nullptr );
145
- if (NonUniformWGs) {
118
+ // Is the local size of the workgroup greater than the global range size in
119
+ // any dimension? This is a sub-case of NonUniformWGs.
120
+ const bool LocalExceedsGlobal =
121
+ NonUniformWGs && (NDRDesc.LocalSize [0 ] > NDRDesc.GlobalSize [0 ] ||
122
+ NDRDesc.LocalSize [1 ] > NDRDesc.GlobalSize [1 ] ||
123
+ NDRDesc.LocalSize [2 ] > NDRDesc.GlobalSize [2 ]);
124
+
125
+ if (NonUniformWGs) {
126
+ if (Ver[0 ] == ' 1' ) {
127
+ // OpenCL 1.x:
128
+ // PI_INVALID_WORK_GROUP_SIZE if local_work_size is specified and
129
+ // number of workitems specified by global_work_size is not evenly
130
+ // divisible by size of work-group given by local_work_size
131
+ if (LocalExceedsGlobal)
132
+ throw sycl::nd_range_error (" Local workgroup size cannot be greater "
133
+ " than global range in any dimension" ,
134
+ PI_INVALID_WORK_GROUP_SIZE);
135
+ else
136
+ throw sycl::nd_range_error (
137
+ " Global_work_size must be evenly divisible by local_work_size. "
138
+ " Non-uniform work-groups are not supported by the target device" ,
139
+ PI_INVALID_WORK_GROUP_SIZE);
140
+ } else {
141
+ // OpenCL 2.x:
142
+ // PI_INVALID_WORK_GROUP_SIZE if the program was compiled with
143
+ // –cl-uniform-work-group-size and the number of work-items specified
144
+ // by global_work_size is not evenly divisible by size of work-group
145
+ // given by local_work_size
146
+
147
+ pi_program Program = nullptr ;
148
+ Plugin.call <PiApiKind::piKernelGetInfo>(Kernel, PI_KERNEL_INFO_PROGRAM,
149
+ sizeof (pi_program), &Program,
150
+ nullptr );
151
+ size_t OptsSize = 0 ;
152
+ Plugin.call <PiApiKind::piProgramGetBuildInfo>(
153
+ Program, Device, PI_PROGRAM_BUILD_INFO_OPTIONS, 0 , nullptr ,
154
+ &OptsSize);
155
+ string_class Opts (OptsSize, ' \0 ' );
156
+ Plugin.call <PiApiKind::piProgramGetBuildInfo>(
157
+ Program, Device, PI_PROGRAM_BUILD_INFO_OPTIONS, OptsSize,
158
+ &Opts.front (), nullptr );
146
159
const bool HasStd20 = Opts.find (" -cl-std=CL2.0" ) != string_class::npos;
160
+ const bool RequiresUniformWGSize =
161
+ Opts.find (" -cl-uniform-work-group-size" ) != string_class::npos;
162
+ std::string message =
163
+ LocalExceedsGlobal
164
+ ? " Local workgroup size greater than global range size. "
165
+ : " Global_work_size not evenly divisible by local_work_size. " ;
147
166
if (!HasStd20)
148
167
throw sycl::nd_range_error (
149
- " Non-uniform work-groups are not allowed by default. Underlying "
150
- " OpenCL 2.x implementation supports this feature and to enable "
151
- " it, build device program with -cl-std=CL2.0" ,
168
+ message.append (" Non-uniform work-groups are not allowed by "
169
+ " default. Underlying "
170
+ " OpenCL 2.x implementation supports this feature "
171
+ " and to enable "
172
+ " it, build device program with -cl-std=CL2.0" ),
152
173
PI_INVALID_WORK_GROUP_SIZE);
153
- else
174
+ else if (RequiresUniformWGSize)
154
175
throw sycl::nd_range_error (
155
- " Non-uniform work-groups are not allowed by default. Underlying "
156
- " OpenCL 2.x implementation supports this feature, but it is "
157
- " disabled by -cl-uniform-work-group-size build flag" ,
176
+ message.append (
177
+ " Non-uniform work-groups are not allowed by when "
178
+ " -cl-uniform-work-group-size flag is used. Underlying "
179
+ " OpenCL 2.x implementation supports this feature, but it is "
180
+ " being "
181
+ " disabled by -cl-uniform-work-group-size build flag" ),
158
182
PI_INVALID_WORK_GROUP_SIZE);
183
+ // else unknown. fallback (below)
159
184
}
160
185
}
161
186
}
0 commit comments