File tree Expand file tree Collapse file tree 2 files changed +7
-5
lines changed
source/adapters/native_cpu Expand file tree Collapse file tree 2 files changed +7
-5
lines changed Original file line number Diff line number Diff line change @@ -138,12 +138,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
138
138
#else
139
139
bool isLocalSizeOne =
140
140
ndr.LocalSize [0 ] == 1 && ndr.LocalSize [1 ] == 1 && ndr.LocalSize [2 ] == 1 ;
141
- if (isLocalSizeOne && ndr.GlobalSize [0 ] > numParallelThreads) {
141
+ if (isLocalSizeOne && ndr.GlobalSize [0 ] > numParallelThreads &&
142
+ !hKernel->hasLocalArgs ()) {
142
143
// If the local size is one, we make the assumption that we are running a
143
144
// parallel_for over a sycl::range.
144
- // Todo: we could add compiler checks and
145
- // kernel properties for this (e.g. check that no barriers are called, no
146
- // local memory args).
145
+ // Todo: we could add more compiler checks and
146
+ // kernel properties for this (e.g. check that no barriers are called).
147
147
148
148
// Todo: this assumes that dim 0 is the best dimension over which we want to
149
149
// parallelize
Original file line number Diff line number Diff line change @@ -142,7 +142,9 @@ struct ur_kernel_handle_t_ : RefCounted {
142
142
_localMemPoolSize = reqSize;
143
143
}
144
144
145
- // To be called before executing a work group
145
+ bool hasLocalArgs () const { return !_localArgInfo.empty (); }
146
+
147
+ // To be called before executing a work group if local args are present
146
148
void handleLocalArgs (size_t numParallelThread, size_t threadId) {
147
149
// For each local argument we have size*numthreads
148
150
size_t offset = 0 ;
You can’t perform that action at this time.
0 commit comments