Skip to content

Commit 744bd72

Browse files
[SYCL] Use per-kernel mutex for interop kernel enqueue (#8165)
Switch from using a single mutex to one per kernel when enqueueing interoperability kernels. Compared to the original solution (#8111), this allows to enqueue different interop SYCL kernels in parallel, but leaves out an edge case where two SYCL kernels were created with the same native handle.
1 parent 1b973ae commit 744bd72

File tree

2 files changed

+14
-6
lines changed

2 files changed

+14
-6
lines changed

sycl/source/detail/kernel_impl.hpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,10 @@ class kernel_impl {
173173

174174
ProgramImplPtr getProgramImpl() const { return MProgramImpl; }
175175

176+
std::mutex &getNoncacheableEnqueueMutex() {
177+
return MNoncacheableEnqueueMutex;
178+
}
179+
176180
private:
177181
RT::PiKernel MKernel;
178182
const ContextImplPtr MContext;
@@ -181,6 +185,7 @@ class kernel_impl {
181185
const DeviceImageImplPtr MDeviceImageImpl;
182186
const KernelBundleImplPtr MKernelBundleImpl;
183187
bool MIsInterop = false;
188+
std::mutex MNoncacheableEnqueueMutex;
184189
};
185190

186191
template <typename Param>

sycl/source/detail/scheduler/commands.cpp

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2106,12 +2106,7 @@ pi_int32 enqueueImpKernel(
21062106
auto ContextImpl = Queue->getContextImplPtr();
21072107
auto DeviceImpl = Queue->getDeviceImplPtr();
21082108
RT::PiKernel Kernel = nullptr;
2109-
// Cacheable kernels use per-kernel mutexes that will be fetched from the
2110-
// cache, others (e.g. interoperability kernels) share a single mutex.
2111-
// TODO consider adding a PiKernel -> mutex map for allowing to enqueue
2112-
// different PiKernel's in parallel.
2113-
static std::mutex NoncacheableEnqueueMutex;
2114-
std::mutex *KernelMutex = &NoncacheableEnqueueMutex;
2109+
std::mutex *KernelMutex = nullptr;
21152110
RT::PiProgram Program = nullptr;
21162111

21172112
std::shared_ptr<kernel_impl> SyclKernelImpl;
@@ -2152,6 +2147,14 @@ pi_int32 enqueueImpKernel(
21522147
OSModuleHandle, ContextImpl, DeviceImpl, KernelName,
21532148
SyclProg.get());
21542149
assert(FoundKernel == Kernel);
2150+
} else {
2151+
// Non-cacheable kernels use mutexes from kernel_impls.
2152+
// TODO this can still result in a race condition if multiple SYCL
2153+
// kernels are created with the same native handle. To address this,
2154+
// we need to either store and use a pi_native_handle -> mutex map or
2155+
// reuse and return existing SYCL kernels from make_native to avoid
2156+
// their duplication in such cases.
2157+
KernelMutex = &MSyclKernel->getNoncacheableEnqueueMutex();
21552158
}
21562159
} else {
21572160
std::tie(Kernel, KernelMutex, Program) =

0 commit comments

Comments
 (0)