Skip to content

Commit ebace77

Browse files
[SYCL] Fix deadlock in Scheduler on Windows (#1703)
There was a deadlock in Scheduler on Windows only. This deadlock happens in implementation of std::shared_timed_mutex of MSVC's C++ std library. The deadlock on Windows happens because lock ordering is not specified in C++ spec when we do lock and lock_shared simultaneously, and lock and lock_shared both wait. It can be fixed in 2 ways: 1. upgrade DPC++ runtime to C++17 and change std::shared_timed_mutex to std::shared_mutex, there will be no deadlock on Windows out of the box. No regression on Linux. 2. implement spinlock We can't upgrade to C++17 yet, so spinlock was implemented. It fixed the deadlock on Windows. But it creates another deadlock, this time on Linux only in implementation of lock and shared_lock in pthread. To fix this, added #ifdef _WIN32 to separate Windows and Linux parts of code. Signed-off-by: Dmitry Vodopyanov <[email protected]>
1 parent ae3fd5c commit ebace77

File tree

2 files changed

+36
-4
lines changed

2 files changed

+36
-4
lines changed

sycl/source/detail/scheduler/scheduler.cpp

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,8 @@ EventImplPtr Scheduler::addCG(std::unique_ptr<detail::CG> CommandGroup,
6666
Command *NewCmd = nullptr;
6767
const bool IsKernel = CommandGroup->getType() == CG::KERNEL;
6868
{
69-
std::lock_guard<std::shared_timed_mutex> Lock(MGraphLock);
69+
std::unique_lock<std::shared_timed_mutex> Lock(MGraphLock, std::defer_lock);
70+
lockSharedTimedMutex(Lock);
7071

7172
switch (CommandGroup->getType()) {
7273
case CG::UPDATE_HOST:
@@ -98,7 +99,8 @@ EventImplPtr Scheduler::addCG(std::unique_ptr<detail::CG> CommandGroup,
9899
}
99100

100101
EventImplPtr Scheduler::addCopyBack(Requirement *Req) {
101-
std::lock_guard<std::shared_timed_mutex> Lock(MGraphLock);
102+
std::unique_lock<std::shared_timed_mutex> Lock(MGraphLock, std::defer_lock);
103+
lockSharedTimedMutex(Lock);
102104
Command *NewCmd = MGraphBuilder.addCopyBack(Req);
103105
// Command was not creted because there were no operations with
104106
// buffer.
@@ -154,7 +156,8 @@ void Scheduler::cleanupFinishedCommands(EventImplPtr FinishedEvent) {
154156
}
155157

156158
void Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj) {
157-
std::lock_guard<std::shared_timed_mutex> Lock(MGraphLock);
159+
std::unique_lock<std::shared_timed_mutex> Lock(MGraphLock, std::defer_lock);
160+
lockSharedTimedMutex(Lock);
158161

159162
MemObjRecord *Record = MGraphBuilder.getMemObjRecord(MemObj);
160163
if (!Record)
@@ -169,7 +172,8 @@ void Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj) {
169172

170173
EventImplPtr Scheduler::addHostAccessor(Requirement *Req,
171174
const bool destructor) {
172-
std::lock_guard<std::shared_timed_mutex> Lock(MGraphLock);
175+
std::unique_lock<std::shared_timed_mutex> Lock(MGraphLock, std::defer_lock);
176+
lockSharedTimedMutex(Lock);
173177

174178
Command *NewCmd = MGraphBuilder.addHostAccessor(Req, destructor);
175179

@@ -216,6 +220,25 @@ Scheduler::Scheduler() {
216220
QueueOrder::Ordered, /*PropList=*/{}));
217221
}
218222

223+
void Scheduler::lockSharedTimedMutex(
224+
std::unique_lock<std::shared_timed_mutex> &Lock) {
225+
#ifdef _WIN32
226+
// Avoiding deadlock situation for MSVC. std::shared_timed_mutex specification
227+
// does not specify a priority for shared and exclusive accesses. It will be a
228+
// deadlock in MSVC's std::shared_timed_mutex implementation, if exclusive
229+
// access occurs after shared access.
230+
// TODO: after switching to C++17, change std::shared_timed_mutex to
231+
// std::shared_mutex and use std::lock_guard here both for Windows and Linux.
232+
while (!Lock.owns_lock()) {
233+
Lock.try_lock();
234+
}
235+
#else
236+
// It is a deadlock on UNIX in implementation of lock and lock_shared, if
237+
// try_lock in the loop above will be executed, so using a single lock here
238+
Lock.lock();
239+
#endif // _WIN32
240+
}
241+
219242
} // namespace detail
220243
} // namespace sycl
221244
} // __SYCL_INLINE_NAMESPACE(cl)

sycl/source/detail/scheduler/scheduler.hpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -430,6 +430,13 @@ class Scheduler {
430430
Scheduler();
431431
static Scheduler instance;
432432

433+
/// Provides exclusive access to std::shared_timed_mutex object with deadlock
434+
/// avoidance
435+
///
436+
/// \param Lock is an instance of std::unique_lock<std::shared_timed_mutex>
437+
/// class
438+
void lockSharedTimedMutex(std::unique_lock<std::shared_timed_mutex> &Lock);
439+
433440
static void enqueueLeavesOfReqUnlocked(const Requirement *const Req);
434441

435442
/// Graph builder class.
@@ -687,6 +694,8 @@ class Scheduler {
687694
void waitForRecordToFinish(MemObjRecord *Record);
688695

689696
GraphBuilder MGraphBuilder;
697+
// TODO: after switching to C++17, change std::shared_timed_mutex to
698+
// std::shared_mutex
690699
std::shared_timed_mutex MGraphLock;
691700

692701
QueueImplPtr DefaultHostQueue;

0 commit comments

Comments
 (0)