Skip to content

Commit e7d1a72

Browse files
sergey-semenovAlexeySachkov
authored andcommitted
[SYCL] Fix a thread pool data race during shutdown (#15535)
The flag that kills the threads waiting for work was an atomic bool, but that is not enough: any modification of a condition must be made under the mutex associated with that condition variable. Otherwise, the bool value flip and the call to notify might happen after the condition check in the other thread, but before it starts waiting for notifications.
1 parent d396703 commit e7d1a72

File tree

2 files changed

+24
-6
lines changed

2 files changed

+24
-6
lines changed

sycl/source/detail/thread_pool.hpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,17 +29,17 @@ class ThreadPool {
2929
std::queue<std::function<void()>> MJobQueue;
3030
std::mutex MJobQueueMutex;
3131
std::condition_variable MDoSmthOrStop;
32-
std::atomic_bool MStop;
32+
bool MStop = false;
3333
std::atomic_uint MJobsInPool;
3434

3535
void worker() {
3636
GlobalHandler::instance().registerSchedulerUsage(/*ModifyCounter*/ false);
3737
std::unique_lock<std::mutex> Lock(MJobQueueMutex);
3838
while (true) {
39-
MDoSmthOrStop.wait(
40-
Lock, [this]() { return !MJobQueue.empty() || MStop.load(); });
39+
MDoSmthOrStop.wait(Lock,
40+
[this]() { return !MJobQueue.empty() || MStop; });
4141

42-
if (MStop.load())
42+
if (MStop)
4343
break;
4444

4545
std::function<void()> Job = std::move(MJobQueue.front());
@@ -57,7 +57,6 @@ class ThreadPool {
5757
void start() {
5858
MLaunchedThreads.reserve(MThreadCount);
5959

60-
MStop.store(false);
6160
MJobsInPool.store(0);
6261

6362
for (size_t Idx = 0; Idx < MThreadCount; ++Idx)
@@ -83,7 +82,10 @@ class ThreadPool {
8382
}
8483

8584
void finishAndWait() {
86-
MStop.store(true);
85+
{
86+
std::lock_guard<std::mutex> Lock(MJobQueueMutex);
87+
MStop = true;
88+
}
8789

8890
MDoSmthOrStop.notify_all();
8991

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
// RUN: %{build} -o %t.out
2+
// RUN: %{run} %t.out
3+
4+
#include <sycl/detail/core.hpp>
5+
6+
using namespace sycl;
7+
8+
// This test checks that host tasks that haven't been waited for do not cause
9+
// issues during shutdown.
10+
int main(int argc, char *argv[]) {
11+
queue q;
12+
13+
q.submit([&](handler &h) { h.host_task([=]() {}); });
14+
15+
return 0;
16+
}

0 commit comments

Comments
 (0)