Skip to content

Commit e911de7

Browse files
authored
[SYCL] Fix enqueing only a single host-task (#1937)
The issue results in either segfault due to some nullptr dereference or in a deadlock when the buffer gets destroyed and the last access to it was through host task.
1 parent a7b763b commit e911de7

File tree

2 files changed

+82
-9
lines changed

2 files changed

+82
-9
lines changed

sycl/source/detail/scheduler/scheduler.cpp

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -156,18 +156,33 @@ void Scheduler::cleanupFinishedCommands(EventImplPtr FinishedEvent) {
156156
}
157157

158158
void Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj) {
159+
MemObjRecord *Record = nullptr;
159160
std::unique_lock<std::shared_timed_mutex> Lock(MGraphLock, std::defer_lock);
160-
lockSharedTimedMutex(Lock);
161161

162-
MemObjRecord *Record = MGraphBuilder.getMemObjRecord(MemObj);
163-
if (!Record)
164-
// No operations were performed on the mem object
165-
return;
162+
{
163+
lockSharedTimedMutex(Lock);
164+
165+
Record = MGraphBuilder.getMemObjRecord(MemObj);
166+
if (!Record)
167+
// No operations were performed on the mem object
168+
return;
166169

167-
waitForRecordToFinish(Record);
168-
MGraphBuilder.decrementLeafCountersForRecord(Record);
169-
MGraphBuilder.cleanupCommandsForRecord(Record);
170-
MGraphBuilder.removeRecordForMemObj(MemObj);
170+
Lock.unlock();
171+
}
172+
173+
{
174+
// This only needs a shared mutex as it only involves enqueueing and
175+
// awaiting for events
176+
std::shared_lock<std::shared_timed_mutex> Lock(MGraphLock);
177+
waitForRecordToFinish(Record);
178+
}
179+
180+
{
181+
lockSharedTimedMutex(Lock);
182+
MGraphBuilder.decrementLeafCountersForRecord(Record);
183+
MGraphBuilder.cleanupCommandsForRecord(Record);
184+
MGraphBuilder.removeRecordForMemObj(MemObj);
185+
}
171186
}
172187

173188
EventImplPtr Scheduler::addHostAccessor(Requirement *Req) {
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
2+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
3+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
4+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
5+
6+
#include <CL/sycl.hpp>
7+
8+
using namespace cl::sycl;
9+
using namespace cl::sycl::access;
10+
11+
static constexpr size_t BUFFER_SIZE = 1024;
12+
13+
template <typename T>
14+
class Modifier;
15+
16+
template <typename T>
17+
class Init;
18+
19+
template <typename DataT>
20+
void copy(buffer<DataT, 1> &Src, buffer<DataT, 1> &Dst, queue &Q) {
21+
Q.submit([&](handler &CGH) {
22+
auto SrcA = Src.template get_access<mode::read>(CGH);
23+
auto DstA = Dst.template get_access<mode::write>(CGH);
24+
25+
CGH.codeplay_host_task([=]() {
26+
for (size_t Idx = 0; Idx < SrcA.get_count(); ++Idx)
27+
DstA[Idx] = SrcA[Idx];
28+
});
29+
});
30+
}
31+
32+
template <typename DataT>
33+
void init(buffer<DataT, 1> &B1, buffer<DataT, 1> &B2, queue &Q) {
34+
Q.submit([&](handler &CGH) {
35+
auto Acc1 = B1.template get_access<mode::write>(CGH);
36+
auto Acc2 = B2.template get_access<mode::write>(CGH);
37+
38+
CGH.parallel_for<Init<DataT>>(BUFFER_SIZE, [=](item<1> Id) {
39+
Acc1[Id] = -1;
40+
Acc2[Id] = -2;
41+
});
42+
});
43+
}
44+
45+
void test() {
46+
queue Q;
47+
buffer<int, 1> Buffer1{BUFFER_SIZE};
48+
buffer<int, 1> Buffer2{BUFFER_SIZE};
49+
50+
init<int>(Buffer1, Buffer2, Q);
51+
52+
copy(Buffer1, Buffer2, Q);
53+
}
54+
55+
int main() {
56+
test();
57+
return 0;
58+
}

0 commit comments

Comments
 (0)