Skip to content

Commit e578f69

Browse files
[SYCL][Graph] Makes command graph functions thread-safe (bugfix)
Removes the test-e2e dependency to graph_impl.hpp by changing the e2e test to an unitests.
1 parent bfcd551 commit e578f69

File tree

5 files changed

+124
-136
lines changed

5 files changed

+124
-136
lines changed

sycl/test-e2e/Graph/Explicit/multiple_exec_graphs.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// REQUIRES: level_zero, gpu
2-
// RUN: %{build_pthread_inc} -o %t.out
2+
// RUN: %{build} -o %t.out
33
// RUN: %{run} %t.out
44
// Extra run to check for leaks in Level Zero using ZE_DEBUG
55
// RUN: %if ext_oneapi_level_zero %{env ZE_DEBUG=4 %{run} %t.out 2>&1 | FileCheck %s %}

sycl/test-e2e/Graph/Inputs/multiple_exec_graphs.cpp

Lines changed: 11 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -2,40 +2,12 @@
22
// graph.
33

44
#include "../graph_common.hpp"
5-
#include <detail/graph_impl.hpp>
6-
7-
#include <thread>
8-
9-
bool checkExecGraphSchedule(
10-
std::shared_ptr<sycl::ext::oneapi::experimental::detail::exec_graph_impl>
11-
GraphA,
12-
std::shared_ptr<sycl::ext::oneapi::experimental::detail::exec_graph_impl>
13-
GraphB) {
14-
auto ScheduleA = GraphA->getSchedule();
15-
auto ScheduleB = GraphB->getSchedule();
16-
if (ScheduleA.size() != ScheduleB.size())
17-
return false;
18-
19-
std::vector<
20-
std::shared_ptr<sycl::ext::oneapi::experimental::detail::node_impl>>
21-
VScheduleA{std::begin(ScheduleA), std::end(ScheduleA)};
22-
std::vector<
23-
std::shared_ptr<sycl::ext::oneapi::experimental::detail::node_impl>>
24-
VScheduleB{std::begin(ScheduleB), std::end(ScheduleB)};
25-
26-
for (size_t i = 0; i < VScheduleA.size(); i++) {
27-
if (!VScheduleA[i]->isSimilar(VScheduleB[i]))
28-
return false;
29-
}
30-
return true;
31-
}
325

336
int main() {
347
queue Queue;
358

369
using T = int;
3710

38-
const unsigned NumThreads = std::thread::hardware_concurrency();
3911
std::vector<T> DataA(Size), DataB(Size), DataC(Size);
4012

4113
std::iota(DataA.begin(), DataA.end(), 1);
@@ -62,72 +34,29 @@ int main() {
6234

6335
add_nodes(Graph, Queue, Size, PtrA, PtrB, PtrC);
6436

65-
Barrier SyncPoint{NumThreads};
66-
67-
std::map<int, exp_ext::command_graph<exp_ext::graph_state::executable>>
68-
GraphsExecMap;
69-
auto FinalizeGraph = [&](int ThreadNum) {
70-
SyncPoint.wait();
37+
// Finalize and execute several iterations of the graph
38+
event Event;
39+
for (unsigned n = 0; n < Iterations; n++) {
7140
auto GraphExec = Graph.finalize();
72-
GraphsExecMap.insert(
73-
std::map<int,
74-
exp_ext::command_graph<exp_ext::graph_state::executable>>::
75-
value_type(ThreadNum, GraphExec));
76-
Queue.submit([&](sycl::handler &CGH) { CGH.ext_oneapi_graph(GraphExec); });
77-
};
78-
79-
std::vector<std::thread> Threads;
80-
Threads.reserve(NumThreads);
81-
82-
for (unsigned i = 0; i < NumThreads; ++i) {
83-
Threads.emplace_back(FinalizeGraph, i);
41+
Event = Queue.submit([&](handler &CGH) {
42+
CGH.depends_on(Event);
43+
CGH.ext_oneapi_graph(GraphExec);
44+
});
8445
}
85-
86-
for (unsigned i = 0; i < NumThreads; ++i) {
87-
Threads[i].join();
88-
}
89-
9046
Queue.wait_and_throw();
9147

9248
Queue.copy(PtrA, DataA.data(), Size);
9349
Queue.copy(PtrB, DataB.data(), Size);
9450
Queue.copy(PtrC, DataC.data(), Size);
9551
Queue.wait_and_throw();
9652

97-
// Ref computation
98-
queue QueueRef{Queue.get_context(), Queue.get_device()};
99-
exp_ext::command_graph GraphRef{Queue.get_context(), Queue.get_device()};
100-
101-
T *PtrARef = malloc_device<T>(Size, QueueRef);
102-
T *PtrBRef = malloc_device<T>(Size, QueueRef);
103-
T *PtrCRef = malloc_device<T>(Size, QueueRef);
104-
105-
QueueRef.copy(DataA.data(), PtrARef, Size);
106-
QueueRef.copy(DataB.data(), PtrBRef, Size);
107-
QueueRef.copy(DataC.data(), PtrCRef, Size);
108-
QueueRef.wait_and_throw();
109-
110-
add_nodes(GraphRef, QueueRef, Size, PtrARef, PtrBRef, PtrCRef);
111-
112-
for (unsigned i = 0; i < NumThreads; ++i) {
113-
auto GraphExecRef = GraphRef.finalize();
114-
QueueRef.submit(
115-
[&](sycl::handler &CGH) { CGH.ext_oneapi_graph(GraphExecRef); });
116-
auto GraphExecImpl =
117-
sycl::detail::getSyclObjImpl(GraphsExecMap.find(i)->second);
118-
auto GraphExecRefImpl = sycl::detail::getSyclObjImpl(GraphExecRef);
119-
assert(checkExecGraphSchedule(GraphExecImpl, GraphExecRefImpl));
120-
}
121-
122-
QueueRef.wait_and_throw();
123-
124-
free(PtrARef, QueueRef);
125-
free(PtrBRef, QueueRef);
126-
free(PtrCRef, QueueRef);
127-
12853
free(PtrA, Queue);
12954
free(PtrB, Queue);
13055
free(PtrC, Queue);
13156

57+
assert(ReferenceA == DataA);
58+
assert(ReferenceB == DataB);
59+
assert(ReferenceC == DataC);
60+
13261
return 0;
13362
}

sycl/test-e2e/Graph/RecordReplay/multiple_exec_graphs.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// REQUIRES: level_zero, gpu
2-
// RUN: %{build_pthread_inc} -o %t.out
2+
// RUN: %{build} -o %t.out
33
// RUN: %{run} %t.out
44
// Extra run to check for leaks in Level Zero using ZE_DEBUG
55
// RUN: %if ext_oneapi_level_zero %{env ZE_DEBUG=4 %{run} %t.out 2>&1 | FileCheck %s %}

sycl/test-e2e/format.py

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -100,17 +100,8 @@ def execute(self, test, litConfig):
100100
# -that new tests by default would runnable there (unless they have
101101
# -other restrictions).
102102
substitutions.append(('%{build}', '%clangxx -fsycl -fsycl-targets=%{sycl_triple} %s'))
103-
104-
# get GIT root path
105-
stream = os.popen('git rev-parse --show-toplevel')
106-
git_root_path = stream.read()[:-1]
107-
108-
if 'windows' in test.config.available_features:
109-
source_files_path = git_root_path+"\sycl\source"
110-
else:
111-
source_files_path = git_root_path+"/sycl/source"
112103

113-
compilation_cmd_pthread = "%clangxx -I" + source_files_path + " -pthread -fsycl -fsycl-targets=%{sycl_triple} %s"
104+
compilation_cmd_pthread = "%clangxx -pthread -fsycl -fsycl-targets=%{sycl_triple} %s"
114105
substitutions.append(('%{build_pthread_inc}', compilation_cmd_pthread))
115106

116107
def get_extra_env(sycl_devices):

0 commit comments

Comments
 (0)