2
2
// graph.
3
3
4
4
#include " ../graph_common.hpp"
5
- #include < detail/graph_impl.hpp>
6
-
7
- #include < thread>
8
-
9
- bool checkExecGraphSchedule (
10
- std::shared_ptr<sycl::ext::oneapi::experimental::detail::exec_graph_impl>
11
- GraphA,
12
- std::shared_ptr<sycl::ext::oneapi::experimental::detail::exec_graph_impl>
13
- GraphB) {
14
- auto ScheduleA = GraphA->getSchedule ();
15
- auto ScheduleB = GraphB->getSchedule ();
16
- if (ScheduleA.size () != ScheduleB.size ())
17
- return false ;
18
-
19
- std::vector<
20
- std::shared_ptr<sycl::ext::oneapi::experimental::detail::node_impl>>
21
- VScheduleA{std::begin (ScheduleA), std::end (ScheduleA)};
22
- std::vector<
23
- std::shared_ptr<sycl::ext::oneapi::experimental::detail::node_impl>>
24
- VScheduleB{std::begin (ScheduleB), std::end (ScheduleB)};
25
-
26
- for (size_t i = 0 ; i < VScheduleA.size (); i++) {
27
- if (!VScheduleA[i]->isSimilar (VScheduleB[i]))
28
- return false ;
29
- }
30
- return true ;
31
- }
32
5
33
6
int main () {
34
7
queue Queue;
35
8
36
9
using T = int ;
37
10
38
- const unsigned NumThreads = std::thread::hardware_concurrency ();
39
11
std::vector<T> DataA (Size), DataB (Size), DataC (Size);
40
12
41
13
std::iota (DataA.begin (), DataA.end (), 1 );
@@ -62,72 +34,29 @@ int main() {
62
34
63
35
add_nodes (Graph, Queue, Size, PtrA, PtrB, PtrC);
64
36
65
- Barrier SyncPoint{NumThreads};
66
-
67
- std::map<int , exp_ext::command_graph<exp_ext::graph_state::executable>>
68
- GraphsExecMap;
69
- auto FinalizeGraph = [&](int ThreadNum) {
70
- SyncPoint.wait ();
37
+ // Finalize and execute several iterations of the graph
38
+ event Event;
39
+ for (unsigned n = 0 ; n < Iterations; n++) {
71
40
auto GraphExec = Graph.finalize ();
72
- GraphsExecMap.insert (
73
- std::map<int ,
74
- exp_ext::command_graph<exp_ext::graph_state::executable>>::
75
- value_type (ThreadNum, GraphExec));
76
- Queue.submit ([&](sycl::handler &CGH) { CGH.ext_oneapi_graph (GraphExec); });
77
- };
78
-
79
- std::vector<std::thread> Threads;
80
- Threads.reserve (NumThreads);
81
-
82
- for (unsigned i = 0 ; i < NumThreads; ++i) {
83
- Threads.emplace_back (FinalizeGraph, i);
41
+ Event = Queue.submit ([&](handler &CGH) {
42
+ CGH.depends_on (Event);
43
+ CGH.ext_oneapi_graph (GraphExec);
44
+ });
84
45
}
85
-
86
- for (unsigned i = 0 ; i < NumThreads; ++i) {
87
- Threads[i].join ();
88
- }
89
-
90
46
Queue.wait_and_throw ();
91
47
92
48
Queue.copy (PtrA, DataA.data (), Size);
93
49
Queue.copy (PtrB, DataB.data (), Size);
94
50
Queue.copy (PtrC, DataC.data (), Size);
95
51
Queue.wait_and_throw ();
96
52
97
- // Ref computation
98
- queue QueueRef{Queue.get_context (), Queue.get_device ()};
99
- exp_ext::command_graph GraphRef{Queue.get_context (), Queue.get_device ()};
100
-
101
- T *PtrARef = malloc_device<T>(Size, QueueRef);
102
- T *PtrBRef = malloc_device<T>(Size, QueueRef);
103
- T *PtrCRef = malloc_device<T>(Size, QueueRef);
104
-
105
- QueueRef.copy (DataA.data (), PtrARef, Size);
106
- QueueRef.copy (DataB.data (), PtrBRef, Size);
107
- QueueRef.copy (DataC.data (), PtrCRef, Size);
108
- QueueRef.wait_and_throw ();
109
-
110
- add_nodes (GraphRef, QueueRef, Size, PtrARef, PtrBRef, PtrCRef);
111
-
112
- for (unsigned i = 0 ; i < NumThreads; ++i) {
113
- auto GraphExecRef = GraphRef.finalize ();
114
- QueueRef.submit (
115
- [&](sycl::handler &CGH) { CGH.ext_oneapi_graph (GraphExecRef); });
116
- auto GraphExecImpl =
117
- sycl::detail::getSyclObjImpl (GraphsExecMap.find (i)->second );
118
- auto GraphExecRefImpl = sycl::detail::getSyclObjImpl (GraphExecRef);
119
- assert (checkExecGraphSchedule (GraphExecImpl, GraphExecRefImpl));
120
- }
121
-
122
- QueueRef.wait_and_throw ();
123
-
124
- free (PtrARef, QueueRef);
125
- free (PtrBRef, QueueRef);
126
- free (PtrCRef, QueueRef);
127
-
128
53
free (PtrA, Queue);
129
54
free (PtrB, Queue);
130
55
free (PtrC, Queue);
131
56
57
+ assert (ReferenceA == DataA);
58
+ assert (ReferenceB == DataB);
59
+ assert (ReferenceC == DataC);
60
+
132
61
return 0 ;
133
62
}
0 commit comments