[SYCL][Graph] Makes command graph functions thread-safe (bugfix)

mfrancepillois · mfrancepillois · commit e578f69102c9 · 2023-08-07T17:41:52.000+01:00
Removes the test-e2e dependency to graph_impl.hpp by changing the e2e test
to an unitests.
diff --git a/sycl/test-e2e/Graph/Explicit/multiple_exec_graphs.cpp b/sycl/test-e2e/Graph/Explicit/multiple_exec_graphs.cpp
@@ -1,5 +1,5 @@
 // REQUIRES: level_zero, gpu
-// RUN: %{build_pthread_inc} -o %t.out
+// RUN: %{build} -o %t.out
 // RUN: %{run} %t.out
 // Extra run to check for leaks in Level Zero using ZE_DEBUG
 // RUN: %if ext_oneapi_level_zero %{env ZE_DEBUG=4 %{run} %t.out 2>&1 | FileCheck %s %}
diff --git a/sycl/test-e2e/Graph/Inputs/multiple_exec_graphs.cpp b/sycl/test-e2e/Graph/Inputs/multiple_exec_graphs.cpp
@@ -2,40 +2,12 @@
 // graph.
 
 #include "../graph_common.hpp"
-#include <detail/graph_impl.hpp>
-
-#include <thread>
-
-bool checkExecGraphSchedule(
-    std::shared_ptr<sycl::ext::oneapi::experimental::detail::exec_graph_impl>
-        GraphA,
-    std::shared_ptr<sycl::ext::oneapi::experimental::detail::exec_graph_impl>
-        GraphB) {
-  auto ScheduleA = GraphA->getSchedule();
-  auto ScheduleB = GraphB->getSchedule();
-  if (ScheduleA.size() != ScheduleB.size())
-    return false;
-
-  std::vector<
-      std::shared_ptr<sycl::ext::oneapi::experimental::detail::node_impl>>
-      VScheduleA{std::begin(ScheduleA), std::end(ScheduleA)};
-  std::vector<
-      std::shared_ptr<sycl::ext::oneapi::experimental::detail::node_impl>>
-      VScheduleB{std::begin(ScheduleB), std::end(ScheduleB)};
-
-  for (size_t i = 0; i < VScheduleA.size(); i++) {
-    if (!VScheduleA[i]->isSimilar(VScheduleB[i]))
-      return false;
-  }
-  return true;
-}
 
 int main() {
   queue Queue;
 
   using T = int;
 
-  const unsigned NumThreads = std::thread::hardware_concurrency();
   std::vector<T> DataA(Size), DataB(Size), DataC(Size);
 
   std::iota(DataA.begin(), DataA.end(), 1);
@@ -62,72 +34,29 @@ int main() {
 
   add_nodes(Graph, Queue, Size, PtrA, PtrB, PtrC);
 
-  Barrier SyncPoint{NumThreads};
-
-  std::map<int, exp_ext::command_graph<exp_ext::graph_state::executable>>
-      GraphsExecMap;
-  auto FinalizeGraph = [&](int ThreadNum) {
-    SyncPoint.wait();
+  // Finalize and execute several iterations of the graph
+  event Event;
+  for (unsigned n = 0; n < Iterations; n++) {
     auto GraphExec = Graph.finalize();
-    GraphsExecMap.insert(
-        std::map<int,
-                 exp_ext::command_graph<exp_ext::graph_state::executable>>::
-            value_type(ThreadNum, GraphExec));
-    Queue.submit([&](sycl::handler &CGH) { CGH.ext_oneapi_graph(GraphExec); });
-  };
-
-  std::vector<std::thread> Threads;
-  Threads.reserve(NumThreads);
-
-  for (unsigned i = 0; i < NumThreads; ++i) {
-    Threads.emplace_back(FinalizeGraph, i);
+    Event = Queue.submit([&](handler &CGH) {
+      CGH.depends_on(Event);
+      CGH.ext_oneapi_graph(GraphExec);
+    });
   }
-
-  for (unsigned i = 0; i < NumThreads; ++i) {
-    Threads[i].join();
-  }
-
   Queue.wait_and_throw();
 
   Queue.copy(PtrA, DataA.data(), Size);
   Queue.copy(PtrB, DataB.data(), Size);
   Queue.copy(PtrC, DataC.data(), Size);
   Queue.wait_and_throw();
 
-  // Ref computation
-  queue QueueRef{Queue.get_context(), Queue.get_device()};
-  exp_ext::command_graph GraphRef{Queue.get_context(), Queue.get_device()};
-
-  T *PtrARef = malloc_device<T>(Size, QueueRef);
-  T *PtrBRef = malloc_device<T>(Size, QueueRef);
-  T *PtrCRef = malloc_device<T>(Size, QueueRef);
-
-  QueueRef.copy(DataA.data(), PtrARef, Size);
-  QueueRef.copy(DataB.data(), PtrBRef, Size);
-  QueueRef.copy(DataC.data(), PtrCRef, Size);
-  QueueRef.wait_and_throw();
-
-  add_nodes(GraphRef, QueueRef, Size, PtrARef, PtrBRef, PtrCRef);
-
-  for (unsigned i = 0; i < NumThreads; ++i) {
-    auto GraphExecRef = GraphRef.finalize();
-    QueueRef.submit(
-        [&](sycl::handler &CGH) { CGH.ext_oneapi_graph(GraphExecRef); });
-    auto GraphExecImpl =
-        sycl::detail::getSyclObjImpl(GraphsExecMap.find(i)->second);
-    auto GraphExecRefImpl = sycl::detail::getSyclObjImpl(GraphExecRef);
-    assert(checkExecGraphSchedule(GraphExecImpl, GraphExecRefImpl));
-  }
-
-  QueueRef.wait_and_throw();
-
-  free(PtrARef, QueueRef);
-  free(PtrBRef, QueueRef);
-  free(PtrCRef, QueueRef);
-
   free(PtrA, Queue);
   free(PtrB, Queue);
   free(PtrC, Queue);
 
+  assert(ReferenceA == DataA);
+  assert(ReferenceB == DataB);
+  assert(ReferenceC == DataC);
+
   return 0;
 }
diff --git a/sycl/test-e2e/Graph/RecordReplay/multiple_exec_graphs.cpp b/sycl/test-e2e/Graph/RecordReplay/multiple_exec_graphs.cpp
@@ -1,5 +1,5 @@
 // REQUIRES: level_zero, gpu
-// RUN: %{build_pthread_inc} -o %t.out
+// RUN: %{build} -o %t.out
 // RUN: %{run} %t.out
 // Extra run to check for leaks in Level Zero using ZE_DEBUG
 // RUN: %if ext_oneapi_level_zero %{env ZE_DEBUG=4 %{run} %t.out 2>&1 | FileCheck %s %}
diff --git a/sycl/test-e2e/format.py b/sycl/test-e2e/format.py
@@ -100,17 +100,8 @@ def execute(self, test, litConfig):
         # -that new tests by default would runnable there (unless they have
         # -other restrictions).
         substitutions.append(('%{build}', '%clangxx -fsycl -fsycl-targets=%{sycl_triple} %s'))
-
-        # get GIT root path
-        stream = os.popen('git rev-parse --show-toplevel')
-        git_root_path = stream.read()[:-1]
-
-        if 'windows' in test.config.available_features:
-            source_files_path = git_root_path+"\sycl\source" 
-        else:
-            source_files_path = git_root_path+"/sycl/source"
             
-        compilation_cmd_pthread = "%clangxx -I" + source_files_path + " -pthread -fsycl -fsycl-targets=%{sycl_triple} %s"
+        compilation_cmd_pthread = "%clangxx -pthread -fsycl -fsycl-targets=%{sycl_triple} %s"
         substitutions.append(('%{build_pthread_inc}', compilation_cmd_pthread))
         
         def get_extra_env(sycl_devices):
diff --git a/sycl/unittests/Extensions/CommandGraph.cpp b/sycl/unittests/Extensions/CommandGraph.cpp