Skip to content

Commit a8c8291

Browse files
[SYCL][Graph] Modified the adapters such that it is valid to call release on... (#18619)
Command Buffer, while it is still executing.
1 parent 109a796 commit a8c8291

File tree

13 files changed

+121
-32
lines changed

13 files changed

+121
-32
lines changed

sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -765,10 +765,11 @@ _executable_ by the user invoking `command_graph::finalize()` to create a
765765
new executable instance of the graph. An executable graph cannot be converted
766766
to a modifiable graph. After finalizing a graph in the modifiable state, it is
767767
valid for a user to add additional nodes and finalize again to create subsequent
768-
executable graphs. The state of a `command_graph` object is made explicit by
769-
templating on state to make the class strongly typed, with the default template
770-
argument being `graph_state::modifiable` to reduce code verbosity on
771-
construction.
768+
executable graphs. When an executable graph is destroyed, the underlying
769+
resources will be freed only once any enqueued submissions of the graph have
770+
completed. The state of a `command_graph` object is made explicit by templating
771+
on state to make the class strongly typed, with the default template argument
772+
being `graph_state::modifiable` to reduce code verbosity on construction.
772773

773774
.Graph State Diagram
774775
[source, mermaid]

sycl/source/detail/graph_impl.cpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1002,11 +1002,6 @@ exec_graph_impl::~exec_graph_impl() {
10021002
const sycl::detail::AdapterPtr &Adapter =
10031003
sycl::detail::getSyclObjImpl(MContext)->getAdapter();
10041004
MSchedule.clear();
1005-
// We need to wait on all command buffer executions before we can release
1006-
// them.
1007-
for (auto &Event : MExecutionEvents) {
1008-
Event->wait(Event);
1009-
}
10101005

10111006
// Clean up any graph-owned allocations that were allocated
10121007
MGraphImpl->getMemPool().deallocateAndUnmapAll();
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
// RUN: %{build} -o %t.out
2+
// RUN: %{run} %t.out
3+
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
4+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
5+
// Extra run to check for immediate-command-list in Level Zero
6+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
7+
8+
#define GRAPH_E2E_EXPLICIT
9+
10+
#include "../Inputs/release_while_executing.cpp"
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
// Tests destroying finalized command_graph before it is finished executing,
2+
// relying on the backends to properly synchronize and wait for the submitted
3+
// work to finish.
4+
5+
#include "../graph_common.hpp"
6+
7+
int main() {
8+
queue Queue;
9+
10+
using T = int;
11+
12+
std::vector<T> DataA(Size), ReferenceA(Size);
13+
14+
std::iota(DataA.begin(), DataA.end(), 1);
15+
std::iota(ReferenceA.begin(), ReferenceA.end(), 2);
16+
17+
T *PtrA = malloc_device<T>(Size, Queue);
18+
19+
// Create the command_graph in a seperate scope so that it's destroyed before
20+
// Queue.wait()
21+
{
22+
exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()};
23+
24+
Queue.copy(DataA.data(), PtrA, Size);
25+
Queue.wait_and_throw();
26+
27+
auto Node = add_node(Graph, Queue, [&](handler &CGH) {
28+
CGH.parallel_for(Size, [=](item<1> Item) { PtrA[Item.get_id()] += 1; });
29+
});
30+
31+
auto GraphExec = Graph.finalize();
32+
Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); });
33+
}
34+
35+
Queue.wait_and_throw();
36+
37+
Queue.copy(PtrA, DataA.data(), Size);
38+
Queue.wait_and_throw();
39+
40+
free(PtrA, Queue);
41+
42+
for (size_t i = 0; i < Size; i++) {
43+
assert(check_value(i, ReferenceA[i], DataA[i], "DataA"));
44+
}
45+
46+
return 0;
47+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
// RUN: %{build} -o %t.out
2+
// RUN: %{run} %t.out
3+
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
4+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
5+
// Extra run to check for immediate-command-list in Level Zero
6+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
7+
8+
#define GRAPH_E2E_RECORD_REPLAY
9+
10+
#include "../Inputs/release_while_executing.cpp"

unified-runtime/include/ur_api.h

Lines changed: 4 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

unified-runtime/scripts/core/EXP-COMMAND-BUFFER.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -563,3 +563,4 @@ Contributors
563563
* Maxime France-Pillois `[email protected] <[email protected]>`_
564564
565565
566+

unified-runtime/scripts/core/exp-command-buffer.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -310,7 +310,7 @@ returns:
310310
- $X_RESULT_ERROR_OUT_OF_HOST_MEMORY
311311
--- #--------------------------------------------------------------------------
312312
type: function
313-
desc: "Decrement the command-buffer object's reference count and delete the command-buffer object if the reference count becomes zero."
313+
desc: "Decrement the command-buffer object's reference count and delete the command-buffer object if the reference count becomes zero. It is legal to call the entry-point while `hCommandBuffer` is still executing, which will block on completion if the reference count of `hCommandBuffer` becomes zero."
314314
class: $xCommandBuffer
315315
name: ReleaseExp
316316
params:

unified-runtime/source/adapters/level_zero/command_buffer.cpp

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -671,6 +671,25 @@ ur_result_t createMainCommandList(ur_context_handle_t Context,
671671
return UR_RESULT_SUCCESS;
672672
}
673673

674+
/**
675+
* Waits for any ongoing executions of the command-buffer to finish
676+
* @param CommandBuffer The command-buffer to wait for.
677+
* @return UR_RESULT_SUCCESS or an error code on failure
678+
*/
679+
ur_result_t
680+
waitForOngoingExecution(ur_exp_command_buffer_handle_t CommandBuffer) {
681+
682+
if (ur_event_handle_t &CurrentSubmissionEvent =
683+
CommandBuffer->CurrentSubmissionEvent) {
684+
ZE2UR_CALL(zeEventHostSynchronize,
685+
(CurrentSubmissionEvent->ZeEvent, UINT64_MAX));
686+
UR_CALL(urEventReleaseInternal(CurrentSubmissionEvent));
687+
CurrentSubmissionEvent = nullptr;
688+
}
689+
690+
return UR_RESULT_SUCCESS;
691+
}
692+
674693
/**
675694
* Checks whether the command-buffer can be constructed using in order
676695
* command-lists.
@@ -832,6 +851,7 @@ urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t CommandBuffer) {
832851
if (!CommandBuffer->RefCount.decrementAndTest())
833852
return UR_RESULT_SUCCESS;
834853

854+
UR_CALL(waitForOngoingExecution(CommandBuffer));
835855
CommandBuffer->cleanupCommandBufferResources();
836856
delete CommandBuffer;
837857
return UR_RESULT_SUCCESS;
@@ -1453,25 +1473,6 @@ ur_result_t getZeCommandQueue(ur_queue_handle_t Queue, bool UseCopyEngine,
14531473
return UR_RESULT_SUCCESS;
14541474
}
14551475

1456-
/**
1457-
* Waits for any ongoing executions of the command-buffer to finish.
1458-
* @param CommandBuffer The command-buffer to wait for.
1459-
* @return UR_RESULT_SUCCESS or an error code on failure
1460-
*/
1461-
ur_result_t
1462-
waitForOngoingExecution(ur_exp_command_buffer_handle_t CommandBuffer) {
1463-
1464-
if (ur_event_handle_t &CurrentSubmissionEvent =
1465-
CommandBuffer->CurrentSubmissionEvent) {
1466-
ZE2UR_CALL(zeEventHostSynchronize,
1467-
(CurrentSubmissionEvent->ZeEvent, UINT64_MAX));
1468-
UR_CALL(urEventReleaseInternal(CurrentSubmissionEvent));
1469-
CurrentSubmissionEvent = nullptr;
1470-
}
1471-
1472-
return UR_RESULT_SUCCESS;
1473-
}
1474-
14751476
/**
14761477
* Waits for the all the dependencies of the command-buffer
14771478
* @param[in] CommandBuffer The command-buffer.

unified-runtime/source/adapters/level_zero/v2/command_buffer.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,10 @@ urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer) try {
263263
if (!hCommandBuffer->RefCount.decrementAndTest())
264264
return UR_RESULT_SUCCESS;
265265

266+
if (auto executionEvent = hCommandBuffer->getExecutionEventUnlocked()) {
267+
ZE2UR_CALL(zeEventHostSynchronize,
268+
(executionEvent->getZeEvent(), UINT64_MAX));
269+
}
266270
delete hCommandBuffer;
267271
return UR_RESULT_SUCCESS;
268272
} catch (...) {

unified-runtime/source/loader/ur_libapi.cpp

Lines changed: 4 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

unified-runtime/source/ur_api.cpp

Lines changed: 4 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

unified-runtime/test/conformance/exp_command_buffer/enqueue.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,3 +190,14 @@ TEST_P(urEnqueueCommandBufferExpTest, SerializeInOrOutOfOrderQueue) {
190190
ASSERT_EQ(reference, Output[i]);
191191
}
192192
}
193+
194+
// Tests releasing command-buffer while it is still executing relying
195+
// on synchronization during urCommandBufferReleaseExp call.
196+
TEST_P(urEnqueueCommandBufferExpTest, EnqueueAndRelease) {
197+
ASSERT_SUCCESS(urEnqueueCommandBufferExp(
198+
in_or_out_of_order_queue, cmd_buf_handle, 0, nullptr, nullptr));
199+
200+
// Release the command buffer without explicitly waiting beforehand
201+
EXPECT_SUCCESS(urCommandBufferReleaseExp(cmd_buf_handle));
202+
cmd_buf_handle = nullptr;
203+
}

0 commit comments

Comments
 (0)