Skip to content

Commit 3967086

Browse files
authored
[UR] [V2] Add wait before enqueue in command buffer (#17709)
According to @MichalMrozek, zeCommandListImmediateAppendCommandListsExp has the same requirements as zeCommandQueueExecuteCommandLists, because of this, the command list must not be referenced by device when it is enqueued. This PR fixes this issue by adding event to synchronize append and execution
1 parent 180f73e commit 3967086

File tree

8 files changed

+72
-23
lines changed

8 files changed

+72
-23
lines changed

unified-runtime/source/adapters/level_zero/v2/command_buffer.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,28 @@ ur_result_t ur_exp_command_buffer_handle_t_::finalizeCommandBuffer() {
4747
isFinalized = true;
4848
return UR_RESULT_SUCCESS;
4949
}
50+
ur_event_handle_t ur_exp_command_buffer_handle_t_::getExecutionEventUnlocked() {
51+
return currentExecution;
52+
}
53+
54+
ur_result_t ur_exp_command_buffer_handle_t_::registerExecutionEventUnlocked(
55+
ur_event_handle_t nextExecutionEvent) {
56+
if (currentExecution) {
57+
UR_CALL(currentExecution->release());
58+
currentExecution = nullptr;
59+
}
60+
if (nextExecutionEvent) {
61+
currentExecution = nextExecutionEvent;
62+
UR_CALL(nextExecutionEvent->retain());
63+
}
64+
return UR_RESULT_SUCCESS;
65+
}
5066

67+
ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_() {
68+
if (currentExecution) {
69+
currentExecution->release();
70+
}
71+
}
5172
namespace ur::level_zero {
5273

5374
ur_result_t

unified-runtime/source/adapters/level_zero/v2/command_buffer.hpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,11 @@ struct ur_exp_command_buffer_handle_t_ : public _ur_object {
2323
v2::raii::command_list_unique_handle &&commandList,
2424
const ur_exp_command_buffer_desc_t *desc);
2525

26-
~ur_exp_command_buffer_handle_t_() = default;
26+
~ur_exp_command_buffer_handle_t_();
27+
28+
ur_event_handle_t getExecutionEventUnlocked();
29+
ur_result_t
30+
registerExecutionEventUnlocked(ur_event_handle_t nextExecutionEvent);
2731

2832
lockable<ur_command_list_manager> commandListManager;
2933

@@ -36,6 +40,8 @@ struct ur_exp_command_buffer_handle_t_ : public _ur_object {
3640
private:
3741
// Indicates if command-buffer was finalized.
3842
bool isFinalized = false;
43+
44+
ur_event_handle_t currentExecution = nullptr;
3945
};
4046

4147
struct ur_exp_command_buffer_command_handle_t_ : public _ur_object {

unified-runtime/source/adapters/level_zero/v2/command_list_manager.cpp

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -157,16 +157,20 @@ ur_result_t ur_command_list_manager::appendRegionCopyUnlocked(
157157
return UR_RESULT_SUCCESS;
158158
}
159159

160-
wait_list_view
161-
ur_command_list_manager::getWaitListView(const ur_event_handle_t *phWaitEvents,
162-
uint32_t numWaitEvents) {
160+
wait_list_view ur_command_list_manager::getWaitListView(
161+
const ur_event_handle_t *phWaitEvents, uint32_t numWaitEvents,
162+
ur_event_handle_t additionalWaitEvent) {
163163

164-
waitList.resize(numWaitEvents);
164+
uint32_t totalNumWaitEvents =
165+
numWaitEvents + (additionalWaitEvent != nullptr ? 1 : 0);
166+
waitList.resize(totalNumWaitEvents);
165167
for (uint32_t i = 0; i < numWaitEvents; i++) {
166168
waitList[i] = phWaitEvents[i]->getZeEvent();
167169
}
168-
169-
return {waitList.data(), static_cast<uint32_t>(numWaitEvents)};
170+
if (additionalWaitEvent != nullptr) {
171+
waitList[totalNumWaitEvents - 1] = additionalWaitEvent->getZeEvent();
172+
}
173+
return {waitList.data(), static_cast<uint32_t>(totalNumWaitEvents)};
170174
}
171175

172176
ze_event_handle_t

unified-runtime/source/adapters/level_zero/v2/command_list_manager.hpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,8 +128,9 @@ struct ur_command_list_manager {
128128

129129
ze_command_list_handle_t getZeCommandList();
130130

131-
wait_list_view getWaitListView(const ur_event_handle_t *phWaitEvents,
132-
uint32_t numWaitEvents);
131+
wait_list_view
132+
getWaitListView(const ur_event_handle_t *phWaitEvents, uint32_t numWaitEvents,
133+
ur_event_handle_t additionalWaitEvent = nullptr);
133134
ze_event_handle_t getSignalEvent(ur_event_handle_t *hUserEvent,
134135
ur_command_t commandType);
135136

unified-runtime/source/adapters/level_zero/v2/queue_immediate_in_order.cpp

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,10 @@ namespace v2 {
2323

2424
wait_list_view ur_queue_immediate_in_order_t::getWaitListView(
2525
locked<ur_command_list_manager> &commandList,
26-
const ur_event_handle_t *phWaitEvents, uint32_t numWaitEvents) {
27-
return commandList->getWaitListView(phWaitEvents, numWaitEvents);
26+
const ur_event_handle_t *phWaitEvents, uint32_t numWaitEvents,
27+
ur_event_handle_t additionalWaitEvent) {
28+
return commandList->getWaitListView(phWaitEvents, numWaitEvents,
29+
additionalWaitEvent);
2830
}
2931

3032
static int32_t getZeOrdinal(ur_device_handle_t hDevice) {
@@ -898,7 +900,8 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueTimestampRecordingExp(
898900
ur_result_t ur_queue_immediate_in_order_t::enqueueGenericCommandListsExp(
899901
uint32_t numCommandLists, ze_command_list_handle_t *phCommandLists,
900902
ur_event_handle_t *phEvent, uint32_t numEventsInWaitList,
901-
const ur_event_handle_t *phEventWaitList, ur_command_t callerCommand) {
903+
const ur_event_handle_t *phEventWaitList, ur_command_t callerCommand,
904+
ur_event_handle_t additionalWaitEvent) {
902905
TRACK_SCOPE_LATENCY(
903906
"ur_queue_immediate_in_order_t::enqueueGenericCommandListsExp");
904907

@@ -907,7 +910,8 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueGenericCommandListsExp(
907910
getSignalEvent(commandListLocked, phEvent, callerCommand);
908911

909912
auto [pWaitEvents, numWaitEvents] =
910-
getWaitListView(commandListLocked, phEventWaitList, numEventsInWaitList);
913+
getWaitListView(commandListLocked, phEventWaitList, numEventsInWaitList,
914+
additionalWaitEvent);
911915
// zeCommandListImmediateAppendCommandListsExp is not working with in-order
912916
// immediate lists what causes problems with synchronization
913917
// TODO: remove synchronization when it is not needed
@@ -928,9 +932,21 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueCommandBufferExp(
928932
auto commandListLocked = hCommandBuffer->commandListManager.lock();
929933
ze_command_list_handle_t commandBufferCommandList =
930934
commandListLocked->getZeCommandList();
931-
return enqueueGenericCommandListsExp(1, &commandBufferCommandList, phEvent,
932-
numEventsInWaitList, phEventWaitList,
933-
UR_COMMAND_ENQUEUE_COMMAND_BUFFER_EXP);
935+
ur_event_handle_t internalEvent = nullptr;
936+
if (phEvent == nullptr) {
937+
phEvent = &internalEvent;
938+
}
939+
ur_event_handle_t executionEvent =
940+
hCommandBuffer->getExecutionEventUnlocked();
941+
942+
UR_CALL(enqueueGenericCommandListsExp(
943+
1, &commandBufferCommandList, phEvent, numEventsInWaitList,
944+
phEventWaitList, UR_COMMAND_ENQUEUE_COMMAND_BUFFER_EXP, executionEvent));
945+
UR_CALL(hCommandBuffer->registerExecutionEventUnlocked(*phEvent));
946+
if (internalEvent != nullptr) {
947+
internalEvent->release();
948+
}
949+
return UR_RESULT_SUCCESS;
934950
}
935951

936952
ur_result_t ur_queue_immediate_in_order_t::enqueueKernelLaunchCustomExp(

unified-runtime/source/adapters/level_zero/v2/queue_immediate_in_order.hpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,10 @@ struct ur_queue_immediate_in_order_t : _ur_object, public ur_queue_t_ {
3737
std::vector<ur_event_handle_t> deferredEvents;
3838
std::vector<ur_kernel_handle_t> submittedKernels;
3939

40-
wait_list_view getWaitListView(locked<ur_command_list_manager> &commandList,
41-
const ur_event_handle_t *phWaitEvents,
42-
uint32_t numWaitEvents);
40+
wait_list_view
41+
getWaitListView(locked<ur_command_list_manager> &commandList,
42+
const ur_event_handle_t *phWaitEvents, uint32_t numWaitEvents,
43+
ur_event_handle_t additionalWaitEvent = nullptr);
4344

4445
ze_event_handle_t getSignalEvent(locked<ur_command_list_manager> &commandList,
4546
ur_event_handle_t *hUserEvent,
@@ -56,7 +57,8 @@ struct ur_queue_immediate_in_order_t : _ur_object, public ur_queue_t_ {
5657
ur_result_t enqueueGenericCommandListsExp(
5758
uint32_t numCommandLists, ze_command_list_handle_t *phCommandLists,
5859
ur_event_handle_t *phEvent, uint32_t numEventsInWaitList,
59-
const ur_event_handle_t *phEventWaitList, ur_command_t callerCommand);
60+
const ur_event_handle_t *phEventWaitList, ur_command_t callerCommand,
61+
ur_event_handle_t additionalWaitEvent);
6062

6163
ur_result_t
6264
enqueueEventsWaitWithBarrierImpl(uint32_t numEventsInWaitList,

unified-runtime/test/conformance/exp_command_buffer/fixtures.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@ static void checkCommandBufferUpdateSupport(
4545

4646
struct urCommandBufferExpTest : uur::urContextTest {
4747
void SetUp() override {
48-
UUR_KNOWN_FAILURE_ON(uur::LevelZeroV2{});
4948

5049
UUR_RETURN_ON_FATAL_FAILURE(uur::urContextTest::SetUp());
5150

@@ -72,7 +71,6 @@ struct urCommandBufferExpTest : uur::urContextTest {
7271
template <class T>
7372
struct urCommandBufferExpTestWithParam : urQueueTestWithParam<T> {
7473
void SetUp() override {
75-
UUR_KNOWN_FAILURE_ON(uur::LevelZeroV2{});
7674

7775
UUR_RETURN_ON_FATAL_FAILURE(uur::urQueueTestWithParam<T>::SetUp());
7876

@@ -97,7 +95,6 @@ struct urCommandBufferExpTestWithParam : urQueueTestWithParam<T> {
9795

9896
struct urCommandBufferExpExecutionTest : uur::urKernelExecutionTest {
9997
void SetUp() override {
100-
UUR_KNOWN_FAILURE_ON(uur::LevelZeroV2{});
10198

10299
UUR_RETURN_ON_FATAL_FAILURE(uur::urKernelExecutionTest::SetUp());
103100

unified-runtime/test/conformance/exp_command_buffer/update/invalid_update.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,8 @@ TEST_P(InvalidUpdateTest, CommandBufferMismatch) {
309309
// that isn't supported.
310310
struct InvalidUpdateCommandBufferExpExecutionTest : uur::urKernelExecutionTest {
311311
void SetUp() override {
312+
UUR_KNOWN_FAILURE_ON(uur::LevelZeroV2{});
313+
312314
program_name = "fill_usm";
313315
UUR_RETURN_ON_FATAL_FAILURE(uur::urKernelExecutionTest::SetUp());
314316

0 commit comments

Comments
 (0)