Skip to content

Commit b3630eb

Browse files
committed
[SYCL][UR][L0 v2] implement OOO immediate queue
by using multiple in-order queues and round-robin strategy to dispatch work. With this approach we don't need to worry about events' lifetime. Since we are still using counter-based events, we don't need any special logic to handle cases where event release is called right after being passed as signal event or as part of a wait list.
1 parent 133fee5 commit b3630eb

File tree

9 files changed

+805
-66
lines changed

9 files changed

+805
-66
lines changed

unified-runtime/source/adapters/level_zero/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,7 @@ if(UR_BUILD_ADAPTER_L0_V2)
171171
${CMAKE_CURRENT_SOURCE_DIR}/v2/lockable.hpp
172172
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_api.hpp
173173
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_immediate_in_order.hpp
174+
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_immediate_out_of_order.hpp
174175
${CMAKE_CURRENT_SOURCE_DIR}/v2/usm.hpp
175176
${CMAKE_CURRENT_SOURCE_DIR}/v2/api.cpp
176177
${CMAKE_CURRENT_SOURCE_DIR}/v2/command_buffer.cpp
@@ -187,6 +188,7 @@ if(UR_BUILD_ADAPTER_L0_V2)
187188
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_api.cpp
188189
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_create.cpp
189190
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_immediate_in_order.cpp
191+
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_immediate_out_of_order.cpp
190192
${CMAKE_CURRENT_SOURCE_DIR}/v2/usm.cpp
191193
)
192194
install_ur_library(ur_adapter_level_zero_v2)

unified-runtime/source/adapters/level_zero/v2/event_pool.hpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,4 +61,34 @@ class event_pool {
6161
std::unique_ptr<std::mutex> mutex;
6262
};
6363

64+
// Only create an event when requested by the user.
65+
static inline ur_event_handle_t
66+
createEventIfRequested(event_pool *eventPool, ur_event_handle_t *phEvent,
67+
ur_queue_t_ *queue) {
68+
if (phEvent == nullptr) {
69+
return nullptr;
70+
}
71+
72+
(*phEvent) = eventPool->allocate();
73+
(*phEvent)->setQueue(queue);
74+
return (*phEvent);
75+
}
76+
77+
// Always creates an event (used in functions that need to store the event
78+
// internally). If event was requested by the user, also increase ref count of
79+
// that event to avoid pre-mature release.
80+
static inline ur_event_handle_t createEventAndRetain(event_pool *eventPool,
81+
ur_event_handle_t *phEvent,
82+
ur_queue_t_ *queue) {
83+
auto hEvent = eventPool->allocate();
84+
hEvent->setQueue(queue);
85+
86+
if (phEvent) {
87+
(*phEvent) = hEvent;
88+
hEvent->retain();
89+
}
90+
91+
return hEvent;
92+
}
93+
6494
} // namespace v2

unified-runtime/source/adapters/level_zero/v2/lockable.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ template <typename T> struct locked {
1818
object_ = object;
1919
}
2020
T *operator->() { return object_; }
21+
auto &operator[](size_t index) { return (*object_)[index]; }
2122

2223
private:
2324
std::unique_lock<std::mutex> lock_;

unified-runtime/source/adapters/level_zero/v2/queue_create.cpp

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,9 +69,18 @@ ur_result_t urQueueCreate(ur_context_handle_t hContext,
6969

7070
auto zeIndex = v2::getZeIndex(pProperties);
7171

72-
*phQueue = ur_queue_handle_t_::create<v2::ur_queue_immediate_in_order_t>(
73-
hContext, hDevice, v2::getZeOrdinal(hDevice), v2::getZePriority(flags),
74-
zeIndex, v2::eventFlagsFromQueueFlags(flags), flags);
72+
if ((flags & UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE) != 0 &&
73+
!zeIndex.has_value()) {
74+
*phQueue =
75+
ur_queue_handle_t_::create<v2::ur_queue_immediate_out_of_order_t>(
76+
hContext, hDevice, v2::getZeOrdinal(hDevice),
77+
v2::getZePriority(flags), v2::eventFlagsFromQueueFlags(flags),
78+
flags);
79+
} else {
80+
*phQueue = ur_queue_handle_t_::create<v2::ur_queue_immediate_in_order_t>(
81+
hContext, hDevice, v2::getZeOrdinal(hDevice), v2::getZePriority(flags),
82+
zeIndex, v2::eventFlagsFromQueueFlags(flags), flags);
83+
}
7584

7685
return UR_RESULT_SUCCESS;
7786
} catch (...) {

unified-runtime/source/adapters/level_zero/v2/queue_handle.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,13 @@
1515

1616
#include "../common.hpp"
1717
#include "queue_immediate_in_order.hpp"
18+
#include "queue_immediate_out_of_order.hpp"
1819
#include <ur_api.h>
1920
#include <variant>
2021

2122
struct ur_queue_handle_t_ : ur::handle_base<ur::level_zero::ddi_getter> {
22-
using data_variant = std::variant<v2::ur_queue_immediate_in_order_t>;
23+
using data_variant = std::variant<v2::ur_queue_immediate_in_order_t,
24+
v2::ur_queue_immediate_out_of_order_t>;
2325
data_variant queue_data;
2426

2527
static constexpr uintptr_t queue_offset =

unified-runtime/source/adapters/level_zero/v2/queue_immediate_in_order.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -140,10 +140,12 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueEventsWaitWithBarrier(
140140
// zeCommandListAppendWaitOnEvents
141141
if ((flags & UR_QUEUE_FLAG_PROFILING_ENABLE) != 0) {
142142
return commandListManager.lock()->appendEventsWaitWithBarrier(
143-
numEventsInWaitList, phEventWaitList, createEventIfRequested(phEvent));
143+
numEventsInWaitList, phEventWaitList,
144+
createEventIfRequested(eventPool.get(), phEvent, this));
144145
} else {
145146
return commandListManager.lock()->appendEventsWait(
146-
numEventsInWaitList, phEventWaitList, createEventIfRequested(phEvent));
147+
numEventsInWaitList, phEventWaitList,
148+
createEventIfRequested(eventPool.get(), phEvent, this));
147149
}
148150
}
149151

0 commit comments

Comments
 (0)