Skip to content

Commit 7a275e4

Browse files
authored
[SYCL][UR][L0 v2] implement OOO immediate queue (#18903)
by using multiple in-order queues and round-robin strategy to dispatch work. With this approach we don't need to worry about events' lifetime. Since we are still using counter-based events, we don't need any special logic to handle cases where event release is called right after being passed as signal event or as part of a wait list.
1 parent 65d926c commit 7a275e4

File tree

12 files changed

+825
-66
lines changed

12 files changed

+825
-66
lines changed

sycl/test-e2e/ProfilingTag/profiling_queue.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@
2424
// UNSUPPORTED: cuda
2525
// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/14053
2626

27+
// UNSUPPORTED: level_zero_v2_adapter
28+
// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/19116
29+
2730
#include "common.hpp"
2831

2932
int main() {

sycl/test-e2e/WorkGroupMemory/basic_usage.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
// UNSUPPORTED: hip
22
// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/17339
3+
// UNSUPPORTED: level_zero_v2_adapter
4+
// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/19116
35
// RUN: %{build} -o %t.out
46
// RUN: %{run} %t.out
57
// XFAIL: spirv-backend

unified-runtime/source/adapters/level_zero/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,7 @@ if(UR_BUILD_ADAPTER_L0_V2)
164164
${CMAKE_CURRENT_SOURCE_DIR}/v2/lockable.hpp
165165
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_api.hpp
166166
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_immediate_in_order.hpp
167+
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_immediate_out_of_order.hpp
167168
${CMAKE_CURRENT_SOURCE_DIR}/v2/usm.hpp
168169
${CMAKE_CURRENT_SOURCE_DIR}/v2/api.cpp
169170
${CMAKE_CURRENT_SOURCE_DIR}/v2/command_buffer.cpp
@@ -180,6 +181,7 @@ if(UR_BUILD_ADAPTER_L0_V2)
180181
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_api.cpp
181182
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_create.cpp
182183
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_immediate_in_order.cpp
184+
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_immediate_out_of_order.cpp
183185
${CMAKE_CURRENT_SOURCE_DIR}/v2/usm.cpp
184186
)
185187
install_ur_library(ur_adapter_level_zero_v2)

unified-runtime/source/adapters/level_zero/v2/event_pool.hpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,4 +60,34 @@ class event_pool {
6060
ur_mutex mutex;
6161
};
6262

63+
// Only create an event when requested by the user.
64+
static inline ur_event_handle_t
65+
createEventIfRequested(event_pool *eventPool, ur_event_handle_t *phEvent,
66+
ur_queue_t_ *queue) {
67+
if (phEvent == nullptr) {
68+
return nullptr;
69+
}
70+
71+
(*phEvent) = eventPool->allocate();
72+
(*phEvent)->setQueue(queue);
73+
return (*phEvent);
74+
}
75+
76+
// Always creates an event (used in functions that need to store the event
77+
// internally). If event was requested by the user, also increase ref count of
78+
// that event to avoid pre-mature release.
79+
static inline ur_event_handle_t createEventAndRetain(event_pool *eventPool,
80+
ur_event_handle_t *phEvent,
81+
ur_queue_t_ *queue) {
82+
auto hEvent = eventPool->allocate();
83+
hEvent->setQueue(queue);
84+
85+
if (phEvent) {
86+
(*phEvent) = hEvent;
87+
hEvent->retain();
88+
}
89+
90+
return hEvent;
91+
}
92+
6393
} // namespace v2

unified-runtime/source/adapters/level_zero/v2/lockable.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ template <typename T> struct locked {
1818
object_ = object;
1919
}
2020
T *operator->() { return object_; }
21+
auto &operator[](size_t index) { return (*object_)[index]; }
2122

2223
private:
2324
std::unique_lock<ur_mutex> lock_;

unified-runtime/source/adapters/level_zero/v2/queue_create.cpp

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,9 +69,17 @@ ur_result_t urQueueCreate(ur_context_handle_t hContext,
6969

7070
auto zeIndex = v2::getZeIndex(pProperties);
7171

72-
*phQueue = ur_queue_handle_t_::create<v2::ur_queue_immediate_in_order_t>(
73-
hContext, hDevice, v2::getZeOrdinal(hDevice), v2::getZePriority(flags),
74-
zeIndex, v2::eventFlagsFromQueueFlags(flags), flags);
72+
if ((flags & UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE) != 0) {
73+
*phQueue =
74+
ur_queue_handle_t_::create<v2::ur_queue_immediate_out_of_order_t>(
75+
hContext, hDevice, v2::getZeOrdinal(hDevice),
76+
v2::getZePriority(flags), zeIndex,
77+
v2::eventFlagsFromQueueFlags(flags), flags);
78+
} else {
79+
*phQueue = ur_queue_handle_t_::create<v2::ur_queue_immediate_in_order_t>(
80+
hContext, hDevice, v2::getZeOrdinal(hDevice), v2::getZePriority(flags),
81+
zeIndex, v2::eventFlagsFromQueueFlags(flags), flags);
82+
}
7583

7684
return UR_RESULT_SUCCESS;
7785
} catch (...) {

unified-runtime/source/adapters/level_zero/v2/queue_handle.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,13 @@
1515

1616
#include "../common.hpp"
1717
#include "queue_immediate_in_order.hpp"
18+
#include "queue_immediate_out_of_order.hpp"
1819
#include <ur_api.h>
1920
#include <variant>
2021

2122
struct ur_queue_handle_t_ : ur::handle_base<ur::level_zero::ddi_getter> {
22-
using data_variant = std::variant<v2::ur_queue_immediate_in_order_t>;
23+
using data_variant = std::variant<v2::ur_queue_immediate_in_order_t,
24+
v2::ur_queue_immediate_out_of_order_t>;
2325
data_variant queue_data;
2426

2527
static constexpr uintptr_t queue_offset =

unified-runtime/source/adapters/level_zero/v2/queue_immediate_in_order.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -140,10 +140,12 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueEventsWaitWithBarrier(
140140
// zeCommandListAppendWaitOnEvents
141141
if ((flags & UR_QUEUE_FLAG_PROFILING_ENABLE) != 0) {
142142
return commandListManager.lock()->appendEventsWaitWithBarrier(
143-
numEventsInWaitList, phEventWaitList, createEventIfRequested(phEvent));
143+
numEventsInWaitList, phEventWaitList,
144+
createEventIfRequested(eventPool.get(), phEvent, this));
144145
} else {
145146
return commandListManager.lock()->appendEventsWait(
146-
numEventsInWaitList, phEventWaitList, createEventIfRequested(phEvent));
147+
numEventsInWaitList, phEventWaitList,
148+
createEventIfRequested(eventPool.get(), phEvent, this));
147149
}
148150
}
149151

0 commit comments

Comments
 (0)