Skip to content

[SYCL][UR][L0 v2] implement OOO immediate queue #18903

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Jun 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions sycl/test-e2e/ProfilingTag/profiling_queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@
// UNSUPPORTED: cuda
// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/14053

// UNSUPPORTED: level_zero_v2_adapter
// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/19116

#include "common.hpp"

int main() {
Expand Down
2 changes: 2 additions & 0 deletions sycl/test-e2e/WorkGroupMemory/basic_usage.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
// UNSUPPORTED: hip
// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/17339
// UNSUPPORTED: level_zero_v2_adapter
// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/19116
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out
// XFAIL: spirv-backend
Expand Down
2 changes: 2 additions & 0 deletions unified-runtime/source/adapters/level_zero/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ if(UR_BUILD_ADAPTER_L0_V2)
${CMAKE_CURRENT_SOURCE_DIR}/v2/lockable.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_api.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_immediate_in_order.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_immediate_out_of_order.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/usm.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/api.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/command_buffer.cpp
Expand All @@ -180,6 +181,7 @@ if(UR_BUILD_ADAPTER_L0_V2)
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_api.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_create.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_immediate_in_order.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_immediate_out_of_order.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/usm.cpp
)
install_ur_library(ur_adapter_level_zero_v2)
Expand Down
30 changes: 30 additions & 0 deletions unified-runtime/source/adapters/level_zero/v2/event_pool.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,4 +60,34 @@ class event_pool {
ur_mutex mutex;
};

// Only create an event when requested by the user.
static inline ur_event_handle_t
createEventIfRequested(event_pool *eventPool, ur_event_handle_t *phEvent,
ur_queue_t_ *queue) {
if (phEvent == nullptr) {
return nullptr;
}

(*phEvent) = eventPool->allocate();
(*phEvent)->setQueue(queue);
return (*phEvent);
}

// Always creates an event (used in functions that need to store the event
// internally). If event was requested by the user, also increase ref count of
// that event to avoid pre-mature release.
static inline ur_event_handle_t createEventAndRetain(event_pool *eventPool,
ur_event_handle_t *phEvent,
ur_queue_t_ *queue) {
auto hEvent = eventPool->allocate();
hEvent->setQueue(queue);

if (phEvent) {
(*phEvent) = hEvent;
hEvent->retain();
}

return hEvent;
}

} // namespace v2
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ template <typename T> struct locked {
object_ = object;
}
T *operator->() { return object_; }
auto &operator[](size_t index) { return (*object_)[index]; }

private:
std::unique_lock<ur_mutex> lock_;
Expand Down
14 changes: 11 additions & 3 deletions unified-runtime/source/adapters/level_zero/v2/queue_create.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,17 @@ ur_result_t urQueueCreate(ur_context_handle_t hContext,

auto zeIndex = v2::getZeIndex(pProperties);

*phQueue = ur_queue_handle_t_::create<v2::ur_queue_immediate_in_order_t>(
hContext, hDevice, v2::getZeOrdinal(hDevice), v2::getZePriority(flags),
zeIndex, v2::eventFlagsFromQueueFlags(flags), flags);
if ((flags & UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE) != 0) {
*phQueue =
ur_queue_handle_t_::create<v2::ur_queue_immediate_out_of_order_t>(
hContext, hDevice, v2::getZeOrdinal(hDevice),
v2::getZePriority(flags), zeIndex,
v2::eventFlagsFromQueueFlags(flags), flags);
} else {
*phQueue = ur_queue_handle_t_::create<v2::ur_queue_immediate_in_order_t>(
hContext, hDevice, v2::getZeOrdinal(hDevice), v2::getZePriority(flags),
zeIndex, v2::eventFlagsFromQueueFlags(flags), flags);
}

return UR_RESULT_SUCCESS;
} catch (...) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,13 @@

#include "../common.hpp"
#include "queue_immediate_in_order.hpp"
#include "queue_immediate_out_of_order.hpp"
#include <ur_api.h>
#include <variant>

struct ur_queue_handle_t_ : ur::handle_base<ur::level_zero::ddi_getter> {
using data_variant = std::variant<v2::ur_queue_immediate_in_order_t>;
using data_variant = std::variant<v2::ur_queue_immediate_in_order_t,
v2::ur_queue_immediate_out_of_order_t>;
data_variant queue_data;

static constexpr uintptr_t queue_offset =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -140,10 +140,12 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueEventsWaitWithBarrier(
// zeCommandListAppendWaitOnEvents
if ((flags & UR_QUEUE_FLAG_PROFILING_ENABLE) != 0) {
return commandListManager.lock()->appendEventsWaitWithBarrier(
numEventsInWaitList, phEventWaitList, createEventIfRequested(phEvent));
numEventsInWaitList, phEventWaitList,
createEventIfRequested(eventPool.get(), phEvent, this));
} else {
return commandListManager.lock()->appendEventsWait(
numEventsInWaitList, phEventWaitList, createEventIfRequested(phEvent));
numEventsInWaitList, phEventWaitList,
createEventIfRequested(eventPool.get(), phEvent, this));
}
}

Expand Down
Loading