Skip to content

Commit 7c3659d

Browse files
committed
fixes
1 parent 88f1f06 commit 7c3659d

File tree

4 files changed

+44
-36
lines changed

4 files changed

+44
-36
lines changed

unified-runtime/source/adapters/level_zero/v2/queue_create.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -69,13 +69,12 @@ ur_result_t urQueueCreate(ur_context_handle_t hContext,
6969

7070
auto zeIndex = v2::getZeIndex(pProperties);
7171

72-
if ((flags & UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE) != 0 &&
73-
!zeIndex.has_value()) {
72+
if ((flags & UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE) != 0) {
7473
*phQueue =
7574
ur_queue_handle_t_::create<v2::ur_queue_immediate_out_of_order_t>(
7675
hContext, hDevice, v2::getZeOrdinal(hDevice),
77-
v2::getZePriority(flags), v2::eventFlagsFromQueueFlags(flags),
78-
flags);
76+
v2::getZePriority(flags), zeIndex,
77+
v2::eventFlagsFromQueueFlags(flags), flags);
7978
} else {
8079
*phQueue = ur_queue_handle_t_::create<v2::ur_queue_immediate_in_order_t>(
8180
hContext, hDevice, v2::getZeOrdinal(hDevice), v2::getZePriority(flags),

unified-runtime/source/adapters/level_zero/v2/queue_immediate_out_of_order.cpp

Lines changed: 20 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
//===--------- queue_immediate_in_order.cpp - Level Zero Adapter ---------===//
22
//
3-
// Copyright (C) 2024 Intel Corporation
3+
// Copyright (C) 2025 Intel Corporation
44
//
55
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM
66
// Exceptions. See LICENSE.TXT
@@ -14,40 +14,29 @@
1414

1515
namespace v2 {
1616

17-
// Helper function to intialize std::array of command list manager.
18-
// This is needed because command list manager does not have a default
19-
// constructor.
20-
template <size_t... Is>
21-
std::array<ur_command_list_manager, sizeof...(Is)> createCommandListManagers(
22-
ur_context_handle_t hContext, ur_device_handle_t hDevice, uint32_t ordinal,
23-
ze_command_queue_priority_t priority, std::index_sequence<Is...>) {
24-
return {
25-
((void)Is, ur_command_list_manager(
26-
hContext, hDevice,
27-
hContext->getCommandListCache().getImmediateCommandList(
28-
hDevice->ZeDevice,
29-
{true, ordinal, true /* always enable copy offload */},
30-
ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS, priority)))...};
31-
}
32-
3317
template <size_t N>
34-
std::array<ur_command_list_manager, N>
35-
createCommandListManagers(ur_context_handle_t hContext,
36-
ur_device_handle_t hDevice, uint32_t ordinal,
37-
ze_command_queue_priority_t priority) {
38-
return createCommandListManagers(hContext, hDevice, ordinal, priority,
39-
std::make_index_sequence<N>{});
18+
std::array<ur_command_list_manager, N> createCommandListManagers(
19+
ur_context_handle_t hContext, ur_device_handle_t hDevice, uint32_t ordinal,
20+
ze_command_queue_priority_t priority, std::optional<int32_t> index) {
21+
return createArrayOf<ur_command_list_manager, numCommandLists>([&](size_t) {
22+
return ur_command_list_manager(
23+
hContext, hDevice,
24+
hContext->getCommandListCache().getImmediateCommandList(
25+
hDevice->ZeDevice,
26+
{true, ordinal, true /* always enable copy offload */},
27+
ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS, priority, index));
28+
});
4029
}
4130

4231
ur_queue_immediate_out_of_order_t::ur_queue_immediate_out_of_order_t(
4332
ur_context_handle_t hContext, ur_device_handle_t hDevice, uint32_t ordinal,
44-
ze_command_queue_priority_t priority, event_flags_t eventFlags,
45-
ur_queue_flags_t flags)
33+
ze_command_queue_priority_t priority, std::optional<int32_t> index,
34+
event_flags_t eventFlags, ur_queue_flags_t flags)
4635
: hContext(hContext), hDevice(hDevice),
4736
eventPool(hContext->getEventPoolCache(PoolCacheType::Immediate)
4837
.borrow(hDevice->Id.value(), eventFlags)),
4938
commandListManagers(createCommandListManagers<numCommandLists>(
50-
hContext, hDevice, ordinal, priority)),
39+
hContext, hDevice, ordinal, priority, index)),
5140
flags(flags) {
5241
for (size_t i = 0; i < numCommandLists; i++) {
5342
barrierEvents[i] = eventPool->allocate();
@@ -153,11 +142,11 @@ ur_result_t ur_queue_immediate_out_of_order_t::enqueueEventsWaitWithBarrier(
153142
ur_event_handle_t *phEvent) {
154143
TRACK_SCOPE_LATENCY(
155144
"ur_queue_immediate_out_of_order_t::enqueueEventsWaitWithBarrier");
156-
// For in-order queue we don't need a real L0 barrier, just wait for
157-
// requested events in potentially different queues and add a "barrier"
158-
// event signal because it is already guaranteed that previous commands
159-
// in this queue are completed when the signal is started. However, we do
160-
// need to use barrier if profiling is enabled: see
145+
// Since we use L0 in-order command lists, we don't need a real L0 barrier,
146+
// just wait for requested events in potentially different queues and add a
147+
// "barrier" event signal because it is already guaranteed that previous
148+
// commands in this queue are completed when the signal is started. However,
149+
// we do need to use barrier if profiling is enabled: see
161150
// zeCommandListAppendWaitOnEvents
162151
bool needsRealBarrier = (flags & UR_QUEUE_FLAG_PROFILING_ENABLE) != 0;
163152
auto appendEventsWaitFn =

unified-runtime/source/adapters/level_zero/v2/queue_immediate_out_of_order.hpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
//===--------- queue_immediate_in_order.hpp - Level Zero Adapter ---------===//
22
//
3-
// Copyright (C) 2024 Intel Corporation
3+
// Copyright (C) 2025 Intel Corporation
44
//
55
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM
66
// Exceptions. See LICENSE.TXT
@@ -25,6 +25,10 @@ namespace v2 {
2525

2626
struct ur_queue_immediate_out_of_order_t : ur_object, ur_queue_t_ {
2727
private:
28+
// Number of command lists was chosen experimentally as a compromise
29+
// between number of allowed concurrent launches and overhead of
30+
// iterating over the command lists to synchronize them.
31+
// This might need to be changed for future hardware.
2832
static constexpr size_t numCommandLists = 4;
2933

3034
ur_context_handle_t hContext;
@@ -49,6 +53,7 @@ struct ur_queue_immediate_out_of_order_t : ur_object, ur_queue_t_ {
4953
ur_queue_immediate_out_of_order_t(ur_context_handle_t, ur_device_handle_t,
5054
uint32_t ordinal,
5155
ze_command_queue_priority_t priority,
56+
std::optional<int32_t> index,
5257
event_flags_t eventFlags,
5358
ur_queue_flags_t flags);
5459

unified-runtime/source/common/ur_util.hpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -553,4 +553,19 @@ inline bool isPointerAlignedTo(uint32_t Alignment, void *Ptr) {
553553
reinterpret_cast<std::uintptr_t>(Ptr) % Alignment == 0;
554554
}
555555

556+
template <typename T, typename F, size_t... Is>
557+
std::array<T, sizeof...(Is)> createArrayOfHelper(F &&f,
558+
std::index_sequence<Is...>) {
559+
return {(f(Is))...};
560+
}
561+
562+
// Helper function to intialize std::array of non-default constructible
563+
// types. Calls provided ctor function (passing index to the array) to create
564+
// each element of the array.
565+
template <typename T, size_t N, typename F>
566+
std::array<T, N> createArrayOf(F &&ctor) {
567+
return createArrayOfHelper<T, F>(std::forward<F>(f),
568+
std::make_index_sequence<N>{});
569+
}
570+
556571
#endif /* UR_UTIL_H */

0 commit comments

Comments
 (0)