Skip to content

Commit f0c1db6

Browse files
authored
Merge pull request #2270 from igchor/deferred_event_deallocation
[L0 v2] Use single command list for all operations and implement deferred event deallocation
2 parents 308b1ea + e3b09e2 commit f0c1db6

15 files changed

+336
-276
lines changed

scripts/templates/queue_api.hpp.mako

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@ from templates import helper as th
2525

2626
struct ur_queue_handle_t_ {
2727
virtual ~ur_queue_handle_t_();
28+
29+
virtual void deferEventFree(ur_event_handle_t hEvent) = 0;
30+
2831
%for obj in th.get_queue_related_functions(specs, n, tags):
2932
virtual ${x}_result_t ${th.transform_queue_related_function_name(n, tags, obj, format=["type"])} = 0;
3033
%endfor

source/adapters/level_zero/v2/command_list_cache.cpp

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,21 @@
1313

1414
#include "../device.hpp"
1515

16+
typedef struct _zex_intel_queue_copy_operations_offload_hint_exp_desc_t {
17+
ze_structure_type_t stype;
18+
const void *pNext;
19+
ze_bool_t copyOffloadEnabled;
20+
} zex_intel_queue_copy_operations_offload_hint_exp_desc_t;
21+
22+
#define ZEX_INTEL_STRUCTURE_TYPE_QUEUE_COPY_OPERATIONS_OFFLOAD_HINT_EXP_PROPERTIES \
23+
(ze_structure_type_t)0x0003001B
24+
25+
template <>
26+
ze_structure_type_t
27+
getZeStructureType<zex_intel_queue_copy_operations_offload_hint_exp_desc_t>() {
28+
return ZEX_INTEL_STRUCTURE_TYPE_QUEUE_COPY_OPERATIONS_OFFLOAD_HINT_EXP_PROPERTIES;
29+
}
30+
1631
bool v2::immediate_command_list_descriptor_t::operator==(
1732
const immediate_command_list_descriptor_t &rhs) const {
1833
return ZeDevice == rhs.ZeDevice && IsInOrder == rhs.IsInOrder &&
@@ -45,6 +60,10 @@ command_list_cache_t::command_list_cache_t(ze_context_handle_t ZeContext)
4560

4661
raii::ze_command_list_handle_t
4762
command_list_cache_t::createCommandList(const command_list_descriptor_t &desc) {
63+
ZeStruct<zex_intel_queue_copy_operations_offload_hint_exp_desc_t> offloadDesc;
64+
offloadDesc.copyOffloadEnabled =
65+
std::visit([](auto &&arg) { return arg.CopyOffloadEnabled; }, desc);
66+
4867
if (auto ImmCmdDesc =
4968
std::get_if<immediate_command_list_descriptor_t>(&desc)) {
5069
ze_command_list_handle_t ZeCommandList;
@@ -58,6 +77,7 @@ command_list_cache_t::createCommandList(const command_list_descriptor_t &desc) {
5877
QueueDesc.flags |= ZE_COMMAND_QUEUE_FLAG_EXPLICIT_ONLY;
5978
QueueDesc.index = ImmCmdDesc->Index.value();
6079
}
80+
QueueDesc.pNext = &offloadDesc;
6181
ZE2UR_CALL_THROWS(
6282
zeCommandListCreateImmediate,
6383
(ZeContext, ImmCmdDesc->ZeDevice, &QueueDesc, &ZeCommandList));
@@ -68,6 +88,7 @@ command_list_cache_t::createCommandList(const command_list_descriptor_t &desc) {
6888
CmdListDesc.flags =
6989
RegCmdDesc.IsInOrder ? ZE_COMMAND_LIST_FLAG_IN_ORDER : 0;
7090
CmdListDesc.commandQueueGroupOrdinal = RegCmdDesc.Ordinal;
91+
CmdListDesc.pNext = &offloadDesc;
7192

7293
ze_command_list_handle_t ZeCommandList;
7394
ZE2UR_CALL_THROWS(zeCommandListCreate, (ZeContext, RegCmdDesc.ZeDevice,
@@ -78,13 +99,14 @@ command_list_cache_t::createCommandList(const command_list_descriptor_t &desc) {
7899

79100
raii::command_list_unique_handle command_list_cache_t::getImmediateCommandList(
80101
ze_device_handle_t ZeDevice, bool IsInOrder, uint32_t Ordinal,
81-
ze_command_queue_mode_t Mode, ze_command_queue_priority_t Priority,
82-
std::optional<uint32_t> Index) {
102+
bool CopyOffloadEnable, ze_command_queue_mode_t Mode,
103+
ze_command_queue_priority_t Priority, std::optional<uint32_t> Index) {
83104
TRACK_SCOPE_LATENCY("command_list_cache_t::getImmediateCommandList");
84105

85106
immediate_command_list_descriptor_t Desc;
86107
Desc.ZeDevice = ZeDevice;
87108
Desc.Ordinal = Ordinal;
109+
Desc.CopyOffloadEnabled = CopyOffloadEnable;
88110
Desc.IsInOrder = IsInOrder;
89111
Desc.Mode = Mode;
90112
Desc.Priority = Priority;
@@ -99,13 +121,15 @@ raii::command_list_unique_handle command_list_cache_t::getImmediateCommandList(
99121

100122
raii::command_list_unique_handle
101123
command_list_cache_t::getRegularCommandList(ze_device_handle_t ZeDevice,
102-
bool IsInOrder, uint32_t Ordinal) {
124+
bool IsInOrder, uint32_t Ordinal,
125+
bool CopyOffloadEnable) {
103126
TRACK_SCOPE_LATENCY("command_list_cache_t::getRegularCommandList");
104127

105128
regular_command_list_descriptor_t Desc;
106129
Desc.ZeDevice = ZeDevice;
107130
Desc.IsInOrder = IsInOrder;
108131
Desc.Ordinal = Ordinal;
132+
Desc.CopyOffloadEnabled = CopyOffloadEnable;
109133

110134
auto [CommandList, _] = getCommandList(Desc).release();
111135

source/adapters/level_zero/v2/command_list_cache.hpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ struct immediate_command_list_descriptor_t {
3030
ze_device_handle_t ZeDevice;
3131
bool IsInOrder;
3232
uint32_t Ordinal;
33+
bool CopyOffloadEnabled;
3334
ze_command_queue_mode_t Mode;
3435
ze_command_queue_priority_t Priority;
3536
std::optional<uint32_t> Index;
@@ -40,6 +41,7 @@ struct regular_command_list_descriptor_t {
4041
ze_device_handle_t ZeDevice;
4142
bool IsInOrder;
4243
uint32_t Ordinal;
44+
bool CopyOffloadEnabled;
4345
bool operator==(const regular_command_list_descriptor_t &rhs) const;
4446
};
4547

@@ -56,12 +58,13 @@ struct command_list_cache_t {
5658

5759
raii::command_list_unique_handle
5860
getImmediateCommandList(ze_device_handle_t ZeDevice, bool IsInOrder,
59-
uint32_t Ordinal, ze_command_queue_mode_t Mode,
61+
uint32_t Ordinal, bool CopyOffloadEnable,
62+
ze_command_queue_mode_t Mode,
6063
ze_command_queue_priority_t Priority,
6164
std::optional<uint32_t> Index = std::nullopt);
6265
raii::command_list_unique_handle
6366
getRegularCommandList(ze_device_handle_t ZeDevice, bool IsInOrder,
64-
uint32_t Ordinal);
67+
uint32_t Ordinal, bool CopyOffloadEnable);
6568

6669
// For testing purposes
6770
size_t getNumImmediateCommandLists();

source/adapters/level_zero/v2/event.cpp

Lines changed: 50 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "event.hpp"
1414
#include "event_pool.hpp"
1515
#include "event_provider.hpp"
16+
#include "queue_api.hpp"
1617

1718
#include "../ur_interface_loader.hpp"
1819

@@ -24,6 +25,12 @@ ur_event_handle_t_::ur_event_handle_t_(
2425
zeTimerResolution(getDevice()->ZeDeviceProperties->timerResolution),
2526
timestampMaxValue(getDevice()->getTimestampMask()) {}
2627

28+
void ur_event_handle_t_::resetQueueAndCommand(ur_queue_handle_t hQueue,
29+
ur_command_t commandType) {
30+
this->hQueue = hQueue;
31+
this->commandType = commandType;
32+
}
33+
2734
void ur_event_handle_t_::reset() {
2835
// consider make an abstraction for regular/counter based
2936
// events if there's more of this type of conditions
@@ -33,6 +40,8 @@ void ur_event_handle_t_::reset() {
3340
}
3441

3542
ze_event_handle_t ur_event_handle_t_::getZeEvent() const {
43+
assert(hQueue);
44+
assert(commandType != UR_COMMAND_FORCE_UINT32);
3645
return zeEvent.get();
3746
}
3847

@@ -41,14 +50,27 @@ ur_result_t ur_event_handle_t_::retain() {
4150
return UR_RESULT_SUCCESS;
4251
}
4352

53+
ur_result_t ur_event_handle_t_::releaseDeferred() {
54+
assert(zeEventQueryStatus(zeEvent.get()) == ZE_RESULT_SUCCESS);
55+
assert(RefCount.load() == 0);
56+
57+
pool->free(this);
58+
return UR_RESULT_SUCCESS;
59+
}
60+
4461
ur_result_t ur_event_handle_t_::release() {
4562
if (!RefCount.decrementAndTest())
4663
return UR_RESULT_SUCCESS;
4764

65+
// Need to take a lock before checking if the event is timestamped.
66+
std::unique_lock<ur_shared_mutex> lock(Mutex);
67+
4868
if (isTimestamped() && adjustedEventEndTimestamp == 0) {
4969
// L0 will write end timestamp to this event some time in the future,
5070
// so we can't release it yet.
51-
// TODO: delay releasing until the end timestamp is written.
71+
72+
assert(hQueue);
73+
hQueue->deferEventFree(this);
5274
return UR_RESULT_SUCCESS;
5375
}
5476

@@ -99,17 +121,16 @@ uint64_t ur_event_handle_t_::getEventEndTimestamp() {
99121
if (adjustedEventEndTimestamp)
100122
return adjustedEventEndTimestamp;
101123

102-
// If the result is 0, we have not yet gotten results back and so we just
103-
// return it.
104-
if (recordEventEndTimestamp == 0)
105-
return recordEventEndTimestamp;
124+
auto status = zeEventQueryStatus(zeEvent.get());
125+
if (status != ZE_RESULT_SUCCESS) {
126+
// profiling info not ready
127+
return 0;
128+
}
106129

107-
// Now that we have the result, there is no need to keep it in the queue
108-
// anymore, so we cache it on the event and evict the record from the
109-
// queue.
110130
adjustedEventEndTimestamp =
111131
adjustEndEventTimestamp(getEventStartTimestmap(), recordEventEndTimestamp,
112132
timestampMaxValue, zeTimerResolution);
133+
113134
return adjustedEventEndTimestamp;
114135
}
115136

@@ -118,13 +139,19 @@ void ur_event_handle_t_::recordStartTimestamp() {
118139
UR_CALL_THROWS(ur::level_zero::urDeviceGetGlobalTimestamps(
119140
getDevice(), &deviceStartTimestamp, nullptr));
120141

142+
assert(adjustedEventStartTimestamp == 0);
121143
adjustedEventStartTimestamp = deviceStartTimestamp;
122144
}
123145

124-
uint64_t *ur_event_handle_t_::getEventEndTimestampPtr() {
125-
return &recordEventEndTimestamp;
146+
std::pair<uint64_t *, ze_event_handle_t>
147+
ur_event_handle_t_::getEventEndTimestampAndHandle() {
148+
return {&recordEventEndTimestamp, zeEvent.get()};
126149
}
127150

151+
ur_queue_handle_t ur_event_handle_t_::getQueue() const { return hQueue; }
152+
153+
ur_command_t ur_event_handle_t_::getCommandType() const { return commandType; }
154+
128155
namespace ur::level_zero {
129156
ur_result_t urEventRetain(ur_event_handle_t hEvent) { return hEvent->retain(); }
130157

@@ -159,6 +186,19 @@ ur_result_t urEventGetInfo(ur_event_handle_t hEvent, ur_event_info_t propName,
159186
case UR_EVENT_INFO_REFERENCE_COUNT: {
160187
return returnValue(hEvent->RefCount.load());
161188
}
189+
case UR_EVENT_INFO_COMMAND_QUEUE: {
190+
return returnValue(ur_queue_handle_t{hEvent->getQueue()});
191+
}
192+
case UR_EVENT_INFO_CONTEXT: {
193+
ur_context_handle_t hContext;
194+
UR_CALL(::ur::level_zero::urQueueGetInfo(
195+
hEvent->getQueue(), UR_QUEUE_INFO_CONTEXT, sizeof(hContext),
196+
reinterpret_cast<void *>(&hContext), nullptr));
197+
return returnValue(hContext);
198+
}
199+
case UR_EVENT_INFO_COMMAND_TYPE: {
200+
return returnValue(hEvent->getCommandType());
201+
}
162202
default:
163203
logger::error(
164204
"Unsupported ParamName in urEventGetInfo: ParamName=ParamName={}(0x{})",

source/adapters/level_zero/v2/event.hpp

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,19 @@ struct ur_event_handle_t_ : _ur_object {
2727
ur_event_handle_t_(v2::raii::cache_borrowed_event eventAllocation,
2828
v2::event_pool *pool);
2929

30+
// Set the queue and command that this event is associated with
31+
void resetQueueAndCommand(ur_queue_handle_t hQueue, ur_command_t commandType);
32+
3033
void reset();
3134
ze_event_handle_t getZeEvent() const;
3235

3336
ur_result_t retain();
3437
ur_result_t release();
3538

39+
// releases a signaled and no longer in-use event, that's on the
40+
// deffered events list in the queue
41+
ur_result_t releaseDeferred();
42+
3643
// Tells if this event was created as a timestamp event, allowing profiling
3744
// info even if profiling is not enabled.
3845
bool isTimestamped() const;
@@ -43,13 +50,24 @@ struct ur_event_handle_t_ : _ur_object {
4350
// Device associated with this event
4451
ur_device_handle_t getDevice() const;
4552

53+
// Queue associated with this event
54+
ur_queue_handle_t getQueue() const;
55+
56+
// Get the type of the command that this event is associated with
57+
ur_command_t getCommandType() const;
58+
4659
void recordStartTimestamp();
47-
uint64_t *getEventEndTimestampPtr();
60+
61+
// Get pointer to the end timestamp, and ze event handle.
62+
// Caller is responsible for signaling the event once the timestamp is ready.
63+
std::pair<uint64_t *, ze_event_handle_t> getEventEndTimestampAndHandle();
4864

4965
uint64_t getEventStartTimestmap() const;
5066
uint64_t getEventEndTimestamp();
5167

5268
private:
69+
ur_queue_handle_t hQueue = nullptr;
70+
ur_command_t commandType = UR_COMMAND_FORCE_UINT32;
5371
v2::raii::cache_borrowed_event zeEvent;
5472
v2::event_pool *pool;
5573

source/adapters/level_zero/v2/event_pool.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@ namespace v2 {
1515

1616
static constexpr size_t EVENTS_BURST = 64;
1717

18-
ur_event_handle_t_ *event_pool::allocate() {
18+
ur_event_handle_t_ *event_pool::allocate(ur_queue_handle_t hQueue,
19+
ur_command_t commandType) {
1920
TRACK_SCOPE_LATENCY("event_pool::allocate");
2021

2122
std::unique_lock<std::mutex> lock(*mutex);
@@ -32,6 +33,8 @@ ur_event_handle_t_ *event_pool::allocate() {
3233
auto event = freelist.back();
3334
freelist.pop_back();
3435

36+
event->resetQueueAndCommand(hQueue, commandType);
37+
3538
return event;
3639
}
3740

source/adapters/level_zero/v2/event_pool.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ class event_pool {
4141
DeviceId Id() { return provider->device()->Id.value(); };
4242

4343
// Allocate an event from the pool. Thread safe.
44-
ur_event_handle_t_ *allocate();
44+
ur_event_handle_t_ *allocate(ur_queue_handle_t hQueue,
45+
ur_command_t commandType);
4546

4647
// Free an event back to the pool. Thread safe.
4748
void free(ur_event_handle_t_ *event);

source/adapters/level_zero/v2/memory.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ static ur_result_t synchronousZeCopy(ur_context_handle_t hContext,
157157
hDevice
158158
->QueueGroup[ur_device_handle_t_::queue_group_info_t::type::Compute]
159159
.ZeOrdinal,
160-
ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS, ZE_COMMAND_QUEUE_PRIORITY_NORMAL,
160+
true, ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS, ZE_COMMAND_QUEUE_PRIORITY_NORMAL,
161161
std::nullopt);
162162

163163
ZE2UR_CALL(zeCommandListAppendMemoryCopy,

source/adapters/level_zero/v2/queue_api.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616

1717
struct ur_queue_handle_t_ {
1818
virtual ~ur_queue_handle_t_();
19+
20+
virtual void deferEventFree(ur_event_handle_t hEvent) = 0;
21+
1922
virtual ur_result_t queueGetInfo(ur_queue_info_t, size_t, void *,
2023
size_t *) = 0;
2124
virtual ur_result_t queueRetain() = 0;

0 commit comments

Comments
 (0)