Skip to content

Commit fdeef32

Browse files
committed
[L0 v2] Implement deferred event deallocation
and fix timestamp checking in events. Until now, recordEventEndTimestamp was being checked directly to see if the WriteGlobalTimestamp completed. This is not safe, as we didn't use any atomics. To avoid any races, only read recordEventEndTimestamp when the associated zeEvent completed.
1 parent 4e5c21e commit fdeef32

File tree

10 files changed

+82
-23
lines changed

10 files changed

+82
-23
lines changed

scripts/templates/queue_api.hpp.mako

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@ from templates import helper as th
2525

2626
struct ur_queue_handle_t_ {
2727
virtual ~ur_queue_handle_t_();
28+
29+
virtual void deferEventFree(ur_event_handle_t hEvent) = 0;
30+
2831
%for obj in th.get_queue_related_functions(specs, n, tags):
2932
virtual ${x}_result_t ${th.transform_queue_related_function_name(n, tags, obj, format=["type"])} = 0;
3033
%endfor

source/adapters/level_zero/v2/event.cpp

Lines changed: 30 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "event.hpp"
1414
#include "event_pool.hpp"
1515
#include "event_provider.hpp"
16+
#include "queue_api.hpp"
1617

1718
#include "../ur_interface_loader.hpp"
1819

@@ -24,6 +25,10 @@ ur_event_handle_t_::ur_event_handle_t_(
2425
zeTimerResolution(getDevice()->ZeDeviceProperties->timerResolution),
2526
timestampMaxValue(getDevice()->getTimestampMask()) {}
2627

28+
void ur_event_handle_t_::resetQueue(ur_queue_handle_t hQueue) {
29+
this->hQueue = hQueue;
30+
}
31+
2732
void ur_event_handle_t_::reset() {
2833
// consider make an abstraction for regular/counter based
2934
// events if there's more of this type of conditions
@@ -33,6 +38,7 @@ void ur_event_handle_t_::reset() {
3338
}
3439

3540
ze_event_handle_t ur_event_handle_t_::getZeEvent() const {
41+
assert(hQueue);
3642
return zeEvent.get();
3743
}
3844

@@ -41,14 +47,27 @@ ur_result_t ur_event_handle_t_::retain() {
4147
return UR_RESULT_SUCCESS;
4248
}
4349

50+
ur_result_t ur_event_handle_t_::releaseDeferred() {
51+
assert(zeEventQueryStatus(zeEvent.get()) == ZE_RESULT_SUCCESS);
52+
assert(RefCount.load() == 0);
53+
54+
pool->free(this);
55+
return UR_RESULT_SUCCESS;
56+
}
57+
4458
ur_result_t ur_event_handle_t_::release() {
4559
if (!RefCount.decrementAndTest())
4660
return UR_RESULT_SUCCESS;
4761

62+
// Need to take a lock before checking if the event is timestamped.
63+
std::unique_lock<ur_shared_mutex> lock(Mutex);
64+
4865
if (isTimestamped() && adjustedEventEndTimestamp == 0) {
4966
// L0 will write end timestamp to this event some time in the future,
5067
// so we can't release it yet.
51-
// TODO: delay releasing until the end timestamp is written.
68+
69+
assert(hQueue);
70+
hQueue->deferEventFree(this);
5271
return UR_RESULT_SUCCESS;
5372
}
5473

@@ -99,17 +118,16 @@ uint64_t ur_event_handle_t_::getEventEndTimestamp() {
99118
if (adjustedEventEndTimestamp)
100119
return adjustedEventEndTimestamp;
101120

102-
// If the result is 0, we have not yet gotten results back and so we just
103-
// return it.
104-
if (recordEventEndTimestamp == 0)
105-
return recordEventEndTimestamp;
121+
auto status = zeEventQueryStatus(zeEvent.get());
122+
if (status != ZE_RESULT_SUCCESS) {
123+
// profiling info not ready
124+
return 0;
125+
}
106126

107-
// Now that we have the result, there is no need to keep it in the queue
108-
// anymore, so we cache it on the event and evict the record from the
109-
// queue.
110127
adjustedEventEndTimestamp =
111128
adjustEndEventTimestamp(getEventStartTimestmap(), recordEventEndTimestamp,
112129
timestampMaxValue, zeTimerResolution);
130+
113131
return adjustedEventEndTimestamp;
114132
}
115133

@@ -118,11 +136,13 @@ void ur_event_handle_t_::recordStartTimestamp() {
118136
UR_CALL_THROWS(ur::level_zero::urDeviceGetGlobalTimestamps(
119137
getDevice(), &deviceStartTimestamp, nullptr));
120138

139+
assert(adjustedEventStartTimestamp == 0);
121140
adjustedEventStartTimestamp = deviceStartTimestamp;
122141
}
123142

124-
uint64_t *ur_event_handle_t_::getEventEndTimestampPtr() {
125-
return &recordEventEndTimestamp;
143+
std::pair<uint64_t *, ze_event_handle_t>
144+
ur_event_handle_t_::getEventEndTimestampAndHandle() {
145+
return {&recordEventEndTimestamp, zeEvent.get()};
126146
}
127147

128148
namespace ur::level_zero {

source/adapters/level_zero/v2/event.hpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,19 @@ struct ur_event_handle_t_ : _ur_object {
2727
ur_event_handle_t_(v2::raii::cache_borrowed_event eventAllocation,
2828
v2::event_pool *pool);
2929

30+
// Set the queue that this event is associated with
31+
void resetQueue(ur_queue_handle_t hQueue);
32+
3033
void reset();
3134
ze_event_handle_t getZeEvent() const;
3235

3336
ur_result_t retain();
3437
ur_result_t release();
3538

39+
// releases a signaled and no longer in-use event, that's on the
40+
// deffered events list in the queue
41+
ur_result_t releaseDeferred();
42+
3643
// Tells if this event was created as a timestamp event, allowing profiling
3744
// info even if profiling is not enabled.
3845
bool isTimestamped() const;
@@ -44,12 +51,16 @@ struct ur_event_handle_t_ : _ur_object {
4451
ur_device_handle_t getDevice() const;
4552

4653
void recordStartTimestamp();
47-
uint64_t *getEventEndTimestampPtr();
54+
55+
// Get pointer to the end timestamp, and ze event handle.
56+
// Caller is responsible for signaling the event once the timestamp is ready.
57+
std::pair<uint64_t *, ze_event_handle_t> getEventEndTimestampAndHandle();
4858

4959
uint64_t getEventStartTimestmap() const;
5060
uint64_t getEventEndTimestamp();
5161

5262
private:
63+
ur_queue_handle_t hQueue = nullptr;
5364
v2::raii::cache_borrowed_event zeEvent;
5465
v2::event_pool *pool;
5566

source/adapters/level_zero/v2/event_pool.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ namespace v2 {
1515

1616
static constexpr size_t EVENTS_BURST = 64;
1717

18-
ur_event_handle_t_ *event_pool::allocate() {
18+
ur_event_handle_t_ *event_pool::allocate(ur_queue_handle_t hQueue) {
1919
TRACK_SCOPE_LATENCY("event_pool::allocate");
2020

2121
std::unique_lock<std::mutex> lock(*mutex);
@@ -32,6 +32,8 @@ ur_event_handle_t_ *event_pool::allocate() {
3232
auto event = freelist.back();
3333
freelist.pop_back();
3434

35+
event->resetQueue(hQueue);
36+
3537
return event;
3638
}
3739

source/adapters/level_zero/v2/event_pool.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ class event_pool {
4141
DeviceId Id() { return provider->device()->Id.value(); };
4242

4343
// Allocate an event from the pool. Thread safe.
44-
ur_event_handle_t_ *allocate();
44+
ur_event_handle_t_ *allocate(ur_queue_handle_t hQueue);
4545

4646
// Free an event back to the pool. Thread safe.
4747
void free(ur_event_handle_t_ *event);

source/adapters/level_zero/v2/queue_api.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616

1717
struct ur_queue_handle_t_ {
1818
virtual ~ur_queue_handle_t_();
19+
20+
virtual void deferEventFree(ur_event_handle_t hEvent) = 0;
21+
1922
virtual ur_result_t queueGetInfo(ur_queue_info_t, size_t, void *,
2023
size_t *) = 0;
2124
virtual ur_result_t queueRetain() = 0;

source/adapters/level_zero/v2/queue_immediate_in_order.cpp

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ ur_queue_immediate_in_order_t::ur_queue_immediate_in_order_t(
104104
ur_event_handle_t
105105
ur_queue_immediate_in_order_t::getSignalEvent(ur_event_handle_t *hUserEvent) {
106106
if (hUserEvent) {
107-
*hUserEvent = eventPool->allocate();
107+
*hUserEvent = eventPool->allocate(this);
108108
return *hUserEvent;
109109
} else {
110110
return nullptr;
@@ -156,6 +156,11 @@ ur_result_t ur_queue_immediate_in_order_t::queueRelease() {
156156
return UR_RESULT_SUCCESS;
157157
}
158158

159+
void ur_queue_immediate_in_order_t::deferEventFree(ur_event_handle_t hEvent) {
160+
std::unique_lock<ur_shared_mutex> lock(this->Mutex);
161+
deferredEvents.push_back(hEvent);
162+
}
163+
159164
ur_result_t ur_queue_immediate_in_order_t::queueGetNativeHandle(
160165
ur_queue_native_desc_t *pDesc, ur_native_handle_t *phNativeQueue) {
161166
std::ignore = pDesc;
@@ -175,6 +180,12 @@ ur_result_t ur_queue_immediate_in_order_t::queueFinish() {
175180
ZE2UR_CALL(zeCommandListHostSynchronize,
176181
(handler.commandList.get(), UINT64_MAX));
177182

183+
// Free deferred events
184+
for (auto &hEvent : deferredEvents) {
185+
hEvent->releaseDeferred();
186+
}
187+
deferredEvents.clear();
188+
178189
return UR_RESULT_SUCCESS;
179190
}
180191

@@ -983,9 +994,12 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueTimestampRecordingExp(
983994

984995
signalEvent->recordStartTimestamp();
985996

997+
auto [timestampPtr, zeSignalEvent] =
998+
signalEvent->getEventEndTimestampAndHandle();
999+
9861000
ZE2UR_CALL(zeCommandListAppendWriteGlobalTimestamp,
987-
(handler.commandList.get(), signalEvent->getEventEndTimestampPtr(),
988-
signalEvent->getZeEvent(), numWaitEvents, pWaitEvents));
1001+
(handler.commandList.get(), timestampPtr, zeSignalEvent,
1002+
numWaitEvents, pWaitEvents));
9891003

9901004
if (blocking) {
9911005
ZE2UR_CALL(zeCommandListHostSynchronize,

source/adapters/level_zero/v2/queue_immediate_in_order.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,12 +46,16 @@ struct ur_queue_immediate_in_order_t : _ur_object, public ur_queue_handle_t_ {
4646

4747
std::vector<ze_event_handle_t> waitList;
4848

49+
std::vector<ur_event_handle_t> deferredEvents;
50+
4951
std::pair<ze_event_handle_t *, uint32_t>
5052
getWaitListView(const ur_event_handle_t *phWaitEvents,
5153
uint32_t numWaitEvents);
5254

5355
ur_event_handle_t getSignalEvent(ur_event_handle_t *hUserEvent);
5456

57+
void deferEventFree(ur_event_handle_t hEvent) override;
58+
5559
ur_result_t enqueueRegionCopyUnlocked(
5660
ur_mem_handle_t src, ur_mem_handle_t dst, bool blocking,
5761
ur_rect_offset_t srcOrigin, ur_rect_offset_t dstOrigin,

test/adapters/level_zero/v2/event_pool_test.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ TEST_P(EventPoolTest, Basic) {
150150
{
151151
auto pool = cache->borrow(device->Id.value(), getParam().flags);
152152

153-
first = pool->allocate();
153+
first = pool->allocate(reinterpret_cast<ur_queue_handle_t>(0x1));
154154
zeFirst = first->getZeEvent();
155155

156156
urEventRelease(first);
@@ -160,7 +160,7 @@ TEST_P(EventPoolTest, Basic) {
160160
{
161161
auto pool = cache->borrow(device->Id.value(), getParam().flags);
162162

163-
second = pool->allocate();
163+
second = pool->allocate(reinterpret_cast<ur_queue_handle_t>(0x1));
164164
zeSecond = second->getZeEvent();
165165

166166
urEventRelease(second);
@@ -179,7 +179,8 @@ TEST_P(EventPoolTest, Threaded) {
179179
auto pool = cache->borrow(device->Id.value(), getParam().flags);
180180
std::vector<ur_event_handle_t> events;
181181
for (int i = 0; i < 100; ++i) {
182-
events.push_back(pool->allocate());
182+
events.push_back(pool->allocate(
183+
reinterpret_cast<ur_queue_handle_t>(0x1)));
183184
}
184185
for (int i = 0; i < 100; ++i) {
185186
urEventRelease(events[i]);
@@ -197,15 +198,16 @@ TEST_P(EventPoolTest, ProviderNormalUseMostFreePool) {
197198
auto pool = cache->borrow(device->Id.value(), getParam().flags);
198199
std::list<ur_event_handle_t> events;
199200
for (int i = 0; i < 128; ++i) {
200-
events.push_back(pool->allocate());
201+
events.push_back(
202+
pool->allocate(reinterpret_cast<ur_queue_handle_t>(0x1)));
201203
}
202204
auto frontZeHandle = events.front()->getZeEvent();
203205
for (int i = 0; i < 8; ++i) {
204206
urEventRelease(events.front());
205207
events.pop_front();
206208
}
207209
for (int i = 0; i < 8; ++i) {
208-
auto e = pool->allocate();
210+
auto e = pool->allocate(reinterpret_cast<ur_queue_handle_t>(0x1));
209211
events.push_back(e);
210212
}
211213

test/conformance/event/event_adapter_level_zero_v2.match

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@ urEventGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.
55
urEventGetInfoNegativeTest.InvalidNullHandle/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_
66
urEventGetInfoNegativeTest.InvalidSizePropSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_
77
urEventGetInfoNegativeTest.InvalidSizePropSizeSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_
8-
{{OPT}}urEventGetProfilingInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_PROFILING_INFO_COMMAND_QUEUED
9-
{{OPT}}urEventGetProfilingInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_PROFILING_INFO_COMMAND_SUBMIT
8+
urEventGetProfilingInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_PROFILING_INFO_COMMAND_QUEUED
9+
urEventGetProfilingInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_PROFILING_INFO_COMMAND_SUBMIT
1010
urEventGetProfilingInfoWithTimingComparisonTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_
1111
urEventSetCallbackTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_
1212
urEventSetCallbackTest.ValidateParameters/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_

0 commit comments

Comments
 (0)