Skip to content

Commit 9a1c965

Browse files
committed
[SYCL][UR] fix use after free on queue in eventRelease
There was a potential race: if getEventEndTimestamp returns false and just after that the queue finishes and is destroyed, we would call deferEventRelease on invalid queue. This patch reworks the logic for deferring event release. Instead of registering the events to be released in the queue, we just clear adjustedEventStartTimestamp and adjustedEventEndTimestamp to mark the event as not timestamped. Since events are never actually destroyed, only returned to the pool, any pending timestamp write will always succeed. Since we are only dealing with in-order queues, the timestamp can only increase.
1 parent 0a42cf7 commit 9a1c965

File tree

10 files changed

+119
-79
lines changed

10 files changed

+119
-79
lines changed

unified-runtime/scripts/templates/queue_api.hpp.mako

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,6 @@ from templates import helper as th
2929
struct ur_queue_t_ {
3030
virtual ~ur_queue_t_();
3131

32-
virtual void deferEventFree(ur_event_handle_t hEvent) = 0;
33-
3432
%for obj in th.get_queue_related_functions(specs, n, tags):
3533
%if not 'Release' in obj['name'] and not 'Retain' in obj['name']:
3634
virtual ${x}_result_t ${th.transform_queue_related_function_name(n, tags, obj, format=["type"])} = 0;

unified-runtime/source/adapters/level_zero/v2/event.cpp

Lines changed: 22 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,21 @@ uint64_t event_profiling_data_t::getEventEndTimestamp() {
6060
return adjustedEventEndTimestamp;
6161
}
6262

63+
void event_profiling_data_t::reset() {
64+
// This ensures that the event is consider as not timestamped.
65+
// We can't touch the recordEventEndTimestamp
66+
// as it may still be overwritten by the driver.
67+
// In case event is resued and recordStartTimestamp
68+
// is called again, adjustedEventEndTimestamp will always be updated correctly
69+
// to the new value as we wait for the event to be signaled.
70+
// If the event is reused on another queue, this means that the original
71+
// queue must have been destroyed (and the even pool released back to the
72+
// context) and the timstamp is already wrriten, so there's no race-condition
73+
// possible.
74+
adjustedEventStartTimestamp = 0;
75+
adjustedEventEndTimestamp = 0;
76+
}
77+
6378
void event_profiling_data_t::recordStartTimestamp(ur_device_handle_t hDevice) {
6479
zeTimerResolution = hDevice->ZeDeviceProperties->timerResolution;
6580
timestampMaxValue = hDevice->getTimestampMask();
@@ -98,7 +113,7 @@ void ur_event_handle_t_::resetQueueAndCommand(ur_queue_t_ *hQueue,
98113
ur_command_t commandType) {
99114
this->hQueue = hQueue;
100115
this->commandType = commandType;
101-
profilingData = event_profiling_data_t(getZeEvent());
116+
profilingData.reset();
102117
}
103118

104119
void ur_event_handle_t_::recordStartTimestamp() {
@@ -141,33 +156,17 @@ ur_result_t ur_event_handle_t_::retain() {
141156
return UR_RESULT_SUCCESS;
142157
}
143158

144-
ur_result_t ur_event_handle_t_::releaseDeferred() {
145-
assert(zeEventQueryStatus(getZeEvent()) == ZE_RESULT_SUCCESS);
146-
assert(RefCount.load() == 0);
147-
148-
return this->forceRelease();
149-
}
150-
151159
ur_result_t ur_event_handle_t_::release() {
152160
if (!RefCount.decrementAndTest())
153161
return UR_RESULT_SUCCESS;
154162

155-
// Need to take a lock before checking if the event is timestamped.
156-
std::unique_lock<ur_shared_mutex> lock(Mutex);
157-
158-
if (isTimestamped() && !getEventEndTimestamp()) {
159-
// L0 will write end timestamp to this event some time in the future,
160-
// so we can't release it yet.
161-
assert(hQueue);
162-
hQueue->deferEventFree(this);
163-
return UR_RESULT_SUCCESS;
163+
if (event_pool) {
164+
event_pool->free(this);
165+
} else {
166+
std::get<v2::raii::ze_event_handle_t>(hZeEvent).release();
167+
delete this;
164168
}
165-
166-
// Need to unlock now, as forceRelease might deallocate memory backing
167-
// the Mutex.
168-
lock.unlock();
169-
170-
return this->forceRelease();
169+
return UR_RESULT_SUCCESS;
171170
}
172171

173172
bool ur_event_handle_t_::isTimestamped() const {
@@ -209,16 +208,6 @@ ur_event_handle_t_::ur_event_handle_t_(
209208
,
210209
nullptr) {}
211210

212-
ur_result_t ur_event_handle_t_::forceRelease() {
213-
if (event_pool) {
214-
event_pool->free(this);
215-
} else {
216-
std::get<v2::raii::ze_event_handle_t>(hZeEvent).release();
217-
delete this;
218-
}
219-
return UR_RESULT_SUCCESS;
220-
}
221-
222211
namespace ur::level_zero {
223212
ur_result_t urEventRetain(ur_event_handle_t hEvent) try {
224213
return hEvent->retain();

unified-runtime/source/adapters/level_zero/v2/event.hpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,10 @@ struct event_profiling_data_t {
3535
bool recordingStarted() const;
3636
bool recordingEnded() const;
3737

38+
// clear the profiling data, allowing the event to be reused
39+
// for a new command
40+
void reset();
41+
3842
private:
3943
ze_event_handle_t hZeEvent;
4044

@@ -64,9 +68,6 @@ struct ur_event_handle_t_ : _ur_object {
6468
// Set the queue and command that this event is associated with
6569
void resetQueueAndCommand(ur_queue_t_ *hQueue, ur_command_t commandType);
6670

67-
// releases event immediately
68-
ur_result_t forceRelease();
69-
7071
void reset();
7172
ze_event_handle_t getZeEvent() const;
7273

unified-runtime/source/adapters/level_zero/v2/queue_api.hpp

Lines changed: 0 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

unified-runtime/source/adapters/level_zero/v2/queue_immediate_in_order.cpp

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -138,11 +138,6 @@ ur_queue_immediate_in_order_t::queueGetInfo(ur_queue_info_t propName,
138138
return UR_RESULT_SUCCESS;
139139
}
140140

141-
void ur_queue_immediate_in_order_t::deferEventFree(ur_event_handle_t hEvent) {
142-
auto commandListLocked = commandListManager.lock();
143-
deferredEvents.push_back(hEvent);
144-
}
145-
146141
ur_result_t ur_queue_immediate_in_order_t::queueGetNativeHandle(
147142
ur_queue_native_desc_t * /*pDesc*/, ur_native_handle_t *phNativeQueue) {
148143
*phNativeQueue = reinterpret_cast<ur_native_handle_t>(
@@ -160,12 +155,6 @@ ur_result_t ur_queue_immediate_in_order_t::queueFinish() {
160155
ZE2UR_CALL(zeCommandListHostSynchronize,
161156
(commandListLocked->getZeCommandList(), UINT64_MAX));
162157

163-
// Free deferred events
164-
for (auto &hEvent : deferredEvents) {
165-
UR_CALL(hEvent->releaseDeferred());
166-
}
167-
deferredEvents.clear();
168-
169158
// Free deferred kernels
170159
for (auto &hKernel : submittedKernels) {
171160
UR_CALL(hKernel->release());

unified-runtime/source/adapters/level_zero/v2/queue_immediate_in_order.hpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ struct ur_queue_immediate_in_order_t : _ur_object, public ur_queue_t_ {
3434
ur_queue_flags_t flags;
3535

3636
lockable<ur_command_list_manager> commandListManager;
37-
std::vector<ur_event_handle_t> deferredEvents;
3837
std::vector<ur_kernel_handle_t> submittedKernels;
3938

4039
wait_list_view
@@ -46,8 +45,6 @@ struct ur_queue_immediate_in_order_t : _ur_object, public ur_queue_t_ {
4645
ur_event_handle_t *hUserEvent,
4746
ur_command_t commandType);
4847

49-
void deferEventFree(ur_event_handle_t hEvent) override;
50-
5148
ur_result_t enqueueGenericFillUnlocked(
5249
ur_mem_buffer_t *hBuffer, size_t offset, size_t patternSize,
5350
const void *pPattern, size_t size, uint32_t numEventsInWaitList,

unified-runtime/test/adapters/level_zero/v2/event_pool_test.cpp

Lines changed: 4 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -272,39 +272,18 @@ TEST_P(EventPoolTestWithQueue, WithTimestamp) {
272272
&hDevice, nullptr));
273273

274274
ur_event_handle_t first;
275-
ze_event_handle_t zeFirst;
276275
{
277276
ASSERT_SUCCESS(
278277
urEnqueueTimestampRecordingExp(queue, false, 1, &hEvent, &first));
279-
zeFirst = first->getZeEvent();
280-
281278
urEventRelease(first); // should not actually release the event until
282279
// recording is completed
283280
}
284281
ur_event_handle_t second;
285-
ze_event_handle_t zeSecond;
286-
{
287-
ASSERT_SUCCESS(urEnqueueEventsWaitWithBarrier(queue, 0, nullptr, &second));
288-
zeSecond = second->getZeEvent();
289-
ASSERT_SUCCESS(urEventRelease(second));
290-
}
291-
ASSERT_NE(first, second);
292-
ASSERT_NE(zeFirst, zeSecond);
282+
ASSERT_SUCCESS(urEnqueueEventsWaitWithBarrier(queue, 0, nullptr, &second));
283+
// even if the event is reused, it should not be timestamped anymore
284+
ASSERT_FALSE(second->isTimestamped());
285+
ASSERT_SUCCESS(urEventRelease(second));
293286

294287
ASSERT_EQ(zeEventHostSignal(zeEvent.get()), ZE_RESULT_SUCCESS);
295-
296288
ASSERT_SUCCESS(urQueueFinish(queue));
297-
298-
// Now, the first event should be avilable for reuse
299-
ur_event_handle_t third;
300-
ze_event_handle_t zeThird;
301-
{
302-
ASSERT_SUCCESS(urEnqueueEventsWaitWithBarrier(queue, 0, nullptr, &third));
303-
zeThird = third->getZeEvent();
304-
ASSERT_SUCCESS(urEventRelease(third));
305-
306-
ASSERT_FALSE(third->isTimestamped());
307-
}
308-
ASSERT_EQ(first, third);
309-
ASSERT_EQ(zeFirst, zeThird);
310289
}

unified-runtime/test/adapters/level_zero/ze_helpers.hpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,18 @@
1010
#include <ur_api.h>
1111
#include <uur/fixtures.h>
1212

13+
static bool ze_initialized = false;
14+
1315
std::unique_ptr<_ze_event_handle_t, std::function<void(ze_event_handle_t)>>
1416
createZeEvent(ur_context_handle_t hContext, ur_device_handle_t hDevice) {
17+
if (!ze_initialized) {
18+
ze_result_t result = zeInit(ZE_INIT_FLAG_GPU_ONLY);
19+
if (result != ZE_RESULT_SUCCESS) {
20+
return nullptr;
21+
}
22+
ze_initialized = true;
23+
}
24+
1525
ze_event_pool_desc_t desc;
1626
desc.stype = ZE_STRUCTURE_TYPE_EVENT_POOL_DESC;
1727
desc.pNext = nullptr;

unified-runtime/test/conformance/enqueue/urEnqueueTimestampRecording.cpp

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,61 @@ TEST_P(urEnqueueTimestampRecordingExpTest, SuccessBlocking) {
6666
ASSERT_SUCCESS(urEventRelease(event));
6767
}
6868

69+
TEST_P(urEnqueueTimestampRecordingExpTest,
70+
ReleaseEventWhileTimestampWritePending) {
71+
void *ptr;
72+
ASSERT_SUCCESS(
73+
urUSMSharedAlloc(context, device, nullptr, nullptr, 1024 * 1024, &ptr));
74+
75+
// Enqueue an operation to keep the device busy
76+
uint8_t pattern = 0xFF;
77+
ASSERT_SUCCESS(urEnqueueUSMFill(queue, ptr, sizeof(uint8_t), &pattern,
78+
1024 * 1024, 0, nullptr, nullptr));
79+
80+
ur_event_handle_t event1 = nullptr;
81+
ASSERT_SUCCESS(
82+
urEnqueueTimestampRecordingExp(queue, false, 0, nullptr, &event1));
83+
ASSERT_SUCCESS(urEventRelease(event1));
84+
85+
ur_event_handle_t event2 = nullptr;
86+
ASSERT_SUCCESS(urEnqueueUSMFill(queue, ptr, sizeof(uint8_t), &pattern,
87+
1024 * 1024, 0, nullptr, &event2));
88+
89+
// Make sure the new event does not contain profiling info (in case it's reused
90+
// by the adapter)
91+
ASSERT_EQ(urEventGetProfilingInfo(event2, UR_PROFILING_INFO_COMMAND_QUEUED,
92+
sizeof(uint64_t), nullptr, nullptr),
93+
UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE);
94+
ASSERT_SUCCESS(urEventRelease(event2));
95+
ASSERT_SUCCESS(urUSMFree(context, ptr));
96+
}
97+
98+
TEST_P(urEnqueueTimestampRecordingExpTest, ReleaseEventAfterQueueRelease) {
99+
void *ptr;
100+
ASSERT_SUCCESS(
101+
urUSMSharedAlloc(context, device, nullptr, nullptr, 1024 * 1024, &ptr));
102+
103+
// Enqueue an operation to keep the device busy
104+
uint8_t pattern = 0xFF;
105+
ASSERT_SUCCESS(urEnqueueUSMFill(queue, ptr, sizeof(uint8_t), &pattern,
106+
1024 * 1024, 0, nullptr, nullptr));
107+
108+
ur_event_handle_t event1 = nullptr;
109+
ASSERT_SUCCESS(
110+
urEnqueueTimestampRecordingExp(queue, false, 0, nullptr, &event1));
111+
112+
ASSERT_SUCCESS(urQueueRelease(queue));
113+
queue = nullptr;
114+
115+
uint64_t queuedTime = 0;
116+
ASSERT_SUCCESS(
117+
urEventGetProfilingInfo(event1, UR_PROFILING_INFO_COMMAND_QUEUED,
118+
sizeof(uint64_t), &queuedTime, nullptr));
119+
120+
ASSERT_SUCCESS(urEventRelease(event1));
121+
ASSERT_SUCCESS(urUSMFree(context, ptr));
122+
}
123+
69124
TEST_P(urEnqueueTimestampRecordingExpTest, InvalidNullHandleQueue) {
70125
ur_event_handle_t event = nullptr;
71126
ASSERT_EQ_RESULT(

unified-runtime/test/conformance/event/urEventGetProfilingInfo.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,30 @@ TEST_P(urEventGetProfilingInfoTest, Success) {
128128
UR_PROFILING_INFO_COMMAND_COMPLETE);
129129
}
130130

131+
TEST_P(urEventGetProfilingInfoTest, ReleaseEventAfterQueueRelease) {
132+
void *ptr;
133+
ASSERT_SUCCESS(
134+
urUSMSharedAlloc(context, device, nullptr, nullptr, 1024 * 1024, &ptr));
135+
136+
// Enqueue an operation to keep the device busy
137+
uint8_t pattern = 0xFF;
138+
ur_event_handle_t event1;
139+
ASSERT_SUCCESS(urEnqueueUSMFill(queue, ptr, sizeof(uint8_t), &pattern,
140+
1024 * 1024, 0, nullptr, &event1));
141+
142+
ASSERT_SUCCESS(urQueueRelease(queue));
143+
queue = nullptr;
144+
145+
uint64_t queuedTime = 0;
146+
auto ret = urEventGetProfilingInfo(event1, UR_PROFILING_INFO_COMMAND_QUEUED,
147+
sizeof(uint64_t), &queuedTime, nullptr);
148+
ASSERT_TRUE(ret == UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION ||
149+
ret == UR_RESULT_SUCCESS);
150+
151+
ASSERT_SUCCESS(urEventRelease(event1));
152+
ASSERT_SUCCESS(urUSMFree(context, ptr));
153+
}
154+
131155
TEST_P(urEventGetProfilingInfoTest, InvalidNullHandle) {
132156
UUR_KNOWN_FAILURE_ON(uur::NativeCPU{});
133157

0 commit comments

Comments
 (0)