Skip to content

Commit 051e140

Browse files
authored
[SYCL][L0] Fix memory leak in pi Queue (#2905)
Retain Queue whenever a pi_event associates a pi_queue. Release the associated pi_queue when a pi_event is released. Signed-off-by: Byoungro So <[email protected]>
1 parent d4e51db commit 051e140

File tree

1 file changed

+100
-84
lines changed

1 file changed

+100
-84
lines changed

sycl/plugins/level_zero/pi_level_zero.cpp

Lines changed: 100 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -406,6 +406,40 @@ ze_result_t ZeCall::doCall(ze_result_t ZeResult, const char *CallStr,
406406
return mapError(Result);
407407
#define ZE_CALL_NOCHECK(Call) ZeCall().doCall(Call, #Call, false)
408408

409+
// This helper function increments the reference counter of the Queue
410+
// without guarding with a lock.
411+
// It is the caller's responsibility to make sure the lock is acquired
412+
// on the Queue that is passed in.
413+
inline static void piQueueRetainNoLock(pi_queue Queue) { Queue->RefCount++; }
414+
415+
// This helper function creates a pi_event and associate a pi_queue.
416+
// Note that the caller of this function must have acquired lock on the Queue
417+
// that is passed in.
418+
// \param Queue pi_queue to associate with a new event.
419+
// \param Event a pointer to hold the newly created pi_event
420+
// \param CommandType various command type determined by the caller
421+
// \param ZeCommandList the handle to associate with the newly created event
422+
inline static pi_result
423+
createEventAndAssociateQueue(pi_queue Queue, pi_event *Event,
424+
pi_command_type CommandType,
425+
ze_command_list_handle_t ZeCommandList) {
426+
pi_result Res = piEventCreate(Queue->Context, Event);
427+
if (Res != PI_SUCCESS)
428+
return Res;
429+
430+
(*Event)->Queue = Queue;
431+
(*Event)->CommandType = CommandType;
432+
(*Event)->ZeCommandList = ZeCommandList;
433+
434+
// We need to increment the reference counter here to avoid pi_queue
435+
// being released before the associated pi_event is released because
436+
// piEventRelease requires access to the associated pi_queue.
437+
// In piEventRelease, the reference counter of the Queue is decremented
438+
// to release it.
439+
piQueueRetainNoLock(Queue);
440+
return PI_SUCCESS;
441+
}
442+
409443
pi_result _pi_device::initialize() {
410444
uint32_t numQueueGroups = 0;
411445
ZE_CALL(zeDeviceGetCommandQueueGroupProperties(ZeDevice, &numQueueGroups,
@@ -1988,34 +2022,44 @@ pi_result piQueueRetain(pi_queue Queue) {
19882022
// Lock automatically releases when this goes out of scope.
19892023
std::lock_guard<std::mutex> lock(Queue->PiQueueMutex);
19902024

1991-
++(Queue->RefCount);
2025+
piQueueRetainNoLock(Queue);
19922026
return PI_SUCCESS;
19932027
}
19942028

19952029
pi_result piQueueRelease(pi_queue Queue) {
19962030
PI_ASSERT(Queue, PI_INVALID_QUEUE);
2031+
// We need to use a bool variable here to check the condition that
2032+
// RefCount becomes zero atomically with PiQueueMutex lock.
2033+
// Then, we can release the lock before we remove the Queue below.
2034+
bool RefCountZero = false;
2035+
{
2036+
std::lock_guard<std::mutex> Lock(Queue->PiQueueMutex);
2037+
Queue->RefCount--;
2038+
if (Queue->RefCount == 0)
2039+
RefCountZero = true;
2040+
2041+
if (RefCountZero) {
2042+
// It is possible to get to here and still have an open command list
2043+
// if no wait or finish ever occurred for this queue. But still need
2044+
// // TODO: o make sure commands get executed.
2045+
if (auto Res = Queue->executeOpenCommandList())
2046+
return Res;
19972047

1998-
// Lock automatically releases when this goes out of scope.
1999-
std::lock_guard<std::mutex> lock(Queue->PiQueueMutex);
2000-
2001-
if (--(Queue->RefCount) == 0) {
2002-
// It is possible to get to here and still have an open command list
2003-
// if no wait or finish ever occurred for this queue. But still need
2004-
// to make sure commands get executed.
2005-
if (auto Res = Queue->executeOpenCommandList())
2006-
return Res;
2048+
// Destroy all the fences created associated with this queue.
2049+
for (const auto &MapEntry : Queue->ZeCommandListFenceMap) {
2050+
ZE_CALL(zeFenceDestroy(MapEntry.second));
2051+
}
2052+
Queue->ZeCommandListFenceMap.clear();
2053+
ZE_CALL(zeCommandQueueDestroy(Queue->ZeCommandQueue));
2054+
Queue->ZeCommandQueue = nullptr;
20072055

2008-
// Destroy all the fences created associated with this queue.
2009-
for (const auto &MapEntry : Queue->ZeCommandListFenceMap) {
2010-
ZE_CALL(zeFenceDestroy(MapEntry.second));
2056+
zePrint("piQueueRelease NumTimesClosedFull %d, NumTimesClosedEarly %d\n",
2057+
Queue->NumTimesClosedFull, Queue->NumTimesClosedEarly);
20112058
}
2012-
Queue->ZeCommandListFenceMap.clear();
2013-
ZE_CALL(zeCommandQueueDestroy(Queue->ZeCommandQueue));
2014-
Queue->ZeCommandQueue = nullptr;
2015-
2016-
zePrint("piQueueRelease NumTimesClosedFull %d, NumTimesClosedEarly %d\n",
2017-
Queue->NumTimesClosedFull, Queue->NumTimesClosedEarly);
20182059
}
2060+
2061+
if (RefCountZero)
2062+
delete Queue;
20192063
return PI_SUCCESS;
20202064
}
20212065

@@ -3411,13 +3455,11 @@ piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim,
34113455
return Res;
34123456

34133457
ze_event_handle_t ZeEvent = nullptr;
3414-
auto Res = piEventCreate(Kernel->Program->Context, Event);
3458+
pi_result Res = createEventAndAssociateQueue(
3459+
Queue, Event, PI_COMMAND_TYPE_NDRANGE_KERNEL, ZeCommandList);
34153460
if (Res != PI_SUCCESS)
34163461
return Res;
3417-
3418-
(*Event)->Queue = Queue;
3419-
(*Event)->CommandType = PI_COMMAND_TYPE_NDRANGE_KERNEL;
3420-
(*Event)->ZeCommandList = ZeCommandList;
3462+
ZeEvent = (*Event)->ZeEvent;
34213463

34223464
// Save the kernel in the event, so that when the event is signalled
34233465
// the code can do a piKernelRelease on this kernel.
@@ -3430,8 +3472,6 @@ piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim,
34303472
// in CommandData.
34313473
piKernelRetain(Kernel);
34323474

3433-
ZeEvent = (*Event)->ZeEvent;
3434-
34353475
ze_event_handle_t *ZeEventWaitList =
34363476
_pi_event::createZeEventList(NumEventsInWaitList, EventWaitList);
34373477
if (!ZeEventWaitList)
@@ -3696,6 +3736,11 @@ pi_result piEventRelease(pi_event Event) {
36963736
auto Context = Event->Context;
36973737
ZE_CALL(Context->decrementAliveEventsInPool(Event->ZeEventPool));
36983738

3739+
// We intentionally incremented the reference counter when an event is
3740+
// created so that we can avoid pi_queue is released before the associated
3741+
// pi_event is released. Here we have to decrement it so pi_queue
3742+
// can be released successfully.
3743+
piQueueRelease(Event->Queue);
36993744
delete Event;
37003745
}
37013746
return PI_SUCCESS;
@@ -3885,14 +3930,10 @@ pi_result piEnqueueEventsWaitWithBarrier(pi_queue Queue,
38853930

38863931
ze_event_handle_t ZeEvent = nullptr;
38873932
if (Event) {
3888-
auto Res = piEventCreate(Queue->Context, Event);
3933+
auto Res = createEventAndAssociateQueue(Queue, Event, PI_COMMAND_TYPE_USER,
3934+
ZeCommandList);
38893935
if (Res != PI_SUCCESS)
38903936
return Res;
3891-
3892-
(*Event)->Queue = Queue;
3893-
(*Event)->CommandType = PI_COMMAND_TYPE_USER;
3894-
(*Event)->ZeCommandList = ZeCommandList;
3895-
38963937
ZeEvent = (*Event)->ZeEvent;
38973938
}
38983939

@@ -3959,7 +4000,7 @@ enqueueMemCopyHelper(pi_command_type CommandType, pi_queue Queue, void *Dst,
39594000
pi_bool BlockingWrite, size_t Size, const void *Src,
39604001
pi_uint32 NumEventsInWaitList,
39614002
const pi_event *EventWaitList, pi_event *Event) {
3962-
4003+
PI_ASSERT(Queue, PI_INVALID_QUEUE);
39634004
// Get a new command list to be used on this call
39644005
ze_command_list_handle_t ZeCommandList = nullptr;
39654006
ze_fence_handle_t ZeFence = nullptr;
@@ -3969,14 +4010,10 @@ enqueueMemCopyHelper(pi_command_type CommandType, pi_queue Queue, void *Dst,
39694010

39704011
ze_event_handle_t ZeEvent = nullptr;
39714012
if (Event) {
3972-
auto Res = piEventCreate(Queue->Context, Event);
4013+
auto Res =
4014+
createEventAndAssociateQueue(Queue, Event, CommandType, ZeCommandList);
39734015
if (Res != PI_SUCCESS)
39744016
return Res;
3975-
3976-
(*Event)->Queue = Queue;
3977-
(*Event)->CommandType = CommandType;
3978-
(*Event)->ZeCommandList = ZeCommandList;
3979-
39804017
ZeEvent = (*Event)->ZeEvent;
39814018
}
39824019

@@ -4019,7 +4056,7 @@ static pi_result enqueueMemCopyRectHelper(
40194056
size_t DstSlicePitch, pi_bool Blocking, pi_uint32 NumEventsInWaitList,
40204057
const pi_event *EventWaitList, pi_event *Event) {
40214058

4022-
PI_ASSERT(Region && SrcOrigin && DstOrigin, PI_INVALID_VALUE);
4059+
PI_ASSERT(Region && SrcOrigin && DstOrigin && Queue, PI_INVALID_VALUE);
40234060

40244061
// Get a new command list to be used on this call
40254062
ze_command_list_handle_t ZeCommandList = nullptr;
@@ -4030,14 +4067,10 @@ static pi_result enqueueMemCopyRectHelper(
40304067

40314068
ze_event_handle_t ZeEvent = nullptr;
40324069
if (Event) {
4033-
auto Res = piEventCreate(Queue->Context, Event);
4070+
auto Res =
4071+
createEventAndAssociateQueue(Queue, Event, CommandType, ZeCommandList);
40344072
if (Res != PI_SUCCESS)
40354073
return Res;
4036-
4037-
(*Event)->Queue = Queue;
4038-
(*Event)->CommandType = CommandType;
4039-
(*Event)->ZeCommandList = ZeCommandList;
4040-
40414074
ZeEvent = (*Event)->ZeEvent;
40424075
}
40434076

@@ -4202,7 +4235,7 @@ enqueueMemFillHelper(pi_command_type CommandType, pi_queue Queue, void *Ptr,
42024235
const void *Pattern, size_t PatternSize, size_t Size,
42034236
pi_uint32 NumEventsInWaitList,
42044237
const pi_event *EventWaitList, pi_event *Event) {
4205-
4238+
PI_ASSERT(Queue, PI_INVALID_QUEUE);
42064239
// Get a new command list to be used on this call
42074240
ze_command_list_handle_t ZeCommandList = nullptr;
42084241
ze_fence_handle_t ZeFence = nullptr;
@@ -4212,14 +4245,10 @@ enqueueMemFillHelper(pi_command_type CommandType, pi_queue Queue, void *Ptr,
42124245

42134246
ze_event_handle_t ZeEvent = nullptr;
42144247
if (Event) {
4215-
auto Res = piEventCreate(Queue->Context, Event);
4248+
auto Res =
4249+
createEventAndAssociateQueue(Queue, Event, CommandType, ZeCommandList);
42164250
if (Res != PI_SUCCESS)
42174251
return Res;
4218-
4219-
(*Event)->Queue = Queue;
4220-
(*Event)->CommandType = CommandType;
4221-
(*Event)->ZeCommandList = ZeCommandList;
4222-
42234252
ZeEvent = (*Event)->ZeEvent;
42244253
}
42254254

@@ -4297,14 +4326,13 @@ pi_result piEnqueueMemBufferMap(pi_queue Queue, pi_mem Buffer,
42974326
ze_event_handle_t ZeEvent = nullptr;
42984327

42994328
if (Event) {
4300-
auto Res = piEventCreate(Queue->Context, Event);
4329+
// Lock automatically releases when this goes out of scope.
4330+
std::lock_guard<std::mutex> lock(Queue->PiQueueMutex);
4331+
4332+
auto Res = createEventAndAssociateQueue(
4333+
Queue, Event, PI_COMMAND_TYPE_MEM_BUFFER_MAP, ZeCommandList);
43014334
if (Res != PI_SUCCESS)
43024335
return Res;
4303-
4304-
(*Event)->Queue = Queue;
4305-
(*Event)->CommandType = PI_COMMAND_TYPE_MEM_BUFFER_MAP;
4306-
(*Event)->ZeCommandList = ZeCommandList;
4307-
43084336
ZeEvent = (*Event)->ZeEvent;
43094337
}
43104338

@@ -4395,15 +4423,15 @@ pi_result piEnqueueMemUnmap(pi_queue Queue, pi_mem MemObj, void *MappedPtr,
43954423
PI_ASSERT(Event, PI_INVALID_EVENT);
43964424

43974425
ze_event_handle_t ZeEvent = nullptr;
4426+
43984427
if (Event) {
4399-
auto Res = piEventCreate(Queue->Context, Event);
4428+
// Lock automatically releases when this goes out of scope.
4429+
std::lock_guard<std::mutex> lock(Queue->PiQueueMutex);
4430+
4431+
auto Res = createEventAndAssociateQueue(
4432+
Queue, Event, PI_COMMAND_TYPE_MEM_BUFFER_UNMAP, ZeCommandList);
44004433
if (Res != PI_SUCCESS)
44014434
return Res;
4402-
4403-
(*Event)->Queue = Queue;
4404-
(*Event)->CommandType = PI_COMMAND_TYPE_MEM_BUFFER_UNMAP;
4405-
(*Event)->ZeCommandList = ZeCommandList;
4406-
44074435
ZeEvent = (*Event)->ZeEvent;
44084436
}
44094437

@@ -4537,7 +4565,7 @@ enqueueMemImageCommandHelper(pi_command_type CommandType, pi_queue Queue,
45374565
size_t RowPitch, size_t SlicePitch,
45384566
pi_uint32 NumEventsInWaitList,
45394567
const pi_event *EventWaitList, pi_event *Event) {
4540-
4568+
PI_ASSERT(Queue, PI_INVALID_QUEUE);
45414569
// Get a new command list to be used on this call
45424570
ze_command_list_handle_t ZeCommandList = nullptr;
45434571
ze_fence_handle_t ZeFence = nullptr;
@@ -4547,14 +4575,10 @@ enqueueMemImageCommandHelper(pi_command_type CommandType, pi_queue Queue,
45474575

45484576
ze_event_handle_t ZeEvent = nullptr;
45494577
if (Event) {
4550-
auto Res = piEventCreate(Queue->Context, Event);
4578+
auto Res =
4579+
createEventAndAssociateQueue(Queue, Event, CommandType, ZeCommandList);
45514580
if (Res != PI_SUCCESS)
45524581
return Res;
4553-
4554-
(*Event)->Queue = Queue;
4555-
(*Event)->CommandType = CommandType;
4556-
(*Event)->ZeCommandList = ZeCommandList;
4557-
45584582
ZeEvent = (*Event)->ZeEvent;
45594583
}
45604584

@@ -5154,14 +5178,10 @@ pi_result piextUSMEnqueuePrefetch(pi_queue Queue, const void *Ptr, size_t Size,
51545178
// TODO: do we need to create a unique command type for this?
51555179
ze_event_handle_t ZeEvent = nullptr;
51565180
if (Event) {
5157-
auto Res = piEventCreate(Queue->Context, Event);
5181+
auto Res = createEventAndAssociateQueue(Queue, Event, PI_COMMAND_TYPE_USER,
5182+
ZeCommandList);
51585183
if (Res != PI_SUCCESS)
51595184
return Res;
5160-
5161-
(*Event)->Queue = Queue;
5162-
(*Event)->CommandType = PI_COMMAND_TYPE_USER;
5163-
(*Event)->ZeCommandList = ZeCommandList;
5164-
51655185
ZeEvent = (*Event)->ZeEvent;
51665186
}
51675187

@@ -5214,14 +5234,10 @@ pi_result piextUSMEnqueueMemAdvise(pi_queue Queue, const void *Ptr,
52145234
// TODO: do we need to create a unique command type for this?
52155235
ze_event_handle_t ZeEvent = nullptr;
52165236
if (Event) {
5217-
auto Res = piEventCreate(Queue->Context, Event);
5237+
auto Res = createEventAndAssociateQueue(Queue, Event, PI_COMMAND_TYPE_USER,
5238+
ZeCommandList);
52185239
if (Res != PI_SUCCESS)
52195240
return Res;
5220-
5221-
(*Event)->Queue = Queue;
5222-
(*Event)->CommandType = PI_COMMAND_TYPE_USER;
5223-
(*Event)->ZeCommandList = ZeCommandList;
5224-
52255241
ZeEvent = (*Event)->ZeEvent;
52265242
}
52275243

0 commit comments

Comments
 (0)