Skip to content

[SYCL][L0] Fix memory leak in pi Queue #2905

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Jan 8, 2021
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
184 changes: 100 additions & 84 deletions sycl/plugins/level_zero/pi_level_zero.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,40 @@ ze_result_t ZeCall::doCall(ze_result_t ZeResult, const char *CallStr,
return mapError(Result);
#define ZE_CALL_NOCHECK(Call) ZeCall().doCall(Call, #Call, false)

// This helper function increments the reference counter of the Queue
// without guarding with a lock.
// It is the caller's responsibility to make sure the lock is acquired
// on the Queue that is passed in.
inline static void piQueueRetainNoLock(pi_queue Queue) { Queue->RefCount++; }

// This helper function creates a pi_event and associate a pi_queue.
// Note that the caller of this function must have acquired lock on the Queue
// that is passed in.
// \param Queue pi_queue to associate with a new event.
// \param Event a pointer to hold the newly created pi_event
// \param CommandType various command type determined by the caller
// \param ZeCommandList the handle to associate with the newly created event
inline static pi_result
createEventAndAssociateQueue(pi_queue Queue, pi_event *Event,
pi_command_type CommandType,
ze_command_list_handle_t ZeCommandList) {
pi_result Res = piEventCreate(Queue->Context, Event);
if (Res != PI_SUCCESS)
return Res;

(*Event)->Queue = Queue;
(*Event)->CommandType = CommandType;
(*Event)->ZeCommandList = ZeCommandList;

// We need to increment the reference counter here to avoid pi_queue
// being released before the associated pi_event is released because
// piEventRelease requires access to the associated pi_queue.
// In piEventRelease, the reference counter of the Queue is decremented
// to release it.
piQueueRetainNoLock(Queue);
return PI_SUCCESS;
}

pi_result _pi_device::initialize() {
uint32_t numQueueGroups = 0;
ZE_CALL(zeDeviceGetCommandQueueGroupProperties(ZeDevice, &numQueueGroups,
Expand Down Expand Up @@ -1988,34 +2022,44 @@ pi_result piQueueRetain(pi_queue Queue) {
// Lock automatically releases when this goes out of scope.
std::lock_guard<std::mutex> lock(Queue->PiQueueMutex);

++(Queue->RefCount);
piQueueRetainNoLock(Queue);
return PI_SUCCESS;
}

pi_result piQueueRelease(pi_queue Queue) {
PI_ASSERT(Queue, PI_INVALID_QUEUE);
// We need to use a bool variable here to check the condition that
// RefCount becomes zero atomically with PiQueueMutex lock.
// Then, we can release the lock before we remove the Queue below.
bool RefCountZero = false;
{
std::lock_guard<std::mutex> Lock(Queue->PiQueueMutex);
Queue->RefCount--;
if (Queue->RefCount == 0)
RefCountZero = true;

if (RefCountZero) {
// It is possible to get to here and still have an open command list
// if no wait or finish ever occurred for this queue. But still need
// // TODO: o make sure commands get executed.
if (auto Res = Queue->executeOpenCommandList())
return Res;

// Lock automatically releases when this goes out of scope.
std::lock_guard<std::mutex> lock(Queue->PiQueueMutex);

if (--(Queue->RefCount) == 0) {
// It is possible to get to here and still have an open command list
// if no wait or finish ever occurred for this queue. But still need
// to make sure commands get executed.
if (auto Res = Queue->executeOpenCommandList())
return Res;
// Destroy all the fences created associated with this queue.
for (const auto &MapEntry : Queue->ZeCommandListFenceMap) {
ZE_CALL(zeFenceDestroy(MapEntry.second));
}
Queue->ZeCommandListFenceMap.clear();
ZE_CALL(zeCommandQueueDestroy(Queue->ZeCommandQueue));
Queue->ZeCommandQueue = nullptr;

// Destroy all the fences created associated with this queue.
for (const auto &MapEntry : Queue->ZeCommandListFenceMap) {
ZE_CALL(zeFenceDestroy(MapEntry.second));
zePrint("piQueueRelease NumTimesClosedFull %d, NumTimesClosedEarly %d\n",
Queue->NumTimesClosedFull, Queue->NumTimesClosedEarly);
}
Queue->ZeCommandListFenceMap.clear();
ZE_CALL(zeCommandQueueDestroy(Queue->ZeCommandQueue));
Queue->ZeCommandQueue = nullptr;

zePrint("piQueueRelease NumTimesClosedFull %d, NumTimesClosedEarly %d\n",
Queue->NumTimesClosedFull, Queue->NumTimesClosedEarly);
}

if (RefCountZero)
delete Queue;
return PI_SUCCESS;
}

Expand Down Expand Up @@ -3397,13 +3441,11 @@ piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim,
return Res;

ze_event_handle_t ZeEvent = nullptr;
auto Res = piEventCreate(Kernel->Program->Context, Event);
pi_result Res = createEventAndAssociateQueue(
Queue, Event, PI_COMMAND_TYPE_NDRANGE_KERNEL, ZeCommandList);
if (Res != PI_SUCCESS)
return Res;

(*Event)->Queue = Queue;
(*Event)->CommandType = PI_COMMAND_TYPE_NDRANGE_KERNEL;
(*Event)->ZeCommandList = ZeCommandList;
ZeEvent = (*Event)->ZeEvent;

// Save the kernel in the event, so that when the event is signalled
// the code can do a piKernelRelease on this kernel.
Expand All @@ -3416,8 +3458,6 @@ piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim,
// in CommandData.
piKernelRetain(Kernel);

ZeEvent = (*Event)->ZeEvent;

ze_event_handle_t *ZeEventWaitList =
_pi_event::createZeEventList(NumEventsInWaitList, EventWaitList);
if (!ZeEventWaitList)
Expand Down Expand Up @@ -3682,6 +3722,11 @@ pi_result piEventRelease(pi_event Event) {
auto Context = Event->Context;
ZE_CALL(Context->decrementAliveEventsInPool(Event->ZeEventPool));

// We intentionally incremented the reference counter when an event is
// created so that we can avoid pi_queue is released before the associated
// pi_event is released. Here we have to decrement it so pi_queue
// can be released successfully.
piQueueRelease(Event->Queue);
delete Event;
}
return PI_SUCCESS;
Expand Down Expand Up @@ -3871,14 +3916,10 @@ pi_result piEnqueueEventsWaitWithBarrier(pi_queue Queue,

ze_event_handle_t ZeEvent = nullptr;
if (Event) {
auto Res = piEventCreate(Queue->Context, Event);
auto Res = createEventAndAssociateQueue(Queue, Event, PI_COMMAND_TYPE_USER,
ZeCommandList);
if (Res != PI_SUCCESS)
return Res;

(*Event)->Queue = Queue;
(*Event)->CommandType = PI_COMMAND_TYPE_USER;
(*Event)->ZeCommandList = ZeCommandList;

ZeEvent = (*Event)->ZeEvent;
}

Expand Down Expand Up @@ -3945,7 +3986,7 @@ enqueueMemCopyHelper(pi_command_type CommandType, pi_queue Queue, void *Dst,
pi_bool BlockingWrite, size_t Size, const void *Src,
pi_uint32 NumEventsInWaitList,
const pi_event *EventWaitList, pi_event *Event) {

PI_ASSERT(Queue, PI_INVALID_QUEUE);
// Get a new command list to be used on this call
ze_command_list_handle_t ZeCommandList = nullptr;
ze_fence_handle_t ZeFence = nullptr;
Expand All @@ -3955,14 +3996,10 @@ enqueueMemCopyHelper(pi_command_type CommandType, pi_queue Queue, void *Dst,

ze_event_handle_t ZeEvent = nullptr;
if (Event) {
auto Res = piEventCreate(Queue->Context, Event);
auto Res =
createEventAndAssociateQueue(Queue, Event, CommandType, ZeCommandList);
if (Res != PI_SUCCESS)
return Res;

(*Event)->Queue = Queue;
(*Event)->CommandType = CommandType;
(*Event)->ZeCommandList = ZeCommandList;

ZeEvent = (*Event)->ZeEvent;
}

Expand Down Expand Up @@ -4005,7 +4042,7 @@ static pi_result enqueueMemCopyRectHelper(
size_t DstSlicePitch, pi_bool Blocking, pi_uint32 NumEventsInWaitList,
const pi_event *EventWaitList, pi_event *Event) {

PI_ASSERT(Region && SrcOrigin && DstOrigin, PI_INVALID_VALUE);
PI_ASSERT(Region && SrcOrigin && DstOrigin && Queue, PI_INVALID_VALUE);

// Get a new command list to be used on this call
ze_command_list_handle_t ZeCommandList = nullptr;
Expand All @@ -4016,14 +4053,10 @@ static pi_result enqueueMemCopyRectHelper(

ze_event_handle_t ZeEvent = nullptr;
if (Event) {
auto Res = piEventCreate(Queue->Context, Event);
auto Res =
createEventAndAssociateQueue(Queue, Event, CommandType, ZeCommandList);
if (Res != PI_SUCCESS)
return Res;

(*Event)->Queue = Queue;
(*Event)->CommandType = CommandType;
(*Event)->ZeCommandList = ZeCommandList;

ZeEvent = (*Event)->ZeEvent;
}

Expand Down Expand Up @@ -4188,7 +4221,7 @@ enqueueMemFillHelper(pi_command_type CommandType, pi_queue Queue, void *Ptr,
const void *Pattern, size_t PatternSize, size_t Size,
pi_uint32 NumEventsInWaitList,
const pi_event *EventWaitList, pi_event *Event) {

PI_ASSERT(Queue, PI_INVALID_QUEUE);
// Get a new command list to be used on this call
ze_command_list_handle_t ZeCommandList = nullptr;
ze_fence_handle_t ZeFence = nullptr;
Expand All @@ -4198,14 +4231,10 @@ enqueueMemFillHelper(pi_command_type CommandType, pi_queue Queue, void *Ptr,

ze_event_handle_t ZeEvent = nullptr;
if (Event) {
auto Res = piEventCreate(Queue->Context, Event);
auto Res =
createEventAndAssociateQueue(Queue, Event, CommandType, ZeCommandList);
if (Res != PI_SUCCESS)
return Res;

(*Event)->Queue = Queue;
(*Event)->CommandType = CommandType;
(*Event)->ZeCommandList = ZeCommandList;

ZeEvent = (*Event)->ZeEvent;
}

Expand Down Expand Up @@ -4283,14 +4312,13 @@ pi_result piEnqueueMemBufferMap(pi_queue Queue, pi_mem Buffer,
ze_event_handle_t ZeEvent = nullptr;

if (Event) {
auto Res = piEventCreate(Queue->Context, Event);
// Lock automatically releases when this goes out of scope.
std::lock_guard<std::mutex> lock(Queue->PiQueueMutex);

auto Res = createEventAndAssociateQueue(
Queue, Event, PI_COMMAND_TYPE_MEM_BUFFER_MAP, ZeCommandList);
if (Res != PI_SUCCESS)
return Res;

(*Event)->Queue = Queue;
(*Event)->CommandType = PI_COMMAND_TYPE_MEM_BUFFER_MAP;
(*Event)->ZeCommandList = ZeCommandList;

ZeEvent = (*Event)->ZeEvent;
}

Expand Down Expand Up @@ -4380,15 +4408,15 @@ pi_result piEnqueueMemUnmap(pi_queue Queue, pi_mem MemObj, void *MappedPtr,
PI_ASSERT(Event, PI_INVALID_EVENT);

ze_event_handle_t ZeEvent = nullptr;

if (Event) {
auto Res = piEventCreate(Queue->Context, Event);
// Lock automatically releases when this goes out of scope.
std::lock_guard<std::mutex> lock(Queue->PiQueueMutex);

auto Res = createEventAndAssociateQueue(
Queue, Event, PI_COMMAND_TYPE_MEM_BUFFER_UNMAP, ZeCommandList);
if (Res != PI_SUCCESS)
return Res;

(*Event)->Queue = Queue;
(*Event)->CommandType = PI_COMMAND_TYPE_MEM_BUFFER_UNMAP;
(*Event)->ZeCommandList = ZeCommandList;

ZeEvent = (*Event)->ZeEvent;
}

Expand Down Expand Up @@ -4521,7 +4549,7 @@ enqueueMemImageCommandHelper(pi_command_type CommandType, pi_queue Queue,
size_t RowPitch, size_t SlicePitch,
pi_uint32 NumEventsInWaitList,
const pi_event *EventWaitList, pi_event *Event) {

PI_ASSERT(Queue, PI_INVALID_QUEUE);
// Get a new command list to be used on this call
ze_command_list_handle_t ZeCommandList = nullptr;
ze_fence_handle_t ZeFence = nullptr;
Expand All @@ -4531,14 +4559,10 @@ enqueueMemImageCommandHelper(pi_command_type CommandType, pi_queue Queue,

ze_event_handle_t ZeEvent = nullptr;
if (Event) {
auto Res = piEventCreate(Queue->Context, Event);
auto Res =
createEventAndAssociateQueue(Queue, Event, CommandType, ZeCommandList);
if (Res != PI_SUCCESS)
return Res;

(*Event)->Queue = Queue;
(*Event)->CommandType = CommandType;
(*Event)->ZeCommandList = ZeCommandList;

ZeEvent = (*Event)->ZeEvent;
}

Expand Down Expand Up @@ -5138,14 +5162,10 @@ pi_result piextUSMEnqueuePrefetch(pi_queue Queue, const void *Ptr, size_t Size,
// TODO: do we need to create a unique command type for this?
ze_event_handle_t ZeEvent = nullptr;
if (Event) {
auto Res = piEventCreate(Queue->Context, Event);
auto Res = createEventAndAssociateQueue(Queue, Event, PI_COMMAND_TYPE_USER,
ZeCommandList);
if (Res != PI_SUCCESS)
return Res;

(*Event)->Queue = Queue;
(*Event)->CommandType = PI_COMMAND_TYPE_USER;
(*Event)->ZeCommandList = ZeCommandList;

ZeEvent = (*Event)->ZeEvent;
}

Expand Down Expand Up @@ -5198,14 +5218,10 @@ pi_result piextUSMEnqueueMemAdvise(pi_queue Queue, const void *Ptr,
// TODO: do we need to create a unique command type for this?
ze_event_handle_t ZeEvent = nullptr;
if (Event) {
auto Res = piEventCreate(Queue->Context, Event);
auto Res = createEventAndAssociateQueue(Queue, Event, PI_COMMAND_TYPE_USER,
ZeCommandList);
if (Res != PI_SUCCESS)
return Res;

(*Event)->Queue = Queue;
(*Event)->CommandType = PI_COMMAND_TYPE_USER;
(*Event)->ZeCommandList = ZeCommandList;

ZeEvent = (*Event)->ZeEvent;
}

Expand Down