Skip to content

Commit 1b79491

Browse files
[SYCL][L0] Fix memory leak when tracking indirect accesses (#7105)
On the plugin API boundaries extra piKernelRetain/piKernelRelease can be called in arbitrary moments (from other threads) include *before* the kernel was submitted to the actual GPU HW. As such, piKernelRelease has nothing to do with submissions and indirect memory tracking updates should be initiated from the points where the kernel actually finishes - CleanupCompletedEvent.
1 parent 5d233ee commit 1b79491

File tree

1 file changed

+31
-24
lines changed

1 file changed

+31
-24
lines changed

sycl/plugins/level_zero/pi_level_zero.cpp

Lines changed: 31 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -5253,27 +5253,6 @@ pi_result piKernelRetain(pi_kernel Kernel) {
52535253
pi_result piKernelRelease(pi_kernel Kernel) {
52545254
PI_ASSERT(Kernel, PI_ERROR_INVALID_KERNEL);
52555255

5256-
if (IndirectAccessTrackingEnabled) {
5257-
// piKernelRelease is called by CleanupCompletedEvent(Event) as soon as
5258-
// kernel execution has finished. This is the place where we need to release
5259-
// memory allocations. If kernel is not in use (not submitted by some
5260-
// other thread) then release referenced memory allocations. As a result,
5261-
// memory can be deallocated and context can be removed from container in
5262-
// the platform. That's why we need to lock a mutex here.
5263-
pi_platform Plt = Kernel->Program->Context->getPlatform();
5264-
std::scoped_lock<pi_shared_mutex> ContextsLock(Plt->ContextsMutex);
5265-
5266-
if (--Kernel->SubmissionsCount == 0) {
5267-
// Kernel is not submitted for execution, release referenced memory
5268-
// allocations.
5269-
for (auto &MemAlloc : Kernel->MemAllocs) {
5270-
USMFreeHelper(MemAlloc->second.Context, MemAlloc->first,
5271-
MemAlloc->second.OwnZeMemHandle);
5272-
}
5273-
Kernel->MemAllocs.clear();
5274-
}
5275-
}
5276-
52775256
if (!Kernel->RefCount.decrementAndTest())
52785257
return PI_SUCCESS;
52795258

@@ -5821,9 +5800,35 @@ static pi_result CleanupCompletedEvent(pi_event Event, bool QueueLocked) {
58215800
Event->CleanedUp = true;
58225801
}
58235802

5803+
auto ReleaseIndirectMem = [](pi_kernel Kernel) {
5804+
if (IndirectAccessTrackingEnabled) {
5805+
// piKernelRelease is called by CleanupCompletedEvent(Event) as soon as
5806+
// kernel execution has finished. This is the place where we need to
5807+
// release memory allocations. If kernel is not in use (not submitted by
5808+
// some other thread) then release referenced memory allocations. As a
5809+
// result, memory can be deallocated and context can be removed from
5810+
// container in the platform. That's why we need to lock a mutex here.
5811+
pi_platform Plt = Kernel->Program->Context->getPlatform();
5812+
std::scoped_lock<pi_shared_mutex> ContextsLock(Plt->ContextsMutex);
5813+
5814+
if (--Kernel->SubmissionsCount == 0) {
5815+
// Kernel is not submitted for execution, release referenced memory
5816+
// allocations.
5817+
for (auto &MemAlloc : Kernel->MemAllocs) {
5818+
// std::pair<void *const, MemAllocRecord> *, Hash
5819+
USMFreeHelper(MemAlloc->second.Context, MemAlloc->first,
5820+
MemAlloc->second.OwnZeMemHandle);
5821+
}
5822+
Kernel->MemAllocs.clear();
5823+
}
5824+
}
5825+
};
5826+
58245827
// We've reset event data members above, now cleanup resources.
5825-
if (AssociatedKernel)
5828+
if (AssociatedKernel) {
5829+
ReleaseIndirectMem(AssociatedKernel);
58265830
PI_CALL(piKernelRelease(AssociatedKernel));
5831+
}
58275832

58285833
if (AssociatedQueue) {
58295834
{
@@ -5877,8 +5882,10 @@ static pi_result CleanupCompletedEvent(pi_event Event, bool QueueLocked) {
58775882
}
58785883
}
58795884
}
5880-
if (DepEventKernel)
5881-
PI_CALL(piKernelRelease(pi_cast<pi_kernel>(DepEvent->CommandData)));
5885+
if (DepEventKernel) {
5886+
ReleaseIndirectMem(DepEventKernel);
5887+
PI_CALL(piKernelRelease(DepEventKernel));
5888+
}
58825889
PI_CALL(piEventReleaseInternal(DepEvent));
58835890
}
58845891

0 commit comments

Comments
 (0)