@@ -5253,27 +5253,6 @@ pi_result piKernelRetain(pi_kernel Kernel) {
5253
5253
pi_result piKernelRelease (pi_kernel Kernel) {
5254
5254
PI_ASSERT (Kernel, PI_ERROR_INVALID_KERNEL);
5255
5255
5256
- if (IndirectAccessTrackingEnabled) {
5257
- // piKernelRelease is called by CleanupCompletedEvent(Event) as soon as
5258
- // kernel execution has finished. This is the place where we need to release
5259
- // memory allocations. If kernel is not in use (not submitted by some
5260
- // other thread) then release referenced memory allocations. As a result,
5261
- // memory can be deallocated and context can be removed from container in
5262
- // the platform. That's why we need to lock a mutex here.
5263
- pi_platform Plt = Kernel->Program ->Context ->getPlatform ();
5264
- std::scoped_lock<pi_shared_mutex> ContextsLock (Plt->ContextsMutex );
5265
-
5266
- if (--Kernel->SubmissionsCount == 0 ) {
5267
- // Kernel is not submitted for execution, release referenced memory
5268
- // allocations.
5269
- for (auto &MemAlloc : Kernel->MemAllocs ) {
5270
- USMFreeHelper (MemAlloc->second .Context , MemAlloc->first ,
5271
- MemAlloc->second .OwnZeMemHandle );
5272
- }
5273
- Kernel->MemAllocs .clear ();
5274
- }
5275
- }
5276
-
5277
5256
if (!Kernel->RefCount .decrementAndTest ())
5278
5257
return PI_SUCCESS;
5279
5258
@@ -5821,9 +5800,35 @@ static pi_result CleanupCompletedEvent(pi_event Event, bool QueueLocked) {
5821
5800
Event->CleanedUp = true ;
5822
5801
}
5823
5802
5803
+ auto ReleaseIndirectMem = [](pi_kernel Kernel) {
5804
+ if (IndirectAccessTrackingEnabled) {
5805
+ // piKernelRelease is called by CleanupCompletedEvent(Event) as soon as
5806
+ // kernel execution has finished. This is the place where we need to
5807
+ // release memory allocations. If kernel is not in use (not submitted by
5808
+ // some other thread) then release referenced memory allocations. As a
5809
+ // result, memory can be deallocated and context can be removed from
5810
+ // container in the platform. That's why we need to lock a mutex here.
5811
+ pi_platform Plt = Kernel->Program ->Context ->getPlatform ();
5812
+ std::scoped_lock<pi_shared_mutex> ContextsLock (Plt->ContextsMutex );
5813
+
5814
+ if (--Kernel->SubmissionsCount == 0 ) {
5815
+ // Kernel is not submitted for execution, release referenced memory
5816
+ // allocations.
5817
+ for (auto &MemAlloc : Kernel->MemAllocs ) {
5818
+ // std::pair<void *const, MemAllocRecord> *, Hash
5819
+ USMFreeHelper (MemAlloc->second .Context , MemAlloc->first ,
5820
+ MemAlloc->second .OwnZeMemHandle );
5821
+ }
5822
+ Kernel->MemAllocs .clear ();
5823
+ }
5824
+ }
5825
+ };
5826
+
5824
5827
// We've reset event data members above, now cleanup resources.
5825
- if (AssociatedKernel)
5828
+ if (AssociatedKernel) {
5829
+ ReleaseIndirectMem (AssociatedKernel);
5826
5830
PI_CALL (piKernelRelease (AssociatedKernel));
5831
+ }
5827
5832
5828
5833
if (AssociatedQueue) {
5829
5834
{
@@ -5877,8 +5882,10 @@ static pi_result CleanupCompletedEvent(pi_event Event, bool QueueLocked) {
5877
5882
}
5878
5883
}
5879
5884
}
5880
- if (DepEventKernel)
5881
- PI_CALL (piKernelRelease (pi_cast<pi_kernel>(DepEvent->CommandData )));
5885
+ if (DepEventKernel) {
5886
+ ReleaseIndirectMem (DepEventKernel);
5887
+ PI_CALL (piKernelRelease (DepEventKernel));
5888
+ }
5882
5889
PI_CALL (piEventReleaseInternal (DepEvent));
5883
5890
}
5884
5891
0 commit comments