@@ -4557,11 +4557,18 @@ pi_result piProgramRetain(pi_program Program) {
4557
4557
4558
4558
pi_result piProgramRelease (pi_program Program) {
4559
4559
PI_ASSERT (Program, PI_INVALID_PROGRAM);
4560
- // Check if the program is already released
4561
- PI_ASSERT (Program->RefCount > 0 , PI_INVALID_VALUE);
4562
- if (--(Program->RefCount ) == 0 ) {
4563
- delete Program;
4560
+ bool RefCountZero = false ;
4561
+ {
4562
+ std::scoped_lock Guard (Program->Mutex );
4563
+ // Check if the program is already released
4564
+ PI_ASSERT (Program->RefCount > 0 , PI_INVALID_VALUE);
4565
+ if (--(Program->RefCount ) == 0 ) {
4566
+ RefCountZero = true ;
4567
+ }
4564
4568
}
4569
+ if (RefCountZero)
4570
+ delete Program;
4571
+
4565
4572
return PI_SUCCESS;
4566
4573
}
4567
4574
@@ -4731,6 +4738,7 @@ pi_result piKernelSetArg(pi_kernel Kernel, pi_uint32 ArgIndex, size_t ArgSize,
4731
4738
4732
4739
PI_ASSERT (Kernel, PI_INVALID_KERNEL);
4733
4740
4741
+ std::scoped_lock Guard (Kernel->Mutex );
4734
4742
ZE_CALL (zeKernelSetArgumentValue,
4735
4743
(pi_cast<ze_kernel_handle_t >(Kernel->ZeKernel ),
4736
4744
pi_cast<uint32_t >(ArgIndex), pi_cast<size_t >(ArgSize),
@@ -4758,8 +4766,10 @@ pi_result piextKernelSetArgMemObj(pi_kernel Kernel, pi_uint32 ArgIndex,
4758
4766
// Improve that by passing SYCL buffer accessor type into
4759
4767
// piextKernelSetArgMemObj.
4760
4768
//
4769
+ std::scoped_lock Guard (Kernel->Mutex );
4761
4770
Kernel->PendingArguments .push_back (
4762
4771
{ArgIndex, sizeof (void *), *ArgValue, _pi_mem::read_write});
4772
+
4763
4773
return PI_SUCCESS;
4764
4774
}
4765
4775
@@ -4768,6 +4778,7 @@ pi_result piextKernelSetArgSampler(pi_kernel Kernel, pi_uint32 ArgIndex,
4768
4778
const pi_sampler *ArgValue) {
4769
4779
PI_ASSERT (Kernel, PI_INVALID_KERNEL);
4770
4780
4781
+ std::scoped_lock Guard (Kernel->Mutex );
4771
4782
ZE_CALL (zeKernelSetArgumentValue,
4772
4783
(pi_cast<ze_kernel_handle_t >(Kernel->ZeKernel ),
4773
4784
pi_cast<uint32_t >(ArgIndex), sizeof (void *),
@@ -4782,6 +4793,8 @@ pi_result piKernelGetInfo(pi_kernel Kernel, pi_kernel_info ParamName,
4782
4793
PI_ASSERT (Kernel, PI_INVALID_KERNEL);
4783
4794
4784
4795
ReturnHelper ReturnValue (ParamValueSize, ParamValue, ParamValueSizeRet);
4796
+
4797
+ std::shared_lock Guard (Kernel->Mutex );
4785
4798
switch (ParamName) {
4786
4799
case PI_KERNEL_INFO_CONTEXT:
4787
4800
return ReturnValue (pi_context{Kernel->Program ->Context });
@@ -4832,6 +4845,8 @@ pi_result piKernelGetGroupInfo(pi_kernel Kernel, pi_device Device,
4832
4845
PI_ASSERT (Device, PI_INVALID_DEVICE);
4833
4846
4834
4847
ReturnHelper ReturnValue (ParamValueSize, ParamValue, ParamValueSizeRet);
4848
+
4849
+ std::shared_lock Guard (Kernel->Mutex );
4835
4850
switch (ParamName) {
4836
4851
case PI_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE: {
4837
4852
// TODO: To revisit after level_zero/issues/262 is resolved
@@ -4887,6 +4902,7 @@ pi_result piKernelGetSubGroupInfo(pi_kernel Kernel, pi_device Device,
4887
4902
4888
4903
ReturnHelper ReturnValue (ParamValueSize, ParamValue, ParamValueSizeRet);
4889
4904
4905
+ std::shared_lock Guard (Kernel->Mutex );
4890
4906
if (ParamName == PI_KERNEL_MAX_SUB_GROUP_SIZE) {
4891
4907
ReturnValue (uint32_t {Kernel->ZeKernelProperties ->maxSubgroupSize });
4892
4908
} else if (ParamName == PI_KERNEL_MAX_NUM_SUB_GROUPS) {
@@ -4906,49 +4922,58 @@ pi_result piKernelRetain(pi_kernel Kernel) {
4906
4922
4907
4923
PI_ASSERT (Kernel, PI_INVALID_KERNEL);
4908
4924
4909
- ++(Kernel->RefCount );
4910
- // When retaining a kernel, you are also retaining the program it is part of.
4911
- PI_CALL (piProgramRetain (Kernel->Program ));
4925
+ // When retaining a kernel, you are also retaining the program it is part
4926
+ // of.
4927
+ std::scoped_lock Lock (Kernel->Mutex , Kernel->Program ->Mutex );
4928
+ Kernel->retain ();
4912
4929
return PI_SUCCESS;
4913
4930
}
4914
4931
4915
4932
pi_result piKernelRelease (pi_kernel Kernel) {
4916
4933
4917
4934
PI_ASSERT (Kernel, PI_INVALID_KERNEL);
4935
+ pi_program KernelProgram = nullptr ;
4936
+ bool RefCountZero = false ;
4937
+ {
4938
+ std::scoped_lock Guard (Kernel->Mutex );
4939
+ KernelProgram = Kernel->Program ;
4940
+ if (IndirectAccessTrackingEnabled) {
4941
+ // piKernelRelease is called by Event->cleanup() as soon as kernel
4942
+ // execution has finished. This is the place where we need to release
4943
+ // memory allocations. If kernel is not in use (not submitted by some
4944
+ // other thread) then release referenced memory allocations. As a result,
4945
+ // memory can be deallocated and context can be removed from container in
4946
+ // the platform. That's why we need to lock a mutex here.
4947
+ pi_platform Plt = KernelProgram->Context ->getPlatform ();
4948
+ std::lock_guard<std::mutex> ContextsLock (Plt->ContextsMutex );
4949
+
4950
+ if (--Kernel->SubmissionsCount == 0 ) {
4951
+ // Kernel is not submitted for execution, release referenced memory
4952
+ // allocations.
4953
+ for (auto &MemAlloc : Kernel->MemAllocs ) {
4954
+ USMFreeHelper (MemAlloc->second .Context , MemAlloc->first ,
4955
+ MemAlloc->second .OwnZeMemHandle );
4956
+ }
4957
+ Kernel->MemAllocs .clear ();
4958
+ }
4959
+ }
4918
4960
4919
- if (IndirectAccessTrackingEnabled) {
4920
- // piKernelRelease is called by Event->cleanup() as soon as kernel
4921
- // execution has finished. This is the place where we need to release memory
4922
- // allocations. If kernel is not in use (not submitted by some other thread)
4923
- // then release referenced memory allocations. As a result, memory can be
4924
- // deallocated and context can be removed from container in the platform.
4925
- // That's why we need to lock a mutex here.
4926
- pi_platform Plt = Kernel->Program ->Context ->getPlatform ();
4927
- std::lock_guard<std::mutex> ContextsLock (Plt->ContextsMutex );
4928
-
4929
- if (--Kernel->SubmissionsCount == 0 ) {
4930
- // Kernel is not submitted for execution, release referenced memory
4931
- // allocations.
4932
- for (auto &MemAlloc : Kernel->MemAllocs ) {
4933
- USMFreeHelper (MemAlloc->second .Context , MemAlloc->first ,
4934
- MemAlloc->second .OwnZeMemHandle );
4961
+ if (--(Kernel->RefCount ) == 0 ) {
4962
+ if (Kernel->OwnZeKernel )
4963
+ ZE_CALL (zeKernelDestroy, (Kernel->ZeKernel ));
4964
+ if (IndirectAccessTrackingEnabled) {
4965
+ PI_CALL (piContextRelease (KernelProgram->Context ));
4935
4966
}
4936
- Kernel-> MemAllocs . clear () ;
4967
+ RefCountZero = true ;
4937
4968
}
4938
4969
}
4939
4970
4940
- auto KernelProgram = Kernel->Program ;
4941
- if (--(Kernel->RefCount ) == 0 ) {
4942
- if (Kernel->OwnZeKernel )
4943
- ZE_CALL (zeKernelDestroy, (Kernel->ZeKernel ));
4944
- if (IndirectAccessTrackingEnabled) {
4945
- PI_CALL (piContextRelease (KernelProgram->Context ));
4946
- }
4971
+ if (RefCountZero)
4947
4972
delete Kernel;
4948
- }
4949
4973
4950
- // do a release on the program this kernel was part of
4951
- PI_CALL (piProgramRelease (KernelProgram));
4974
+ if (KernelProgram)
4975
+ // do a release on the program this kernel was part of
4976
+ PI_CALL (piProgramRelease (KernelProgram));
4952
4977
4953
4978
return PI_SUCCESS;
4954
4979
}
@@ -4964,6 +4989,8 @@ piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim,
4964
4989
PI_ASSERT (Event, PI_INVALID_EVENT);
4965
4990
PI_ASSERT ((WorkDim > 0 ) && (WorkDim < 4 ), PI_INVALID_WORK_DIMENSION);
4966
4991
4992
+ // Lock automatically releases when this goes out of scope.
4993
+ std::scoped_lock Lock (Queue->Mutex , Kernel->Mutex , Kernel->Program ->Mutex );
4967
4994
if (GlobalWorkOffset != NULL ) {
4968
4995
if (!PiDriverGlobalOffsetExtensionFound) {
4969
4996
zePrint (" No global offset extension found on this driver\n " );
@@ -5049,9 +5076,6 @@ piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim,
5049
5076
5050
5077
ZE_CALL (zeKernelSetGroupSize, (Kernel->ZeKernel , WG[0 ], WG[1 ], WG[2 ]));
5051
5078
5052
- // Lock automatically releases when this goes out of scope.
5053
- std::scoped_lock QueueLock (Queue->Mutex );
5054
-
5055
5079
_pi_ze_event_list_t TmpWaitList;
5056
5080
5057
5081
if (auto Res = TmpWaitList.createAndRetainPiZeEventList (NumEventsInWaitList,
@@ -5077,12 +5101,11 @@ piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim,
5077
5101
// the code can do a piKernelRelease on this kernel.
5078
5102
(*Event)->CommandData = (void *)Kernel;
5079
5103
5080
- // Use piKernelRetain to increment the reference count and indicate
5081
- // that the Kernel is in use. Once the event has been signalled, the
5082
- // code in Event.cleanup() will do a piReleaseKernel to update
5083
- // the reference count on the kernel, using the kernel saved
5084
- // in CommandData.
5085
- PI_CALL (piKernelRetain (Kernel));
5104
+ // Increment the reference count of the Kernel and indicate that the Kernel is
5105
+ // in use. Once the event has been signalled, the code in Event.cleanup() will
5106
+ // do a piReleaseKernel to update the reference count on the kernel, using the
5107
+ // kernel saved in CommandData.
5108
+ Kernel->retain ();
5086
5109
5087
5110
// Add to list of kernels to be submitted
5088
5111
if (IndirectAccessTrackingEnabled)
@@ -5152,6 +5175,7 @@ pi_result piextKernelGetNativeHandle(pi_kernel Kernel,
5152
5175
PI_ASSERT (Kernel, PI_INVALID_KERNEL);
5153
5176
PI_ASSERT (NativeHandle, PI_INVALID_VALUE);
5154
5177
5178
+ std::shared_lock Guard (Kernel->Mutex );
5155
5179
auto *ZeKernel = pi_cast<ze_kernel_handle_t *>(NativeHandle);
5156
5180
*ZeKernel = Kernel->ZeKernel ;
5157
5181
return PI_SUCCESS;
@@ -5815,17 +5839,26 @@ pi_result piSamplerGetInfo(pi_sampler Sampler, pi_sampler_info ParamName,
5815
5839
pi_result piSamplerRetain (pi_sampler Sampler) {
5816
5840
PI_ASSERT (Sampler, PI_INVALID_SAMPLER);
5817
5841
5842
+ std::scoped_lock Guard (Sampler->Mutex );
5818
5843
++(Sampler->RefCount );
5819
5844
return PI_SUCCESS;
5820
5845
}
5821
5846
5822
5847
pi_result piSamplerRelease (pi_sampler Sampler) {
5823
5848
PI_ASSERT (Sampler, PI_INVALID_SAMPLER);
5824
5849
5825
- if (--(Sampler->RefCount ) == 0 ) {
5826
- ZE_CALL (zeSamplerDestroy, (Sampler->ZeSampler ));
5827
- delete Sampler;
5850
+ bool RefCountZero = false ;
5851
+ {
5852
+ std::scoped_lock Guard (Sampler->Mutex );
5853
+ if (--(Sampler->RefCount ) == 0 ) {
5854
+ ZE_CALL (zeSamplerDestroy, (Sampler->ZeSampler ));
5855
+ RefCountZero = true ;
5856
+ }
5828
5857
}
5858
+
5859
+ if (RefCountZero)
5860
+ delete Sampler;
5861
+
5829
5862
return PI_SUCCESS;
5830
5863
}
5831
5864
@@ -7947,6 +7980,7 @@ pi_result piKernelSetExecInfo(pi_kernel Kernel, pi_kernel_exec_info ParamName,
7947
7980
PI_ASSERT (Kernel, PI_INVALID_KERNEL);
7948
7981
PI_ASSERT (ParamValue, PI_INVALID_VALUE);
7949
7982
7983
+ std::scoped_lock Guard (Kernel->Mutex );
7950
7984
if (ParamName == PI_USM_INDIRECT_ACCESS &&
7951
7985
*(static_cast <const pi_bool *>(ParamValue)) == PI_TRUE) {
7952
7986
// The whole point for users really was to not need to know anything
0 commit comments