Skip to content

Commit a37c10b

Browse files
authored
[SYCL] Protect access to the kernel and sampler objects with a mutex (#5232)
Protect access to all the non-const member variables of the sampler and kernel classes with a mutex to improve thread-safety.
1 parent 0cec3c6 commit a37c10b

File tree

2 files changed

+89
-46
lines changed

2 files changed

+89
-46
lines changed

sycl/plugins/level_zero/pi_level_zero.cpp

Lines changed: 80 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -4557,11 +4557,18 @@ pi_result piProgramRetain(pi_program Program) {
45574557

45584558
pi_result piProgramRelease(pi_program Program) {
45594559
PI_ASSERT(Program, PI_INVALID_PROGRAM);
4560-
// Check if the program is already released
4561-
PI_ASSERT(Program->RefCount > 0, PI_INVALID_VALUE);
4562-
if (--(Program->RefCount) == 0) {
4563-
delete Program;
4560+
bool RefCountZero = false;
4561+
{
4562+
std::scoped_lock Guard(Program->Mutex);
4563+
// Check if the program is already released
4564+
PI_ASSERT(Program->RefCount > 0, PI_INVALID_VALUE);
4565+
if (--(Program->RefCount) == 0) {
4566+
RefCountZero = true;
4567+
}
45644568
}
4569+
if (RefCountZero)
4570+
delete Program;
4571+
45654572
return PI_SUCCESS;
45664573
}
45674574

@@ -4731,6 +4738,7 @@ pi_result piKernelSetArg(pi_kernel Kernel, pi_uint32 ArgIndex, size_t ArgSize,
47314738

47324739
PI_ASSERT(Kernel, PI_INVALID_KERNEL);
47334740

4741+
std::scoped_lock Guard(Kernel->Mutex);
47344742
ZE_CALL(zeKernelSetArgumentValue,
47354743
(pi_cast<ze_kernel_handle_t>(Kernel->ZeKernel),
47364744
pi_cast<uint32_t>(ArgIndex), pi_cast<size_t>(ArgSize),
@@ -4758,8 +4766,10 @@ pi_result piextKernelSetArgMemObj(pi_kernel Kernel, pi_uint32 ArgIndex,
47584766
// Improve that by passing SYCL buffer accessor type into
47594767
// piextKernelSetArgMemObj.
47604768
//
4769+
std::scoped_lock Guard(Kernel->Mutex);
47614770
Kernel->PendingArguments.push_back(
47624771
{ArgIndex, sizeof(void *), *ArgValue, _pi_mem::read_write});
4772+
47634773
return PI_SUCCESS;
47644774
}
47654775

@@ -4768,6 +4778,7 @@ pi_result piextKernelSetArgSampler(pi_kernel Kernel, pi_uint32 ArgIndex,
47684778
const pi_sampler *ArgValue) {
47694779
PI_ASSERT(Kernel, PI_INVALID_KERNEL);
47704780

4781+
std::scoped_lock Guard(Kernel->Mutex);
47714782
ZE_CALL(zeKernelSetArgumentValue,
47724783
(pi_cast<ze_kernel_handle_t>(Kernel->ZeKernel),
47734784
pi_cast<uint32_t>(ArgIndex), sizeof(void *),
@@ -4782,6 +4793,8 @@ pi_result piKernelGetInfo(pi_kernel Kernel, pi_kernel_info ParamName,
47824793
PI_ASSERT(Kernel, PI_INVALID_KERNEL);
47834794

47844795
ReturnHelper ReturnValue(ParamValueSize, ParamValue, ParamValueSizeRet);
4796+
4797+
std::shared_lock Guard(Kernel->Mutex);
47854798
switch (ParamName) {
47864799
case PI_KERNEL_INFO_CONTEXT:
47874800
return ReturnValue(pi_context{Kernel->Program->Context});
@@ -4832,6 +4845,8 @@ pi_result piKernelGetGroupInfo(pi_kernel Kernel, pi_device Device,
48324845
PI_ASSERT(Device, PI_INVALID_DEVICE);
48334846

48344847
ReturnHelper ReturnValue(ParamValueSize, ParamValue, ParamValueSizeRet);
4848+
4849+
std::shared_lock Guard(Kernel->Mutex);
48354850
switch (ParamName) {
48364851
case PI_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE: {
48374852
// TODO: To revisit after level_zero/issues/262 is resolved
@@ -4887,6 +4902,7 @@ pi_result piKernelGetSubGroupInfo(pi_kernel Kernel, pi_device Device,
48874902

48884903
ReturnHelper ReturnValue(ParamValueSize, ParamValue, ParamValueSizeRet);
48894904

4905+
std::shared_lock Guard(Kernel->Mutex);
48904906
if (ParamName == PI_KERNEL_MAX_SUB_GROUP_SIZE) {
48914907
ReturnValue(uint32_t{Kernel->ZeKernelProperties->maxSubgroupSize});
48924908
} else if (ParamName == PI_KERNEL_MAX_NUM_SUB_GROUPS) {
@@ -4906,49 +4922,58 @@ pi_result piKernelRetain(pi_kernel Kernel) {
49064922

49074923
PI_ASSERT(Kernel, PI_INVALID_KERNEL);
49084924

4909-
++(Kernel->RefCount);
4910-
// When retaining a kernel, you are also retaining the program it is part of.
4911-
PI_CALL(piProgramRetain(Kernel->Program));
4925+
// When retaining a kernel, you are also retaining the program it is part
4926+
// of.
4927+
std::scoped_lock Lock(Kernel->Mutex, Kernel->Program->Mutex);
4928+
Kernel->retain();
49124929
return PI_SUCCESS;
49134930
}
49144931

49154932
pi_result piKernelRelease(pi_kernel Kernel) {
49164933

49174934
PI_ASSERT(Kernel, PI_INVALID_KERNEL);
4935+
pi_program KernelProgram = nullptr;
4936+
bool RefCountZero = false;
4937+
{
4938+
std::scoped_lock Guard(Kernel->Mutex);
4939+
KernelProgram = Kernel->Program;
4940+
if (IndirectAccessTrackingEnabled) {
4941+
// piKernelRelease is called by Event->cleanup() as soon as kernel
4942+
// execution has finished. This is the place where we need to release
4943+
// memory allocations. If kernel is not in use (not submitted by some
4944+
// other thread) then release referenced memory allocations. As a result,
4945+
// memory can be deallocated and context can be removed from container in
4946+
// the platform. That's why we need to lock a mutex here.
4947+
pi_platform Plt = KernelProgram->Context->getPlatform();
4948+
std::lock_guard<std::mutex> ContextsLock(Plt->ContextsMutex);
4949+
4950+
if (--Kernel->SubmissionsCount == 0) {
4951+
// Kernel is not submitted for execution, release referenced memory
4952+
// allocations.
4953+
for (auto &MemAlloc : Kernel->MemAllocs) {
4954+
USMFreeHelper(MemAlloc->second.Context, MemAlloc->first,
4955+
MemAlloc->second.OwnZeMemHandle);
4956+
}
4957+
Kernel->MemAllocs.clear();
4958+
}
4959+
}
49184960

4919-
if (IndirectAccessTrackingEnabled) {
4920-
// piKernelRelease is called by Event->cleanup() as soon as kernel
4921-
// execution has finished. This is the place where we need to release memory
4922-
// allocations. If kernel is not in use (not submitted by some other thread)
4923-
// then release referenced memory allocations. As a result, memory can be
4924-
// deallocated and context can be removed from container in the platform.
4925-
// That's why we need to lock a mutex here.
4926-
pi_platform Plt = Kernel->Program->Context->getPlatform();
4927-
std::lock_guard<std::mutex> ContextsLock(Plt->ContextsMutex);
4928-
4929-
if (--Kernel->SubmissionsCount == 0) {
4930-
// Kernel is not submitted for execution, release referenced memory
4931-
// allocations.
4932-
for (auto &MemAlloc : Kernel->MemAllocs) {
4933-
USMFreeHelper(MemAlloc->second.Context, MemAlloc->first,
4934-
MemAlloc->second.OwnZeMemHandle);
4961+
if (--(Kernel->RefCount) == 0) {
4962+
if (Kernel->OwnZeKernel)
4963+
ZE_CALL(zeKernelDestroy, (Kernel->ZeKernel));
4964+
if (IndirectAccessTrackingEnabled) {
4965+
PI_CALL(piContextRelease(KernelProgram->Context));
49354966
}
4936-
Kernel->MemAllocs.clear();
4967+
RefCountZero = true;
49374968
}
49384969
}
49394970

4940-
auto KernelProgram = Kernel->Program;
4941-
if (--(Kernel->RefCount) == 0) {
4942-
if (Kernel->OwnZeKernel)
4943-
ZE_CALL(zeKernelDestroy, (Kernel->ZeKernel));
4944-
if (IndirectAccessTrackingEnabled) {
4945-
PI_CALL(piContextRelease(KernelProgram->Context));
4946-
}
4971+
if (RefCountZero)
49474972
delete Kernel;
4948-
}
49494973

4950-
// do a release on the program this kernel was part of
4951-
PI_CALL(piProgramRelease(KernelProgram));
4974+
if (KernelProgram)
4975+
// do a release on the program this kernel was part of
4976+
PI_CALL(piProgramRelease(KernelProgram));
49524977

49534978
return PI_SUCCESS;
49544979
}
@@ -4964,6 +4989,8 @@ piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim,
49644989
PI_ASSERT(Event, PI_INVALID_EVENT);
49654990
PI_ASSERT((WorkDim > 0) && (WorkDim < 4), PI_INVALID_WORK_DIMENSION);
49664991

4992+
// Lock automatically releases when this goes out of scope.
4993+
std::scoped_lock Lock(Queue->Mutex, Kernel->Mutex, Kernel->Program->Mutex);
49674994
if (GlobalWorkOffset != NULL) {
49684995
if (!PiDriverGlobalOffsetExtensionFound) {
49694996
zePrint("No global offset extension found on this driver\n");
@@ -5049,9 +5076,6 @@ piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim,
50495076

50505077
ZE_CALL(zeKernelSetGroupSize, (Kernel->ZeKernel, WG[0], WG[1], WG[2]));
50515078

5052-
// Lock automatically releases when this goes out of scope.
5053-
std::scoped_lock QueueLock(Queue->Mutex);
5054-
50555079
_pi_ze_event_list_t TmpWaitList;
50565080

50575081
if (auto Res = TmpWaitList.createAndRetainPiZeEventList(NumEventsInWaitList,
@@ -5077,12 +5101,11 @@ piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim,
50775101
// the code can do a piKernelRelease on this kernel.
50785102
(*Event)->CommandData = (void *)Kernel;
50795103

5080-
// Use piKernelRetain to increment the reference count and indicate
5081-
// that the Kernel is in use. Once the event has been signalled, the
5082-
// code in Event.cleanup() will do a piReleaseKernel to update
5083-
// the reference count on the kernel, using the kernel saved
5084-
// in CommandData.
5085-
PI_CALL(piKernelRetain(Kernel));
5104+
// Increment the reference count of the Kernel and indicate that the Kernel is
5105+
// in use. Once the event has been signalled, the code in Event.cleanup() will
5106+
// do a piReleaseKernel to update the reference count on the kernel, using the
5107+
// kernel saved in CommandData.
5108+
Kernel->retain();
50865109

50875110
// Add to list of kernels to be submitted
50885111
if (IndirectAccessTrackingEnabled)
@@ -5152,6 +5175,7 @@ pi_result piextKernelGetNativeHandle(pi_kernel Kernel,
51525175
PI_ASSERT(Kernel, PI_INVALID_KERNEL);
51535176
PI_ASSERT(NativeHandle, PI_INVALID_VALUE);
51545177

5178+
std::shared_lock Guard(Kernel->Mutex);
51555179
auto *ZeKernel = pi_cast<ze_kernel_handle_t *>(NativeHandle);
51565180
*ZeKernel = Kernel->ZeKernel;
51575181
return PI_SUCCESS;
@@ -5815,17 +5839,26 @@ pi_result piSamplerGetInfo(pi_sampler Sampler, pi_sampler_info ParamName,
58155839
pi_result piSamplerRetain(pi_sampler Sampler) {
58165840
PI_ASSERT(Sampler, PI_INVALID_SAMPLER);
58175841

5842+
std::scoped_lock Guard(Sampler->Mutex);
58185843
++(Sampler->RefCount);
58195844
return PI_SUCCESS;
58205845
}
58215846

58225847
pi_result piSamplerRelease(pi_sampler Sampler) {
58235848
PI_ASSERT(Sampler, PI_INVALID_SAMPLER);
58245849

5825-
if (--(Sampler->RefCount) == 0) {
5826-
ZE_CALL(zeSamplerDestroy, (Sampler->ZeSampler));
5827-
delete Sampler;
5850+
bool RefCountZero = false;
5851+
{
5852+
std::scoped_lock Guard(Sampler->Mutex);
5853+
if (--(Sampler->RefCount) == 0) {
5854+
ZE_CALL(zeSamplerDestroy, (Sampler->ZeSampler));
5855+
RefCountZero = true;
5856+
}
58285857
}
5858+
5859+
if (RefCountZero)
5860+
delete Sampler;
5861+
58295862
return PI_SUCCESS;
58305863
}
58315864

@@ -7947,6 +7980,7 @@ pi_result piKernelSetExecInfo(pi_kernel Kernel, pi_kernel_exec_info ParamName,
79477980
PI_ASSERT(Kernel, PI_INVALID_KERNEL);
79487981
PI_ASSERT(ParamValue, PI_INVALID_VALUE);
79497982

7983+
std::scoped_lock Guard(Kernel->Mutex);
79507984
if (ParamName == PI_USM_INDIRECT_ACCESS &&
79517985
*(static_cast<const pi_bool *>(ParamValue)) == PI_TRUE) {
79527986
// The whole point for users really was to not need to know anything

sycl/plugins/level_zero/pi_level_zero.hpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,8 @@ struct _pi_object {
261261
// std::shared_lock Obj3Lock(Obj3->Mutex, std::defer_lock);
262262
// std::scoped_lock LockAll(Obj1->Mutex, Obj2->Mutex, Obj3Lock);
263263
pi_shared_mutex Mutex;
264+
265+
void retain() { ++RefCount; }
264266
};
265267

266268
// Record for a memory allocation. This structure is used to keep information
@@ -1393,6 +1395,13 @@ struct _pi_kernel : _pi_object {
13931395
return true;
13941396
}
13951397

1398+
// The caller must lock access to the kernel and program.
1399+
void retain() {
1400+
++RefCount;
1401+
// When retaining a kernel, you are also retaining the program it is part
1402+
// of.
1403+
Program->retain();
1404+
}
13961405
// Level Zero function handle.
13971406
ze_kernel_handle_t ZeKernel;
13981407

0 commit comments

Comments
 (0)