Skip to content

Commit 2ce2ca6

Browse files
authored
[SYCL] Store command lists cache in the context class (#3214)
Currently command lists cache is stored in the device class and command lists are destroyed only during piTearDown during destruction of device objects but this is too late because associated contexts of the command lists aren't alive by this time. It looks reasonable to store the cache in the context class and destroy command lists associated with a context during piContextRelease because command lists are not usable after context destruction.
1 parent b1371f5 commit 2ce2ca6

File tree

2 files changed

+63
-69
lines changed

2 files changed

+63
-69
lines changed

sycl/plugins/level_zero/pi_level_zero.cpp

Lines changed: 40 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -415,20 +415,6 @@ ze_result_t ZeCall::doCall(ze_result_t ZeResult, const char *CallStr,
415415
if (!(condition)) \
416416
return error;
417417

418-
// Destroy all the command lists associated with this device.
419-
// This is required when destructing the _pi_device object.
420-
// During the piTearDown process, platforms and root devices are
421-
// destroyed automatically regardless of their reference counts.
422-
// So, this destructor should explicitly call zeCommandListDestroy
423-
// to avoid memory leaks.
424-
_pi_device::~_pi_device() {
425-
std::lock_guard<std::mutex> Lock(ZeCommandListCacheMutex);
426-
for (ze_command_list_handle_t &ZeCommandList : ZeCommandListCache) {
427-
if (ZeCommandList)
428-
ZE_CALL_NOCHECK(zeCommandListDestroy(ZeCommandList));
429-
}
430-
}
431-
432418
// This helper function increments the reference counter of the Queue
433419
// without guarding with a lock.
434420
// It is the caller's responsibility to make sure the lock is acquired
@@ -522,6 +508,12 @@ pi_result _pi_context::finalize() {
522508

523509
// Destroy the command list used for initializations
524510
ZE_CALL(zeCommandListDestroy(ZeCommandListInit));
511+
512+
std::lock_guard<std::mutex> Lock(ZeCommandListCacheMutex);
513+
for (ze_command_list_handle_t &ZeCommandList : ZeCommandListCache) {
514+
if (ZeCommandList)
515+
ZE_CALL(zeCommandListDestroy(ZeCommandList));
516+
}
525517
return PI_SUCCESS;
526518
}
527519

@@ -534,8 +526,8 @@ _pi_queue::resetCommandListFenceEntry(ze_command_list_handle_t ZeCommandList,
534526
ZE_CALL(zeFenceReset(this->ZeCommandListFenceMap[ZeCommandList]));
535527
ZE_CALL(zeCommandListReset(ZeCommandList));
536528
if (MakeAvailable) {
537-
std::lock_guard<std::mutex> lock(this->Device->ZeCommandListCacheMutex);
538-
this->Device->ZeCommandListCache.push_back(ZeCommandList);
529+
std::lock_guard<std::mutex> lock(this->Context->ZeCommandListCacheMutex);
530+
this->Context->ZeCommandListCache.push_back(ZeCommandList);
539531
}
540532

541533
return PI_SUCCESS;
@@ -559,7 +551,7 @@ static const pi_uint32 ZeCommandListBatchSize = [] {
559551

560552
// Retrieve an available command list to be used in a PI call
561553
// Caller must hold a lock on the Queue passed in.
562-
pi_result _pi_device::getAvailableCommandList(
554+
pi_result _pi_context::getAvailableCommandList(
563555
pi_queue Queue, ze_command_list_handle_t *ZeCommandList,
564556
ze_fence_handle_t *ZeFence, bool AllowBatching) {
565557
// First see if there is an command-list open for batching commands
@@ -595,10 +587,10 @@ pi_result _pi_device::getAvailableCommandList(
595587
{
596588
// Make sure to acquire the lock before checking the size, or there
597589
// will be a race condition.
598-
std::lock_guard<std::mutex> lock(Queue->Device->ZeCommandListCacheMutex);
590+
std::lock_guard<std::mutex> lock(Queue->Context->ZeCommandListCacheMutex);
599591

600-
if (Queue->Device->ZeCommandListCache.size() > 0) {
601-
*ZeCommandList = Queue->Device->ZeCommandListCache.front();
592+
if (Queue->Context->ZeCommandListCache.size() > 0) {
593+
*ZeCommandList = Queue->Context->ZeCommandListCache.front();
602594
auto it = Queue->ZeCommandListFenceMap.find(*ZeCommandList);
603595
if (it != Queue->ZeCommandListFenceMap.end()) {
604596
*ZeFence = it->second;
@@ -610,7 +602,7 @@ pi_result _pi_device::getAvailableCommandList(
610602
ZE_CALL(zeFenceCreate(Queue->ZeCommandQueue, &ZeFenceDesc, ZeFence));
611603
Queue->ZeCommandListFenceMap[*ZeCommandList] = *ZeFence;
612604
}
613-
Queue->Device->ZeCommandListCache.pop_front();
605+
Queue->Context->ZeCommandListCache.pop_front();
614606
return PI_SUCCESS;
615607
}
616608
}
@@ -636,12 +628,14 @@ pi_result _pi_device::getAvailableCommandList(
636628
// command lists we can create.
637629
// Once created, this command list & fence are added to the command list fence
638630
// map.
639-
if ((*ZeCommandList == nullptr) && (this->Platform->ZeGlobalCommandListCount <
640-
this->Platform->ZeMaxCommandListCache)) {
641-
ZE_CALL(zeCommandListCreate(Queue->Context->ZeContext, ZeDevice,
642-
&ZeCommandListDesc, ZeCommandList));
631+
if ((*ZeCommandList == nullptr) &&
632+
(Queue->Device->Platform->ZeGlobalCommandListCount <
633+
Queue->Device->Platform->ZeMaxCommandListCache)) {
634+
ZE_CALL(zeCommandListCreate(Queue->Context->ZeContext,
635+
Queue->Device->ZeDevice, &ZeCommandListDesc,
636+
ZeCommandList));
643637
// Increments the total number of command lists created on this platform.
644-
this->Platform->ZeGlobalCommandListCount++;
638+
Queue->Device->Platform->ZeGlobalCommandListCount++;
645639
ZE_CALL(zeFenceCreate(Queue->ZeCommandQueue, &ZeFenceDesc, ZeFence));
646640
Queue->ZeCommandListFenceMap.insert(
647641
std::pair<ze_command_list_handle_t, ze_fence_handle_t>(*ZeCommandList,
@@ -3594,8 +3588,8 @@ piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim,
35943588
// Get a new command list to be used on this call
35953589
ze_command_list_handle_t ZeCommandList = nullptr;
35963590
ze_fence_handle_t ZeFence = nullptr;
3597-
if (auto Res = Queue->Device->getAvailableCommandList(Queue, &ZeCommandList,
3598-
&ZeFence, true))
3591+
if (auto Res = Queue->Context->getAvailableCommandList(Queue, &ZeCommandList,
3592+
&ZeFence, true))
35993593
return Res;
36003594

36013595
ze_event_handle_t ZeEvent = nullptr;
@@ -4111,8 +4105,8 @@ pi_result piEnqueueEventsWaitWithBarrier(pi_queue Queue,
41114105
// Get a new command list to be used on this call
41124106
ze_command_list_handle_t ZeCommandList = nullptr;
41134107
ze_fence_handle_t ZeFence = nullptr;
4114-
if (auto Res = Queue->Device->getAvailableCommandList(Queue, &ZeCommandList,
4115-
&ZeFence))
4108+
if (auto Res = Queue->Context->getAvailableCommandList(Queue, &ZeCommandList,
4109+
&ZeFence))
41164110
return Res;
41174111

41184112
ze_event_handle_t ZeEvent = nullptr;
@@ -4188,8 +4182,8 @@ enqueueMemCopyHelper(pi_command_type CommandType, pi_queue Queue, void *Dst,
41884182
// Get a new command list to be used on this call
41894183
ze_command_list_handle_t ZeCommandList = nullptr;
41904184
ze_fence_handle_t ZeFence = nullptr;
4191-
if (auto Res = Queue->Device->getAvailableCommandList(Queue, &ZeCommandList,
4192-
&ZeFence))
4185+
if (auto Res = Queue->Context->getAvailableCommandList(Queue, &ZeCommandList,
4186+
&ZeFence))
41934187
return Res;
41944188

41954189
ze_event_handle_t ZeEvent = nullptr;
@@ -4245,8 +4239,8 @@ static pi_result enqueueMemCopyRectHelper(
42454239
// Get a new command list to be used on this call
42464240
ze_command_list_handle_t ZeCommandList = nullptr;
42474241
ze_fence_handle_t ZeFence = nullptr;
4248-
if (auto Res = Queue->Device->getAvailableCommandList(Queue, &ZeCommandList,
4249-
&ZeFence))
4242+
if (auto Res = Queue->Context->getAvailableCommandList(Queue, &ZeCommandList,
4243+
&ZeFence))
42504244
return Res;
42514245

42524246
ze_event_handle_t ZeEvent = nullptr;
@@ -4412,8 +4406,8 @@ enqueueMemFillHelper(pi_command_type CommandType, pi_queue Queue, void *Ptr,
44124406
// Get a new command list to be used on this call
44134407
ze_command_list_handle_t ZeCommandList = nullptr;
44144408
ze_fence_handle_t ZeFence = nullptr;
4415-
if (auto Res = Queue->Device->getAvailableCommandList(Queue, &ZeCommandList,
4416-
&ZeFence))
4409+
if (auto Res = Queue->Context->getAvailableCommandList(Queue, &ZeCommandList,
4410+
&ZeFence))
44174411
return Res;
44184412

44194413
ze_event_handle_t ZeEvent = nullptr;
@@ -4538,8 +4532,8 @@ pi_result piEnqueueMemBufferMap(pi_queue Queue, pi_mem Buffer,
45384532
std::lock_guard<std::mutex> lock(Queue->PiQueueMutex);
45394533

45404534
// For discrete devices we need a command list
4541-
if (auto Res = Queue->Device->getAvailableCommandList(Queue, &ZeCommandList,
4542-
&ZeFence))
4535+
if (auto Res = Queue->Context->getAvailableCommandList(Queue, &ZeCommandList,
4536+
&ZeFence))
45434537
return Res;
45444538

45454539
// Set the commandlist in the event
@@ -4638,8 +4632,8 @@ pi_result piEnqueueMemUnmap(pi_queue Queue, pi_mem MemObj, void *MappedPtr,
46384632
// Lock automatically releases when this goes out of scope.
46394633
std::lock_guard<std::mutex> lock(Queue->PiQueueMutex);
46404634

4641-
if (auto Res = Queue->Device->getAvailableCommandList(Queue, &ZeCommandList,
4642-
&ZeFence))
4635+
if (auto Res = Queue->Context->getAvailableCommandList(Queue, &ZeCommandList,
4636+
&ZeFence))
46434637
return Res;
46444638

46454639
// Set the commandlist in the event
@@ -4750,8 +4744,8 @@ enqueueMemImageCommandHelper(pi_command_type CommandType, pi_queue Queue,
47504744
// Get a new command list to be used on this call
47514745
ze_command_list_handle_t ZeCommandList = nullptr;
47524746
ze_fence_handle_t ZeFence = nullptr;
4753-
if (auto Res = Queue->Device->getAvailableCommandList(Queue, &ZeCommandList,
4754-
&ZeFence))
4747+
if (auto Res = Queue->Context->getAvailableCommandList(Queue, &ZeCommandList,
4748+
&ZeFence))
47554749
return Res;
47564750

47574751
ze_event_handle_t ZeEvent = nullptr;
@@ -5357,8 +5351,8 @@ pi_result piextUSMEnqueuePrefetch(pi_queue Queue, const void *Ptr, size_t Size,
53575351
// Get a new command list to be used on this call
53585352
ze_command_list_handle_t ZeCommandList = nullptr;
53595353
ze_fence_handle_t ZeFence = nullptr;
5360-
if (auto Res = Queue->Device->getAvailableCommandList(Queue, &ZeCommandList,
5361-
&ZeFence))
5354+
if (auto Res = Queue->Context->getAvailableCommandList(Queue, &ZeCommandList,
5355+
&ZeFence))
53625356
return Res;
53635357

53645358
// TODO: do we need to create a unique command type for this?
@@ -5413,8 +5407,8 @@ pi_result piextUSMEnqueueMemAdvise(pi_queue Queue, const void *Ptr,
54135407
// Get a new command list to be used on this call
54145408
ze_command_list_handle_t ZeCommandList = nullptr;
54155409
ze_fence_handle_t ZeFence = nullptr;
5416-
if (auto Res = Queue->Device->getAvailableCommandList(Queue, &ZeCommandList,
5417-
&ZeFence))
5410+
if (auto Res = Queue->Context->getAvailableCommandList(Queue, &ZeCommandList,
5411+
&ZeFence))
54185412
return Res;
54195413

54205414
// TODO: do we need to create a unique command type for this?

sycl/plugins/level_zero/pi_level_zero.hpp

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,6 @@ struct _pi_device : _pi_object {
142142
// NOTE: one must additionally call initialize() to complete
143143
// PI device creation.
144144
}
145-
~_pi_device();
146145

147146
// Keep the ordinal of a "compute" commands group, where we send all
148147
// commands currently.
@@ -160,33 +159,11 @@ struct _pi_device : _pi_object {
160159
// PI platform to which this device belongs.
161160
pi_platform Platform;
162161

163-
// Mutex Lock for the Command List Cache
164-
std::mutex ZeCommandListCacheMutex;
165-
// Cache of all currently Available Command Lists for use by PI APIs
166-
std::list<ze_command_list_handle_t> ZeCommandListCache;
167-
168162
// Indicates if this is a root-device or a sub-device.
169163
// Technically this information can be queried from a device handle, but it
170164
// seems better to just keep it here.
171165
bool IsSubDevice;
172166

173-
// Retrieves a command list for executing on this device along with
174-
// a fence to be used in tracking the execution of this command list.
175-
// If a command list has been created on this device which has
176-
// completed its commands, then that command list and its associated fence
177-
// will be reused. Otherwise, a new command list and fence will be created for
178-
// running on this device. L0 fences are created on a L0 command queue so the
179-
// caller must pass a command queue to create a new fence for the new command
180-
// list if a command list/fence pair is not available. All Command Lists &
181-
// associated fences are destroyed at Device Release.
182-
// If AllowBatching is true, then the command list returned may already have
183-
// command in it, if AllowBatching is false, any open command lists that
184-
// already exist in Queue will be closed and executed.
185-
pi_result getAvailableCommandList(pi_queue Queue,
186-
ze_command_list_handle_t *ZeCommandList,
187-
ze_fence_handle_t *ZeFence,
188-
bool AllowBatching = false);
189-
190167
// Cache of the immutable device properties.
191168
ze_device_properties_t ZeDeviceProperties;
192169
ze_device_compute_properties_t ZeDeviceComputeProperties;
@@ -237,6 +214,29 @@ struct _pi_context : _pi_object {
237214
// support of the multiple devices per context will be added.
238215
ze_command_list_handle_t ZeCommandListInit;
239216

217+
// Mutex Lock for the Command List Cache
218+
std::mutex ZeCommandListCacheMutex;
219+
220+
// Cache of all currently Available Command Lists for use by PI APIs
221+
std::list<ze_command_list_handle_t> ZeCommandListCache;
222+
223+
// Retrieves a command list for executing on this device along with
224+
// a fence to be used in tracking the execution of this command list.
225+
// If a command list has been created on this device which has
226+
// completed its commands, then that command list and its associated fence
227+
// will be reused. Otherwise, a new command list and fence will be created for
228+
// running on this device. L0 fences are created on a L0 command queue so the
229+
// caller must pass a command queue to create a new fence for the new command
230+
// list if a command list/fence pair is not available. All Command Lists &
231+
// associated fences are destroyed at Device Release.
232+
// If AllowBatching is true, then the command list returned may already have
233+
// command in it, if AllowBatching is false, any open command lists that
234+
// already exist in Queue will be closed and executed.
235+
pi_result getAvailableCommandList(pi_queue Queue,
236+
ze_command_list_handle_t *ZeCommandList,
237+
ze_fence_handle_t *ZeFence,
238+
bool AllowBatching = false);
239+
240240
// Get index of the free slot in the available pool. If there is no avialble
241241
// pool then create new one.
242242
pi_result getFreeSlotInExistingOrNewPool(ze_event_pool_handle_t &, size_t &);

0 commit comments

Comments
 (0)