Skip to content

Commit 95d1584

Browse files
committed
[SYCL] Cache pi_events instead of native handles and fix pool handling
1 parent c5f6e9a commit 95d1584

File tree

2 files changed

+97
-74
lines changed

2 files changed

+97
-74
lines changed

sycl/plugins/level_zero/pi_level_zero.cpp

Lines changed: 82 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -478,10 +478,18 @@ _pi_context::getFreeSlotInExistingOrNewPool(ze_event_pool_handle_t &Pool,
478478
std::list<ze_event_pool_handle_t> *ZePoolCache =
479479
getZeEventPoolCache(HostVisible, ProfilingEnabled);
480480

481-
// Remove full pool from the cache.
482481
if (!ZePoolCache->empty()) {
483482
if (NumEventsAvailableInEventPool[ZePoolCache->front()] == 0) {
484-
ZePoolCache->erase(ZePoolCache->begin());
483+
if (DisableEventsCaching) {
484+
// Remove full pool from the cache if events caching is disabled.
485+
ZePoolCache->erase(ZePoolCache->begin());
486+
} else {
487+
// If event caching is enabled then we don't destroy events so there is
488+
// no need to remove pool from the cache and add it back when it has
489+
// available slots. Just keep it in the tail of the cache so that all
490+
// pools can be destroyed during context destruction.
491+
ZePoolCache->push_front(nullptr);
492+
}
485493
}
486494
}
487495
if (ZePoolCache->empty()) {
@@ -878,16 +886,18 @@ pi_result _pi_context::initialize() {
878886
pi_result _pi_context::finalize() {
879887
// This function is called when pi_context is deallocated, piContextRelease.
880888
// There could be some memory that may have not been deallocated.
881-
// For example, event pool caches would be still alive.
882-
{
883-
std::scoped_lock Lock(ZeEventCacheMutex);
884-
for (auto &ZeEventCache : ZeEventCaches) {
885-
for (auto &ZeEventAndPool : ZeEventCache)
886-
ZE_CALL(zeEventDestroy, (ZeEventAndPool.first));
887-
ZeEventCache.clear();
889+
// For example, event and event pool caches would be still alive.
890+
891+
if (!DisableEventsCaching) {
892+
std::scoped_lock Lock(EventCacheMutex);
893+
for (auto &EventCache : EventCaches) {
894+
for (auto Event : EventCache) {
895+
ZE_CALL(zeEventDestroy, (Event->ZeEvent));
896+
delete Event;
897+
}
898+
EventCache.clear();
888899
}
889900
}
890-
891901
{
892902
std::scoped_lock Lock(ZeEventPoolCacheMutex);
893903
for (auto &ZePoolCache : ZeEventPoolCache) {
@@ -5449,26 +5459,40 @@ _pi_event::getOrCreateHostVisibleEvent(ze_event_handle_t &ZeHostVisibleEvent) {
54495459
return PI_SUCCESS;
54505460
}
54515461

5452-
std::pair<ze_event_handle_t, ze_event_pool_handle_t>
5453-
_pi_context::getZeEventFromCache(bool HostVisible, bool WithProfiling) {
5454-
std::scoped_lock Lock(ZeEventCacheMutex);
5455-
auto Cache = getZeEventCache(HostVisible, WithProfiling);
5462+
pi_result _pi_event::reset() {
5463+
Queue = nullptr;
5464+
CleanedUp = false;
5465+
Completed = false;
5466+
CommandData = nullptr;
5467+
CommandType = PI_COMMAND_TYPE_USER;
5468+
WaitList = {};
5469+
RefCount.reset(1);
5470+
5471+
if (!isHostVisible())
5472+
HostVisibleEvent = nullptr;
5473+
5474+
ZE_CALL(zeEventHostReset, (ZeEvent));
5475+
return PI_SUCCESS;
5476+
}
5477+
5478+
pi_event _pi_context::getEventFromCache(bool HostVisible, bool WithProfiling) {
5479+
std::scoped_lock Lock(EventCacheMutex);
5480+
auto Cache = getEventCache(HostVisible, WithProfiling);
54565481
if (Cache->empty())
5457-
return std::make_pair<ze_event_handle_t, ze_event_pool_handle_t>(nullptr,
5458-
nullptr);
5482+
return nullptr;
54595483

54605484
auto It = Cache->begin();
5461-
std::pair<ze_event_handle_t, ze_event_pool_handle_t> ZeEvent = *It;
5485+
pi_event Event = *It;
54625486
Cache->erase(It);
5463-
return ZeEvent;
5487+
return Event;
54645488
}
54655489

5466-
void _pi_context::addZeEventToCache(ze_event_handle_t ZeEvent,
5467-
ze_event_pool_handle_t ZePool,
5468-
bool HostVisible, bool WithProfiling) {
5469-
std::scoped_lock Lock(ZeEventCacheMutex);
5470-
auto Cache = getZeEventCache(HostVisible, WithProfiling);
5471-
Cache->emplace_back(ZeEvent, ZePool);
5490+
void _pi_context::addEventToCache(pi_event Event) {
5491+
std::scoped_lock Lock(EventCacheMutex);
5492+
auto Cache =
5493+
getEventCache(Event->isHostVisible(), Event->isProfilingEnabled());
5494+
Event->reset();
5495+
Cache->emplace_back(Event);
54725496
}
54735497

54745498
// Helper function for creating a PI event.
@@ -5481,43 +5505,41 @@ static pi_result EventCreate(pi_context Context, pi_queue Queue,
54815505
bool ProfilingEnabled =
54825506
!Queue || (Queue->Properties & PI_QUEUE_PROFILING_ENABLE) != 0;
54835507

5484-
auto CachedEvent =
5485-
Context->getZeEventFromCache(HostVisible, ProfilingEnabled);
5508+
if (auto CachedEvent =
5509+
Context->getEventFromCache(HostVisible, ProfilingEnabled)) {
5510+
*RetEvent = CachedEvent;
5511+
return PI_SUCCESS;
5512+
}
5513+
54865514
ze_event_handle_t ZeEvent;
54875515
ze_event_pool_handle_t ZeEventPool = {};
54885516

5489-
// If no any then check cache of event in the context
5490-
if (CachedEvent.first) {
5491-
ZeEvent = CachedEvent.first;
5492-
ZeEventPool = CachedEvent.second;
5493-
} else {
5494-
size_t Index = 0;
5495-
5496-
if (auto Res = Context->getFreeSlotInExistingOrNewPool(
5497-
ZeEventPool, Index, HostVisible, ProfilingEnabled))
5498-
return Res;
5517+
size_t Index = 0;
54995518

5500-
ZeStruct<ze_event_desc_t> ZeEventDesc;
5501-
ZeEventDesc.index = Index;
5502-
ZeEventDesc.wait = 0;
5519+
if (auto Res = Context->getFreeSlotInExistingOrNewPool(
5520+
ZeEventPool, Index, HostVisible, ProfilingEnabled))
5521+
return Res;
55035522

5504-
if (HostVisible) {
5505-
ZeEventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
5506-
} else {
5507-
//
5508-
// Set the scope to "device" for every event. This is sufficient for
5509-
// global device access and peer device access. If needed to be seen on
5510-
// the host we are doing special handling, see EventsScope options.
5511-
//
5512-
// TODO: see if "sub-device" (ZE_EVENT_SCOPE_FLAG_SUBDEVICE) can better be
5513-
// used in some circumstances.
5514-
//
5515-
ZeEventDesc.signal = 0;
5516-
}
5523+
ZeStruct<ze_event_desc_t> ZeEventDesc;
5524+
ZeEventDesc.index = Index;
5525+
ZeEventDesc.wait = 0;
55175526

5518-
ZE_CALL(zeEventCreate, (ZeEventPool, &ZeEventDesc, &ZeEvent));
5527+
if (HostVisible) {
5528+
ZeEventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
5529+
} else {
5530+
//
5531+
// Set the scope to "device" for every event. This is sufficient for
5532+
// global device access and peer device access. If needed to be seen on
5533+
// the host we are doing special handling, see EventsScope options.
5534+
//
5535+
// TODO: see if "sub-device" (ZE_EVENT_SCOPE_FLAG_SUBDEVICE) can better be
5536+
// used in some circumstances.
5537+
//
5538+
ZeEventDesc.signal = 0;
55195539
}
55205540

5541+
ZE_CALL(zeEventCreate, (ZeEventPool, &ZeEventDesc, &ZeEvent));
5542+
55215543
try {
55225544
PI_ASSERT(RetEvent, PI_ERROR_INVALID_VALUE);
55235545

@@ -5869,12 +5891,7 @@ pi_result piEventRelease(pi_event Event) {
58695891
Event->CommandData = nullptr;
58705892
}
58715893
if (Event->OwnZeEvent) {
5872-
if (!DisableEventsCaching) {
5873-
ZE_CALL(zeEventHostReset, (Event->ZeEvent));
5874-
Event->Context->addZeEventToCache(Event->ZeEvent, Event->ZeEventPool,
5875-
Event->isHostVisible(),
5876-
Event->isProfilingEnabled());
5877-
} else {
5894+
if (DisableEventsCaching) {
58785895
ZE_CALL(zeEventDestroy, (Event->ZeEvent));
58795896
auto Context = Event->Context;
58805897
if (auto Res = Context->decrementUnreleasedEventsInPool(Event))
@@ -5896,7 +5913,12 @@ pi_result piEventRelease(pi_event Event) {
58965913
if (Event->Queue) {
58975914
PI_CALL(piQueueReleaseInternal(Event->Queue));
58985915
}
5899-
delete Event;
5916+
5917+
if (DisableEventsCaching || !Event->OwnZeEvent) {
5918+
delete Event;
5919+
} else {
5920+
Event->Context->addEventToCache(Event);
5921+
}
59005922

59015923
return PI_SUCCESS;
59025924
}

sycl/plugins/level_zero/pi_level_zero.hpp

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,8 @@ template <class T> struct ZeCache : private T {
276276
struct ReferenceCounter {
277277
ReferenceCounter(pi_uint32 InitVal) : RefCount{InitVal} {}
278278

279+
void reset(pi_uint32 InitVal) { RefCount = InitVal; }
280+
279281
// Used when retaining an object.
280282
void increment() { RefCount++; }
281283

@@ -750,14 +752,11 @@ struct _pi_context : _pi_object {
750752
// when kernel has finished execution.
751753
std::unordered_map<void *, MemAllocRecord> MemAllocs;
752754

753-
// Get a native event and its pool from cache.
754-
std::pair<ze_event_handle_t, ze_event_pool_handle_t>
755-
getZeEventFromCache(bool HostVisible, bool WithProfiling);
755+
// Get pi_event from cache.
756+
pi_event getEventFromCache(bool HostVisible, bool WithProfiling);
756757

757-
// Add a native event and its pool to cache.
758-
void addZeEventToCache(ze_event_handle_t ZeEvent,
759-
ze_event_pool_handle_t ZePool, bool HostVisible,
760-
bool WithProfiling);
758+
// Add pi_event to cache.
759+
void addEventToCache(pi_event);
761760

762761
private:
763762
// If context contains one device then return this device.
@@ -809,18 +808,17 @@ struct _pi_context : _pi_object {
809808
pi_mutex ZeEventPoolCacheMutex;
810809

811810
// Mutex to control operations on event caches.
812-
pi_mutex ZeEventCacheMutex;
811+
pi_mutex EventCacheMutex;
813812

814-
// Caches for native event handles.
815-
std::vector<std::list<std::pair<ze_event_handle_t, ze_event_pool_handle_t>>>
816-
ZeEventCaches{4};
813+
// Caches for events.
814+
std::vector<std::list<pi_event>> EventCaches{4};
817815

818816
// Get the cache of events for a provided scope and profiling mode.
819-
auto getZeEventCache(bool HostVisible, bool WithProfiling) {
817+
auto getEventCache(bool HostVisible, bool WithProfiling) {
820818
if (HostVisible)
821-
return WithProfiling ? &ZeEventCaches[0] : &ZeEventCaches[1];
819+
return WithProfiling ? &EventCaches[0] : &EventCaches[1];
822820
else
823-
return WithProfiling ? &ZeEventCaches[2] : &ZeEventCaches[3];
821+
return WithProfiling ? &EventCaches[2] : &EventCaches[3];
824822
}
825823
};
826824

@@ -1374,6 +1372,9 @@ struct _pi_event : _pi_object {
13741372
// L0 event (if any) is not guranteed to have been signalled, or
13751373
// being visible to the host at all.
13761374
bool Completed = {false};
1375+
1376+
// Reset _pi_event object.
1377+
pi_result reset();
13771378
};
13781379

13791380
struct _pi_program : _pi_object {

0 commit comments

Comments
 (0)