Skip to content

Commit a41b33c

Browse files
authored
[SYCL] Cache and reuse events in the Level Zero plugin. (#6484)
Avoid creating new Level Zero events. Reset native event handles and put them in the cache instead of removing.
1 parent 3e03f30 commit a41b33c

File tree

2 files changed

+121
-17
lines changed

2 files changed

+121
-17
lines changed

sycl/plugins/level_zero/pi_level_zero.cpp

100755100644
Lines changed: 93 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,16 @@ static const bool UseMultipleCmdlistBarriers = [] {
9999
return std::stoi(UseMultipleCmdlistBarriersFlag) > 0;
100100
}();
101101

102+
// This is an experimental option that allows to disable caching of events in
103+
// the context.
104+
static const bool DisableEventsCaching = [] {
105+
const char *DisableEventsCachingFlag =
106+
std::getenv("SYCL_PI_LEVEL_ZERO_DISABLE_EVENTS_CACHING");
107+
if (!DisableEventsCachingFlag)
108+
return false;
109+
return std::stoi(DisableEventsCachingFlag) != 0;
110+
}();
111+
102112
// This class encapsulates actions taken along with a call to Level Zero API.
103113
class ZeCall {
104114
private:
@@ -468,10 +478,18 @@ _pi_context::getFreeSlotInExistingOrNewPool(ze_event_pool_handle_t &Pool,
468478
std::list<ze_event_pool_handle_t> *ZePoolCache =
469479
getZeEventPoolCache(HostVisible, ProfilingEnabled);
470480

471-
// Remove full pool from the cache.
472481
if (!ZePoolCache->empty()) {
473482
if (NumEventsAvailableInEventPool[ZePoolCache->front()] == 0) {
474-
ZePoolCache->erase(ZePoolCache->begin());
483+
if (DisableEventsCaching) {
484+
// Remove full pool from the cache if events caching is disabled.
485+
ZePoolCache->erase(ZePoolCache->begin());
486+
} else {
487+
// If event caching is enabled then we don't destroy events so there is
488+
// no need to remove pool from the cache and add it back when it has
489+
// available slots. Just keep it in the tail of the cache so that all
490+
// pools can be destroyed during context destruction.
491+
ZePoolCache->push_front(nullptr);
492+
}
475493
}
476494
}
477495
if (ZePoolCache->empty()) {
@@ -868,7 +886,18 @@ pi_result _pi_context::initialize() {
868886
pi_result _pi_context::finalize() {
869887
// This function is called when pi_context is deallocated, piContextRelease.
870888
// There could be some memory that may have not been deallocated.
871-
// For example, event pool caches would be still alive.
889+
// For example, event and event pool caches would be still alive.
890+
891+
if (!DisableEventsCaching) {
892+
std::scoped_lock Lock(EventCacheMutex);
893+
for (auto &EventCache : EventCaches) {
894+
for (auto Event : EventCache) {
895+
ZE_CALL(zeEventDestroy, (Event->ZeEvent));
896+
delete Event;
897+
}
898+
EventCache.clear();
899+
}
900+
}
872901
{
873902
std::scoped_lock Lock(ZeEventPoolCacheMutex);
874903
for (auto &ZePoolCache : ZeEventPoolCache) {
@@ -5430,24 +5459,67 @@ _pi_event::getOrCreateHostVisibleEvent(ze_event_handle_t &ZeHostVisibleEvent) {
54305459
return PI_SUCCESS;
54315460
}
54325461

5462+
pi_result _pi_event::reset() {
5463+
Queue = nullptr;
5464+
CleanedUp = false;
5465+
Completed = false;
5466+
CommandData = nullptr;
5467+
CommandType = PI_COMMAND_TYPE_USER;
5468+
WaitList = {};
5469+
RefCount.reset();
5470+
5471+
if (!isHostVisible())
5472+
HostVisibleEvent = nullptr;
5473+
5474+
ZE_CALL(zeEventHostReset, (ZeEvent));
5475+
return PI_SUCCESS;
5476+
}
5477+
5478+
pi_event _pi_context::getEventFromCache(bool HostVisible, bool WithProfiling) {
5479+
std::scoped_lock Lock(EventCacheMutex);
5480+
auto Cache = getEventCache(HostVisible, WithProfiling);
5481+
if (Cache->empty())
5482+
return nullptr;
5483+
5484+
auto It = Cache->begin();
5485+
pi_event Event = *It;
5486+
Cache->erase(It);
5487+
return Event;
5488+
}
5489+
5490+
void _pi_context::addEventToCache(pi_event Event) {
5491+
std::scoped_lock Lock(EventCacheMutex);
5492+
auto Cache =
5493+
getEventCache(Event->isHostVisible(), Event->isProfilingEnabled());
5494+
Event->reset();
5495+
Cache->emplace_back(Event);
5496+
}
5497+
54335498
// Helper function for creating a PI event.
54345499
// The "Queue" argument specifies the PI queue where a command is submitted.
54355500
// The "HostVisible" argument specifies if event needs to be allocated from
54365501
// a host-visible pool.
54375502
//
54385503
static pi_result EventCreate(pi_context Context, pi_queue Queue,
54395504
bool HostVisible, pi_event *RetEvent) {
5440-
54415505
bool ProfilingEnabled =
54425506
!Queue || (Queue->Properties & PI_QUEUE_PROFILING_ENABLE) != 0;
54435507

5444-
size_t Index = 0;
5508+
if (auto CachedEvent =
5509+
Context->getEventFromCache(HostVisible, ProfilingEnabled)) {
5510+
*RetEvent = CachedEvent;
5511+
return PI_SUCCESS;
5512+
}
5513+
5514+
ze_event_handle_t ZeEvent;
54455515
ze_event_pool_handle_t ZeEventPool = {};
5516+
5517+
size_t Index = 0;
5518+
54465519
if (auto Res = Context->getFreeSlotInExistingOrNewPool(
54475520
ZeEventPool, Index, HostVisible, ProfilingEnabled))
54485521
return Res;
54495522

5450-
ze_event_handle_t ZeEvent;
54515523
ZeStruct<ze_event_desc_t> ZeEventDesc;
54525524
ZeEventDesc.index = Index;
54535525
ZeEventDesc.wait = 0;
@@ -5456,9 +5528,9 @@ static pi_result EventCreate(pi_context Context, pi_queue Queue,
54565528
ZeEventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
54575529
} else {
54585530
//
5459-
// Set the scope to "device" for every event. This is sufficient for global
5460-
// device access and peer device access. If needed to be seen on the host
5461-
// we are doing special handling, see EventsScope options.
5531+
// Set the scope to "device" for every event. This is sufficient for
5532+
// global device access and peer device access. If needed to be seen on
5533+
// the host we are doing special handling, see EventsScope options.
54625534
//
54635535
// TODO: see if "sub-device" (ZE_EVENT_SCOPE_FLAG_SUBDEVICE) can better be
54645536
// used in some circumstances.
@@ -5819,7 +5891,12 @@ pi_result piEventRelease(pi_event Event) {
58195891
Event->CommandData = nullptr;
58205892
}
58215893
if (Event->OwnZeEvent) {
5822-
ZE_CALL(zeEventDestroy, (Event->ZeEvent));
5894+
if (DisableEventsCaching) {
5895+
ZE_CALL(zeEventDestroy, (Event->ZeEvent));
5896+
auto Context = Event->Context;
5897+
if (auto Res = Context->decrementUnreleasedEventsInPool(Event))
5898+
return Res;
5899+
}
58235900
}
58245901
// It is possible that host-visible event was never created.
58255902
// In case it was check if that's different from this same event
@@ -5829,18 +5906,19 @@ pi_result piEventRelease(pi_event Event) {
58295906
PI_CALL(piEventRelease(Event->HostVisibleEvent));
58305907
}
58315908

5832-
auto Context = Event->Context;
5833-
if (auto Res = Context->decrementUnreleasedEventsInPool(Event))
5834-
return Res;
5835-
58365909
// We intentionally incremented the reference counter when an event is
58375910
// created so that we can avoid pi_queue is released before the associated
58385911
// pi_event is released. Here we have to decrement it so pi_queue
58395912
// can be released successfully.
58405913
if (Event->Queue) {
58415914
PI_CALL(piQueueReleaseInternal(Event->Queue));
58425915
}
5843-
delete Event;
5916+
5917+
if (DisableEventsCaching || !Event->OwnZeEvent) {
5918+
delete Event;
5919+
} else {
5920+
Event->Context->addEventToCache(Event);
5921+
}
58445922

58455923
return PI_SUCCESS;
58465924
}

sycl/plugins/level_zero/pi_level_zero.hpp

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,10 @@ template <class T> struct ZeCache : private T {
274274
// thread can reach ref count equal to zero, i.e. only a single thread can pass
275275
// through this check.
276276
struct ReferenceCounter {
277-
ReferenceCounter(pi_uint32 InitVal) : RefCount{InitVal} {}
277+
ReferenceCounter() : RefCount{1} {}
278+
279+
// Reset the counter to the initial value.
280+
void reset() { RefCount = 1; }
278281

279282
// Used when retaining an object.
280283
void increment() { RefCount++; }
@@ -306,7 +309,7 @@ struct ReferenceCounter {
306309

307310
// Base class to store common data
308311
struct _pi_object {
309-
_pi_object() : RefCount{1} {}
312+
_pi_object() : RefCount{} {}
310313

311314
// Level Zero doesn't do the reference counting, so we have to do.
312315
// Must be atomic to prevent data race when incrementing/decrementing.
@@ -750,6 +753,12 @@ struct _pi_context : _pi_object {
750753
// when kernel has finished execution.
751754
std::unordered_map<void *, MemAllocRecord> MemAllocs;
752755

756+
// Get pi_event from cache.
757+
pi_event getEventFromCache(bool HostVisible, bool WithProfiling);
758+
759+
// Add pi_event to cache.
760+
void addEventToCache(pi_event);
761+
753762
private:
754763
// If context contains one device then return this device.
755764
// If context contains sub-devices of the same device, then return this parent
@@ -798,6 +807,20 @@ struct _pi_context : _pi_object {
798807
// Mutex to control operations on event pool caches and the helper maps
799808
// holding the current pool usage counts.
800809
pi_mutex ZeEventPoolCacheMutex;
810+
811+
// Mutex to control operations on event caches.
812+
pi_mutex EventCacheMutex;
813+
814+
// Caches for events.
815+
std::vector<std::list<pi_event>> EventCaches{4};
816+
817+
// Get the cache of events for a provided scope and profiling mode.
818+
auto getEventCache(bool HostVisible, bool WithProfiling) {
819+
if (HostVisible)
820+
return WithProfiling ? &EventCaches[0] : &EventCaches[1];
821+
else
822+
return WithProfiling ? &EventCaches[2] : &EventCaches[3];
823+
}
801824
};
802825

803826
struct _pi_queue : _pi_object {
@@ -1350,6 +1373,9 @@ struct _pi_event : _pi_object {
13501373
// L0 event (if any) is not guranteed to have been signalled, or
13511374
// being visible to the host at all.
13521375
bool Completed = {false};
1376+
1377+
// Reset _pi_event object.
1378+
pi_result reset();
13531379
};
13541380

13551381
struct _pi_program : _pi_object {

0 commit comments

Comments
 (0)