Skip to content

Commit c5f6e9a

Browse files
committed
[SYCL] Reuse native event handles in the Level Zero plugin.
Avoid creating new Level Zero events. Reset native event handles and put them in the cache instead of removing.
1 parent 6f0bb25 commit c5f6e9a

File tree

2 files changed

+108
-28
lines changed

2 files changed

+108
-28
lines changed

sycl/plugins/level_zero/pi_level_zero.cpp

100755100644
Lines changed: 84 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,16 @@ static const bool UseMultipleCmdlistBarriers = [] {
9999
return std::stoi(UseMultipleCmdlistBarriersFlag) > 0;
100100
}();
101101

102+
// This is an experimental option that allows to disable caching of events in
103+
// the context.
104+
static const bool DisableEventsCaching = [] {
105+
const char *DisableEventsCachingFlag =
106+
std::getenv("SYCL_PI_LEVEL_ZERO_DISABLE_EVENTS_CACHING");
107+
if (!DisableEventsCachingFlag)
108+
return false;
109+
return std::stoi(DisableEventsCachingFlag) != 0;
110+
}();
111+
102112
// This class encapsulates actions taken along with a call to Level Zero API.
103113
class ZeCall {
104114
private:
@@ -869,6 +879,15 @@ pi_result _pi_context::finalize() {
869879
// This function is called when pi_context is deallocated, piContextRelease.
870880
// There could be some memory that may have not been deallocated.
871881
// For example, event pool caches would be still alive.
882+
{
883+
std::scoped_lock Lock(ZeEventCacheMutex);
884+
for (auto &ZeEventCache : ZeEventCaches) {
885+
for (auto &ZeEventAndPool : ZeEventCache)
886+
ZE_CALL(zeEventDestroy, (ZeEventAndPool.first));
887+
ZeEventCache.clear();
888+
}
889+
}
890+
872891
{
873892
std::scoped_lock Lock(ZeEventPoolCacheMutex);
874893
for (auto &ZePoolCache : ZeEventPoolCache) {
@@ -5430,43 +5449,74 @@ _pi_event::getOrCreateHostVisibleEvent(ze_event_handle_t &ZeHostVisibleEvent) {
54305449
return PI_SUCCESS;
54315450
}
54325451

5452+
std::pair<ze_event_handle_t, ze_event_pool_handle_t>
5453+
_pi_context::getZeEventFromCache(bool HostVisible, bool WithProfiling) {
5454+
std::scoped_lock Lock(ZeEventCacheMutex);
5455+
auto Cache = getZeEventCache(HostVisible, WithProfiling);
5456+
if (Cache->empty())
5457+
return std::make_pair<ze_event_handle_t, ze_event_pool_handle_t>(nullptr,
5458+
nullptr);
5459+
5460+
auto It = Cache->begin();
5461+
std::pair<ze_event_handle_t, ze_event_pool_handle_t> ZeEvent = *It;
5462+
Cache->erase(It);
5463+
return ZeEvent;
5464+
}
5465+
5466+
void _pi_context::addZeEventToCache(ze_event_handle_t ZeEvent,
5467+
ze_event_pool_handle_t ZePool,
5468+
bool HostVisible, bool WithProfiling) {
5469+
std::scoped_lock Lock(ZeEventCacheMutex);
5470+
auto Cache = getZeEventCache(HostVisible, WithProfiling);
5471+
Cache->emplace_back(ZeEvent, ZePool);
5472+
}
5473+
54335474
// Helper function for creating a PI event.
54345475
// The "Queue" argument specifies the PI queue where a command is submitted.
54355476
// The "HostVisible" argument specifies if event needs to be allocated from
54365477
// a host-visible pool.
54375478
//
54385479
static pi_result EventCreate(pi_context Context, pi_queue Queue,
54395480
bool HostVisible, pi_event *RetEvent) {
5440-
54415481
bool ProfilingEnabled =
54425482
!Queue || (Queue->Properties & PI_QUEUE_PROFILING_ENABLE) != 0;
54435483

5444-
size_t Index = 0;
5445-
ze_event_pool_handle_t ZeEventPool = {};
5446-
if (auto Res = Context->getFreeSlotInExistingOrNewPool(
5447-
ZeEventPool, Index, HostVisible, ProfilingEnabled))
5448-
return Res;
5449-
5484+
auto CachedEvent =
5485+
Context->getZeEventFromCache(HostVisible, ProfilingEnabled);
54505486
ze_event_handle_t ZeEvent;
5451-
ZeStruct<ze_event_desc_t> ZeEventDesc;
5452-
ZeEventDesc.index = Index;
5453-
ZeEventDesc.wait = 0;
5487+
ze_event_pool_handle_t ZeEventPool = {};
54545488

5455-
if (HostVisible) {
5456-
ZeEventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
5489+
// If no any then check cache of event in the context
5490+
if (CachedEvent.first) {
5491+
ZeEvent = CachedEvent.first;
5492+
ZeEventPool = CachedEvent.second;
54575493
} else {
5458-
//
5459-
// Set the scope to "device" for every event. This is sufficient for global
5460-
// device access and peer device access. If needed to be seen on the host
5461-
// we are doing special handling, see EventsScope options.
5462-
//
5463-
// TODO: see if "sub-device" (ZE_EVENT_SCOPE_FLAG_SUBDEVICE) can better be
5464-
// used in some circumstances.
5465-
//
5466-
ZeEventDesc.signal = 0;
5467-
}
5494+
size_t Index = 0;
5495+
5496+
if (auto Res = Context->getFreeSlotInExistingOrNewPool(
5497+
ZeEventPool, Index, HostVisible, ProfilingEnabled))
5498+
return Res;
5499+
5500+
ZeStruct<ze_event_desc_t> ZeEventDesc;
5501+
ZeEventDesc.index = Index;
5502+
ZeEventDesc.wait = 0;
5503+
5504+
if (HostVisible) {
5505+
ZeEventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
5506+
} else {
5507+
//
5508+
// Set the scope to "device" for every event. This is sufficient for
5509+
// global device access and peer device access. If needed to be seen on
5510+
// the host we are doing special handling, see EventsScope options.
5511+
//
5512+
// TODO: see if "sub-device" (ZE_EVENT_SCOPE_FLAG_SUBDEVICE) can better be
5513+
// used in some circumstances.
5514+
//
5515+
ZeEventDesc.signal = 0;
5516+
}
54685517

5469-
ZE_CALL(zeEventCreate, (ZeEventPool, &ZeEventDesc, &ZeEvent));
5518+
ZE_CALL(zeEventCreate, (ZeEventPool, &ZeEventDesc, &ZeEvent));
5519+
}
54705520

54715521
try {
54725522
PI_ASSERT(RetEvent, PI_ERROR_INVALID_VALUE);
@@ -5819,7 +5869,17 @@ pi_result piEventRelease(pi_event Event) {
58195869
Event->CommandData = nullptr;
58205870
}
58215871
if (Event->OwnZeEvent) {
5822-
ZE_CALL(zeEventDestroy, (Event->ZeEvent));
5872+
if (!DisableEventsCaching) {
5873+
ZE_CALL(zeEventHostReset, (Event->ZeEvent));
5874+
Event->Context->addZeEventToCache(Event->ZeEvent, Event->ZeEventPool,
5875+
Event->isHostVisible(),
5876+
Event->isProfilingEnabled());
5877+
} else {
5878+
ZE_CALL(zeEventDestroy, (Event->ZeEvent));
5879+
auto Context = Event->Context;
5880+
if (auto Res = Context->decrementUnreleasedEventsInPool(Event))
5881+
return Res;
5882+
}
58235883
}
58245884
// It is possible that host-visible event was never created.
58255885
// In case it was check if that's different from this same event
@@ -5829,10 +5889,6 @@ pi_result piEventRelease(pi_event Event) {
58295889
PI_CALL(piEventRelease(Event->HostVisibleEvent));
58305890
}
58315891

5832-
auto Context = Event->Context;
5833-
if (auto Res = Context->decrementUnreleasedEventsInPool(Event))
5834-
return Res;
5835-
58365892
// We intentionally incremented the reference counter when an event is
58375893
// created so that we can avoid pi_queue is released before the associated
58385894
// pi_event is released. Here we have to decrement it so pi_queue

sycl/plugins/level_zero/pi_level_zero.hpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -750,6 +750,15 @@ struct _pi_context : _pi_object {
750750
// when kernel has finished execution.
751751
std::unordered_map<void *, MemAllocRecord> MemAllocs;
752752

753+
// Get a native event and its pool from cache.
754+
std::pair<ze_event_handle_t, ze_event_pool_handle_t>
755+
getZeEventFromCache(bool HostVisible, bool WithProfiling);
756+
757+
// Add a native event and its pool to cache.
758+
void addZeEventToCache(ze_event_handle_t ZeEvent,
759+
ze_event_pool_handle_t ZePool, bool HostVisible,
760+
bool WithProfiling);
761+
753762
private:
754763
// If context contains one device then return this device.
755764
// If context contains sub-devices of the same device, then return this parent
@@ -798,6 +807,21 @@ struct _pi_context : _pi_object {
798807
// Mutex to control operations on event pool caches and the helper maps
799808
// holding the current pool usage counts.
800809
pi_mutex ZeEventPoolCacheMutex;
810+
811+
// Mutex to control operations on event caches.
812+
pi_mutex ZeEventCacheMutex;
813+
814+
// Caches for native event handles.
815+
std::vector<std::list<std::pair<ze_event_handle_t, ze_event_pool_handle_t>>>
816+
ZeEventCaches{4};
817+
818+
// Get the cache of events for a provided scope and profiling mode.
819+
auto getZeEventCache(bool HostVisible, bool WithProfiling) {
820+
if (HostVisible)
821+
return WithProfiling ? &ZeEventCaches[0] : &ZeEventCaches[1];
822+
else
823+
return WithProfiling ? &ZeEventCaches[2] : &ZeEventCaches[3];
824+
}
801825
};
802826

803827
struct _pi_queue : _pi_object {

0 commit comments

Comments
 (0)