@@ -99,6 +99,16 @@ static const bool UseMultipleCmdlistBarriers = [] {
99
99
return std::stoi (UseMultipleCmdlistBarriersFlag) > 0 ;
100
100
}();
101
101
102
+ // This is an experimental option that allows to disable caching of events in
103
+ // the context.
104
+ static const bool DisableEventsCaching = [] {
105
+ const char *DisableEventsCachingFlag =
106
+ std::getenv (" SYCL_PI_LEVEL_ZERO_DISABLE_EVENTS_CACHING" );
107
+ if (!DisableEventsCachingFlag)
108
+ return false ;
109
+ return std::stoi (DisableEventsCachingFlag) != 0 ;
110
+ }();
111
+
102
112
// This class encapsulates actions taken along with a call to Level Zero API.
103
113
class ZeCall {
104
114
private:
@@ -869,6 +879,15 @@ pi_result _pi_context::finalize() {
869
879
// This function is called when pi_context is deallocated, piContextRelease.
870
880
// There could be some memory that may have not been deallocated.
871
881
// For example, event pool caches would be still alive.
882
+ {
883
+ std::scoped_lock Lock (ZeEventCacheMutex);
884
+ for (auto &ZeEventCache : ZeEventCaches) {
885
+ for (auto &ZeEventAndPool : ZeEventCache)
886
+ ZE_CALL (zeEventDestroy, (ZeEventAndPool.first ));
887
+ ZeEventCache.clear ();
888
+ }
889
+ }
890
+
872
891
{
873
892
std::scoped_lock Lock (ZeEventPoolCacheMutex);
874
893
for (auto &ZePoolCache : ZeEventPoolCache) {
@@ -5430,43 +5449,74 @@ _pi_event::getOrCreateHostVisibleEvent(ze_event_handle_t &ZeHostVisibleEvent) {
5430
5449
return PI_SUCCESS;
5431
5450
}
5432
5451
5452
+ std::pair<ze_event_handle_t , ze_event_pool_handle_t >
5453
+ _pi_context::getZeEventFromCache (bool HostVisible, bool WithProfiling) {
5454
+ std::scoped_lock Lock (ZeEventCacheMutex);
5455
+ auto Cache = getZeEventCache (HostVisible, WithProfiling);
5456
+ if (Cache->empty ())
5457
+ return std::make_pair<ze_event_handle_t , ze_event_pool_handle_t >(nullptr ,
5458
+ nullptr );
5459
+
5460
+ auto It = Cache->begin ();
5461
+ std::pair<ze_event_handle_t , ze_event_pool_handle_t > ZeEvent = *It;
5462
+ Cache->erase (It);
5463
+ return ZeEvent;
5464
+ }
5465
+
5466
+ void _pi_context::addZeEventToCache (ze_event_handle_t ZeEvent,
5467
+ ze_event_pool_handle_t ZePool,
5468
+ bool HostVisible, bool WithProfiling) {
5469
+ std::scoped_lock Lock (ZeEventCacheMutex);
5470
+ auto Cache = getZeEventCache (HostVisible, WithProfiling);
5471
+ Cache->emplace_back (ZeEvent, ZePool);
5472
+ }
5473
+
5433
5474
// Helper function for creating a PI event.
5434
5475
// The "Queue" argument specifies the PI queue where a command is submitted.
5435
5476
// The "HostVisible" argument specifies if event needs to be allocated from
5436
5477
// a host-visible pool.
5437
5478
//
5438
5479
static pi_result EventCreate (pi_context Context, pi_queue Queue,
5439
5480
bool HostVisible, pi_event *RetEvent) {
5440
-
5441
5481
bool ProfilingEnabled =
5442
5482
!Queue || (Queue->Properties & PI_QUEUE_PROFILING_ENABLE) != 0 ;
5443
5483
5444
- size_t Index = 0 ;
5445
- ze_event_pool_handle_t ZeEventPool = {};
5446
- if (auto Res = Context->getFreeSlotInExistingOrNewPool (
5447
- ZeEventPool, Index, HostVisible, ProfilingEnabled))
5448
- return Res;
5449
-
5484
+ auto CachedEvent =
5485
+ Context->getZeEventFromCache (HostVisible, ProfilingEnabled);
5450
5486
ze_event_handle_t ZeEvent;
5451
- ZeStruct<ze_event_desc_t > ZeEventDesc;
5452
- ZeEventDesc.index = Index;
5453
- ZeEventDesc.wait = 0 ;
5487
+ ze_event_pool_handle_t ZeEventPool = {};
5454
5488
5455
- if (HostVisible) {
5456
- ZeEventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
5489
+ // If no any then check cache of event in the context
5490
+ if (CachedEvent.first ) {
5491
+ ZeEvent = CachedEvent.first ;
5492
+ ZeEventPool = CachedEvent.second ;
5457
5493
} else {
5458
- //
5459
- // Set the scope to "device" for every event. This is sufficient for global
5460
- // device access and peer device access. If needed to be seen on the host
5461
- // we are doing special handling, see EventsScope options.
5462
- //
5463
- // TODO: see if "sub-device" (ZE_EVENT_SCOPE_FLAG_SUBDEVICE) can better be
5464
- // used in some circumstances.
5465
- //
5466
- ZeEventDesc.signal = 0 ;
5467
- }
5494
+ size_t Index = 0 ;
5495
+
5496
+ if (auto Res = Context->getFreeSlotInExistingOrNewPool (
5497
+ ZeEventPool, Index, HostVisible, ProfilingEnabled))
5498
+ return Res;
5499
+
5500
+ ZeStruct<ze_event_desc_t > ZeEventDesc;
5501
+ ZeEventDesc.index = Index;
5502
+ ZeEventDesc.wait = 0 ;
5503
+
5504
+ if (HostVisible) {
5505
+ ZeEventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
5506
+ } else {
5507
+ //
5508
+ // Set the scope to "device" for every event. This is sufficient for
5509
+ // global device access and peer device access. If needed to be seen on
5510
+ // the host we are doing special handling, see EventsScope options.
5511
+ //
5512
+ // TODO: see if "sub-device" (ZE_EVENT_SCOPE_FLAG_SUBDEVICE) can better be
5513
+ // used in some circumstances.
5514
+ //
5515
+ ZeEventDesc.signal = 0 ;
5516
+ }
5468
5517
5469
- ZE_CALL (zeEventCreate, (ZeEventPool, &ZeEventDesc, &ZeEvent));
5518
+ ZE_CALL (zeEventCreate, (ZeEventPool, &ZeEventDesc, &ZeEvent));
5519
+ }
5470
5520
5471
5521
try {
5472
5522
PI_ASSERT (RetEvent, PI_ERROR_INVALID_VALUE);
@@ -5819,7 +5869,17 @@ pi_result piEventRelease(pi_event Event) {
5819
5869
Event->CommandData = nullptr ;
5820
5870
}
5821
5871
if (Event->OwnZeEvent ) {
5822
- ZE_CALL (zeEventDestroy, (Event->ZeEvent ));
5872
+ if (!DisableEventsCaching) {
5873
+ ZE_CALL (zeEventHostReset, (Event->ZeEvent ));
5874
+ Event->Context ->addZeEventToCache (Event->ZeEvent , Event->ZeEventPool ,
5875
+ Event->isHostVisible (),
5876
+ Event->isProfilingEnabled ());
5877
+ } else {
5878
+ ZE_CALL (zeEventDestroy, (Event->ZeEvent ));
5879
+ auto Context = Event->Context ;
5880
+ if (auto Res = Context->decrementUnreleasedEventsInPool (Event))
5881
+ return Res;
5882
+ }
5823
5883
}
5824
5884
// It is possible that host-visible event was never created.
5825
5885
// In case it was check if that's different from this same event
@@ -5829,10 +5889,6 @@ pi_result piEventRelease(pi_event Event) {
5829
5889
PI_CALL (piEventRelease (Event->HostVisibleEvent ));
5830
5890
}
5831
5891
5832
- auto Context = Event->Context ;
5833
- if (auto Res = Context->decrementUnreleasedEventsInPool (Event))
5834
- return Res;
5835
-
5836
5892
// We intentionally incremented the reference counter when an event is
5837
5893
// created so that we can avoid pi_queue is released before the associated
5838
5894
// pi_event is released. Here we have to decrement it so pi_queue
0 commit comments