Skip to content

Commit bf60383

Browse files
author
Jaime Arteaga
authored
[SYCL][UR][L0] Clean up PI references in Level Zero adapter (#10572)
Signed-off-by: Jaime Arteaga <[email protected]>
1 parent 8ecfa97 commit bf60383

File tree

19 files changed

+251
-242
lines changed

19 files changed

+251
-242
lines changed

sycl/plugins/unified_runtime/pi2ur.hpp

Lines changed: 66 additions & 66 deletions
Large diffs are not rendered by default.

sycl/plugins/unified_runtime/ur/adapters/level_zero/command_buffer.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -500,7 +500,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
500500
LaunchEvent->CommandData = (void *)Kernel;
501501
// Increment the reference count of the Kernel and indicate that the Kernel
502502
// is in use. Once the event has been signalled, the code in
503-
// CleanupCompletedEvent(Event) will do a piReleaseKernel to update the
503+
// CleanupCompletedEvent(Event) will do a urKernelRelease to update the
504504
// reference count on the kernel, using the kernel saved in CommandData.
505505
UR_CALL(urKernelRetain(Kernel));
506506

@@ -678,7 +678,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
678678
ZeStruct<ze_command_queue_desc_t> ZeQueueDesc;
679679
ZeQueueDesc.ordinal = QueueGroupOrdinal;
680680
CommandListPtr = CommandBuffer->CommandListMap.insert(
681-
std::pair<ze_command_list_handle_t, pi_command_list_info_t>(
681+
std::pair<ze_command_list_handle_t, ur_command_list_info_t>(
682682
CommandBuffer->ZeCommandList,
683683
{ZeFence, false, false, ZeCommandQueue, ZeQueueDesc}));
684684

sycl/plugins/unified_runtime/ur/adapters/level_zero/command_buffer.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,9 @@ struct ur_exp_command_buffer_handle_t_ : public _ur_object {
5656
// Command list map so we can use queue::executeCommandList.
5757
// Command list map is also used to release all the Fences retained by the
5858
// command_buffer std::unordered_multimap<ze_command_list_handle_t,
59-
// pi_command_list_info_t> CommandListMap; CommandListMap is redefined as a
59+
// ur_command_list_info_t> CommandListMap; CommandListMap is redefined as a
6060
// multimap to enable mutiple commands enqueing into the same command_buffer
61-
std::unordered_multimap<ze_command_list_handle_t, pi_command_list_info_t>
61+
std::unordered_multimap<ze_command_list_handle_t, ur_command_list_info_t>
6262
CommandListMap;
6363
// Event which will signals the most recent execution of the command-buffer
6464
// has finished

sycl/plugins/unified_runtime/ur/adapters/level_zero/common.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,7 @@ struct ReferenceCounter {
336336
// Used when retaining an object.
337337
void increment() { RefCount++; }
338338

339-
// Supposed to be used in pi*GetInfo* methods where ref count value is
339+
// Supposed to be used in ur*GetInfo* methods where ref count value is
340340
// requested.
341341
uint32_t load() { return RefCount.load(); }
342342

@@ -376,7 +376,7 @@ struct _ur_object {
376376
// To get exclusive access to the object in a scope use std::scoped_lock:
377377
// std::scoped_lock Lock(Obj->Mutex);
378378
//
379-
// If several pi objects are accessed in a scope then each object's mutex must
379+
// If several UR objects are accessed in a scope then each object's mutex must
380380
// be locked. For example, to get write access to Obj1 and Obj2 and read
381381
// access to Obj3 in a scope use the following approach:
382382
// std::shared_lock Obj3Lock(Obj3->Mutex, std::defer_lock);

sycl/plugins/unified_runtime/ur/adapters/level_zero/context.cpp

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -323,12 +323,12 @@ ur_result_t ContextReleaseHelper(ur_context_handle_t Context) {
323323

324324
// We must delete Context first and then destroy zeContext because
325325
// Context deallocation requires ZeContext in some member deallocation of
326-
// pi_context.
326+
// ur_context_handle_t.
327327
delete Context;
328328

329-
// Destruction of some members of pi_context uses L0 context
329+
// Destruction of some members of ur_context_handle_t uses L0 context
330330
// and therefore it must be valid at that point.
331-
// Technically it should be placed to the destructor of pi_context
331+
// Technically it should be placed to the destructor of ur_context_handle_t
332332
// but this makes API error handling more complex.
333333
if (DestroyZeContext) {
334334
auto ZeResult = ZE_CALL_NOCHECK(zeContextDestroy, (DestroyZeContext));
@@ -345,9 +345,9 @@ ur_platform_handle_t ur_context_handle_t_::getPlatform() const {
345345
}
346346

347347
ur_result_t ur_context_handle_t_::finalize() {
348-
// This function is called when pi_context is deallocated, piContextRelease.
349-
// There could be some memory that may have not been deallocated.
350-
// For example, event and event pool caches would be still alive.
348+
// This function is called when ur_context_handle_t is deallocated,
349+
// urContextRelease. There could be some memory that may have not been
350+
// deallocated. For example, event and event pool caches would be still alive.
351351

352352
if (!DisableEventsCaching) {
353353
std::scoped_lock<ur_mutex> Lock(EventCacheMutex);
@@ -630,7 +630,7 @@ ur_result_t ur_context_handle_t_::getAvailableCommandList(
630630
// the command lists, and later are then added to the command queue.
631631
// Each command list is paired with an associated fence to track when the
632632
// command list is available for reuse.
633-
ur_result_t pi_result = UR_RESULT_ERROR_OUT_OF_RESOURCES;
633+
ur_result_t ur_result = UR_RESULT_ERROR_OUT_OF_RESOURCES;
634634

635635
// Initally, we need to check if a command list has already been created
636636
// on this device that is available for use. If so, then reuse that
@@ -678,7 +678,7 @@ ur_result_t ur_context_handle_t_::getAvailableCommandList(
678678
CommandList =
679679
Queue->CommandListMap
680680
.emplace(ZeCommandList,
681-
pi_command_list_info_t{ZeFence, true, false,
681+
ur_command_list_info_t{ZeFence, true, false,
682682
ZeCommandQueue, ZeQueueDesc})
683683
.first;
684684
}
@@ -720,9 +720,9 @@ ur_result_t ur_context_handle_t_::getAvailableCommandList(
720720

721721
// If there are no available command lists nor signalled command lists,
722722
// then we must create another command list.
723-
pi_result = Queue->createCommandList(UseCopyEngine, CommandList);
723+
ur_result = Queue->createCommandList(UseCopyEngine, CommandList);
724724
CommandList->second.ZeFenceInUse = true;
725-
return pi_result;
725+
return ur_result;
726726
}
727727

728728
bool ur_context_handle_t_::isValidDevice(ur_device_handle_t Device) const {

sycl/plugins/unified_runtime/ur/adapters/level_zero/context.hpp

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -36,14 +36,15 @@ struct ur_context_handle_t_ : _ur_object {
3636

3737
// A L0 context handle is primarily used during creation and management of
3838
// resources that may be used by multiple devices.
39-
// This field is only set at _pi_context creation time, and cannot change.
40-
// Therefore it can be accessed without holding a lock on this _pi_context.
39+
// This field is only set at ur_context_handle_t creation time, and cannot
40+
// change. Therefore it can be accessed without holding a lock on this
41+
// ur_context_handle_t.
4142
const ze_context_handle_t ZeContext{};
4243

4344
// Keep the PI devices this PI context was created for.
44-
// This field is only set at _pi_context creation time, and cannot change.
45-
// Therefore it can be accessed without holding a lock on this _pi_context.
46-
// const std::vector<ur_device_handle_t> Devices;
45+
// This field is only set at ur_context_handle_t creation time, and cannot
46+
// change. Therefore it can be accessed without holding a lock on this
47+
// ur_context_handle_t. const std::vector<ur_device_handle_t> Devices;
4748
std::vector<ur_device_handle_t> Devices;
4849
uint32_t NumDevices{};
4950

@@ -68,8 +69,9 @@ struct ur_context_handle_t_ : _ur_object {
6869

6970
// If context contains one device or sub-devices of the same device, we want
7071
// to save this device.
71-
// This field is only set at _pi_context creation time, and cannot change.
72-
// Therefore it can be accessed without holding a lock on this _pi_context.
72+
// This field is only set at ur_context_handle_t creation time, and cannot
73+
// change. Therefore it can be accessed without holding a lock on this
74+
// ur_context_handle_t.
7375
ur_device_handle_t SingleRootDevice = nullptr;
7476

7577
// Cache of all currently available/completed command/copy lists.
@@ -117,9 +119,9 @@ struct ur_context_handle_t_ : _ur_object {
117119
// Following member variables are used to manage assignment of events
118120
// to event pools.
119121
//
120-
// TODO: Create pi_event_pool class to encapsulate working with pools.
122+
// TODO: Create ur_event_pool class to encapsulate working with pools.
121123
// This will avoid needing the use of maps below, and cleanup the
122-
// pi_context overall.
124+
// ur_context_handle_t overall.
123125
//
124126

125127
// The cache of event pools from where new events are allocated from.
@@ -179,11 +181,11 @@ struct ur_context_handle_t_ : _ur_object {
179181
bool HostVisible,
180182
bool ProfilingEnabled);
181183

182-
// Get pi_event from cache.
184+
// Get ur_event_handle_t from cache.
183185
ur_event_handle_t getEventFromContextCache(bool HostVisible,
184186
bool WithProfiling);
185187

186-
// Add pi_event to cache.
188+
// Add ur_event_handle_t to cache.
187189
void addEventToContextCache(ur_event_handle_t);
188190

189191
auto getZeEventPoolCache(bool HostVisible, bool WithProfiling) {

sycl/plugins/unified_runtime/ur/adapters/level_zero/device.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet(
3636

3737
// Filter available devices based on input DeviceType.
3838
std::vector<ur_device_handle_t> MatchedDevices;
39-
std::shared_lock<ur_shared_mutex> Lock(Platform->PiDevicesCacheMutex);
40-
for (auto &D : Platform->PiDevicesCache) {
41-
// Only ever return root-devices from piDevicesGet, but the
39+
std::shared_lock<ur_shared_mutex> Lock(Platform->URDevicesCacheMutex);
40+
for (auto &D : Platform->URDevicesCache) {
41+
// Only ever return root-devices from urDeviceGet, but the
4242
// devices cache also keeps sub-devices.
4343
if (D->isSubDevice())
4444
continue;
@@ -1274,11 +1274,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle(
12741274
//
12751275
// TODO: maybe we should populate cache of platforms if it wasn't already.
12761276
// For now assert that is was populated.
1277-
UR_ASSERT(PiPlatformCachePopulated, UR_RESULT_ERROR_INVALID_VALUE);
1278-
const std::lock_guard<SpinLock> Lock{*PiPlatformsCacheMutex};
1277+
UR_ASSERT(URPlatformCachePopulated, UR_RESULT_ERROR_INVALID_VALUE);
1278+
const std::lock_guard<SpinLock> Lock{*URPlatformsCacheMutex};
12791279

12801280
ur_device_handle_t Dev = nullptr;
1281-
for (ur_platform_handle_t ThePlatform : *PiPlatformsCache) {
1281+
for (ur_platform_handle_t ThePlatform : *URPlatformsCache) {
12821282
Dev = ThePlatform->getDeviceFromNativeHandle(ZeDevice);
12831283
if (Dev) {
12841284
// Check that the input Platform, if was given, matches the found one.

sycl/plugins/unified_runtime/ur/adapters/level_zero/event.cpp

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -480,7 +480,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo(
480480
case UR_PROFILING_INFO_COMMAND_SUBMIT:
481481
// Note: No users for this case
482482
// The "command_submit" time is implemented by recording submission
483-
// timestamp with a call to piGetDeviceAndHostTimer before command enqueue.
483+
// timestamp with a call to urDeviceGetGlobalTimestamps before command
484+
// enqueue.
484485
//
485486
return ReturnValue(uint64_t{0});
486487
default:
@@ -572,7 +573,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventWait(
572573
{
573574
std::shared_lock<ur_shared_mutex> EventLock(Event->Mutex);
574575
if (!Event->hasExternalRefs())
575-
die("piEventsWait must not be called for an internal event");
576+
die("urEventWait must not be called for an internal event");
576577

577578
if (!Event->Completed) {
578579
auto HostVisibleEvent = Event->HostVisibleEvent;
@@ -594,7 +595,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventWait(
594595
reinterpret_cast<ur_event_handle_t>(Event));
595596
else {
596597
// NOTE: we are cleaning up after the event here to free resources
597-
// sooner in case run-time is not calling piEventRelease soon enough.
598+
// sooner in case run-time is not calling urEventRelease soon enough.
598599
CleanupCompletedEvent(reinterpret_cast<ur_event_handle_t>(Event));
599600
// For the case when we have out-of-order queue or regular command
600601
// lists its more efficient to check fences so put the queue in the
@@ -679,7 +680,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventCreateWithNativeHandle(
679680
) {
680681

681682
// we dont have urEventCreate, so use this check for now to know that
682-
// the call comes from piEventCreate()
683+
// the call comes from urEventCreate()
683684
if (NativeEvent == nullptr) {
684685
UR_CALL(EventCreate(Context, nullptr, true, Event));
685686

@@ -689,9 +690,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventCreateWithNativeHandle(
689690
}
690691

691692
auto ZeEvent = ur_cast<ze_event_handle_t>(NativeEvent);
692-
ur_event_handle_t_ *UrEvent{};
693+
ur_event_handle_t_ *UREvent{};
693694
try {
694-
UrEvent = new ur_event_handle_t_(ZeEvent, nullptr /* ZeEventPool */,
695+
UREvent = new ur_event_handle_t_(ZeEvent, nullptr /* ZeEventPool */,
695696
Context, UR_EXT_COMMAND_TYPE_USER,
696697
Properties->isNativeHandleOwned);
697698

@@ -703,16 +704,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventCreateWithNativeHandle(
703704

704705
// Assume native event is host-visible, or otherwise we'd
705706
// need to create a host-visible proxy for it.
706-
UrEvent->HostVisibleEvent = reinterpret_cast<ur_event_handle_t>(UrEvent);
707+
UREvent->HostVisibleEvent = reinterpret_cast<ur_event_handle_t>(UREvent);
707708

708709
// Unlike regular events managed by SYCL RT we don't have to wait for interop
709710
// events completion, and not need to do the their `cleanup()`. This in
710-
// particular guarantees that the extra `piEventRelease` is not called on
711-
// them. That release is needed to match the `piEventRetain` of regular events
711+
// particular guarantees that the extra `urEventRelease` is not called on
712+
// them. That release is needed to match the `urEventRetain` of regular events
712713
// made for waiting for event completion, but not this interop event.
713-
UrEvent->CleanedUp = true;
714+
UREvent->CleanedUp = true;
714715

715-
*Event = reinterpret_cast<ur_event_handle_t>(UrEvent);
716+
*Event = reinterpret_cast<ur_event_handle_t>(UREvent);
716717

717718
return UR_RESULT_SUCCESS;
718719
}
@@ -737,7 +738,7 @@ ur_result_t urEventReleaseInternal(ur_event_handle_t Event) {
737738
return UR_RESULT_SUCCESS;
738739

739740
if (Event->CommandType == UR_COMMAND_MEM_UNMAP && Event->CommandData) {
740-
// Free the memory allocated in the piEnqueueMemBufferMap.
741+
// Free the memory allocated in the urEnqueueMemBufferMap.
741742
if (auto Res = ZeMemFreeHelper(Event->Context, Event->CommandData))
742743
return Res;
743744
Event->CommandData = nullptr;
@@ -773,9 +774,9 @@ ur_result_t urEventReleaseInternal(ur_event_handle_t Event) {
773774
}
774775

775776
// We intentionally incremented the reference counter when an event is
776-
// created so that we can avoid pi_queue is released before the associated
777-
// pi_event is released. Here we have to decrement it so pi_queue
778-
// can be released successfully.
777+
// created so that we can avoid ur_queue_handle_t is released before the
778+
// associated ur_event_handle_t is released. Here we have to decrement it so
779+
// ur_queue_handle_t can be released successfully.
779780
if (Queue) {
780781
UR_CALL(urQueueReleaseInternal(Queue));
781782
}
@@ -839,15 +840,15 @@ ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked) {
839840

840841
// Make a list of all the dependent events that must have signalled
841842
// because this event was dependent on them.
842-
Event->WaitList.collectEventsForReleaseAndDestroyPiZeEventList(
843+
Event->WaitList.collectEventsForReleaseAndDestroyUrZeEventList(
843844
EventsToBeReleased);
844845

845846
Event->CleanedUp = true;
846847
}
847848

848849
auto ReleaseIndirectMem = [](ur_kernel_handle_t Kernel) {
849850
if (IndirectAccessTrackingEnabled) {
850-
// piKernelRelease is called by CleanupCompletedEvent(Event) as soon as
851+
// urKernelRelease is called by CleanupCompletedEvent(Event) as soon as
851852
// kernel execution has finished. This is the place where we need to
852853
// release memory allocations. If kernel is not in use (not submitted by
853854
// some other thread) then release referenced memory allocations. As a
@@ -913,7 +914,7 @@ ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked) {
913914
ur_kernel_handle_t DepEventKernel = nullptr;
914915
{
915916
std::scoped_lock<ur_shared_mutex> DepEventLock(DepEvent->Mutex);
916-
DepEvent->WaitList.collectEventsForReleaseAndDestroyPiZeEventList(
917+
DepEvent->WaitList.collectEventsForReleaseAndDestroyUrZeEventList(
917918
EventsToBeReleased);
918919
if (IndirectAccessTrackingEnabled) {
919920
// DepEvent has finished, we can release the associated kernel if there
@@ -1220,7 +1221,7 @@ ur_result_t _ur_ze_event_list_t::insert(_ur_ze_event_list_t &Other) {
12201221
return UR_RESULT_SUCCESS;
12211222
}
12221223

1223-
ur_result_t _ur_ze_event_list_t::collectEventsForReleaseAndDestroyPiZeEventList(
1224+
ur_result_t _ur_ze_event_list_t::collectEventsForReleaseAndDestroyUrZeEventList(
12241225
std::list<ur_event_handle_t> &EventsToBeReleased) {
12251226
// event wait lists are owned by events, this function is called with owning
12261227
// event lock taken, hence it is thread safe

sycl/plugins/unified_runtime/ur/adapters/level_zero/event.hpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ struct _ur_ze_event_list_t {
6565
// List of level zero events for this event list.
6666
ze_event_handle_t *ZeEventList = {nullptr};
6767

68-
// List of pi_events for this event list.
68+
// List of ur_events for this event list.
6969
ur_event_handle_t *UrEventList = {nullptr};
7070

7171
// length of both the lists. The actual allocation of these lists
@@ -74,8 +74,8 @@ struct _ur_ze_event_list_t {
7474
uint32_t Length = {0};
7575

7676
// Initialize this using the array of events in EventList, and retain
77-
// all the pi_events in the created data structure.
78-
// CurQueue is the pi_queue that the command with this event wait
77+
// all the ur_event_handle_t in the created data structure.
78+
// CurQueue is the ur_queue_handle_t that the command with this event wait
7979
// list is going to be added to. That is needed to flush command
8080
// batches for wait events that are in other queues.
8181
// UseCopyEngine indicates if the next command (the one that this
@@ -88,9 +88,9 @@ struct _ur_ze_event_list_t {
8888
bool UseCopyEngine);
8989

9090
// Add all the events in this object's UrEventList to the end
91-
// of the list EventsToBeReleased. Destroy pi_ze_event_list_t data
91+
// of the list EventsToBeReleased. Destroy ur_ze_event_list_t data
9292
// structure fields making it look empty.
93-
ur_result_t collectEventsForReleaseAndDestroyPiZeEventList(
93+
ur_result_t collectEventsForReleaseAndDestroyUrZeEventList(
9494
std::list<ur_event_handle_t> &EventsToBeReleased);
9595

9696
// Had to create custom assignment operator because the mutex is
@@ -163,7 +163,7 @@ struct ur_event_handle_t_ : _ur_object {
163163
// Opaque data to hold any data needed for CommandType.
164164
void *CommandData;
165165

166-
// Command list associated with the pi_event.
166+
// Command list associated with the ur_event_handle_t
167167
std::optional<ur_command_list_ptr_t> CommandList;
168168

169169
// List of events that were in the wait list of the command that will
@@ -206,7 +206,7 @@ struct ur_event_handle_t_ : _ur_object {
206206

207207
bool hasExternalRefs() { return RefCountExternal != 0; }
208208

209-
// Reset _pi_event object.
209+
// Reset ur_event_handle_t object.
210210
ur_result_t reset();
211211

212212
// Tells if this event is with profiling capabilities.

sycl/plugins/unified_runtime/ur/adapters/level_zero/kernel.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -196,12 +196,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
196196
(*Event)->WaitList = TmpWaitList;
197197

198198
// Save the kernel in the event, so that when the event is signalled
199-
// the code can do a piKernelRelease on this kernel.
199+
// the code can do a urKernelRelease on this kernel.
200200
(*Event)->CommandData = (void *)Kernel;
201201

202202
// Increment the reference count of the Kernel and indicate that the Kernel is
203203
// in use. Once the event has been signalled, the code in
204-
// CleanupCompletedEvent(Event) will do a piReleaseKernel to update the
204+
// CleanupCompletedEvent(Event) will do a urKernelRelease to update the
205205
// reference count on the kernel, using the kernel saved in CommandData.
206206
UR_CALL(urKernelRetain(Kernel));
207207

0 commit comments

Comments
 (0)