Skip to content

Commit cad11ab

Browse files
committed
[UR][L0] Verify the Loader is stable before cleanup of all handles
Signed-off-by: Neil R. Spruit <[email protected]>
1 parent 6da51cd commit cad11ab

File tree

14 files changed

+54
-55
lines changed

14 files changed

+54
-55
lines changed

unified-runtime/source/adapters/level_zero/command_buffer.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -445,16 +445,16 @@ void ur_exp_command_buffer_handle_t_::cleanupCommandBufferResources() {
445445

446446
// Release the memory allocated to the CommandList stored in the
447447
// command_buffer
448-
if (ZeComputeCommandList) {
448+
if (ZeComputeCommandList && checkL0LoaderTeardown()) {
449449
ZE_CALL_NOCHECK(zeCommandListDestroy, (ZeComputeCommandList));
450450
}
451-
if (useCopyEngine() && ZeCopyCommandList) {
451+
if (useCopyEngine() && ZeCopyCommandList && checkL0LoaderTeardown()) {
452452
ZE_CALL_NOCHECK(zeCommandListDestroy, (ZeCopyCommandList));
453453
}
454454

455455
// Release the memory allocated to the CommandListResetEvents stored in the
456456
// command_buffer
457-
if (ZeCommandListResetEvents) {
457+
if (ZeCommandListResetEvents && checkL0LoaderTeardown()) {
458458
ZE_CALL_NOCHECK(zeCommandListDestroy, (ZeCommandListResetEvents));
459459
}
460460

@@ -502,7 +502,9 @@ void ur_exp_command_buffer_handle_t_::cleanupCommandBufferResources() {
502502
// Release fences allocated to command-buffer
503503
for (auto &ZeFencePair : ZeFencesMap) {
504504
auto &ZeFence = ZeFencePair.second;
505-
ZE_CALL_NOCHECK(zeFenceDestroy, (ZeFence));
505+
if (checkL0LoaderTeardown()) {
506+
ZE_CALL_NOCHECK(zeFenceDestroy, (ZeFence));
507+
}
506508
}
507509

508510
auto ReleaseIndirectMem = [](ur_kernel_handle_t Kernel) {

unified-runtime/source/adapters/level_zero/context.cpp

Lines changed: 21 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -264,9 +264,7 @@ ur_result_t ContextReleaseHelper(ur_context_handle_t Context) {
264264
Contexts.erase(It);
265265
}
266266
ze_context_handle_t DestroyZeContext =
267-
((Context->OwnNativeHandle && !Context->IsInteropNativeHandle) ||
268-
(Context->OwnNativeHandle && Context->IsInteropNativeHandle &&
269-
checkL0LoaderTeardown()))
267+
(Context->OwnNativeHandle && checkL0LoaderTeardown())
270268
? Context->ZeContext
271269
: nullptr;
272270

@@ -310,8 +308,7 @@ ur_result_t ur_context_handle_t_::finalize() {
310308
std::scoped_lock<ur_mutex> Lock(EventCacheMutex);
311309
for (auto &EventCache : EventCaches) {
312310
for (auto &Event : EventCache) {
313-
if (!Event->IsInteropNativeHandle ||
314-
(Event->IsInteropNativeHandle && checkL0LoaderTeardown())) {
311+
if (checkL0LoaderTeardown()) {
315312
auto ZeResult = ZE_CALL_NOCHECK(zeEventDestroy, (Event->ZeEvent));
316313
// Gracefully handle the case that L0 was already unloaded.
317314
if (ZeResult && (ZeResult != ZE_RESULT_ERROR_UNINITIALIZED || ZeResult != ZE_RESULT_ERROR_UNKNOWN))
@@ -330,32 +327,36 @@ ur_result_t ur_context_handle_t_::finalize() {
330327
std::scoped_lock<ur_mutex> Lock(ZeEventPoolCacheMutex);
331328
for (auto &ZePoolCache : ZeEventPoolCache) {
332329
for (auto &ZePool : ZePoolCache) {
333-
auto ZeResult = ZE_CALL_NOCHECK(zeEventPoolDestroy, (ZePool));
334-
// Gracefully handle the case that L0 was already unloaded.
335-
if (ZeResult && (ZeResult != ZE_RESULT_ERROR_UNINITIALIZED || ZeResult != ZE_RESULT_ERROR_UNKNOWN))
336-
return ze2urResult(ZeResult);
337-
if ( ZeResult == ZE_RESULT_ERROR_UNKNOWN) {
338-
ZeResult = ZE_RESULT_ERROR_UNINITIALIZED;
330+
if (checkL0LoaderTeardown()) {
331+
auto ZeResult = ZE_CALL_NOCHECK(zeEventPoolDestroy, (ZePool));
332+
// Gracefully handle the case that L0 was already unloaded.
333+
if (ZeResult && (ZeResult != ZE_RESULT_ERROR_UNINITIALIZED || ZeResult != ZE_RESULT_ERROR_UNKNOWN))
334+
return ze2urResult(ZeResult);
335+
if ( ZeResult == ZE_RESULT_ERROR_UNKNOWN) {
336+
ZeResult = ZE_RESULT_ERROR_UNINITIALIZED;
337+
}
339338
}
340339
}
341340
ZePoolCache.clear();
342341
}
343342
}
344343

345-
// Destroy the command list used for initializations
346-
auto ZeResult = ZE_CALL_NOCHECK(zeCommandListDestroy, (ZeCommandListInit));
347-
// Gracefully handle the case that L0 was already unloaded.
348-
if (ZeResult && (ZeResult != ZE_RESULT_ERROR_UNINITIALIZED || ZeResult != ZE_RESULT_ERROR_UNKNOWN))
349-
return ze2urResult(ZeResult);
350-
if ( ZeResult == ZE_RESULT_ERROR_UNKNOWN) {
351-
ZeResult = ZE_RESULT_ERROR_UNINITIALIZED;
344+
if (checkL0LoaderTeardown()) {
345+
// Destroy the command list used for initializations
346+
auto ZeResult = ZE_CALL_NOCHECK(zeCommandListDestroy, (ZeCommandListInit));
347+
// Gracefully handle the case that L0 was already unloaded.
348+
if (ZeResult && (ZeResult != ZE_RESULT_ERROR_UNINITIALIZED || ZeResult != ZE_RESULT_ERROR_UNKNOWN))
349+
return ze2urResult(ZeResult);
350+
if ( ZeResult == ZE_RESULT_ERROR_UNKNOWN) {
351+
ZeResult = ZE_RESULT_ERROR_UNINITIALIZED;
352+
}
352353
}
353354

354355
std::scoped_lock<ur_mutex> Lock(ZeCommandListCacheMutex);
355356
for (auto &List : ZeComputeCommandListCache) {
356357
for (auto &Item : List.second) {
357358
ze_command_list_handle_t ZeCommandList = Item.first;
358-
if (ZeCommandList) {
359+
if (ZeCommandList && checkL0LoaderTeardown()) {
359360
auto ZeResult = ZE_CALL_NOCHECK(zeCommandListDestroy, (ZeCommandList));
360361
// Gracefully handle the case that L0 was already unloaded.
361362
if (ZeResult && (ZeResult != ZE_RESULT_ERROR_UNINITIALIZED || ZeResult != ZE_RESULT_ERROR_UNKNOWN))
@@ -369,7 +370,7 @@ ur_result_t ur_context_handle_t_::finalize() {
369370
for (auto &List : ZeCopyCommandListCache) {
370371
for (auto &Item : List.second) {
371372
ze_command_list_handle_t ZeCommandList = Item.first;
372-
if (ZeCommandList) {
373+
if (ZeCommandList && checkL0LoaderTeardown()) {
373374
auto ZeResult = ZE_CALL_NOCHECK(zeCommandListDestroy, (ZeCommandList));
374375
// Gracefully handle the case that L0 was already unloaded.
375376
if (ZeResult && (ZeResult != ZE_RESULT_ERROR_UNINITIALIZED || ZeResult != ZE_RESULT_ERROR_UNKNOWN))

unified-runtime/source/adapters/level_zero/event.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1090,7 +1090,7 @@ ur_result_t ur_event_handle_t_::getOrCreateHostVisibleEvent(
10901090
* leaks or resource mismanagement.
10911091
*/
10921092
ur_event_handle_t_::~ur_event_handle_t_() {
1093-
if (this->ZeEvent && this->Completed) {
1093+
if (this->ZeEvent && this->Completed && checkL0LoaderTeardown()) {
10941094
if (this->UrQueue && !this->UrQueue->isDiscardEvents())
10951095
ZE_CALL_NOCHECK(zeEventDestroy, (this->ZeEvent));
10961096
}
@@ -1121,8 +1121,7 @@ ur_result_t urEventReleaseInternal(ur_event_handle_t Event) {
11211121
}
11221122
if (Event->OwnNativeHandle) {
11231123
if (DisableEventsCaching) {
1124-
if (!Event->IsInteropNativeHandle ||
1125-
(Event->IsInteropNativeHandle && checkL0LoaderTeardown())) {
1124+
if (checkL0LoaderTeardown()) {
11261125
auto ZeResult = ZE_CALL_NOCHECK(zeEventDestroy, (Event->ZeEvent));
11271126
// Gracefully handle the case that L0 was already unloaded.
11281127
if (ZeResult && (ZeResult != ZE_RESULT_ERROR_UNINITIALIZED || ZeResult != ZE_RESULT_ERROR_UNKNOWN))

unified-runtime/source/adapters/level_zero/image.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -312,7 +312,7 @@ ur_result_t urBindlessImagesUnsampledImageHandleDestroyExp(
312312

313313
auto item = hDevice->ZeOffsetToImageHandleMap.find(hImage);
314314

315-
if (item != hDevice->ZeOffsetToImageHandleMap.end()) {
315+
if (item != hDevice->ZeOffsetToImageHandleMap.end() && checkL0LoaderTeardown()) {
316316
ZE2UR_CALL(zeImageDestroy, (item->second));
317317
hDevice->ZeOffsetToImageHandleMap.erase(item);
318318
} else {

unified-runtime/source/adapters/level_zero/kernel.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -940,8 +940,7 @@ ur_result_t urKernelRelease(
940940
auto KernelProgram = Kernel->Program;
941941
if (Kernel->OwnNativeHandle) {
942942
for (auto &ZeKernel : Kernel->ZeKernels) {
943-
if (!Kernel->IsInteropNativeHandle ||
944-
(Kernel->IsInteropNativeHandle && checkL0LoaderTeardown())) {
943+
if (checkL0LoaderTeardown()) {
945944
auto ZeResult = ZE_CALL_NOCHECK(zeKernelDestroy, (ZeKernel));
946945
// Gracefully handle the case that L0 was already unloaded.
947946
if (ZeResult && (ZeResult != ZE_RESULT_ERROR_UNINITIALIZED || ZeResult != ZE_RESULT_ERROR_UNKNOWN))

unified-runtime/source/adapters/level_zero/memory.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1663,8 +1663,7 @@ ur_result_t urMemRelease(
16631663
if (Image->OwnNativeHandle) {
16641664
UR_CALL(Mem->getZeHandle(ZeHandleImage, ur_mem_handle_t_::write_only,
16651665
nullptr, nullptr, 0u));
1666-
if (!Image->IsInteropNativeHandle ||
1667-
(Image->IsInteropNativeHandle && checkL0LoaderTeardown())) {
1666+
if (checkL0LoaderTeardown()) {
16681667
auto ZeResult = ZE_CALL_NOCHECK(
16691668
zeImageDestroy, (ur_cast<ze_image_handle_t>(ZeHandleImage)));
16701669
// Gracefully handle the case that L0 was already unloaded.

unified-runtime/source/adapters/level_zero/physical_mem.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,10 @@ ur_result_t urPhysicalMemRelease(ur_physical_mem_handle_t hPhysicalMem) {
5050
if (!hPhysicalMem->RefCount.decrementAndTest())
5151
return UR_RESULT_SUCCESS;
5252

53-
ZE2UR_CALL(zePhysicalMemDestroy, (hPhysicalMem->Context->getZeHandle(),
54-
hPhysicalMem->ZePhysicalMem));
53+
if (checkL0LoaderTeardown()) {
54+
ZE2UR_CALL(zePhysicalMemDestroy, (hPhysicalMem->Context->getZeHandle(),
55+
hPhysicalMem->ZePhysicalMem));
56+
}
5557
delete hPhysicalMem;
5658

5759
return UR_RESULT_SUCCESS;

unified-runtime/source/adapters/level_zero/program.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1078,7 +1078,7 @@ void ur_program_handle_t_::ur_release_program_resources(bool deletion) {
10781078
}
10791079
if (!resourcesReleased) {
10801080
for (auto &[ZeDevice, DeviceData] : this->DeviceDataMap) {
1081-
if (DeviceData.ZeBuildLog)
1081+
if (DeviceData.ZeBuildLog && checkL0LoaderTeardown())
10821082
ZE_CALL_NOCHECK(zeModuleBuildLogDestroy, (DeviceData.ZeBuildLog));
10831083
}
10841084
// interop api
@@ -1087,7 +1087,7 @@ void ur_program_handle_t_::ur_release_program_resources(bool deletion) {
10871087
}
10881088

10891089
for (auto &[ZeDevice, DeviceData] : this->DeviceDataMap)
1090-
if (DeviceData.ZeModule)
1090+
if (DeviceData.ZeModule && checkL0LoaderTeardown())
10911091
ZE_CALL_NOCHECK(zeModuleDestroy, (DeviceData.ZeModule));
10921092

10931093
this->DeviceDataMap.clear();

unified-runtime/source/adapters/level_zero/queue.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -648,7 +648,7 @@ ur_result_t urQueueRelease(
648648
// runtime. Destroy only if a queue is healthy. Destroying a fence may
649649
// cause a hang otherwise.
650650
// If the fence is a nullptr we are using immediate commandlists.
651-
if (Queue->Healthy && it->second.ZeFence != nullptr) {
651+
if (Queue->Healthy && it->second.ZeFence != nullptr && checkL0LoaderTeardown()) {
652652
auto ZeResult = ZE_CALL_NOCHECK(zeFenceDestroy, (it->second.ZeFence));
653653
// Gracefully handle the case that L0 was already unloaded.
654654
if (ZeResult && (ZeResult != ZE_RESULT_ERROR_UNINITIALIZED || ZeResult != ZE_RESULT_ERROR_UNKNOWN))
@@ -679,7 +679,7 @@ ur_result_t urQueueRelease(
679679
// A non-reusable comamnd list that came from a make_queue call is
680680
// destroyed since it cannot be recycled.
681681
ze_command_list_handle_t ZeCommandList = it->first;
682-
if (ZeCommandList) {
682+
if (ZeCommandList && checkL0LoaderTeardown()) {
683683
ZE2UR_CALL(zeCommandListDestroy, (ZeCommandList));
684684
}
685685
}
@@ -1608,8 +1608,7 @@ ur_result_t urQueueReleaseInternal(ur_queue_handle_t Queue) {
16081608
for (auto &QueueGroup : QueueMap)
16091609
for (auto &ZeQueue : QueueGroup.second.ZeQueues)
16101610
if (ZeQueue) {
1611-
if (!Queue->IsInteropNativeHandle ||
1612-
(Queue->IsInteropNativeHandle && checkL0LoaderTeardown())) {
1611+
if (checkL0LoaderTeardown()) {
16131612
auto ZeResult = ZE_CALL_NOCHECK(zeCommandQueueDestroy, (ZeQueue));
16141613
// Gracefully handle the case that L0 was already unloaded.
16151614
if (ZeResult && (ZeResult != ZE_RESULT_ERROR_UNINITIALIZED || ZeResult != ZE_RESULT_ERROR_UNKNOWN))

unified-runtime/source/adapters/level_zero/sampler.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -129,12 +129,14 @@ ur_result_t urSamplerRelease(
129129
if (!Sampler->RefCount.decrementAndTest())
130130
return UR_RESULT_SUCCESS;
131131

132-
auto ZeResult = ZE_CALL_NOCHECK(zeSamplerDestroy, (Sampler->ZeSampler));
133-
// Gracefully handle the case that L0 was already unloaded.
134-
if (ZeResult && (ZeResult != ZE_RESULT_ERROR_UNINITIALIZED || ZeResult != ZE_RESULT_ERROR_UNKNOWN))
135-
return ze2urResult(ZeResult);
136-
if ( ZeResult == ZE_RESULT_ERROR_UNKNOWN) {
137-
ZeResult = ZE_RESULT_ERROR_UNINITIALIZED;
132+
if (checkL0LoaderTeardown()) {
133+
auto ZeResult = ZE_CALL_NOCHECK(zeSamplerDestroy, (Sampler->ZeSampler));
134+
// Gracefully handle the case that L0 was already unloaded.
135+
if (ZeResult && (ZeResult != ZE_RESULT_ERROR_UNINITIALIZED || ZeResult != ZE_RESULT_ERROR_UNKNOWN))
136+
return ze2urResult(ZeResult);
137+
if ( ZeResult == ZE_RESULT_ERROR_UNKNOWN) {
138+
ZeResult = ZE_RESULT_ERROR_UNINITIALIZED;
139+
}
138140
}
139141
delete Sampler;
140142

unified-runtime/source/adapters/level_zero/usm.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -683,8 +683,7 @@ ur_result_t UR_APICALL urUSMPoolTrimToExp(ur_context_handle_t,
683683

684684
static ur_result_t USMFreeImpl(ur_context_handle_t Context, void *Ptr) {
685685
ur_result_t Res = UR_RESULT_SUCCESS;
686-
if (!Context->IsInteropNativeHandle ||
687-
(Context->IsInteropNativeHandle && checkL0LoaderTeardown())) {
686+
if (checkL0LoaderTeardown()) {
688687
auto ZeResult = ZE_CALL_NOCHECK(zeMemFree, (Context->ZeContext, Ptr));
689688
// Handle When the driver is already released
690689
if (ZeResult == ZE_RESULT_ERROR_UNINITIALIZED) {

unified-runtime/source/adapters/level_zero/v2/common.hpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,7 @@ struct ze_handle_wrapper {
7979
return;
8080
}
8181

82-
if ((ownZeHandle && !IsInteropNativeHandle) ||
83-
(ownZeHandle && IsInteropNativeHandle && checkL0LoaderTeardown())) {
82+
if (ownZeHandle && checkL0LoaderTeardown()) {
8483
auto zeResult = destroy(handle);
8584
// Gracefully handle the case that L0 was already unloaded.
8685
if (zeResult && (zeResult != ZE_RESULT_ERROR_UNINITIALIZED || zeResult != ZE_RESULT_ERROR_UNKNOWN))

unified-runtime/source/adapters/level_zero/v2/memory.cpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -112,8 +112,7 @@ ur_integrated_buffer_handle_t::ur_integrated_buffer_handle_t(
112112
this->IsInteropNativeHandle = interopNativeHandle;
113113
this->ptr =
114114
usm_unique_ptr_t(hostPtr, [hContext, ownHostPtr, this](void *ptr) {
115-
if (!ownHostPtr ||
116-
(this->IsInteropNativeHandle && !checkL0LoaderTeardown())) {
115+
if (!ownHostPtr || !checkL0LoaderTeardown()) {
117116
return;
118117
}
119118
ZE_CALL_NOCHECK(zeMemFree, (hContext->getZeHandle(), ptr));
@@ -237,8 +236,7 @@ ur_discrete_buffer_handle_t::ur_discrete_buffer_handle_t(
237236
this->IsInteropNativeHandle = interopNativeHandle;
238237
deviceAllocations[hDevice->Id.value()] = usm_unique_ptr_t(
239238
devicePtr, [this, hContext = this->hContext, ownZePtr](void *ptr) {
240-
if (!ownZePtr ||
241-
(this->IsInteropNativeHandle && !checkL0LoaderTeardown())) {
239+
if (!ownZePtr || !checkL0LoaderTeardown()) {
242240
return;
243241
}
244242
ZE_CALL_NOCHECK(zeMemFree, (hContext->getZeHandle(), ptr));

unified-runtime/source/adapters/level_zero/v2/queue_immediate_in_order.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ ur_queue_immediate_in_order_t::ur_queue_immediate_in_order_t(
8787
[ownZeQueue,
8888
interopNativeHandle](ze_command_list_handle_t hZeCommandList) {
8989
if (ownZeQueue) {
90-
if (!interopNativeHandle) {
90+
if (checkL0LoaderTeardown()) {
9191
ZE_CALL_NOCHECK(zeCommandListDestroy, (hZeCommandList));
9292
}
9393
}

0 commit comments

Comments
 (0)