Skip to content

Commit 6da51cd

Browse files
committed
[UR][L0] Fix L0 teardown checks for stability
Signed-off-by: Neil R. Spruit <[email protected]>
1 parent 228fd1b commit 6da51cd

File tree

9 files changed

+62
-72
lines changed

9 files changed

+62
-72
lines changed

unified-runtime/cmake/FetchLevelZero.cmake

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,10 @@ if (NOT DEFINED LEVEL_ZERO_LIBRARY OR NOT DEFINED LEVEL_ZERO_INCLUDE_DIR)
4040
set(BUILD_STATIC ON)
4141

4242
if (UR_LEVEL_ZERO_LOADER_REPO STREQUAL "")
43-
set(UR_LEVEL_ZERO_LOADER_REPO "https://github.com/oneapi-src/level-zero.git")
43+
set(UR_LEVEL_ZERO_LOADER_REPO "https://github.com/nrspruit/level-zero.git")
4444
endif()
4545
if (UR_LEVEL_ZERO_LOADER_TAG STREQUAL "")
46-
set(UR_LEVEL_ZERO_LOADER_TAG v1.21.1)
46+
set(UR_LEVEL_ZERO_LOADER_TAG cd83892e09c339b1688de3aa67cd902fb277b297)
4747
endif()
4848

4949
# Disable due to a bug https://github.com/oneapi-src/level-zero/issues/104

unified-runtime/source/adapters/level_zero/common.hpp

Lines changed: 8 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727

2828
#include <ur/ur.hpp>
2929
#include <ur_ddi.h>
30+
#include <loader/ze_loader.h>
3031
#include <ze_api.h>
3132
#include <zes_api.h>
3233

@@ -38,65 +39,15 @@
3839
struct _ur_platform_handle_t;
3940

4041
[[maybe_unused]] static bool checkL0LoaderTeardown() {
41-
bool loaderStable = true;
42-
#ifdef _WIN32
43-
uint32_t ZeDriverCount = 0;
44-
HMODULE zeLoader = LoadLibrary("ze_loader.dll");
45-
if (zeLoader) {
46-
typedef ze_result_t (*zeDriverGet_t)(uint32_t *, ze_driver_handle_t *);
47-
zeDriverGet_t zeDriverGetLoader =
48-
(zeDriverGet_t)GetProcAddress(zeLoader, "zeDriverGet");
49-
if (zeDriverGetLoader) {
50-
ze_result_t result = zeDriverGetLoader(&ZeDriverCount, nullptr);
51-
logger::debug(
52-
"ZE ---> checkL0LoaderTeardown result = {} driver count = {}", result,
53-
ZeDriverCount);
54-
if (result != ZE_RESULT_SUCCESS || ZeDriverCount == 0) {
55-
loaderStable = false;
56-
}
57-
} else {
58-
logger::debug("ZE ---> checkL0LoaderTeardown: Failed to get address of "
59-
"zeDriverGet");
60-
loaderStable = false;
61-
}
62-
FreeLibrary(zeLoader);
63-
} else {
64-
logger::debug(
65-
"ZE ---> checkL0LoaderTeardown: Failed to load ze_loader.dll");
66-
loaderStable = false;
67-
}
68-
#else
69-
uint32_t ZeDriverCount = 0;
70-
void *zeLoader = dlopen("libze_loader.so.1", RTLD_LAZY);
71-
if (zeLoader) {
72-
typedef ze_result_t (*zeDriverGet_t)(uint32_t *, ze_driver_handle_t *);
73-
zeDriverGet_t zeDriverGetLoader =
74-
(zeDriverGet_t)dlsym(zeLoader, "zeDriverGet");
75-
if (zeDriverGetLoader) {
76-
ze_result_t result = zeDriverGetLoader(&ZeDriverCount, nullptr);
77-
logger::debug(
78-
"ZE ---> checkL0LoaderTeardown result = {} driver count = {}", result,
79-
ZeDriverCount);
80-
if (result != ZE_RESULT_SUCCESS || ZeDriverCount == 0) {
81-
loaderStable = false;
82-
}
83-
} else {
84-
logger::debug("ZE ---> checkL0LoaderTeardown: Failed to get address of "
85-
"zeDriverGet");
86-
loaderStable = false;
42+
try {
43+
if (!zelCheckIsLoaderInTearDown()) {
44+
logger::debug("ZE ---> checkL0LoaderTeardown: Loader is not in teardown");
45+
return true;
8746
}
88-
dlclose(zeLoader);
89-
} else {
90-
logger::debug(
91-
"ZE ---> checkL0LoaderTeardown: Failed to load libze_loader.so.1");
92-
loaderStable = false;
93-
}
94-
#endif
95-
if (!loaderStable) {
96-
logger::debug(
97-
"ZE ---> checkL0LoaderTeardown: Loader is not stable, returning false");
47+
} catch (...) {
9848
}
99-
return loaderStable;
49+
logger::debug("ZE ---> checkL0LoaderTeardown: Loader is in teardown or is unstable");
50+
return false;
10051
}
10152

10253
// Controls UR L0 calls tracing.

unified-runtime/source/adapters/level_zero/context.cpp

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -285,8 +285,11 @@ ur_result_t ContextReleaseHelper(ur_context_handle_t Context) {
285285
if (DestroyZeContext) {
286286
auto ZeResult = ZE_CALL_NOCHECK(zeContextDestroy, (DestroyZeContext));
287287
// Gracefully handle the case that L0 was already unloaded.
288-
if (ZeResult && ZeResult != ZE_RESULT_ERROR_UNINITIALIZED)
288+
if (ZeResult && (ZeResult != ZE_RESULT_ERROR_UNINITIALIZED || ZeResult != ZE_RESULT_ERROR_UNKNOWN))
289289
return ze2urResult(ZeResult);
290+
if ( ZeResult == ZE_RESULT_ERROR_UNKNOWN) {
291+
ZeResult = ZE_RESULT_ERROR_UNINITIALIZED;
292+
}
290293
}
291294

292295
return Result;
@@ -311,8 +314,11 @@ ur_result_t ur_context_handle_t_::finalize() {
311314
(Event->IsInteropNativeHandle && checkL0LoaderTeardown())) {
312315
auto ZeResult = ZE_CALL_NOCHECK(zeEventDestroy, (Event->ZeEvent));
313316
// Gracefully handle the case that L0 was already unloaded.
314-
if (ZeResult && ZeResult != ZE_RESULT_ERROR_UNINITIALIZED)
317+
if (ZeResult && (ZeResult != ZE_RESULT_ERROR_UNINITIALIZED || ZeResult != ZE_RESULT_ERROR_UNKNOWN))
315318
return ze2urResult(ZeResult);
319+
if ( ZeResult == ZE_RESULT_ERROR_UNKNOWN) {
320+
ZeResult = ZE_RESULT_ERROR_UNINITIALIZED;
321+
}
316322
}
317323
Event->ZeEvent = nullptr;
318324
delete Event;
@@ -326,8 +332,11 @@ ur_result_t ur_context_handle_t_::finalize() {
326332
for (auto &ZePool : ZePoolCache) {
327333
auto ZeResult = ZE_CALL_NOCHECK(zeEventPoolDestroy, (ZePool));
328334
// Gracefully handle the case that L0 was already unloaded.
329-
if (ZeResult && ZeResult != ZE_RESULT_ERROR_UNINITIALIZED)
335+
if (ZeResult && (ZeResult != ZE_RESULT_ERROR_UNINITIALIZED || ZeResult != ZE_RESULT_ERROR_UNKNOWN))
330336
return ze2urResult(ZeResult);
337+
if ( ZeResult == ZE_RESULT_ERROR_UNKNOWN) {
338+
ZeResult = ZE_RESULT_ERROR_UNINITIALIZED;
339+
}
331340
}
332341
ZePoolCache.clear();
333342
}
@@ -336,8 +345,11 @@ ur_result_t ur_context_handle_t_::finalize() {
336345
// Destroy the command list used for initializations
337346
auto ZeResult = ZE_CALL_NOCHECK(zeCommandListDestroy, (ZeCommandListInit));
338347
// Gracefully handle the case that L0 was already unloaded.
339-
if (ZeResult && ZeResult != ZE_RESULT_ERROR_UNINITIALIZED)
348+
if (ZeResult && (ZeResult != ZE_RESULT_ERROR_UNINITIALIZED || ZeResult != ZE_RESULT_ERROR_UNKNOWN))
340349
return ze2urResult(ZeResult);
350+
if ( ZeResult == ZE_RESULT_ERROR_UNKNOWN) {
351+
ZeResult = ZE_RESULT_ERROR_UNINITIALIZED;
352+
}
341353

342354
std::scoped_lock<ur_mutex> Lock(ZeCommandListCacheMutex);
343355
for (auto &List : ZeComputeCommandListCache) {
@@ -346,8 +358,11 @@ ur_result_t ur_context_handle_t_::finalize() {
346358
if (ZeCommandList) {
347359
auto ZeResult = ZE_CALL_NOCHECK(zeCommandListDestroy, (ZeCommandList));
348360
// Gracefully handle the case that L0 was already unloaded.
349-
if (ZeResult && ZeResult != ZE_RESULT_ERROR_UNINITIALIZED)
361+
if (ZeResult && (ZeResult != ZE_RESULT_ERROR_UNINITIALIZED || ZeResult != ZE_RESULT_ERROR_UNKNOWN))
350362
return ze2urResult(ZeResult);
363+
if ( ZeResult == ZE_RESULT_ERROR_UNKNOWN) {
364+
ZeResult = ZE_RESULT_ERROR_UNINITIALIZED;
365+
}
351366
}
352367
}
353368
}
@@ -357,8 +372,11 @@ ur_result_t ur_context_handle_t_::finalize() {
357372
if (ZeCommandList) {
358373
auto ZeResult = ZE_CALL_NOCHECK(zeCommandListDestroy, (ZeCommandList));
359374
// Gracefully handle the case that L0 was already unloaded.
360-
if (ZeResult && ZeResult != ZE_RESULT_ERROR_UNINITIALIZED)
375+
if (ZeResult && (ZeResult != ZE_RESULT_ERROR_UNINITIALIZED || ZeResult != ZE_RESULT_ERROR_UNKNOWN))
361376
return ze2urResult(ZeResult);
377+
if ( ZeResult == ZE_RESULT_ERROR_UNKNOWN) {
378+
ZeResult = ZE_RESULT_ERROR_UNINITIALIZED;
379+
}
362380
}
363381
}
364382
}

unified-runtime/source/adapters/level_zero/event.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1125,8 +1125,11 @@ ur_result_t urEventReleaseInternal(ur_event_handle_t Event) {
11251125
(Event->IsInteropNativeHandle && checkL0LoaderTeardown())) {
11261126
auto ZeResult = ZE_CALL_NOCHECK(zeEventDestroy, (Event->ZeEvent));
11271127
// Gracefully handle the case that L0 was already unloaded.
1128-
if (ZeResult && ZeResult != ZE_RESULT_ERROR_UNINITIALIZED)
1128+
if (ZeResult && (ZeResult != ZE_RESULT_ERROR_UNINITIALIZED || ZeResult != ZE_RESULT_ERROR_UNKNOWN))
11291129
return ze2urResult(ZeResult);
1130+
if ( ZeResult == ZE_RESULT_ERROR_UNKNOWN) {
1131+
ZeResult = ZE_RESULT_ERROR_UNINITIALIZED;
1132+
}
11301133
}
11311134
Event->ZeEvent = nullptr;
11321135
auto Context = Event->Context;

unified-runtime/source/adapters/level_zero/kernel.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -944,8 +944,11 @@ ur_result_t urKernelRelease(
944944
(Kernel->IsInteropNativeHandle && checkL0LoaderTeardown())) {
945945
auto ZeResult = ZE_CALL_NOCHECK(zeKernelDestroy, (ZeKernel));
946946
// Gracefully handle the case that L0 was already unloaded.
947-
if (ZeResult && ZeResult != ZE_RESULT_ERROR_UNINITIALIZED)
947+
if (ZeResult && (ZeResult != ZE_RESULT_ERROR_UNINITIALIZED || ZeResult != ZE_RESULT_ERROR_UNKNOWN))
948948
return ze2urResult(ZeResult);
949+
if ( ZeResult == ZE_RESULT_ERROR_UNKNOWN) {
950+
ZeResult = ZE_RESULT_ERROR_UNINITIALIZED;
951+
}
949952
}
950953
}
951954
}

unified-runtime/source/adapters/level_zero/memory.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1668,8 +1668,11 @@ ur_result_t urMemRelease(
16681668
auto ZeResult = ZE_CALL_NOCHECK(
16691669
zeImageDestroy, (ur_cast<ze_image_handle_t>(ZeHandleImage)));
16701670
// Gracefully handle the case that L0 was already unloaded.
1671-
if (ZeResult && ZeResult != ZE_RESULT_ERROR_UNINITIALIZED)
1671+
if (ZeResult && (ZeResult != ZE_RESULT_ERROR_UNINITIALIZED || ZeResult != ZE_RESULT_ERROR_UNKNOWN))
16721672
return ze2urResult(ZeResult);
1673+
if ( ZeResult == ZE_RESULT_ERROR_UNKNOWN) {
1674+
ZeResult = ZE_RESULT_ERROR_UNINITIALIZED;
1675+
}
16731676
}
16741677
}
16751678
delete Image;

unified-runtime/source/adapters/level_zero/queue.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -651,8 +651,11 @@ ur_result_t urQueueRelease(
651651
if (Queue->Healthy && it->second.ZeFence != nullptr) {
652652
auto ZeResult = ZE_CALL_NOCHECK(zeFenceDestroy, (it->second.ZeFence));
653653
// Gracefully handle the case that L0 was already unloaded.
654-
if (ZeResult && ZeResult != ZE_RESULT_ERROR_UNINITIALIZED)
654+
if (ZeResult && (ZeResult != ZE_RESULT_ERROR_UNINITIALIZED || ZeResult != ZE_RESULT_ERROR_UNKNOWN))
655655
return ze2urResult(ZeResult);
656+
if ( ZeResult == ZE_RESULT_ERROR_UNKNOWN) {
657+
ZeResult = ZE_RESULT_ERROR_UNINITIALIZED;
658+
}
656659
}
657660
if (Queue->UsingImmCmdLists && Queue->OwnZeCommandQueue) {
658661
std::scoped_lock<ur_mutex> Lock(
@@ -1609,8 +1612,11 @@ ur_result_t urQueueReleaseInternal(ur_queue_handle_t Queue) {
16091612
(Queue->IsInteropNativeHandle && checkL0LoaderTeardown())) {
16101613
auto ZeResult = ZE_CALL_NOCHECK(zeCommandQueueDestroy, (ZeQueue));
16111614
// Gracefully handle the case that L0 was already unloaded.
1612-
if (ZeResult && ZeResult != ZE_RESULT_ERROR_UNINITIALIZED)
1615+
if (ZeResult && (ZeResult != ZE_RESULT_ERROR_UNINITIALIZED || ZeResult != ZE_RESULT_ERROR_UNKNOWN))
16131616
return ze2urResult(ZeResult);
1617+
if ( ZeResult == ZE_RESULT_ERROR_UNKNOWN) {
1618+
ZeResult = ZE_RESULT_ERROR_UNINITIALIZED;
1619+
}
16141620
}
16151621
}
16161622
}

unified-runtime/source/adapters/level_zero/sampler.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,8 +131,11 @@ ur_result_t urSamplerRelease(
131131

132132
auto ZeResult = ZE_CALL_NOCHECK(zeSamplerDestroy, (Sampler->ZeSampler));
133133
// Gracefully handle the case that L0 was already unloaded.
134-
if (ZeResult && ZeResult != ZE_RESULT_ERROR_UNINITIALIZED)
134+
if (ZeResult && (ZeResult != ZE_RESULT_ERROR_UNINITIALIZED || ZeResult != ZE_RESULT_ERROR_UNKNOWN))
135135
return ze2urResult(ZeResult);
136+
if ( ZeResult == ZE_RESULT_ERROR_UNKNOWN) {
137+
ZeResult = ZE_RESULT_ERROR_UNINITIALIZED;
138+
}
136139
delete Sampler;
137140

138141
return UR_RESULT_SUCCESS;

unified-runtime/source/adapters/level_zero/v2/common.hpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,11 @@ struct ze_handle_wrapper {
8383
(ownZeHandle && IsInteropNativeHandle && checkL0LoaderTeardown())) {
8484
auto zeResult = destroy(handle);
8585
// Gracefully handle the case that L0 was already unloaded.
86-
if (zeResult && zeResult != ZE_RESULT_ERROR_UNINITIALIZED)
86+
if (zeResult && (zeResult != ZE_RESULT_ERROR_UNINITIALIZED || zeResult != ZE_RESULT_ERROR_UNKNOWN))
8787
throw ze2urResult(zeResult);
88+
if ( zeResult == ZE_RESULT_ERROR_UNKNOWN) {
89+
zeResult = ZE_RESULT_ERROR_UNINITIALIZED;
90+
}
8891
}
8992

9093
handle = nullptr;

0 commit comments

Comments
 (0)