Skip to content

[SYCL][UR] Replace logging and leak checking with L0 loader functionality #17150

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 8 commits into
base: sycl
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions .github/workflows/ur-build-hw.yml
Original file line number Diff line number Diff line change
Expand Up @@ -136,12 +136,18 @@ jobs:
run: cmake --install ${{github.workspace}}/build

- name: Test adapter specific
run: ctest -C ${{matrix.build_type}} --test-dir ${{github.workspace}}/build --output-on-failure -L "adapter-specific" -E "memcheck" --timeout 600
env:
ZE_ENABLE_LOADER_DEBUG_TRACE: 1
ZE_DEBUG: 1
run: ctest -C ${{matrix.build_type}} --test-dir ${{github.workspace}}/build --output-on-failure -L "adapter-specific" -E "memcheck" --timeout 600 -VV
# Don't run adapter specific tests when building multiple adapters
if: ${{ matrix.adapter.other_name == '' }}

- name: Test adapters
run: env UR_CTS_ADAPTER_PLATFORM="${{matrix.adapter.platform}}" ctest -C ${{matrix.build_type}} --test-dir ${{github.workspace}}/build --output-on-failure -L "conformance" --timeout 600
env:
ZE_ENABLE_LOADER_DEBUG_TRACE: 1
ZE_DEBUG: 1
run: env UR_CTS_ADAPTER_PLATFORM="${{matrix.adapter.platform}}" ctest -C ${{matrix.build_type}} --test-dir ${{github.workspace}}/build --output-on-failure -L "conformance" --timeout 600 -VV

- name: Get information about platform
if: ${{ always() }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

// Keep ownership
// CHECK: zeMemFree
// CHECK: zeMemFree

// Account for zeMemFree used to query page sizes by the UMF
// CHECK-COUNT-8: zeMemFree
Expand Down
3 changes: 2 additions & 1 deletion unified-runtime/cmake/FetchLevelZero.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,13 @@ if (NOT DEFINED LEVEL_ZERO_LIBRARY OR NOT DEFINED LEVEL_ZERO_INCLUDE_DIR)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-c++98-compat-extra-semi")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unknown-warning-option")
endif()
set(BUILD_STATIC ON)

if (UR_LEVEL_ZERO_LOADER_REPO STREQUAL "")
set(UR_LEVEL_ZERO_LOADER_REPO "https://github.com/oneapi-src/level-zero.git")
endif()
if (UR_LEVEL_ZERO_LOADER_TAG STREQUAL "")
set(UR_LEVEL_ZERO_LOADER_TAG v1.19.2)
set(UR_LEVEL_ZERO_LOADER_TAG v1.21.1)
endif()

# Disable due to a bug https://github.com/oneapi-src/level-zero/issues/104
Expand Down
5 changes: 3 additions & 2 deletions unified-runtime/source/adapters/level_zero/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,14 @@ if(UR_BUILD_ADAPTER_L0)
)
install_ur_library(ur_adapter_level_zero)

target_compile_definitions(ur_adapter_level_zero PUBLIC UR_STATIC_LEVEL_ZERO)
if(UR_STATIC_ADAPTER_L0)
target_compile_definitions(ur_adapter_level_zero PUBLIC UR_STATIC_ADAPTER_LEVEL_ZERO)

# 'utils' target from 'level-zero-loader' includes path which is prefixed
# in the source directory, this breaks the installation of 'utils' target.
set_target_properties(utils PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "")
install(TARGETS ur_umf LevelZeroLoader LevelZeroLoader-Headers ComputeRuntimeLevelZero-Headers ze_loader utils
set_target_properties(level_zero_utils PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "")
install(TARGETS ur_umf LevelZeroLoader LevelZeroLoader-Headers ComputeRuntimeLevelZero-Headers ze_loader level_zero_utils
EXPORT ${PROJECT_NAME}-targets
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
Expand Down
147 changes: 32 additions & 115 deletions unified-runtime/source/adapters/level_zero/adapter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -300,8 +300,18 @@ ur_adapter_handle_t_::ur_adapter_handle_t_()
ZeInitResult = ZE_RESULT_ERROR_UNINITIALIZED;
ZesResult = ZE_RESULT_ERROR_UNINITIALIZED;

#ifdef UR_STATIC_LEVEL_ZERO
// Given static linking of the L0 Loader, we must delay the loader's
// destruction of its context until after the UR Adapter is destroyed.
zelSetDelayLoaderContextTeardown();
#endif

if (UrL0Debug & UR_L0_DEBUG_BASIC) {
logger.setLegacySink(std::make_unique<ur_legacy_sink>());
setEnvVar("ZEL_ENABLE_LOADER_LOGGING", "1");
setEnvVar("ZEL_LOADER_LOGGING_LEVEL", "trace");
setEnvVar("ZEL_LOADER_LOG_CONSOLE", "1");
setEnvVar("ZE_ENABLE_VALIDATION_LAYER", "1");
};

if (UrL0Debug & UR_L0_DEBUG_VALIDATION) {
Expand All @@ -310,18 +320,6 @@ ur_adapter_handle_t_::ur_adapter_handle_t_()
}

PlatformCache.Compute = [](Result<PlatformVec> &result) {
static std::once_flag ZeCallCountInitialized;
try {
std::call_once(ZeCallCountInitialized, []() {
if (UrL0LeaksDebug) {
ZeCallCount = new std::map<std::string, int>;
}
});
} catch (...) {
result = exceptionToResult(std::current_exception());
return;
}

uint32_t UserForcedSysManInit = 0;
// Check if the user has disabled the default L0 Env initialization.
const int UrSysManEnvInitEnabled = [&UserForcedSysManInit] {
Expand All @@ -335,10 +333,12 @@ ur_adapter_handle_t_::ur_adapter_handle_t_()
// Dynamically load the new L0 apis separately.
// This must be done to avoid attempting to use symbols that do
// not exist in older loader runtimes.
#ifndef UR_STATIC_LEVEL_ZERO
#ifdef _WIN32
HMODULE processHandle = GetModuleHandle(NULL);
GlobalAdapter->processHandle = GetModuleHandle(NULL);
#else
HMODULE processHandle = nullptr;
GlobalAdapter->processHandle = nullptr;
#endif
#endif

// initialize level zero only once.
Expand Down Expand Up @@ -412,9 +412,13 @@ ur_adapter_handle_t_::ur_adapter_handle_t_()
}

if (useInitDrivers) {
#ifdef UR_STATIC_LEVEL_ZERO
GlobalAdapter->initDriversFunctionPtr = zeInitDrivers;
#else
GlobalAdapter->initDriversFunctionPtr =
(ze_pfnInitDrivers_t)ur_loader::LibLoader::getFunctionPtr(
processHandle, "zeInitDrivers");
GlobalAdapter->processHandle, "zeInitDrivers");
#endif
if (GlobalAdapter->initDriversFunctionPtr) {
logger::debug("\nzeInitDrivers with flags value of {}\n",
static_cast<int>(GlobalAdapter->InitDriversDesc.flags));
Expand Down Expand Up @@ -455,14 +459,6 @@ ur_adapter_handle_t_::ur_adapter_handle_t_()

return;
}
// Dynamically load the new L0 SysMan separate init and new EXP apis
// separately. This must be done to avoid attempting to use symbols that do
// not exist in older loader runtimes.
#ifdef _WIN32
GlobalAdapter->processHandle = GetModuleHandle(NULL);
#else
GlobalAdapter->processHandle = nullptr;
#endif

// Check if the user has enabled the default L0 SysMan initialization.
const int UrSysmanZesinitEnable = [&UserForcedSysManInit] {
Expand All @@ -484,6 +480,11 @@ ur_adapter_handle_t_::ur_adapter_handle_t_()
ZesInitNeeded = true;
}
if (ZesInitNeeded) {
#ifdef UR_STATIC_LEVEL_ZERO
GlobalAdapter->getDeviceByUUIdFunctionPtr = zesDriverGetDeviceByUuidExp;
GlobalAdapter->getSysManDriversFunctionPtr = zesDriverGet;
GlobalAdapter->sysManInitFunctionPtr = zesInit;
#else
GlobalAdapter->getDeviceByUUIdFunctionPtr =
(zes_pfnDriverGetDeviceByUuidExp_t)
ur_loader::LibLoader::getFunctionPtr(
Expand All @@ -494,6 +495,7 @@ ur_adapter_handle_t_::ur_adapter_handle_t_()
GlobalAdapter->sysManInitFunctionPtr =
(zes_pfnInit_t)ur_loader::LibLoader::getFunctionPtr(
GlobalAdapter->processHandle, "zesInit");
#endif
}
if (GlobalAdapter->getDeviceByUUIdFunctionPtr &&
GlobalAdapter->getSysManDriversFunctionPtr &&
Expand Down Expand Up @@ -523,97 +525,6 @@ void globalAdapterOnDemandCleanup() {
}

ur_result_t adapterStateTeardown() {
// Print the balance of various create/destroy native calls.
// The idea is to verify if the number of create(+) and destroy(-) calls are
// matched.
if (ZeCallCount && (UrL0LeaksDebug) != 0) {
bool LeakFound = false;
// clang-format off
//
// The format of this table is such that each row accounts for a
// specific type of objects, and all elements in the raw except the last
// one are allocating objects of that type, while the last element is known
// to deallocate objects of that type.
//
std::vector<std::vector<std::string>> CreateDestroySet = {
{"zeContextCreate", "zeContextDestroy"},
{"zeCommandQueueCreate", "zeCommandQueueDestroy"},
{"zeModuleCreate", "zeModuleDestroy"},
{"zeKernelCreate", "zeKernelDestroy"},
{"zeEventPoolCreate", "zeEventPoolDestroy"},
{"zeCommandListCreateImmediate", "zeCommandListCreate", "zeCommandListDestroy"},
{"zeEventCreate", "zeEventDestroy"},
{"zeFenceCreate", "zeFenceDestroy"},
{"zeImageCreate","zeImageViewCreateExt", "zeImageDestroy"},
{"zeSamplerCreate", "zeSamplerDestroy"},
{"zeMemAllocDevice", "zeMemAllocHost", "zeMemAllocShared", "zeMemFree"},
};

// A sample output aimed below is this:
// ------------------------------------------------------------------------
// zeContextCreate = 1 \---> zeContextDestroy = 1
// zeCommandQueueCreate = 1 \---> zeCommandQueueDestroy = 1
// zeModuleCreate = 1 \---> zeModuleDestroy = 1
// zeKernelCreate = 1 \---> zeKernelDestroy = 1
// zeEventPoolCreate = 1 \---> zeEventPoolDestroy = 1
// zeCommandListCreateImmediate = 1 |
// zeCommandListCreate = 1 \---> zeCommandListDestroy = 1 ---> LEAK = 1
// zeEventCreate = 2 \---> zeEventDestroy = 2
// zeFenceCreate = 1 \---> zeFenceDestroy = 1
// zeImageCreate = 0 \---> zeImageDestroy = 0
// zeSamplerCreate = 0 \---> zeSamplerDestroy = 0
// zeMemAllocDevice = 0 |
// zeMemAllocHost = 1 |
// zeMemAllocShared = 0 \---> zeMemFree = 1
//
// clang-format on
// TODO: use logger to print this messages
std::cerr << "Check balance of create/destroy calls\n";
std::cerr << "----------------------------------------------------------\n";
std::stringstream ss;
for (const auto &Row : CreateDestroySet) {
int diff = 0;
for (auto I = Row.begin(); I != Row.end();) {
const char *ZeName = (*I).c_str();
const auto &ZeCount = (*ZeCallCount)[*I];

bool First = (I == Row.begin());
bool Last = (++I == Row.end());

if (Last) {
ss << " \\--->";
diff -= ZeCount;
} else {
diff += ZeCount;
if (!First) {
ss << " | ";
std::cerr << ss.str() << "\n";
ss.str("");
ss.clear();
}
}
ss << std::setw(30) << std::right << ZeName;
ss << " = ";
ss << std::setw(5) << std::left << ZeCount;
}

if (diff) {
LeakFound = true;
ss << " ---> LEAK = " << diff;
}

std::cerr << ss.str() << '\n';
ss.str("");
ss.clear();
}

ZeCallCount->clear();
delete ZeCallCount;
ZeCallCount = nullptr;
if (LeakFound)
return UR_RESULT_ERROR_INVALID_MEM_OBJECT;
}

// Due to multiple DLLMain definitions with SYCL, register to cleanup the
// Global Adapter after refcnt is 0
#if defined(_WIN32)
Expand Down Expand Up @@ -668,7 +579,13 @@ ur_result_t urAdapterRelease(ur_adapter_handle_t) {
if (GlobalAdapter) {
std::lock_guard<std::mutex> Lock{GlobalAdapter->Mutex};
if (--GlobalAdapter->RefCount == 0) {
return adapterStateTeardown();
auto result = adapterStateTeardown();
#ifdef UR_STATIC_LEVEL_ZERO
// Given static linking of the L0 Loader, we must delay the loader's
// destruction of its context until after the UR Adapter is destroyed.
zelLoaderContextTeardown();
#endif
return result;
}
}

Expand Down
19 changes: 0 additions & 19 deletions unified-runtime/source/adapters/level_zero/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,25 +137,6 @@ void zeParseError(ze_result_t ZeError, const char *&ErrorString) {
} // switch
}

ze_result_t ZeCall::doCall(ze_result_t ZeResult, const char *ZeName,
const char *ZeArgs, bool TraceError) {
logger::debug("ZE ---> {}{}", ZeName, ZeArgs);

if (ZeResult == ZE_RESULT_SUCCESS) {
if (UrL0LeaksDebug) {
++(*ZeCallCount)[ZeName];
}
return ZE_RESULT_SUCCESS;
}

if (TraceError) {
const char *ErrorString = "Unknown";
zeParseError(ZeResult, ErrorString);
logger::error("Error ({}) in {}", ErrorString, ZeName);
}
return ZeResult;
}

// Specializations for various L0 structures
template <> ze_structure_type_t getZeStructureType<ze_event_pool_desc_t>() {
return ZE_STRUCTURE_TYPE_EVENT_POOL_DESC;
Expand Down
35 changes: 12 additions & 23 deletions unified-runtime/source/adapters/level_zero/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,12 @@ const int UrL0LeaksDebug = [] {
const char *UrRet = std::getenv("UR_L0_LEAKS_DEBUG");
if (!UrRet)
return 0;

if (std::atoi(UrRet)) {
setenv("ZE_ENABLE_VALIDATION_LAYER", "1", 1);
setenv("ZEL_ENABLE_BASIC_LEAK_CHECKER", "1", 1);
}

return std::atoi(UrRet);
}();

Expand All @@ -235,7 +241,6 @@ const int UrL0InitAllDrivers = [] {
enum {
UrL0SerializeNone =
0, // no locking or blocking (except when SYCL RT requested blocking)
UrL0SerializeLock = 1, // locking around each UR_CALL
UrL0SerializeBlock =
2, // blocking UR calls, where supported (usually in enqueue commands)
};
Expand Down Expand Up @@ -282,20 +287,10 @@ class ZeCall {
static std::mutex GlobalLock;

public:
ZeCall() {
if ((UrL0Serialize & UrL0SerializeLock) != 0) {
GlobalLock.lock();
}
template <typename F> static auto underLock(F &&f) {
std::lock_guard<std::mutex> Lock(GlobalLock);
return f();
}
~ZeCall() {
if ((UrL0Serialize & UrL0SerializeLock) != 0) {
GlobalLock.unlock();
}
}

// The non-static version just calls static one.
ze_result_t doCall(ze_result_t ZeResult, const char *ZeName,
const char *ZeArgs, bool TraceError = true);
};

// This function will ensure compatibility with both Linux and Windows for
Expand Down Expand Up @@ -346,25 +341,19 @@ void zeParseError(ze_result_t ZeError, const char *&ErrorString);
// Trace a call to Level-Zero RT
#define ZE2UR_CALL(ZeName, ZeArgs) \
{ \
ze_result_t ZeResult = ZeName ZeArgs; \
if (auto Result = ZeCall().doCall(ZeResult, #ZeName, #ZeArgs, true)) \
if (auto Result = ZeName ZeArgs) \
return ze2urResult(Result); \
}

// Trace a call to Level-Zero RT, throw on error
#define ZE2UR_CALL_THROWS(ZeName, ZeArgs) \
{ \
ze_result_t ZeResult = ZeName ZeArgs; \
if (auto Result = ZeCall().doCall(ZeResult, #ZeName, #ZeArgs, true)) \
if (auto Result = ZeName ZeArgs) \
throw ze2urResult(Result); \
}

// Perform traced call to L0 without checking for errors
#define ZE_CALL_NOCHECK(ZeName, ZeArgs) \
ZeCall().doCall(ZeName ZeArgs, #ZeName, #ZeArgs, false)

#define ZE_CALL_NOCHECK_NAME(ZeName, ZeArgs, callName) \
ZeCall().doCall(ZeName ZeArgs, callName, #ZeArgs, false)
#define ZE_CALL_NOCHECK(ZeName, ZeArgs) ZeName ZeArgs

// This wrapper around std::atomic is created to limit operations with reference
// counter and to make allowed operations more transparent in terms of
Expand Down
2 changes: 1 addition & 1 deletion unified-runtime/source/adapters/level_zero/kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1003,7 +1003,7 @@ ur_result_t urKernelSetExecInfo(
else
// Unexpected cache configuration value.
return UR_RESULT_ERROR_INVALID_VALUE;
ZE2UR_CALL(zeKernelSetCacheConfig, (ZeKernel, ZeCacheConfig););
ZE2UR_CALL(zeKernelSetCacheConfig, (ZeKernel, ZeCacheConfig));
} else {
logger::error("urKernelSetExecInfo: unsupported ParamName");
return UR_RESULT_ERROR_INVALID_VALUE;
Expand Down
Loading
Loading