Skip to content

Commit d3f344a

Browse files
committed
move more stuff and build true pi_unified_runtime plugin
Signed-off-by: Sergey V Maslov <[email protected]>
1 parent 461327b commit d3f344a

File tree

10 files changed

+341
-238
lines changed

10 files changed

+341
-238
lines changed

sycl/plugins/level_zero/CMakeLists.txt

100755100644
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ else()
1515
set(LEVEL_ZERO_LOADER_TAG v1.8.8)
1616
endif()
1717

18+
# Disable due to a bug https://github.com/oneapi-src/level-zero/issues/104
1819
set(CMAKE_INCLUDE_CURRENT_DIR OFF)
1920

2021
message(STATUS "Will fetch Level Zero Loader from ${LEVEL_ZERO_LOADER_REPO}")
@@ -57,7 +58,7 @@ add_sycl_plugin(level_zero
5758
UnifiedRuntime-Headers
5859
ze_loader
5960
Threads::Threads
60-
pi_unified_runtime
61+
unified_runtime_static
6162
)
6263

6364
find_package(Python3 REQUIRED)

sycl/plugins/level_zero/pi_level_zero.cpp

Lines changed: 6 additions & 200 deletions
Original file line numberDiff line numberDiff line change
@@ -90,18 +90,12 @@ static const bool ReuseDiscardedEvents = [] {
9090
return std::stoi(ReuseDiscardedEventsFlag) > 0;
9191
}();
9292

93-
// Controls PI level tracing prints.
94-
static bool PrintPiTrace = false;
95-
9693
// Controls support of the indirect access kernels and deferred memory release.
9794
static const bool IndirectAccessTrackingEnabled = [] {
9895
return std::getenv("SYCL_PI_LEVEL_ZERO_TRACK_INDIRECT_ACCESS_MEMORY") !=
9996
nullptr;
10097
}();
10198

102-
// This will count the calls to Level-Zero
103-
static std::map<const char *, int> *ZeCallCount = nullptr;
104-
10599
// Map from L0 to PI result
106100
static inline pi_result mapError(ze_result_t Result) {
107101
return ur2piResult(ze2urResult(Result));
@@ -118,7 +112,7 @@ static inline pi_result mapError(ze_result_t Result) {
118112
// Trace an internal PI call; returns in case of an error.
119113
#define PI_CALL(Call) \
120114
{ \
121-
if (PrintPiTrace) \
115+
if (PrintTrace) \
122116
fprintf(stderr, "PI ---> %s\n", #Call); \
123117
pi_result Result = (Call); \
124118
if (Result != PI_SUCCESS) \
@@ -352,15 +346,6 @@ static bool CopyEngineRequested(pi_device Device) {
352346
}
353347

354348
// Global variables used in PI_Level_Zero
355-
// Note we only create a simple pointer variables such that C++ RT won't
356-
// deallocate them automatically at the end of the main program.
357-
// The heap memory allocated for these global variables reclaimed only when
358-
// Sycl RT calls piTearDown().
359-
static std::vector<pi_platform> *PiPlatformsCache =
360-
new std::vector<pi_platform>;
361-
static sycl::detail::SpinLock *PiPlatformsCacheMutex =
362-
new sycl::detail::SpinLock;
363-
static bool PiPlatformCachePopulated = false;
364349

365350
pi_result
366351
_pi_context::getFreeSlotInExistingOrNewPool(ze_event_pool_handle_t &Pool,
@@ -470,55 +455,6 @@ static pi_result enqueueMemCopyRectHelper(
470455
const pi_event *EventWaitList, pi_event *Event,
471456
bool PreferCopyEngine = false);
472457

473-
inline void zeParseError(ze_result_t ZeError, const char *&ErrorString) {
474-
switch (ZeError) {
475-
#define ZE_ERRCASE(ERR) \
476-
case ERR: \
477-
ErrorString = "" #ERR; \
478-
break;
479-
480-
ZE_ERRCASE(ZE_RESULT_SUCCESS)
481-
ZE_ERRCASE(ZE_RESULT_NOT_READY)
482-
ZE_ERRCASE(ZE_RESULT_ERROR_DEVICE_LOST)
483-
ZE_ERRCASE(ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY)
484-
ZE_ERRCASE(ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY)
485-
ZE_ERRCASE(ZE_RESULT_ERROR_MODULE_BUILD_FAILURE)
486-
ZE_ERRCASE(ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS)
487-
ZE_ERRCASE(ZE_RESULT_ERROR_NOT_AVAILABLE)
488-
ZE_ERRCASE(ZE_RESULT_ERROR_UNINITIALIZED)
489-
ZE_ERRCASE(ZE_RESULT_ERROR_UNSUPPORTED_VERSION)
490-
ZE_ERRCASE(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE)
491-
ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_ARGUMENT)
492-
ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_NULL_HANDLE)
493-
ZE_ERRCASE(ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE)
494-
ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_NULL_POINTER)
495-
ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_SIZE)
496-
ZE_ERRCASE(ZE_RESULT_ERROR_UNSUPPORTED_SIZE)
497-
ZE_ERRCASE(ZE_RESULT_ERROR_UNSUPPORTED_ALIGNMENT)
498-
ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT)
499-
ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_ENUMERATION)
500-
ZE_ERRCASE(ZE_RESULT_ERROR_UNSUPPORTED_ENUMERATION)
501-
ZE_ERRCASE(ZE_RESULT_ERROR_UNSUPPORTED_IMAGE_FORMAT)
502-
ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_NATIVE_BINARY)
503-
ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_GLOBAL_NAME)
504-
ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_KERNEL_NAME)
505-
ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_FUNCTION_NAME)
506-
ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION)
507-
ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_GLOBAL_WIDTH_DIMENSION)
508-
ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX)
509-
ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE)
510-
ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_KERNEL_ATTRIBUTE_VALUE)
511-
ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE)
512-
ZE_ERRCASE(ZE_RESULT_ERROR_OVERLAPPING_REGIONS)
513-
ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_MODULE_UNLINKED)
514-
ZE_ERRCASE(ZE_RESULT_ERROR_UNKNOWN)
515-
516-
#undef ZE_ERRCASE
517-
default:
518-
assert(false && "Unexpected Error code");
519-
} // switch
520-
}
521-
522458
// Global variables for PI_ERROR_PLUGIN_SPECIFIC_ERROR
523459
constexpr size_t MaxMessageSize = 256;
524460
thread_local pi_result ErrorMessageCode = PI_SUCCESS;
@@ -538,26 +474,6 @@ pi_result piPluginGetLastError(char **message) {
538474
return ErrorMessageCode;
539475
}
540476

541-
ze_result_t ZeCall::doCall(ze_result_t ZeResult, const char *ZeName,
542-
const char *ZeArgs, bool TraceError) {
543-
zePrint("ZE ---> %s%s\n", ZeName, ZeArgs);
544-
545-
if (ZeDebug & ZE_DEBUG_CALL_COUNT) {
546-
++(*ZeCallCount)[ZeName];
547-
}
548-
549-
if (ZeResult && TraceError) {
550-
const char *ErrorString = "Unknown";
551-
zeParseError(ZeResult, ErrorString);
552-
zePrint("Error (%s) in %s\n", ErrorString, ZeName);
553-
}
554-
return ZeResult;
555-
}
556-
557-
#define PI_ASSERT(condition, error) \
558-
if (!(condition)) \
559-
return error;
560-
561477
bool _pi_queue::doReuseDiscardedEvents() {
562478
return ReuseDiscardedEvents && isInOrderQueue() && isDiscardEvents();
563479
}
@@ -1580,7 +1496,7 @@ pi_result _pi_queue::executeCommandList(pi_command_list_ptr_t CommandList,
15801496
// traces incurs much different timings than real execution
15811497
// ansyway, and many regression tests use it.
15821498
//
1583-
bool CurrentlyEmpty = !PrintPiTrace && this->LastCommandEvent == nullptr;
1499+
bool CurrentlyEmpty = !PrintTrace && this->LastCommandEvent == nullptr;
15841500

15851501
// The list can be empty if command-list only contains signals of proxy
15861502
// events. It is possible that executeCommandList is called twice for the same
@@ -2238,117 +2154,7 @@ checkUnresolvedSymbols(ze_module_handle_t ZeModule,
22382154

22392155
pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms,
22402156
pi_uint32 *NumPlatforms) {
2241-
2242-
static const char *PiTrace = std::getenv("SYCL_PI_TRACE");
2243-
static const int PiTraceValue = PiTrace ? std::stoi(PiTrace) : 0;
2244-
if (PiTraceValue == -1 || PiTraceValue == 2) { // Means print all PI traces
2245-
PrintPiTrace = true;
2246-
}
2247-
2248-
static std::once_flag ZeCallCountInitialized;
2249-
try {
2250-
std::call_once(ZeCallCountInitialized, []() {
2251-
if (ZeDebug & ZE_DEBUG_CALL_COUNT) {
2252-
ZeCallCount = new std::map<const char *, int>;
2253-
}
2254-
});
2255-
} catch (const std::bad_alloc &) {
2256-
return PI_ERROR_OUT_OF_HOST_MEMORY;
2257-
} catch (...) {
2258-
return PI_ERROR_UNKNOWN;
2259-
}
2260-
2261-
if (NumEntries == 0 && Platforms != nullptr) {
2262-
return PI_ERROR_INVALID_VALUE;
2263-
}
2264-
if (Platforms == nullptr && NumPlatforms == nullptr) {
2265-
return PI_ERROR_INVALID_VALUE;
2266-
}
2267-
2268-
// Setting these environment variables before running zeInit will enable the
2269-
// validation layer in the Level Zero loader.
2270-
if (ZeDebug & ZE_DEBUG_VALIDATION) {
2271-
setEnvVar("ZE_ENABLE_VALIDATION_LAYER", "1");
2272-
setEnvVar("ZE_ENABLE_PARAMETER_VALIDATION", "1");
2273-
}
2274-
2275-
// Enable SYSMAN support for obtaining the PCI address
2276-
// and maximum memory bandwidth.
2277-
if (getenv("SYCL_ENABLE_PCI") != nullptr) {
2278-
setEnvVar("ZES_ENABLE_SYSMAN", "1");
2279-
}
2280-
2281-
// TODO: We can still safely recover if something goes wrong during the init.
2282-
// Implement handling segfault using sigaction.
2283-
2284-
// We must only initialize the driver once, even if piPlatformsGet() is called
2285-
// multiple times. Declaring the return value as "static" ensures it's only
2286-
// called once.
2287-
static ze_result_t ZeResult = ZE_CALL_NOCHECK(zeInit, (0));
2288-
2289-
// Absorb the ZE_RESULT_ERROR_UNINITIALIZED and just return 0 Platforms.
2290-
if (ZeResult == ZE_RESULT_ERROR_UNINITIALIZED) {
2291-
PI_ASSERT(NumPlatforms != 0, PI_ERROR_INVALID_VALUE);
2292-
*NumPlatforms = 0;
2293-
return PI_SUCCESS;
2294-
}
2295-
2296-
if (ZeResult != ZE_RESULT_SUCCESS) {
2297-
zePrint("zeInit: Level Zero initialization failure\n");
2298-
return mapError(ZeResult);
2299-
}
2300-
2301-
// Cache pi_platforms for reuse in the future
2302-
// It solves two problems;
2303-
// 1. sycl::platform equality issue; we always return the same pi_platform.
2304-
// 2. performance; we can save time by immediately return from cache.
2305-
//
2306-
2307-
const std::lock_guard<sycl::detail::SpinLock> Lock{*PiPlatformsCacheMutex};
2308-
if (!PiPlatformCachePopulated) {
2309-
try {
2310-
// Level Zero does not have concept of Platforms, but Level Zero driver is
2311-
// the closest match.
2312-
uint32_t ZeDriverCount = 0;
2313-
ZE_CALL(zeDriverGet, (&ZeDriverCount, nullptr));
2314-
if (ZeDriverCount == 0) {
2315-
PiPlatformCachePopulated = true;
2316-
} else {
2317-
std::vector<ze_driver_handle_t> ZeDrivers;
2318-
ZeDrivers.resize(ZeDriverCount);
2319-
2320-
ZE_CALL(zeDriverGet, (&ZeDriverCount, ZeDrivers.data()));
2321-
for (uint32_t I = 0; I < ZeDriverCount; ++I) {
2322-
pi_platform Platform = new _pi_platform(ZeDrivers[I]);
2323-
// Save a copy in the cache for future uses.
2324-
PiPlatformsCache->push_back(Platform);
2325-
2326-
pi_result Result = Platform->initialize();
2327-
if (Result != PI_SUCCESS) {
2328-
return Result;
2329-
}
2330-
}
2331-
PiPlatformCachePopulated = true;
2332-
}
2333-
} catch (const std::bad_alloc &) {
2334-
return PI_ERROR_OUT_OF_HOST_MEMORY;
2335-
} catch (...) {
2336-
return PI_ERROR_UNKNOWN;
2337-
}
2338-
}
2339-
2340-
// Populate returned platforms from the cache.
2341-
if (Platforms) {
2342-
PI_ASSERT(NumEntries <= PiPlatformsCache->size(),
2343-
PI_ERROR_INVALID_PLATFORM);
2344-
std::copy_n(PiPlatformsCache->begin(), NumEntries, Platforms);
2345-
}
2346-
2347-
if (NumPlatforms) {
2348-
*NumPlatforms = PiPlatformsCache->size();
2349-
}
2350-
2351-
return PI_SUCCESS;
2157+
return pi2ur::piPlatformsGet(NumEntries, Platforms, NumPlatforms);
23522158
}
23532159

23542160
pi_result piPlatformGetInfo(pi_platform Platform, pi_platform_info ParamName,
@@ -3368,10 +3174,10 @@ pi_result piextDeviceCreateWithNativeHandle(pi_native_handle NativeHandle,
33683174
// TODO: maybe we should populate cache of platforms if it wasn't already.
33693175
// For now assert that is was populated.
33703176
PI_ASSERT(PiPlatformCachePopulated, PI_ERROR_INVALID_VALUE);
3371-
const std::lock_guard<sycl::detail::SpinLock> Lock{*PiPlatformsCacheMutex};
3177+
const std::lock_guard<SpinLock> Lock{*PiPlatformsCacheMutex};
33723178

33733179
pi_device Dev = nullptr;
3374-
for (auto &ThePlatform : *PiPlatformsCache) {
3180+
for (pi_platform ThePlatform : *PiPlatformsCache) {
33753181
Dev = ThePlatform->getDeviceFromNativeHandle(ZeDevice);
33763182
if (Dev) {
33773183
// Check that the input Platform, if was given, matches the found one.
@@ -8781,7 +8587,7 @@ pi_result piTearDown(void *PluginParameter) {
87818587
(void)PluginParameter;
87828588
bool LeakFound = false;
87838589
// reclaim pi_platform objects here since we don't have piPlatformRelease.
8784-
for (pi_platform &Platform : *PiPlatformsCache) {
8590+
for (pi_platform Platform : *PiPlatformsCache) {
87858591
delete Platform;
87868592
}
87878593
delete PiPlatformsCache;

sycl/plugins/level_zero/pi_level_zero.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,9 @@
4040
#include <unordered_set>
4141
#include <vector>
4242

43+
#include <sycl/detail/iostream_proxy.hpp>
4344
#include <ze_api.h>
4445
#include <zes_api.h>
45-
#include <sycl/detail/iostream_proxy.hpp>
4646

4747
// Share code between this PI L0 Plugin and UR L0 Adapter
4848
#include <adapters/level_zero/ur_level_zero.hpp>
@@ -207,6 +207,8 @@ struct _pi_platform : public _ur_level_zero_platform {
207207
pi_shared_mutex ContextsMutex;
208208
};
209209

210+
struct _zer_platform_handle_t : public _pi_platform {};
211+
210212
// Implements memory allocation via L0 RT for USM allocator interface.
211213
class USMMemoryAllocBase : public SystemMemory {
212214
protected:

sycl/plugins/unified_runtime/CMakeLists.txt

100755100644
Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,22 +31,49 @@ list(APPEND SYCL_TOOLCHAIN_DEPLOY_COMPONENTS unified-runtime)
3131

3232
find_package(Threads REQUIRED)
3333

34+
#
35+
# Build a helper static library to carry pieces shared between
36+
# this Unified Runtime plugin and Level Zero plugin. We cannot
37+
# use dynamic plugin library as that only exports pi* symbols.
38+
#
39+
add_library(unified_runtime_static STATIC
40+
"${sycl_inc_dir}/sycl/detail/pi.h"
41+
"${CMAKE_CURRENT_SOURCE_DIR}/ur.hpp"
42+
"${CMAKE_CURRENT_SOURCE_DIR}/ur.cpp"
43+
"${CMAKE_CURRENT_SOURCE_DIR}/pi2ur.hpp"
44+
"${CMAKE_CURRENT_SOURCE_DIR}/adapters/level_zero/ur_level_zero.hpp"
45+
"${CMAKE_CURRENT_SOURCE_DIR}/adapters/level_zero/ur_level_zero.cpp"
46+
)
47+
48+
target_include_directories(unified_runtime_static
49+
INTERFACE
50+
"${UNIFIED_RUNTIME_INCLUDE_DIR}"
51+
"${CMAKE_CURRENT_SOURCE_DIR}"
52+
PRIVATE
53+
# For include <sycl/detail/pi.h>
54+
# TODO: how to get rid of this?
55+
"${CMAKE_SOURCE_DIR}/../sycl/include"
56+
)
57+
58+
target_link_libraries(unified_runtime_static PRIVATE
59+
UnifiedRuntime-Headers
60+
LevelZeroLoader-Headers
61+
)
62+
3463
#
3564
# NOTE: the Unified Runtime doesn't have the loader [yet].
3665
# So what we really build is the Unified Runtime with Level Zero Adapter
3766
# together.
3867
#
3968
add_sycl_plugin(unified_runtime
4069
SOURCES
41-
"${sycl_inc_dir}/sycl/detail/pi.h"
42-
"pi2ur.cpp"
43-
"pi2ur.hpp"
44-
"ur.hpp"
45-
"adapters/level_zero/ur_level_zero.hpp"
46-
"adapters/level_zero/ur_level_zero.cpp"
70+
# Put here anything that belongs exclusively to Unified Runtime
71+
# and should not be shared with the Level Zero plugin
72+
"${CMAKE_CURRENT_SOURCE_DIR}/pi2ur.cpp"
4773
INCLUDE_DIRS
4874
"${UNIFIED_RUNTIME_INCLUDE_DIR}"
4975
LIBRARIES
76+
unified_runtime_static
5077
Threads::Threads
5178
UnifiedRuntime-Headers
5279
LevelZeroLoader-Headers

0 commit comments

Comments
 (0)