Skip to content

Commit 2dcccff

Browse files
authored
Merge pull request #1959 from igchor/v2_enqueue_kernel_separate_adapter
[L0 v2] implement enqueueKernelLaunch and queueFinish
2 parents 9c58db2 + df3956a commit 2dcccff

34 files changed

+1291
-831
lines changed

source/adapters/level_zero/CMakeLists.txt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,11 +179,15 @@ if(UR_BUILD_ADAPTER_L0_V2)
179179
${CMAKE_CURRENT_SOURCE_DIR}/common.hpp
180180
${CMAKE_CURRENT_SOURCE_DIR}/device.hpp
181181
${CMAKE_CURRENT_SOURCE_DIR}/platform.hpp
182+
${CMAKE_CURRENT_SOURCE_DIR}/program.hpp
183+
${CMAKE_CURRENT_SOURCE_DIR}/helpers/kernel_helpers.hpp
182184
${CMAKE_CURRENT_SOURCE_DIR}/adapter.cpp
183185
${CMAKE_CURRENT_SOURCE_DIR}/common.cpp
184186
${CMAKE_CURRENT_SOURCE_DIR}/device.cpp
185187
${CMAKE_CURRENT_SOURCE_DIR}/ur_interface_loader.cpp
186188
${CMAKE_CURRENT_SOURCE_DIR}/platform.cpp
189+
${CMAKE_CURRENT_SOURCE_DIR}/program.cpp
190+
${CMAKE_CURRENT_SOURCE_DIR}/helpers/kernel_helpers.cpp
187191
${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.cpp
188192
# v2-only sources
189193
${CMAKE_CURRENT_SOURCE_DIR}/v2/command_list_cache.hpp
@@ -194,8 +198,10 @@ if(UR_BUILD_ADAPTER_L0_V2)
194198
${CMAKE_CURRENT_SOURCE_DIR}/v2/event_provider_normal.hpp
195199
${CMAKE_CURRENT_SOURCE_DIR}/v2/event_provider.hpp
196200
${CMAKE_CURRENT_SOURCE_DIR}/v2/event.hpp
201+
${CMAKE_CURRENT_SOURCE_DIR}/v2/kernel.hpp
197202
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_api.hpp
198203
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_immediate_in_order.hpp
204+
${CMAKE_CURRENT_SOURCE_DIR}/v2/usm.hpp
199205
${CMAKE_CURRENT_SOURCE_DIR}/v2/api.cpp
200206
${CMAKE_CURRENT_SOURCE_DIR}/v2/command_list_cache.cpp
201207
${CMAKE_CURRENT_SOURCE_DIR}/v2/context.cpp
@@ -204,9 +210,11 @@ if(UR_BUILD_ADAPTER_L0_V2)
204210
${CMAKE_CURRENT_SOURCE_DIR}/v2/event_provider_counter.cpp
205211
${CMAKE_CURRENT_SOURCE_DIR}/v2/event_provider_normal.cpp
206212
${CMAKE_CURRENT_SOURCE_DIR}/v2/event.cpp
213+
${CMAKE_CURRENT_SOURCE_DIR}/v2/kernel.cpp
207214
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_api.cpp
208215
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_create.cpp
209216
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_immediate_in_order.cpp
217+
${CMAKE_CURRENT_SOURCE_DIR}/v2/usm.cpp
210218
)
211219

212220
if(NOT WIN32)
@@ -220,6 +228,8 @@ if(UR_BUILD_ADAPTER_L0_V2)
220228
)
221229
endif()
222230

231+
target_compile_definitions(ur_adapter_level_zero_v2 PUBLIC UR_ADAPTER_LEVEL_ZERO_V2)
232+
223233
# TODO: fix level_zero adapter conversion warnings
224234
target_compile_options(ur_adapter_level_zero_v2 PRIVATE
225235
$<$<CXX_COMPILER_ID:MSVC>:/wd4805 /wd4244>
@@ -245,6 +255,7 @@ if(UR_BUILD_ADAPTER_L0_V2)
245255

246256
target_include_directories(ur_adapter_level_zero_v2 PRIVATE
247257
"${CMAKE_CURRENT_SOURCE_DIR}/../.."
258+
"${CMAKE_CURRENT_SOURCE_DIR}/../../ur"
248259
LevelZeroLoader-Headers
249260
)
250261
endif()

source/adapters/level_zero/command_buffer.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "common.hpp"
1818

1919
#include "context.hpp"
20+
#include "kernel.hpp"
2021
#include "queue.hpp"
2122

2223
struct command_buffer_profiling_t {

source/adapters/level_zero/common.hpp

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525

2626
#include <umf_pools/disjoint_pool_config_parser.hpp>
2727

28+
#include "logger/ur_logger.hpp"
29+
2830
struct _ur_platform_handle_t;
2931

3032
static auto getUrResultString = [](ur_result_t Result) {
@@ -168,7 +170,7 @@ static auto getUrResultString = [](ur_result_t Result) {
168170
}
169171
};
170172

171-
// Trace an internal PI call; returns in case of an error.
173+
// Trace an internal UR call; returns in case of an error.
172174
#define UR_CALL(Call) \
173175
{ \
174176
if (PrintTrace) \
@@ -180,6 +182,18 @@ static auto getUrResultString = [](ur_result_t Result) {
180182
return Result; \
181183
}
182184

185+
// Trace an internal UR call; throw in case of an error.
186+
#define UR_CALL_THROWS(Call) \
187+
{ \
188+
if (PrintTrace) \
189+
logger::always("UR ---> {}", #Call); \
190+
ur_result_t Result = (Call); \
191+
if (PrintTrace) \
192+
logger::always("UR <--- {}({})", #Call, getUrResultString(Result)); \
193+
if (Result != UR_RESULT_SUCCESS) \
194+
throw Result; \
195+
}
196+
183197
// Controls UR L0 calls tracing.
184198
enum UrDebugLevel {
185199
UR_L0_DEBUG_NONE = 0x0,

source/adapters/level_zero/context.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -829,3 +829,12 @@ bool ur_context_handle_t_::isValidDevice(ur_device_handle_t Device) const {
829829
}
830830
return false;
831831
}
832+
833+
const std::vector<ur_device_handle_t> &
834+
ur_context_handle_t_::getDevices() const {
835+
return Devices;
836+
}
837+
838+
ze_context_handle_t ur_context_handle_t_::getZeHandle() const {
839+
return ZeContext;
840+
}

source/adapters/level_zero/context.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,9 @@ struct ur_context_handle_t_ : _ur_object {
193193
// Return the Platform, which is the same for all devices in the context
194194
ur_platform_handle_t getPlatform() const;
195195

196+
// Get vector of devices from this context
197+
const std::vector<ur_device_handle_t> &getDevices() const;
198+
196199
// Get index of the free slot in the available pool. If there is no available
197200
// pool then create new one. The HostVisible parameter tells if we need a
198201
// slot for a host-visible event. The ProfilingEnabled tells is we need a
@@ -303,6 +306,9 @@ struct ur_context_handle_t_ : _ur_object {
303306
// For that the Device or its root devices need to be in the context.
304307
bool isValidDevice(ur_device_handle_t Device) const;
305308

309+
// Get handle to the L0 context
310+
ze_context_handle_t getZeHandle() const;
311+
306312
private:
307313
// Get the cache of events for a provided scope and profiling mode.
308314
auto getEventCache(bool HostVisible, bool WithProfiling,

source/adapters/level_zero/helpers/kernel_helpers.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,14 @@
1212
#include "logger/ur_logger.hpp"
1313

1414
#include "../common.hpp"
15-
#include "../context.hpp"
1615
#include "../device.hpp"
1716

17+
#ifdef UR_ADAPTER_LEVEL_ZERO_V2
18+
#include "../context.hpp"
19+
#else
20+
#include "../v2/context.hpp"
21+
#endif
22+
1823
ur_result_t getSuggestedLocalWorkSize(ur_device_handle_t hDevice,
1924
ze_kernel_handle_t hZeKernel,
2025
size_t GlobalWorkSize3D[3],

source/adapters/level_zero/platform.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -503,6 +503,8 @@ ur_result_t ur_platform_handle_t_::populateDeviceCacheIfNeeded() {
503503
return UR_RESULT_SUCCESS;
504504
}
505505

506+
size_t ur_platform_handle_t_::getNumDevices() { return URDevicesCache.size(); }
507+
506508
ur_device_handle_t ur_platform_handle_t_::getDeviceById(DeviceId id) {
507509
for (auto &dev : URDevicesCache) {
508510
if (dev->Id == id) {

source/adapters/level_zero/platform.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ struct ur_platform_handle_t_ : public _ur_platform {
5656
// Check the device cache and load it if necessary.
5757
ur_result_t populateDeviceCacheIfNeeded();
5858

59+
size_t getNumDevices();
60+
5961
ur_device_handle_t getDeviceById(DeviceId);
6062

6163
// Return the PI device from cache that represents given native device.

source/adapters/level_zero/program.cpp

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,14 @@
99
//===----------------------------------------------------------------------===//
1010

1111
#include "program.hpp"
12+
#include "device.hpp"
1213
#include "logger/ur_logger.hpp"
13-
#include "ur_level_zero.hpp"
14+
15+
#ifdef UR_ADAPTER_LEVEL_ZERO_V2
16+
#include "context.hpp"
17+
#else
18+
#include "v2/context.hpp"
19+
#endif
1420

1521
extern "C" {
1622
// Check to see if a Level Zero module has any unresolved symbols.
@@ -115,8 +121,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramBuild(
115121
const char *Options ///< [in][optional] pointer to build options
116122
///< null-terminated string.
117123
) {
118-
return urProgramBuildExp(Program, Context->Devices.size(),
119-
Context->Devices.data(), Options);
124+
std::vector<ur_device_handle_t> Devices = Context->getDevices();
125+
return urProgramBuildExp(Program, Devices.size(), Devices.data(), Options);
120126
}
121127

122128
UR_APIEXPORT ur_result_t UR_APICALL urProgramBuildExp(
@@ -174,7 +180,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramBuildExp(
174180

175181
for (uint32_t i = 0; i < numDevices; i++) {
176182
ze_device_handle_t ZeDevice = phDevices[i]->ZeDevice;
177-
ze_context_handle_t ZeContext = hProgram->Context->ZeContext;
183+
ze_context_handle_t ZeContext = hProgram->Context->getZeHandle();
178184
ze_module_handle_t ZeModuleHandle = nullptr;
179185
ze_module_build_log_handle_t ZeBuildLog{};
180186

@@ -266,7 +272,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCompile(
266272
// ze-opt-greater-than-4GB-buffer-required to disable
267273
// stateful optimizations and be able to use larger than
268274
// 4GB allocations on these kernels.
269-
if (Context->Devices[0]->useRelaxedAllocationLimits()) {
275+
if (Context->getDevices()[0]->useRelaxedAllocationLimits()) {
270276
Program->BuildFlags += " -ze-opt-greater-than-4GB-buffer-required";
271277
}
272278
}
@@ -285,9 +291,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramLink(
285291
ur_program_handle_t
286292
*Program ///< [out] pointer to handle of program object created.
287293
) {
288-
return urProgramLinkExp(Context, Context->Devices.size(),
289-
Context->Devices.data(), Count, Programs, Options,
290-
Program);
294+
std::vector<ur_device_handle_t> Devices = Context->getDevices();
295+
return urProgramLinkExp(Context, Devices.size(), Devices.data(), Count,
296+
Programs, Options, Program);
291297
}
292298

293299
UR_APIEXPORT ur_result_t UR_APICALL urProgramLinkExp(
@@ -426,7 +432,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramLinkExp(
426432

427433
// Call the Level Zero API to compile, link, and create the module.
428434
ze_device_handle_t ZeDevice = phDevices[i]->ZeDevice;
429-
ze_context_handle_t ZeContext = hContext->ZeContext;
435+
ze_context_handle_t ZeContext = hContext->getZeHandle();
430436
ze_module_handle_t ZeModule = nullptr;
431437
ze_module_build_log_handle_t ZeBuildLog = nullptr;
432438
ze_result_t ZeResult =
@@ -652,7 +658,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetInfo(
652658
for (auto &ZeModulePair : Program->ZeModuleMap) {
653659
auto It = Program->ZeModuleMap.find(ZeModulePair.first);
654660
if (It != Program->ZeModuleMap.end()) {
655-
for (auto &Device : Program->Context->Devices) {
661+
for (auto &Device : Program->Context->getDevices()) {
656662
if (Device->ZeDevice == ZeModulePair.first) {
657663
devices.push_back(Device);
658664
}
@@ -661,7 +667,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetInfo(
661667
}
662668
return ReturnValue(devices.data(), devices.size());
663669
} else {
664-
return ReturnValue(Program->Context->Devices[0]);
670+
return ReturnValue(Program->Context->getDevices()[0]);
665671
}
666672
case UR_PROGRAM_INFO_BINARY_SIZES: {
667673
std::shared_lock<ur_shared_mutex> Guard(Program->Mutex);

0 commit comments

Comments
 (0)