Skip to content

Commit df3956a

Browse files
committed
[L0 v2] implement enqueueKernelLaunch and queueFinish
Also, make event_pool allocate and free thread-safe. allocate() does not strictly need to take a dedicated mutex, we could just always call it under the queue lock. Hoever, this means that free() also needs to be called under the same lock. That, in turn, means that event would have to store queue for every event which would break current event_pool abstraction.
1 parent 0a6e827 commit df3956a

24 files changed

+706
-492
lines changed

source/adapters/level_zero/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,7 @@ if(UR_BUILD_ADAPTER_L0_V2)
201201
${CMAKE_CURRENT_SOURCE_DIR}/v2/kernel.hpp
202202
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_api.hpp
203203
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_immediate_in_order.hpp
204+
${CMAKE_CURRENT_SOURCE_DIR}/v2/usm.hpp
204205
${CMAKE_CURRENT_SOURCE_DIR}/v2/api.cpp
205206
${CMAKE_CURRENT_SOURCE_DIR}/v2/command_list_cache.cpp
206207
${CMAKE_CURRENT_SOURCE_DIR}/v2/context.cpp
@@ -213,6 +214,7 @@ if(UR_BUILD_ADAPTER_L0_V2)
213214
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_api.cpp
214215
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_create.cpp
215216
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_immediate_in_order.cpp
217+
${CMAKE_CURRENT_SOURCE_DIR}/v2/usm.cpp
216218
)
217219

218220
if(NOT WIN32)
@@ -253,6 +255,7 @@ if(UR_BUILD_ADAPTER_L0_V2)
253255

254256
target_include_directories(ur_adapter_level_zero_v2 PRIVATE
255257
"${CMAKE_CURRENT_SOURCE_DIR}/../.."
258+
"${CMAKE_CURRENT_SOURCE_DIR}/../../ur"
256259
LevelZeroLoader-Headers
257260
)
258261
endif()

source/adapters/level_zero/common.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525

2626
#include <umf_pools/disjoint_pool_config_parser.hpp>
2727

28+
#include "logger/ur_logger.hpp"
29+
2830
struct _ur_platform_handle_t;
2931

3032
static auto getUrResultString = [](ur_result_t Result) {

source/adapters/level_zero/v2/api.cpp

Lines changed: 0 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,6 @@
1717

1818
std::mutex ZeCall::GlobalLock;
1919

20-
ur_result_t UR_APICALL urContextGetInfo(ur_context_handle_t hContext,
21-
ur_context_info_t propName,
22-
size_t propSize, void *pPropValue,
23-
size_t *pPropSizeRet) {
24-
logger::error("{} function not implemented!", __FUNCTION__);
25-
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
26-
}
27-
2820
ur_result_t UR_APICALL urContextGetNativeHandle(
2921
ur_context_handle_t hContext, ur_native_handle_t *phNativeContext) {
3022
logger::error("{} function not implemented!", __FUNCTION__);
@@ -157,71 +149,6 @@ ur_result_t UR_APICALL urSamplerCreateWithNativeHandle(
157149
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
158150
}
159151

160-
ur_result_t UR_APICALL urUSMHostAlloc(ur_context_handle_t hContext,
161-
const ur_usm_desc_t *pUSMDesc,
162-
ur_usm_pool_handle_t pool, size_t size,
163-
void **ppMem) {
164-
logger::error("{} function not implemented!", __FUNCTION__);
165-
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
166-
}
167-
168-
ur_result_t UR_APICALL urUSMDeviceAlloc(ur_context_handle_t hContext,
169-
ur_device_handle_t hDevice,
170-
const ur_usm_desc_t *pUSMDesc,
171-
ur_usm_pool_handle_t pool, size_t size,
172-
void **ppMem) {
173-
logger::error("{} function not implemented!", __FUNCTION__);
174-
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
175-
}
176-
177-
ur_result_t UR_APICALL urUSMSharedAlloc(ur_context_handle_t hContext,
178-
ur_device_handle_t hDevice,
179-
const ur_usm_desc_t *pUSMDesc,
180-
ur_usm_pool_handle_t pool, size_t size,
181-
void **ppMem) {
182-
logger::error("{} function not implemented!", __FUNCTION__);
183-
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
184-
}
185-
186-
ur_result_t UR_APICALL urUSMFree(ur_context_handle_t hContext, void *pMem) {
187-
logger::error("{} function not implemented!", __FUNCTION__);
188-
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
189-
}
190-
191-
ur_result_t UR_APICALL urUSMGetMemAllocInfo(ur_context_handle_t hContext,
192-
const void *pMem,
193-
ur_usm_alloc_info_t propName,
194-
size_t propSize, void *pPropValue,
195-
size_t *pPropSizeRet) {
196-
logger::error("{} function not implemented!", __FUNCTION__);
197-
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
198-
}
199-
200-
ur_result_t UR_APICALL urUSMPoolCreate(ur_context_handle_t hContext,
201-
ur_usm_pool_desc_t *pPoolDesc,
202-
ur_usm_pool_handle_t *ppPool) {
203-
logger::error("{} function not implemented!", __FUNCTION__);
204-
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
205-
}
206-
207-
ur_result_t UR_APICALL urUSMPoolRetain(ur_usm_pool_handle_t pPool) {
208-
logger::error("{} function not implemented!", __FUNCTION__);
209-
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
210-
}
211-
212-
ur_result_t UR_APICALL urUSMPoolRelease(ur_usm_pool_handle_t pPool) {
213-
logger::error("{} function not implemented!", __FUNCTION__);
214-
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
215-
}
216-
217-
ur_result_t UR_APICALL urUSMPoolGetInfo(ur_usm_pool_handle_t hPool,
218-
ur_usm_pool_info_t propName,
219-
size_t propSize, void *pPropValue,
220-
size_t *pPropSizeRet) {
221-
logger::error("{} function not implemented!", __FUNCTION__);
222-
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
223-
}
224-
225152
ur_result_t UR_APICALL urVirtualMemGranularityGetInfo(
226153
ur_context_handle_t hContext, ur_device_handle_t hDevice,
227154
ur_virtual_mem_granularity_info_t propName, size_t propSize,
@@ -326,14 +253,6 @@ urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice,
326253
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
327254
}
328255

329-
ur_result_t UR_APICALL
330-
urKernelSetArgPointer(ur_kernel_handle_t hKernel, uint32_t argIndex,
331-
const ur_kernel_arg_pointer_properties_t *pProperties,
332-
const void *pArgValue) {
333-
logger::error("{} function not implemented!", __FUNCTION__);
334-
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
335-
}
336-
337256
ur_result_t UR_APICALL urKernelSetExecInfo(
338257
ur_kernel_handle_t hKernel, ur_kernel_exec_info_t propName, size_t propSize,
339258
const ur_kernel_exec_info_properties_t *pProperties,

source/adapters/level_zero/v2/command_list_cache.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@
99
//===----------------------------------------------------------------------===//
1010

1111
#include "command_list_cache.hpp"
12+
#include "context.hpp"
1213

13-
#include "../context.hpp"
1414
#include "../device.hpp"
1515

1616
bool v2::immediate_command_list_descriptor_t::operator==(

source/adapters/level_zero/v2/context.cpp

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,28 @@
88
//
99
//===----------------------------------------------------------------------===//
1010

11-
#include "context.hpp"
1211
#include "../device.hpp"
1312

13+
#include "context.hpp"
14+
#include "event_provider_normal.hpp"
15+
1416
ur_context_handle_t_::ur_context_handle_t_(ze_context_handle_t hContext,
1517
uint32_t numDevices,
1618
const ur_device_handle_t *phDevices,
17-
bool)
19+
bool ownZeContext)
1820
: hContext(hContext), hDevices(phDevices, phDevices + numDevices),
19-
commandListCache(hContext) {}
21+
commandListCache(hContext),
22+
eventPoolCache(phDevices[0]->Platform->getNumDevices(),
23+
[context = this,
24+
platform = phDevices[0]->Platform](DeviceId deviceId) {
25+
auto device = platform->getDeviceById(deviceId);
26+
// TODO: just use per-context id?
27+
return std::make_unique<v2::provider_normal>(
28+
context, device, v2::EVENT_COUNTER,
29+
v2::QUEUE_IMMEDIATE);
30+
}) {
31+
std::ignore = ownZeContext;
32+
}
2033

2134
ur_context_handle_t_::~ur_context_handle_t_() noexcept(false) {
2235
// ur_context_handle_t_ is only created/destroyed through urContextCreate
@@ -85,3 +98,26 @@ UR_APIEXPORT ur_result_t UR_APICALL
8598
urContextRelease(ur_context_handle_t hContext) {
8699
return hContext->release();
87100
}
101+
102+
UR_APIEXPORT ur_result_t UR_APICALL
103+
urContextGetInfo(ur_context_handle_t hContext,
104+
ur_context_info_t contextInfoType, size_t propSize,
105+
106+
void *pContextInfo,
107+
108+
size_t *pPropSizeRet) {
109+
std::shared_lock<ur_shared_mutex> Lock(hContext->Mutex);
110+
UrReturnHelper ReturnValue(propSize, pContextInfo, pPropSizeRet);
111+
switch (
112+
(uint32_t)contextInfoType) { // cast to avoid warnings on EXT enum values
113+
case UR_CONTEXT_INFO_DEVICES:
114+
return ReturnValue(hContext->getDevices().data(),
115+
hContext->getDevices().size());
116+
case UR_CONTEXT_INFO_NUM_DEVICES:
117+
return ReturnValue(uint32_t(hContext->getDevices().size()));
118+
case UR_CONTEXT_INFO_REFERENCE_COUNT:
119+
return ReturnValue(uint32_t{hContext->RefCount.load()});
120+
default:
121+
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
122+
}
123+
}

source/adapters/level_zero/v2/context.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include <ur_api.h>
1414

1515
#include "command_list_cache.hpp"
16+
#include "event_pool_cache.hpp"
1617

1718
struct ur_context_handle_t_ : _ur_object {
1819
ur_context_handle_t_(ze_context_handle_t hContext, uint32_t numDevices,
@@ -33,4 +34,5 @@ struct ur_context_handle_t_ : _ur_object {
3334
const ze_context_handle_t hContext;
3435
const std::vector<ur_device_handle_t> hDevices;
3536
v2::command_list_cache_t commandListCache;
37+
v2::event_pool_cache eventPoolCache;
3638
};

source/adapters/level_zero/v2/event.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@ ur_result_t ur_event_handle_t_::release() {
4141
return UR_RESULT_SUCCESS;
4242

4343
pool->free(this);
44-
RefCount.increment();
4544

4645
return UR_RESULT_SUCCESS;
4746
}

source/adapters/level_zero/v2/event.hpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,6 @@ namespace v2 {
2222
class event_pool;
2323
}
2424

25-
struct ur_event_handle_t_;
26-
using ur_event_handle_t = ur_event_handle_t_ *;
27-
2825
struct ur_event_handle_t_ : _ur_object {
2926
public:
3027
ur_event_handle_t_(v2::event_allocation eventAllocation,

source/adapters/level_zero/v2/event_pool.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,18 @@
88
//
99
//===----------------------------------------------------------------------===//
1010
#include "event_pool.hpp"
11+
#include "common/latency_tracker.hpp"
1112
#include "ur_api.h"
1213

1314
namespace v2 {
1415

1516
static constexpr size_t EVENTS_BURST = 64;
1617

1718
ur_event_handle_t_ *event_pool::allocate() {
19+
TRACK_SCOPE_LATENCY("event_pool::allocate");
20+
21+
std::unique_lock<std::mutex> lock(*mutex);
22+
1823
if (freelist.empty()) {
1924
auto start = events.size();
2025
auto end = start + EVENTS_BURST;
@@ -31,8 +36,16 @@ ur_event_handle_t_ *event_pool::allocate() {
3136
}
3237

3338
void event_pool::free(ur_event_handle_t_ *event) {
39+
TRACK_SCOPE_LATENCY("event_pool::free");
40+
41+
std::unique_lock<std::mutex> lock(*mutex);
42+
3443
event->reset();
3544
freelist.push_back(event);
45+
46+
// The event is still in the pool, so we need to increment the refcount
47+
assert(event->RefCount.load() == 0);
48+
event->RefCount.increment();
3649
}
3750

3851
event_provider *event_pool::getProvider() { return provider.get(); }

source/adapters/level_zero/v2/event_pool.hpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,9 @@ namespace v2 {
2828

2929
class event_pool {
3030
public:
31+
// store weak reference to the queue as event_pool is part of the queue
3132
event_pool(std::unique_ptr<event_provider> Provider)
32-
: provider(std::move(Provider)){};
33+
: provider(std::move(Provider)), mutex(std::make_unique<std::mutex>()){};
3334

3435
event_pool(event_pool &&other) = default;
3536
event_pool &operator=(event_pool &&other) = default;
@@ -39,7 +40,10 @@ class event_pool {
3940

4041
DeviceId Id() { return provider->device()->Id.value(); };
4142

43+
// Allocate an event from the pool. Thread safe.
4244
ur_event_handle_t_ *allocate();
45+
46+
// Free an event back to the pool. Thread safe.
4347
void free(ur_event_handle_t_ *event);
4448

4549
event_provider *getProvider();
@@ -49,6 +53,8 @@ class event_pool {
4953

5054
std::deque<ur_event_handle_t_> events;
5155
std::vector<ur_event_handle_t_ *> freelist;
56+
57+
std::unique_ptr<std::mutex> mutex;
5258
};
5359

5460
} // namespace v2

source/adapters/level_zero/v2/event_provider_normal.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ provider_pool::provider_pool(ur_context_handle_t context,
2929
queue_type queue) {
3030
ZeStruct<ze_event_pool_desc_t> desc;
3131
desc.count = EVENTS_BURST;
32-
desc.flags = 0;
32+
desc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
3333

3434
ze_event_pool_counter_based_exp_desc_t counterBasedExt = {
3535
ZE_STRUCTURE_TYPE_COUNTER_BASED_EVENT_POOL_EXP_DESC, nullptr};
@@ -79,7 +79,6 @@ event_allocation provider_normal::allocate() {
7979
TRACK_SCOPE_LATENCY("provider_normal::allocate");
8080

8181
if (pools.empty()) {
82-
TRACK_SCOPE_LATENCY("provider_normal::allocate#createProviderPool");
8382
pools.emplace_back(createProviderPool());
8483
}
8584

@@ -91,7 +90,6 @@ event_allocation provider_normal::allocate() {
9190
}
9291
}
9392

94-
TRACK_SCOPE_LATENCY("provider_normal::allocate#slowPath");
9593
std::sort(pools.begin(), pools.end(), [](auto &a, auto &b) {
9694
return a->nfree() < b->nfree(); // asceding
9795
});

0 commit comments

Comments
 (0)