Skip to content

Commit 53e2c7a

Browse files
committed
Synchronize command_list usage in adapter v2
1 parent a412c12 commit 53e2c7a

File tree

8 files changed

+230
-132
lines changed

8 files changed

+230
-132
lines changed

unified-runtime/source/adapters/level_zero/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ if(UR_BUILD_ADAPTER_L0_V2)
158158
${CMAKE_CURRENT_SOURCE_DIR}/v2/event.hpp
159159
${CMAKE_CURRENT_SOURCE_DIR}/v2/kernel.hpp
160160
${CMAKE_CURRENT_SOURCE_DIR}/v2/memory.hpp
161+
${CMAKE_CURRENT_SOURCE_DIR}/v2/lockable.hpp
161162
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_api.hpp
162163
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_immediate_in_order.hpp
163164
${CMAKE_CURRENT_SOURCE_DIR}/v2/usm.hpp

unified-runtime/source/adapters/level_zero/v2/command_buffer.cpp

Lines changed: 43 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,10 @@ ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_(
4141
ur_result_t ur_exp_command_buffer_handle_t_::finalizeCommandBuffer() {
4242
// It is not allowed to append to command list from multiple threads.
4343
std::scoped_lock<ur_shared_mutex> guard(this->Mutex);
44+
auto commandListLocked = commandListManager.lock();
4445
UR_ASSERT(!isFinalized, UR_RESULT_ERROR_INVALID_OPERATION);
4546
// Close the command lists and have them ready for dispatch.
46-
ZE2UR_CALL(zeCommandListClose, (this->commandListManager.getZeCommandList()));
47+
ZE2UR_CALL(zeCommandListClose, (commandListLocked->getZeCommandList()));
4748
isFinalized = true;
4849
return UR_RESULT_SUCCESS;
4950
}
@@ -130,7 +131,8 @@ ur_result_t urCommandBufferAppendKernelLaunchExp(
130131
std::ignore = numKernelAlternatives;
131132
std::ignore = kernelAlternatives;
132133
std::ignore = command;
133-
UR_CALL(commandBuffer->commandListManager.appendKernelLaunch(
134+
auto commandListLocked = commandBuffer->commandListManager.lock();
135+
UR_CALL(commandListLocked->appendKernelLaunch(
134136
hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, 0,
135137
nullptr, nullptr));
136138
return UR_RESULT_SUCCESS;
@@ -157,8 +159,9 @@ ur_result_t urCommandBufferAppendUSMMemcpyExp(
157159

158160
std::ignore = phCommand;
159161
// Responsibility of UMD to offload to copy engine
160-
UR_CALL(hCommandBuffer->commandListManager.appendUSMMemcpy(
161-
false, pDst, pSrc, size, 0, nullptr, nullptr));
162+
auto commandListLocked = hCommandBuffer->commandListManager.lock();
163+
UR_CALL(commandListLocked->appendUSMMemcpy(false, pDst, pSrc, size, 0,
164+
nullptr, nullptr));
162165

163166
return UR_RESULT_SUCCESS;
164167
} catch (...) {
@@ -185,7 +188,8 @@ ur_result_t urCommandBufferAppendMemBufferCopyExp(
185188

186189
std::ignore = phCommand;
187190
// Responsibility of UMD to offload to copy engine
188-
UR_CALL(hCommandBuffer->commandListManager.appendMemBufferCopy(
191+
auto commandListLocked = hCommandBuffer->commandListManager.lock();
192+
UR_CALL(commandListLocked->appendMemBufferCopy(
189193
hSrcMem, hDstMem, srcOffset, dstOffset, size, 0, nullptr, nullptr));
190194

191195
return UR_RESULT_SUCCESS;
@@ -213,8 +217,9 @@ ur_result_t urCommandBufferAppendMemBufferWriteExp(
213217

214218
std::ignore = phCommand;
215219
// Responsibility of UMD to offload to copy engine
216-
UR_CALL(hCommandBuffer->commandListManager.appendMemBufferWrite(
217-
hBuffer, false, offset, size, pSrc, 0, nullptr, nullptr));
220+
auto commandListLocked = hCommandBuffer->commandListManager.lock();
221+
UR_CALL(commandListLocked->appendMemBufferWrite(hBuffer, false, offset, size,
222+
pSrc, 0, nullptr, nullptr));
218223

219224
return UR_RESULT_SUCCESS;
220225
} catch (...) {
@@ -241,8 +246,9 @@ ur_result_t urCommandBufferAppendMemBufferReadExp(
241246
std::ignore = phCommand;
242247

243248
// Responsibility of UMD to offload to copy engine
244-
UR_CALL(hCommandBuffer->commandListManager.appendMemBufferRead(
245-
hBuffer, false, offset, size, pDst, 0, nullptr, nullptr));
249+
auto commandListLocked = hCommandBuffer->commandListManager.lock();
250+
UR_CALL(commandListLocked->appendMemBufferRead(hBuffer, false, offset, size,
251+
pDst, 0, nullptr, nullptr));
246252

247253
return UR_RESULT_SUCCESS;
248254
} catch (...) {
@@ -271,7 +277,8 @@ ur_result_t urCommandBufferAppendMemBufferCopyRectExp(
271277

272278
std::ignore = phCommand;
273279
// Responsibility of UMD to offload to copy engine
274-
UR_CALL(hCommandBuffer->commandListManager.appendMemBufferCopyRect(
280+
auto commandListLocked = hCommandBuffer->commandListManager.lock();
281+
UR_CALL(commandListLocked->appendMemBufferCopyRect(
275282
hSrcMem, hDstMem, srcOrigin, dstOrigin, region, srcRowPitch,
276283
srcSlicePitch, dstRowPitch, dstSlicePitch, 0, nullptr, nullptr));
277284

@@ -303,7 +310,8 @@ ur_result_t urCommandBufferAppendMemBufferWriteRectExp(
303310
std::ignore = phCommand;
304311

305312
// Responsibility of UMD to offload to copy engine
306-
UR_CALL(hCommandBuffer->commandListManager.appendMemBufferWriteRect(
313+
auto commandListLocked = hCommandBuffer->commandListManager.lock();
314+
UR_CALL(commandListLocked->appendMemBufferWriteRect(
307315
hBuffer, false, bufferOffset, hostOffset, region, bufferRowPitch,
308316
bufferSlicePitch, hostRowPitch, hostSlicePitch, pSrc, 0, nullptr,
309317
nullptr));
@@ -336,7 +344,8 @@ ur_result_t urCommandBufferAppendMemBufferReadRectExp(
336344
std::ignore = phCommand;
337345

338346
// Responsibility of UMD to offload to copy engine
339-
UR_CALL(hCommandBuffer->commandListManager.appendMemBufferReadRect(
347+
auto commandListLocked = hCommandBuffer->commandListManager.lock();
348+
UR_CALL(commandListLocked->appendMemBufferReadRect(
340349
hBuffer, false, bufferOffset, hostOffset, region, bufferRowPitch,
341350
bufferSlicePitch, hostRowPitch, hostSlicePitch, pDst, 0, nullptr,
342351
nullptr));
@@ -366,8 +375,9 @@ ur_result_t urCommandBufferAppendUSMFillExp(
366375

367376
std::ignore = phCommand;
368377

369-
UR_CALL(hCommandBuffer->commandListManager.appendUSMFill(
370-
pMemory, patternSize, pPattern, size, 0, nullptr, nullptr));
378+
auto commandListLocked = hCommandBuffer->commandListManager.lock();
379+
UR_CALL(commandListLocked->appendUSMFill(pMemory, patternSize, pPattern, size,
380+
0, nullptr, nullptr));
371381
return UR_RESULT_SUCCESS;
372382
} catch (...) {
373383
return exceptionToResult(std::current_exception());
@@ -393,7 +403,8 @@ ur_result_t urCommandBufferAppendMemBufferFillExp(
393403

394404
std::ignore = phCommand;
395405

396-
UR_CALL(hCommandBuffer->commandListManager.appendMemBufferFill(
406+
auto commandListLocked = hCommandBuffer->commandListManager.lock();
407+
UR_CALL(commandListLocked->appendMemBufferFill(
397408
hBuffer, pPattern, patternSize, offset, size, 0, nullptr, nullptr));
398409
return UR_RESULT_SUCCESS;
399410
} catch (...) {
@@ -420,8 +431,9 @@ ur_result_t urCommandBufferAppendUSMPrefetchExp(
420431

421432
std::ignore = phCommand;
422433

423-
UR_CALL(hCommandBuffer->commandListManager.appendUSMPrefetch(
424-
pMemory, size, flags, 0, nullptr, nullptr));
434+
auto commandListLocked = hCommandBuffer->commandListManager.lock();
435+
UR_CALL(commandListLocked->appendUSMPrefetch(pMemory, size, flags, 0, nullptr,
436+
nullptr));
425437

426438
return UR_RESULT_SUCCESS;
427439
} catch (...) {
@@ -447,8 +459,8 @@ ur_result_t urCommandBufferAppendUSMAdviseExp(
447459

448460
std::ignore = phCommand;
449461

450-
UR_CALL(hCommandBuffer->commandListManager.appendUSMAdvise(pMemory, size,
451-
advice, nullptr));
462+
auto commandListLocked = hCommandBuffer->commandListManager.lock();
463+
UR_CALL(commandListLocked->appendUSMAdvise(pMemory, size, advice, nullptr));
452464

453465
return UR_RESULT_SUCCESS;
454466
} catch (...) {
@@ -483,4 +495,16 @@ urCommandBufferGetInfoExp(ur_exp_command_buffer_handle_t hCommandBuffer,
483495
return exceptionToResult(std::current_exception());
484496
}
485497

498+
ur_result_t urCommandBufferEnqueueExp(
499+
ur_exp_command_buffer_handle_t CommandBuffer, ur_queue_handle_t UrQueue,
500+
uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList,
501+
ur_event_handle_t *Event) try {
502+
auto commandListLocked = CommandBuffer->commandListManager.lock();
503+
return UrQueue->get().enqueueCommandBuffer(
504+
commandListLocked->getZeCommandList(), Event, NumEventsInWaitList,
505+
EventWaitList);
506+
} catch (...) {
507+
return exceptionToResult(std::current_exception());
508+
}
509+
486510
} // namespace ur::level_zero

unified-runtime/source/adapters/level_zero/v2/command_buffer.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "common.hpp"
1414
#include "context.hpp"
1515
#include "kernel.hpp"
16+
#include "lockable.hpp"
1617
#include "queue_api.hpp"
1718
#include <ze_api.h>
1819

@@ -24,7 +25,7 @@ struct ur_exp_command_buffer_handle_t_ : public _ur_object {
2425

2526
~ur_exp_command_buffer_handle_t_() = default;
2627

27-
ur_command_list_manager commandListManager;
28+
lockable<ur_command_list_manager> commandListManager;
2829

2930
ur_result_t finalizeCommandBuffer();
3031
// Indicates if command-buffer commands can be updated after it is closed.

unified-runtime/source/adapters/level_zero/v2/command_list_manager.cpp

Lines changed: 10 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -196,8 +196,7 @@ ur_result_t ur_command_list_manager::appendKernelLaunch(
196196

197197
ze_kernel_handle_t hZeKernel = hKernel->getZeHandle(device);
198198

199-
std::scoped_lock<ur_shared_mutex, ur_shared_mutex> Lock(this->Mutex,
200-
hKernel->Mutex);
199+
std::scoped_lock<ur_shared_mutex> Lock(hKernel->Mutex);
201200

202201
ze_group_count_t zeThreadGroupDimensions{1, 1, 1};
203202
uint32_t WG[3]{};
@@ -235,8 +234,6 @@ ur_result_t ur_command_list_manager::appendUSMMemcpy(
235234
ur_event_handle_t *phEvent) {
236235
TRACK_SCOPE_LATENCY("ur_command_list_manager::appendUSMMemcpy");
237236

238-
std::scoped_lock<ur_shared_mutex> lock(this->Mutex);
239-
240237
auto zeSignalEvent = getSignalEvent(phEvent, UR_COMMAND_USM_MEMCPY);
241238

242239
auto [pWaitEvents, numWaitEvents] =
@@ -262,8 +259,7 @@ ur_result_t ur_command_list_manager::appendMemBufferFill(
262259
auto hBuffer = hMem->getBuffer();
263260
UR_ASSERT(offset + size <= hBuffer->getSize(), UR_RESULT_ERROR_INVALID_SIZE);
264261

265-
std::scoped_lock<ur_shared_mutex, ur_shared_mutex> lock(this->Mutex,
266-
hBuffer->getMutex());
262+
std::scoped_lock<ur_shared_mutex> lock(hBuffer->getMutex());
267263

268264
return appendGenericFillUnlocked(hBuffer, offset, patternSize, pPattern, size,
269265
numEventsInWaitList, phEventWaitList,
@@ -276,8 +272,6 @@ ur_result_t ur_command_list_manager::appendUSMFill(
276272
ur_event_handle_t *phEvent) {
277273
TRACK_SCOPE_LATENCY("ur_command_list_manager::appendUSMFill");
278274

279-
std::scoped_lock<ur_shared_mutex> lock(this->Mutex);
280-
281275
ur_usm_handle_t dstHandle(context, size, pMem);
282276
return appendGenericFillUnlocked(&dstHandle, 0, patternSize, pPattern, size,
283277
numEventsInWaitList, phEventWaitList,
@@ -292,8 +286,6 @@ ur_result_t ur_command_list_manager::appendUSMPrefetch(
292286

293287
std::ignore = flags;
294288

295-
std::scoped_lock<ur_shared_mutex> lock(this->Mutex);
296-
297289
auto zeSignalEvent = getSignalEvent(phEvent, UR_COMMAND_USM_PREFETCH);
298290

299291
auto [pWaitEvents, numWaitEvents] =
@@ -320,8 +312,6 @@ ur_command_list_manager::appendUSMAdvise(const void *pMem, size_t size,
320312
ur_event_handle_t *phEvent) {
321313
TRACK_SCOPE_LATENCY("ur_command_list_manager::appendUSMAdvise");
322314

323-
std::scoped_lock<ur_shared_mutex> lock(this->Mutex);
324-
325315
auto zeAdvice = ur_cast<ze_memory_advice_t>(advice);
326316

327317
auto zeSignalEvent = getSignalEvent(phEvent, UR_COMMAND_USM_ADVISE);
@@ -354,8 +344,7 @@ ur_result_t ur_command_list_manager::appendMemBufferRead(
354344

355345
ur_usm_handle_t dstHandle(context, size, pDst);
356346

357-
std::scoped_lock<ur_shared_mutex, ur_shared_mutex> lock(this->Mutex,
358-
hBuffer->getMutex());
347+
std::scoped_lock<ur_shared_mutex> lock(hBuffer->getMutex());
359348

360349
return appendGenericCopyUnlocked(hBuffer, &dstHandle, blockingRead, offset, 0,
361350
size, numEventsInWaitList, phEventWaitList,
@@ -373,8 +362,7 @@ ur_result_t ur_command_list_manager::appendMemBufferWrite(
373362

374363
ur_usm_handle_t srcHandle(context, size, pSrc);
375364

376-
std::scoped_lock<ur_shared_mutex, ur_shared_mutex> lock(this->Mutex,
377-
hBuffer->getMutex());
365+
std::scoped_lock<ur_shared_mutex> lock(hBuffer->getMutex());
378366

379367
return appendGenericCopyUnlocked(
380368
&srcHandle, hBuffer, blockingWrite, 0, offset, size, numEventsInWaitList,
@@ -395,8 +383,8 @@ ur_result_t ur_command_list_manager::appendMemBufferCopy(
395383
UR_ASSERT(dstOffset + size <= hBufferDst->getSize(),
396384
UR_RESULT_ERROR_INVALID_SIZE);
397385

398-
std::scoped_lock<ur_shared_mutex, ur_shared_mutex, ur_shared_mutex> lock(
399-
this->Mutex, hBufferSrc->getMutex(), hBufferDst->getMutex());
386+
std::scoped_lock<ur_shared_mutex, ur_shared_mutex> lock(
387+
hBufferSrc->getMutex(), hBufferDst->getMutex());
400388

401389
return appendGenericCopyUnlocked(hBufferSrc, hBufferDst, false, srcOffset,
402390
dstOffset, size, numEventsInWaitList,
@@ -415,8 +403,7 @@ ur_result_t ur_command_list_manager::appendMemBufferReadRect(
415403
auto hBuffer = hMem->getBuffer();
416404
ur_usm_handle_t dstHandle(context, 0, pDst);
417405

418-
std::scoped_lock<ur_shared_mutex, ur_shared_mutex> lock(this->Mutex,
419-
hBuffer->getMutex());
406+
std::scoped_lock<ur_shared_mutex> lock(hBuffer->getMutex());
420407

421408
return appendRegionCopyUnlocked(
422409
hBuffer, &dstHandle, blockingRead, bufferOrigin, hostOrigin, region,
@@ -436,8 +423,7 @@ ur_result_t ur_command_list_manager::appendMemBufferWriteRect(
436423
auto hBuffer = hMem->getBuffer();
437424
ur_usm_handle_t srcHandle(context, 0, pSrc);
438425

439-
std::scoped_lock<ur_shared_mutex, ur_shared_mutex> lock(this->Mutex,
440-
hBuffer->getMutex());
426+
std::scoped_lock<ur_shared_mutex> lock(hBuffer->getMutex());
441427

442428
return appendRegionCopyUnlocked(
443429
&srcHandle, hBuffer, blockingWrite, hostOrigin, bufferOrigin, region,
@@ -457,8 +443,8 @@ ur_result_t ur_command_list_manager::appendMemBufferCopyRect(
457443
auto hBufferSrc = hSrc->getBuffer();
458444
auto hBufferDst = hDst->getBuffer();
459445

460-
std::scoped_lock<ur_shared_mutex, ur_shared_mutex, ur_shared_mutex> lock(
461-
this->Mutex, hBufferSrc->getMutex(), hBufferDst->getMutex());
446+
std::scoped_lock<ur_shared_mutex, ur_shared_mutex> lock(
447+
hBufferSrc->getMutex(), hBufferDst->getMutex());
462448

463449
return appendRegionCopyUnlocked(
464450
hBufferSrc, hBufferDst, false, srcOrigin, dstOrigin, region, srcRowPitch,
@@ -475,8 +461,6 @@ ur_result_t ur_command_list_manager::appendUSMMemcpy2D(
475461
ur_rect_offset_t zeroOffset{0, 0, 0};
476462
ur_rect_region_t region{width, height, 0};
477463

478-
std::scoped_lock<ur_shared_mutex> lock(this->Mutex);
479-
480464
ur_usm_handle_t srcHandle(context, 0, pSrc);
481465
ur_usm_handle_t dstHandle(context, 0, pDst);
482466

unified-runtime/source/adapters/level_zero/v2/command_list_manager.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,14 @@ struct wait_list_view {
3131
}
3232
};
3333

34-
struct ur_command_list_manager : public _ur_object {
34+
struct ur_command_list_manager {
3535

3636
ur_command_list_manager(ur_context_handle_t context,
3737
ur_device_handle_t device,
3838
v2::raii::command_list_unique_handle &&commandList,
3939
v2::event_flags_t flags = v2::EVENT_FLAGS_COUNTER,
4040
ur_queue_t_ *queue = nullptr);
41+
ur_command_list_manager(ur_command_list_manager &&src) = default;
4142
~ur_command_list_manager();
4243

4344
ur_result_t appendKernelLaunch(ur_kernel_handle_t hKernel, uint32_t workDim,
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
//===--------- memory.hpp - Level Zero Adapter ---------------------------===//
2+
//
3+
// Copyright (C) 2024 Intel Corporation
4+
//
5+
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM
6+
// Exceptions. See LICENSE.TXT
7+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8+
//
9+
//===----------------------------------------------------------------------===//
10+
11+
#pragma once
12+
#include <mutex>
13+
14+
template <typename T> struct locked {
15+
public:
16+
locked(T *object, std::unique_lock<std::mutex> &&lock)
17+
: lock_(std::move(lock)) {
18+
object_ = object;
19+
}
20+
T *operator->() { return object_; }
21+
22+
private:
23+
std::unique_lock<std::mutex> lock_;
24+
T *object_;
25+
};
26+
27+
/*
28+
lockable<T> wraps T class object in exclusive access lock, similar to one used
29+
in rust
30+
31+
construction:
32+
lockable<X> l(arguments, to, construct, X);
33+
34+
access without synchronization:
35+
X* obj_ptr = l.get_no_lock();
36+
obj_ptr->print_name();
37+
38+
exclusive access to object kept in l:
39+
// as long as lock exists, thread has exclusive access to underlaying object
40+
locked<X> lock = l.lock();
41+
// that object is accessed through ->() operator on lock object
42+
lock->print_name();
43+
*/
44+
45+
template <typename T> struct lockable {
46+
public:
47+
template <typename... Args>
48+
lockable(Args &&...args) : object_(std::forward<Args>(args)...) {}
49+
locked<T> lock() {
50+
std::unique_lock lock{mut_};
51+
return locked<T>(&object_, std::move(lock));
52+
}
53+
T *get_no_lock() { return &object_; }
54+
55+
private:
56+
T object_;
57+
std::mutex mut_;
58+
};

0 commit comments

Comments
 (0)