Skip to content

Commit 624a2a2

Browse files
committed
[L0 v2] simplify optimization in enqueue methods
The optimization makes sure that waitList is only passed to the first command submitted to the command list. Simplify it by clearing the wait list in-place instead of creating additional variable.
1 parent b487b62 commit 624a2a2

File tree

4 files changed

+65
-89
lines changed

4 files changed

+65
-89
lines changed

source/adapters/level_zero/v2/command_list_manager.cpp

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ ur_command_list_manager::~ur_command_list_manager() {
3030
ur::level_zero::urDeviceRelease(device);
3131
}
3232

33-
std::pair<ze_event_handle_t *, uint32_t>
33+
wait_list_view
3434
ur_command_list_manager::getWaitListView(const ur_event_handle_t *phWaitEvents,
3535
uint32_t numWaitEvents) {
3636

@@ -80,32 +80,24 @@ ur_result_t ur_command_list_manager::appendKernelLaunch(
8080

8181
auto zeSignalEvent = getSignalEvent(phEvent, UR_COMMAND_KERNEL_LAUNCH);
8282

83-
auto waitList = getWaitListView(phEventWaitList, numEventsInWaitList);
83+
auto waitListView = getWaitListView(phEventWaitList, numEventsInWaitList);
8484

85-
bool memoryMigrated = false;
8685
auto memoryMigrate = [&](void *src, void *dst, size_t size) {
8786
ZE2UR_CALL_THROWS(zeCommandListAppendMemoryCopy,
8887
(zeCommandList.get(), dst, src, size, nullptr,
89-
waitList.second, waitList.first));
90-
memoryMigrated = true;
88+
waitListView.num, waitListView.handles));
89+
waitListView.clear();
9190
};
9291

9392
UR_CALL(hKernel->prepareForSubmission(context, device, pGlobalWorkOffset,
9493
workDim, WG[0], WG[1], WG[2],
9594
memoryMigrate));
9695

97-
if (memoryMigrated) {
98-
// If memory was migrated, we don't need to pass the wait list to
99-
// the copy command again.
100-
waitList.first = nullptr;
101-
waitList.second = 0;
102-
}
103-
10496
TRACK_SCOPE_LATENCY(
10597
"ur_command_list_manager::zeCommandListAppendLaunchKernel");
10698
ZE2UR_CALL(zeCommandListAppendLaunchKernel,
10799
(zeCommandList.get(), hZeKernel, &zeThreadGroupDimensions,
108-
zeSignalEvent, waitList.second, waitList.first));
100+
zeSignalEvent, waitListView.num, waitListView.handles));
109101

110102
return UR_RESULT_SUCCESS;
111103
}

source/adapters/level_zero/v2/command_list_manager.hpp

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,21 @@
1515
#include "queue_api.hpp"
1616
#include <ze_api.h>
1717

18+
struct wait_list_view {
19+
ze_event_handle_t *handles;
20+
uint32_t num;
21+
22+
operator bool() const {
23+
assert((handles != nullptr) == (num > 0));
24+
return handles != nullptr;
25+
}
26+
27+
void clear() {
28+
handles = nullptr;
29+
num = 0;
30+
}
31+
};
32+
1833
struct ur_command_list_manager : public _ur_object {
1934

2035
ur_command_list_manager(ur_context_handle_t context,
@@ -34,9 +49,8 @@ struct ur_command_list_manager : public _ur_object {
3449

3550
ze_command_list_handle_t getZeCommandList();
3651

37-
std::pair<ze_event_handle_t *, uint32_t>
38-
getWaitListView(const ur_event_handle_t *phWaitEvents,
39-
uint32_t numWaitEvents);
52+
wait_list_view getWaitListView(const ur_event_handle_t *phWaitEvents,
53+
uint32_t numWaitEvents);
4054
ze_event_handle_t getSignalEvent(ur_event_handle_t *hUserEvent,
4155
ur_command_t commandType);
4256

source/adapters/level_zero/v2/queue_immediate_in_order.cpp

Lines changed: 41 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,7 @@
2121

2222
namespace v2 {
2323

24-
std::pair<ze_event_handle_t *, uint32_t>
25-
ur_queue_immediate_in_order_t::getWaitListView(
24+
wait_list_view ur_queue_immediate_in_order_t::getWaitListView(
2625
const ur_event_handle_t *phWaitEvents, uint32_t numWaitEvents) {
2726
return commandListManager.getWaitListView(phWaitEvents, numWaitEvents);
2827
}
@@ -291,37 +290,31 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueGenericCopyUnlocked(
291290
ur_command_t commandType) {
292291
auto zeSignalEvent = getSignalEvent(phEvent, commandType);
293292

294-
auto waitList = getWaitListView(phEventWaitList, numEventsInWaitList);
293+
auto waitListView = getWaitListView(phEventWaitList, numEventsInWaitList);
295294

296-
bool memoryMigrated = false;
297295
auto pSrc = ur_cast<char *>(src->getDevicePtr(
298296
hDevice, ur_mem_handle_t_::device_access_mode_t::read_only, srcOffset,
299297
size, [&](void *src, void *dst, size_t size) {
300298
ZE2UR_CALL_THROWS(zeCommandListAppendMemoryCopy,
301299
(commandListManager.getZeCommandList(), dst, src,
302-
size, nullptr, waitList.second, waitList.first));
303-
memoryMigrated = true;
300+
size, nullptr, waitListView.num,
301+
waitListView.handles));
302+
waitListView.clear();
304303
}));
305304

306305
auto pDst = ur_cast<char *>(dst->getDevicePtr(
307306
hDevice, ur_mem_handle_t_::device_access_mode_t::write_only, dstOffset,
308307
size, [&](void *src, void *dst, size_t size) {
309308
ZE2UR_CALL_THROWS(zeCommandListAppendMemoryCopy,
310309
(commandListManager.getZeCommandList(), dst, src,
311-
size, nullptr, waitList.second, waitList.first));
312-
memoryMigrated = true;
310+
size, nullptr, waitListView.num,
311+
waitListView.handles));
312+
waitListView.clear();
313313
}));
314314

315-
if (memoryMigrated) {
316-
// If memory was migrated, we don't need to pass the wait list to
317-
// the copy command again.
318-
waitList.first = nullptr;
319-
waitList.second = 0;
320-
}
321-
322315
ZE2UR_CALL(zeCommandListAppendMemoryCopy,
323316
(commandListManager.getZeCommandList(), pDst, pSrc, size,
324-
zeSignalEvent, waitList.second, waitList.first));
317+
zeSignalEvent, waitListView.num, waitListView.handles));
325318

326319
if (blocking) {
327320
ZE2UR_CALL(zeCommandListHostSynchronize,
@@ -379,38 +372,32 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueRegionCopyUnlocked(
379372

380373
auto zeSignalEvent = getSignalEvent(phEvent, commandType);
381374

382-
auto waitList = getWaitListView(phEventWaitList, numEventsInWaitList);
375+
auto waitListView = getWaitListView(phEventWaitList, numEventsInWaitList);
383376

384-
bool memoryMigrated = false;
385377
auto pSrc = ur_cast<char *>(src->getDevicePtr(
386378
hDevice, ur_mem_handle_t_::device_access_mode_t::read_only, 0,
387379
src->getSize(), [&](void *src, void *dst, size_t size) {
388380
ZE2UR_CALL_THROWS(zeCommandListAppendMemoryCopy,
389381
(commandListManager.getZeCommandList(), dst, src,
390-
size, nullptr, waitList.second, waitList.first));
391-
memoryMigrated = true;
382+
size, nullptr, waitListView.num,
383+
waitListView.handles));
384+
waitListView.clear();
392385
}));
393386
auto pDst = ur_cast<char *>(dst->getDevicePtr(
394387
hDevice, ur_mem_handle_t_::device_access_mode_t::write_only, 0,
395388
dst->getSize(), [&](void *src, void *dst, size_t size) {
396389
ZE2UR_CALL_THROWS(zeCommandListAppendMemoryCopy,
397390
(commandListManager.getZeCommandList(), dst, src,
398-
size, nullptr, waitList.second, waitList.first));
399-
memoryMigrated = true;
391+
size, nullptr, waitListView.num,
392+
waitListView.handles));
393+
waitListView.clear();
400394
}));
401395

402-
if (memoryMigrated) {
403-
// If memory was migrated, we don't need to pass the wait list to
404-
// the copy command again.
405-
waitList.first = nullptr;
406-
waitList.second = 0;
407-
}
408-
409396
ZE2UR_CALL(zeCommandListAppendMemoryCopyRegion,
410397
(commandListManager.getZeCommandList(), pDst, &zeParams.dstRegion,
411398
zeParams.dstPitch, zeParams.dstSlicePitch, pSrc,
412399
&zeParams.srcRegion, zeParams.srcPitch, zeParams.srcSlicePitch,
413-
zeSignalEvent, waitList.second, waitList.first));
400+
zeSignalEvent, waitListView.num, waitListView.handles));
414401

415402
if (blocking) {
416403
ZE2UR_CALL(zeCommandListHostSynchronize,
@@ -580,23 +567,23 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferMap(
580567

581568
auto zeSignalEvent = getSignalEvent(phEvent, UR_COMMAND_MEM_BUFFER_MAP);
582569

583-
auto waitList = getWaitListView(phEventWaitList, numEventsInWaitList);
570+
auto waitListView = getWaitListView(phEventWaitList, numEventsInWaitList);
584571

585-
bool memoryMigrated = false;
586572
auto pDst = ur_cast<char *>(hBuffer->mapHostPtr(
587573
mapFlags, offset, size, [&](void *src, void *dst, size_t size) {
588574
ZE2UR_CALL_THROWS(zeCommandListAppendMemoryCopy,
589575
(commandListManager.getZeCommandList(), dst, src,
590-
size, nullptr, waitList.second, waitList.first));
591-
memoryMigrated = true;
576+
size, nullptr, waitListView.num,
577+
waitListView.handles));
578+
waitListView.clear();
592579
}));
593580
*ppRetMap = pDst;
594581

595-
if (!memoryMigrated && waitList.second) {
582+
if (waitListView) {
596583
// If memory was not migrated, we need to wait on the events here.
597584
ZE2UR_CALL(zeCommandListAppendWaitOnEvents,
598-
(commandListManager.getZeCommandList(), waitList.second,
599-
waitList.first));
585+
(commandListManager.getZeCommandList(), waitListView.num,
586+
waitListView.handles));
600587
}
601588

602589
if (zeSignalEvent) {
@@ -621,21 +608,20 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueMemUnmap(
621608

622609
auto zeSignalEvent = getSignalEvent(phEvent, UR_COMMAND_MEM_UNMAP);
623610

624-
auto waitList = getWaitListView(phEventWaitList, numEventsInWaitList);
611+
auto waitListView = getWaitListView(phEventWaitList, numEventsInWaitList);
625612

626613
// TODO: currently unmapHostPtr deallocates memory immediately,
627614
// since the memory might be used by the user, we need to make sure
628615
// all dependencies are completed.
629-
ZE2UR_CALL(
630-
zeCommandListAppendWaitOnEvents,
631-
(commandListManager.getZeCommandList(), waitList.second, waitList.first));
616+
ZE2UR_CALL(zeCommandListAppendWaitOnEvents,
617+
(commandListManager.getZeCommandList(), waitListView.num,
618+
waitListView.handles));
619+
waitListView.clear();
632620

633-
bool memoryMigrated = false;
634621
hMem->unmapHostPtr(pMappedPtr, [&](void *src, void *dst, size_t size) {
635622
ZE2UR_CALL_THROWS(zeCommandListAppendMemoryCopy,
636623
(commandListManager.getZeCommandList(), dst, src, size,
637-
nullptr, waitList.second, waitList.first));
638-
memoryMigrated = true;
624+
nullptr, waitListView.num, waitListView.handles));
639625
});
640626
if (zeSignalEvent) {
641627
ZE2UR_CALL(zeCommandListAppendSignalEvent,
@@ -652,33 +638,26 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueGenericFillUnlocked(
652638

653639
auto zeSignalEvent = getSignalEvent(phEvent, commandType);
654640

655-
auto waitList = getWaitListView(phEventWaitList, numEventsInWaitList);
641+
auto waitListView = getWaitListView(phEventWaitList, numEventsInWaitList);
656642

657-
bool memoryMigrated = false;
658643
auto pDst = ur_cast<char *>(dst->getDevicePtr(
659644
hDevice, ur_mem_handle_t_::device_access_mode_t::read_only, offset, size,
660645
[&](void *src, void *dst, size_t size) {
661646
ZE2UR_CALL_THROWS(zeCommandListAppendMemoryCopy,
662647
(commandListManager.getZeCommandList(), dst, src,
663-
size, nullptr, waitList.second, waitList.first));
664-
memoryMigrated = true;
648+
size, nullptr, waitListView.num,
649+
waitListView.handles));
650+
waitListView.clear();
665651
}));
666652

667-
if (memoryMigrated) {
668-
// If memory was migrated, we don't need to pass the wait list to
669-
// the copy command again.
670-
waitList.first = nullptr;
671-
waitList.second = 0;
672-
}
673-
674653
// TODO: support non-power-of-two pattern sizes
675654

676655
// PatternSize must be a power of two for zeCommandListAppendMemoryFill.
677656
// When it's not, the fill is emulated with zeCommandListAppendMemoryCopy.
678657
ZE2UR_CALL(zeCommandListAppendMemoryFill,
679658
(commandListManager.getZeCommandList(), pDst, pPattern,
680-
patternSize, size, zeSignalEvent, waitList.second,
681-
waitList.first));
659+
patternSize, size, zeSignalEvent, waitListView.num,
660+
waitListView.handles));
682661

683662
return UR_RESULT_SUCCESS;
684663
}
@@ -988,33 +967,25 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueCooperativeKernelLaunchExp(
988967

989968
auto zeSignalEvent = getSignalEvent(phEvent, UR_COMMAND_KERNEL_LAUNCH);
990969

991-
auto waitList = getWaitListView(phEventWaitList, numEventsInWaitList);
970+
auto waitListView = getWaitListView(phEventWaitList, numEventsInWaitList);
992971

993-
bool memoryMigrated = false;
994972
auto memoryMigrate = [&](void *src, void *dst, size_t size) {
995973
ZE2UR_CALL_THROWS(zeCommandListAppendMemoryCopy,
996974
(commandListManager.getZeCommandList(), dst, src, size,
997-
nullptr, waitList.second, waitList.first));
998-
memoryMigrated = true;
975+
nullptr, waitListView.num, waitListView.handles));
976+
waitListView.clear();
999977
};
1000978

1001979
UR_CALL(hKernel->prepareForSubmission(hContext, hDevice, pGlobalWorkOffset,
1002980
workDim, WG[0], WG[1], WG[2],
1003981
memoryMigrate));
1004982

1005-
if (memoryMigrated) {
1006-
// If memory was migrated, we don't need to pass the wait list to
1007-
// the copy command again.
1008-
waitList.first = nullptr;
1009-
waitList.second = 0;
1010-
}
1011-
1012983
TRACK_SCOPE_LATENCY("ur_queue_immediate_in_order_t::"
1013984
"zeCommandListAppendLaunchCooperativeKernel");
1014985
ZE2UR_CALL(zeCommandListAppendLaunchCooperativeKernel,
1015986
(commandListManager.getZeCommandList(), hZeKernel,
1016-
&zeThreadGroupDimensions, zeSignalEvent, waitList.second,
1017-
waitList.first));
987+
&zeThreadGroupDimensions, zeSignalEvent, waitListView.num,
988+
waitListView.handles));
1018989

1019990
recordSubmittedKernel(hKernel);
1020991

source/adapters/level_zero/v2/queue_immediate_in_order.hpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,8 @@ struct ur_queue_immediate_in_order_t : _ur_object, public ur_queue_t_ {
3535
std::vector<ur_event_handle_t> deferredEvents;
3636
std::vector<ur_kernel_handle_t> submittedKernels;
3737

38-
std::pair<ze_event_handle_t *, uint32_t>
39-
getWaitListView(const ur_event_handle_t *phWaitEvents,
40-
uint32_t numWaitEvents);
38+
wait_list_view getWaitListView(const ur_event_handle_t *phWaitEvents,
39+
uint32_t numWaitEvents);
4140

4241
ze_event_handle_t getSignalEvent(ur_event_handle_t *hUserEvent,
4342
ur_command_t commandType);

0 commit comments

Comments
 (0)