21
21
22
22
namespace v2 {
23
23
24
- std::pair<ze_event_handle_t *, uint32_t >
25
- ur_queue_immediate_in_order_t::getWaitListView (
24
+ wait_list_view ur_queue_immediate_in_order_t::getWaitListView (
26
25
const ur_event_handle_t *phWaitEvents, uint32_t numWaitEvents) {
27
26
return commandListManager.getWaitListView (phWaitEvents, numWaitEvents);
28
27
}
@@ -291,37 +290,31 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueGenericCopyUnlocked(
291
290
ur_command_t commandType) {
292
291
auto zeSignalEvent = getSignalEvent (phEvent, commandType);
293
292
294
- auto waitList = getWaitListView (phEventWaitList, numEventsInWaitList);
293
+ auto waitListView = getWaitListView (phEventWaitList, numEventsInWaitList);
295
294
296
- bool memoryMigrated = false ;
297
295
auto pSrc = ur_cast<char *>(src->getDevicePtr (
298
296
hDevice, ur_mem_handle_t_::device_access_mode_t ::read_only, srcOffset,
299
297
size, [&](void *src, void *dst, size_t size) {
300
298
ZE2UR_CALL_THROWS (zeCommandListAppendMemoryCopy,
301
299
(commandListManager.getZeCommandList (), dst, src,
302
- size, nullptr , waitList.second , waitList.first ));
303
- memoryMigrated = true ;
300
+ size, nullptr , waitListView.num ,
301
+ waitListView.handles ));
302
+ waitListView.clear ();
304
303
}));
305
304
306
305
auto pDst = ur_cast<char *>(dst->getDevicePtr (
307
306
hDevice, ur_mem_handle_t_::device_access_mode_t ::write_only, dstOffset,
308
307
size, [&](void *src, void *dst, size_t size) {
309
308
ZE2UR_CALL_THROWS (zeCommandListAppendMemoryCopy,
310
309
(commandListManager.getZeCommandList (), dst, src,
311
- size, nullptr , waitList.second , waitList.first ));
312
- memoryMigrated = true ;
310
+ size, nullptr , waitListView.num ,
311
+ waitListView.handles ));
312
+ waitListView.clear ();
313
313
}));
314
314
315
- if (memoryMigrated) {
316
- // If memory was migrated, we don't need to pass the wait list to
317
- // the copy command again.
318
- waitList.first = nullptr ;
319
- waitList.second = 0 ;
320
- }
321
-
322
315
ZE2UR_CALL (zeCommandListAppendMemoryCopy,
323
316
(commandListManager.getZeCommandList (), pDst, pSrc, size,
324
- zeSignalEvent, waitList. second , waitList. first ));
317
+ zeSignalEvent, waitListView. num , waitListView. handles ));
325
318
326
319
if (blocking) {
327
320
ZE2UR_CALL (zeCommandListHostSynchronize,
@@ -379,38 +372,32 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueRegionCopyUnlocked(
379
372
380
373
auto zeSignalEvent = getSignalEvent (phEvent, commandType);
381
374
382
- auto waitList = getWaitListView (phEventWaitList, numEventsInWaitList);
375
+ auto waitListView = getWaitListView (phEventWaitList, numEventsInWaitList);
383
376
384
- bool memoryMigrated = false ;
385
377
auto pSrc = ur_cast<char *>(src->getDevicePtr (
386
378
hDevice, ur_mem_handle_t_::device_access_mode_t ::read_only, 0 ,
387
379
src->getSize (), [&](void *src, void *dst, size_t size) {
388
380
ZE2UR_CALL_THROWS (zeCommandListAppendMemoryCopy,
389
381
(commandListManager.getZeCommandList (), dst, src,
390
- size, nullptr , waitList.second , waitList.first ));
391
- memoryMigrated = true ;
382
+ size, nullptr , waitListView.num ,
383
+ waitListView.handles ));
384
+ waitListView.clear ();
392
385
}));
393
386
auto pDst = ur_cast<char *>(dst->getDevicePtr (
394
387
hDevice, ur_mem_handle_t_::device_access_mode_t ::write_only, 0 ,
395
388
dst->getSize (), [&](void *src, void *dst, size_t size) {
396
389
ZE2UR_CALL_THROWS (zeCommandListAppendMemoryCopy,
397
390
(commandListManager.getZeCommandList (), dst, src,
398
- size, nullptr , waitList.second , waitList.first ));
399
- memoryMigrated = true ;
391
+ size, nullptr , waitListView.num ,
392
+ waitListView.handles ));
393
+ waitListView.clear ();
400
394
}));
401
395
402
- if (memoryMigrated) {
403
- // If memory was migrated, we don't need to pass the wait list to
404
- // the copy command again.
405
- waitList.first = nullptr ;
406
- waitList.second = 0 ;
407
- }
408
-
409
396
ZE2UR_CALL (zeCommandListAppendMemoryCopyRegion,
410
397
(commandListManager.getZeCommandList (), pDst, &zeParams.dstRegion ,
411
398
zeParams.dstPitch , zeParams.dstSlicePitch , pSrc,
412
399
&zeParams.srcRegion , zeParams.srcPitch , zeParams.srcSlicePitch ,
413
- zeSignalEvent, waitList. second , waitList. first ));
400
+ zeSignalEvent, waitListView. num , waitListView. handles ));
414
401
415
402
if (blocking) {
416
403
ZE2UR_CALL (zeCommandListHostSynchronize,
@@ -580,23 +567,23 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferMap(
580
567
581
568
auto zeSignalEvent = getSignalEvent (phEvent, UR_COMMAND_MEM_BUFFER_MAP);
582
569
583
- auto waitList = getWaitListView (phEventWaitList, numEventsInWaitList);
570
+ auto waitListView = getWaitListView (phEventWaitList, numEventsInWaitList);
584
571
585
- bool memoryMigrated = false ;
586
572
auto pDst = ur_cast<char *>(hBuffer->mapHostPtr (
587
573
mapFlags, offset, size, [&](void *src, void *dst, size_t size) {
588
574
ZE2UR_CALL_THROWS (zeCommandListAppendMemoryCopy,
589
575
(commandListManager.getZeCommandList (), dst, src,
590
- size, nullptr , waitList.second , waitList.first ));
591
- memoryMigrated = true ;
576
+ size, nullptr , waitListView.num ,
577
+ waitListView.handles ));
578
+ waitListView.clear ();
592
579
}));
593
580
*ppRetMap = pDst;
594
581
595
- if (!memoryMigrated && waitList. second ) {
582
+ if (waitListView ) {
596
583
// If memory was not migrated, we need to wait on the events here.
597
584
ZE2UR_CALL (zeCommandListAppendWaitOnEvents,
598
- (commandListManager.getZeCommandList (), waitList. second ,
599
- waitList. first ));
585
+ (commandListManager.getZeCommandList (), waitListView. num ,
586
+ waitListView. handles ));
600
587
}
601
588
602
589
if (zeSignalEvent) {
@@ -621,21 +608,20 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueMemUnmap(
621
608
622
609
auto zeSignalEvent = getSignalEvent (phEvent, UR_COMMAND_MEM_UNMAP);
623
610
624
- auto waitList = getWaitListView (phEventWaitList, numEventsInWaitList);
611
+ auto waitListView = getWaitListView (phEventWaitList, numEventsInWaitList);
625
612
626
613
// TODO: currently unmapHostPtr deallocates memory immediately,
627
614
// since the memory might be used by the user, we need to make sure
628
615
// all dependencies are completed.
629
- ZE2UR_CALL (
630
- zeCommandListAppendWaitOnEvents,
631
- (commandListManager.getZeCommandList (), waitList.second , waitList.first ));
616
+ ZE2UR_CALL (zeCommandListAppendWaitOnEvents,
617
+ (commandListManager.getZeCommandList (), waitListView.num ,
618
+ waitListView.handles ));
619
+ waitListView.clear ();
632
620
633
- bool memoryMigrated = false ;
634
621
hMem->unmapHostPtr (pMappedPtr, [&](void *src, void *dst, size_t size) {
635
622
ZE2UR_CALL_THROWS (zeCommandListAppendMemoryCopy,
636
623
(commandListManager.getZeCommandList (), dst, src, size,
637
- nullptr , waitList.second , waitList.first ));
638
- memoryMigrated = true ;
624
+ nullptr , waitListView.num , waitListView.handles ));
639
625
});
640
626
if (zeSignalEvent) {
641
627
ZE2UR_CALL (zeCommandListAppendSignalEvent,
@@ -652,33 +638,40 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueGenericFillUnlocked(
652
638
653
639
auto zeSignalEvent = getSignalEvent (phEvent, commandType);
654
640
655
- auto waitList = getWaitListView (phEventWaitList, numEventsInWaitList);
641
+ auto waitListView = getWaitListView (phEventWaitList, numEventsInWaitList);
656
642
657
- bool memoryMigrated = false ;
658
643
auto pDst = ur_cast<char *>(dst->getDevicePtr (
659
644
hDevice, ur_mem_handle_t_::device_access_mode_t ::read_only, offset, size,
660
645
[&](void *src, void *dst, size_t size) {
661
646
ZE2UR_CALL_THROWS (zeCommandListAppendMemoryCopy,
662
647
(commandListManager.getZeCommandList (), dst, src,
663
- size, nullptr , waitList.second , waitList.first ));
664
- memoryMigrated = true ;
648
+ size, nullptr , waitListView.num ,
649
+ waitListView.handles ));
650
+ waitListView.clear ();
665
651
}));
666
652
667
- if (memoryMigrated) {
668
- // If memory was migrated, we don't need to pass the wait list to
669
- // the copy command again.
670
- waitList.first = nullptr ;
671
- waitList.second = 0 ;
672
- }
673
-
674
- // TODO: support non-power-of-two pattern sizes
675
-
676
653
// PatternSize must be a power of two for zeCommandListAppendMemoryFill.
677
654
// When it's not, the fill is emulated with zeCommandListAppendMemoryCopy.
678
- ZE2UR_CALL (zeCommandListAppendMemoryFill,
679
- (commandListManager.getZeCommandList (), pDst, pPattern,
680
- patternSize, size, zeSignalEvent, waitList.second ,
681
- waitList.first ));
655
+ if (isPowerOf2 (patternSize)) {
656
+ ZE2UR_CALL (zeCommandListAppendMemoryFill,
657
+ (commandListManager.getZeCommandList (), pDst, pPattern,
658
+ patternSize, size, zeSignalEvent, waitListView.num ,
659
+ waitListView.handles ));
660
+ } else {
661
+ // Copy pattern into every entry in memory array pointed by Ptr.
662
+ uint32_t numOfCopySteps = size / patternSize;
663
+ const void *src = pPattern;
664
+
665
+ for (uint32_t step = 0 ; step < numOfCopySteps; ++step) {
666
+ void *dst = reinterpret_cast <void *>(reinterpret_cast <uint8_t *>(pDst) +
667
+ step * patternSize);
668
+ ZE2UR_CALL (zeCommandListAppendMemoryCopy,
669
+ (commandListManager.getZeCommandList (), dst, src, patternSize,
670
+ step == numOfCopySteps - 1 ? zeSignalEvent : nullptr ,
671
+ waitListView.num , waitListView.handles ));
672
+ waitListView.clear ();
673
+ }
674
+ }
682
675
683
676
return UR_RESULT_SUCCESS;
684
677
}
@@ -988,33 +981,25 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueCooperativeKernelLaunchExp(
988
981
989
982
auto zeSignalEvent = getSignalEvent (phEvent, UR_COMMAND_KERNEL_LAUNCH);
990
983
991
- auto waitList = getWaitListView (phEventWaitList, numEventsInWaitList);
984
+ auto waitListView = getWaitListView (phEventWaitList, numEventsInWaitList);
992
985
993
- bool memoryMigrated = false ;
994
986
auto memoryMigrate = [&](void *src, void *dst, size_t size) {
995
987
ZE2UR_CALL_THROWS (zeCommandListAppendMemoryCopy,
996
988
(commandListManager.getZeCommandList (), dst, src, size,
997
- nullptr , waitList. second , waitList. first ));
998
- memoryMigrated = true ;
989
+ nullptr , waitListView. num , waitListView. handles ));
990
+ waitListView. clear () ;
999
991
};
1000
992
1001
993
UR_CALL (hKernel->prepareForSubmission (hContext, hDevice, pGlobalWorkOffset,
1002
994
workDim, WG[0 ], WG[1 ], WG[2 ],
1003
995
memoryMigrate));
1004
996
1005
- if (memoryMigrated) {
1006
- // If memory was migrated, we don't need to pass the wait list to
1007
- // the copy command again.
1008
- waitList.first = nullptr ;
1009
- waitList.second = 0 ;
1010
- }
1011
-
1012
997
TRACK_SCOPE_LATENCY (" ur_queue_immediate_in_order_t::"
1013
998
" zeCommandListAppendLaunchCooperativeKernel" );
1014
999
ZE2UR_CALL (zeCommandListAppendLaunchCooperativeKernel,
1015
1000
(commandListManager.getZeCommandList (), hZeKernel,
1016
- &zeThreadGroupDimensions, zeSignalEvent, waitList. second ,
1017
- waitList. first ));
1001
+ &zeThreadGroupDimensions, zeSignalEvent, waitListView. num ,
1002
+ waitListView. handles ));
1018
1003
1019
1004
recordSubmittedKernel (hKernel);
1020
1005
0 commit comments