@@ -650,14 +650,28 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueGenericFillUnlocked(
650
650
waitListView.clear ();
651
651
}));
652
652
653
- // TODO: support non-power-of-two pattern sizes
654
-
655
653
// PatternSize must be a power of two for zeCommandListAppendMemoryFill.
656
654
// When it's not, the fill is emulated with zeCommandListAppendMemoryCopy.
657
- ZE2UR_CALL (zeCommandListAppendMemoryFill,
658
- (commandListManager.getZeCommandList (), pDst, pPattern,
659
- patternSize, size, zeSignalEvent, waitListView.num ,
660
- waitListView.handles ));
655
+ if (isPowerOf2 (patternSize)) {
656
+ ZE2UR_CALL (zeCommandListAppendMemoryFill,
657
+ (commandListManager.getZeCommandList (), pDst, pPattern,
658
+ patternSize, size, zeSignalEvent, waitListView.num ,
659
+ waitListView.handles ));
660
+ } else {
661
+ // Copy pattern into every entry in memory array pointed by Ptr.
662
+ uint32_t numOfCopySteps = size / patternSize;
663
+ const void *src = pPattern;
664
+
665
+ for (uint32_t step = 0 ; step < numOfCopySteps; ++step) {
666
+ void *dst = reinterpret_cast <void *>(reinterpret_cast <uint8_t *>(pDst) +
667
+ step * patternSize);
668
+ ZE2UR_CALL (zeCommandListAppendMemoryCopy,
669
+ (commandListManager.getZeCommandList (), dst, src, patternSize,
670
+ step == numOfCopySteps - 1 ? zeSignalEvent : nullptr ,
671
+ waitListView.num , waitListView.handles ));
672
+ waitListView.clear ();
673
+ }
674
+ }
661
675
662
676
return UR_RESULT_SUCCESS;
663
677
}
0 commit comments