Skip to content

Commit 2b2bc73

Browse files
committed
[L0 v2] extend USMFill implementation to support sizes which are not powers of 2
1 parent 624a2a2 commit 2b2bc73

File tree

2 files changed

+22
-6
lines changed

2 files changed

+22
-6
lines changed

source/adapters/level_zero/v2/queue_immediate_in_order.cpp

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -650,14 +650,28 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueGenericFillUnlocked(
650650
waitListView.clear();
651651
}));
652652

653-
// TODO: support non-power-of-two pattern sizes
654-
655653
// PatternSize must be a power of two for zeCommandListAppendMemoryFill.
656654
// When it's not, the fill is emulated with zeCommandListAppendMemoryCopy.
657-
ZE2UR_CALL(zeCommandListAppendMemoryFill,
658-
(commandListManager.getZeCommandList(), pDst, pPattern,
659-
patternSize, size, zeSignalEvent, waitListView.num,
660-
waitListView.handles));
655+
if (isPowerOf2(patternSize)) {
656+
ZE2UR_CALL(zeCommandListAppendMemoryFill,
657+
(commandListManager.getZeCommandList(), pDst, pPattern,
658+
patternSize, size, zeSignalEvent, waitListView.num,
659+
waitListView.handles));
660+
} else {
661+
// Copy pattern into every entry in memory array pointed by Ptr.
662+
uint32_t numOfCopySteps = size / patternSize;
663+
const void *src = pPattern;
664+
665+
for (uint32_t step = 0; step < numOfCopySteps; ++step) {
666+
void *dst = reinterpret_cast<void *>(reinterpret_cast<uint8_t *>(pDst) +
667+
step * patternSize);
668+
ZE2UR_CALL(zeCommandListAppendMemoryCopy,
669+
(commandListManager.getZeCommandList(), dst, src, patternSize,
670+
step == numOfCopySteps - 1 ? zeSignalEvent : nullptr,
671+
waitListView.num, waitListView.handles));
672+
waitListView.clear();
673+
}
674+
}
661675

662676
return UR_RESULT_SUCCESS;
663677
}

test/conformance/enqueue/urEnqueueUSMFill.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,8 @@ static std::vector<testParametersFill> test_cases{
8686
{256, 256},
8787
/* pattern_size < size */
8888
{1024, 256},
89+
/* sizes which are not powers of 2 */
90+
{1000, 10},
8991
/* pattern sizes corresponding to some common scalar and vector types */
9092
{256, 4},
9193
{256, 8},

0 commit comments

Comments
 (0)