Skip to content

Commit e16d01c

Browse files
authored
Merge pull request #1603 from konradkusiak97/queueFillOPENCLctsFix
[OpenCL] Modify fill emulation to work for patterns which are not powers of 2
2 parents a3895db + 483a632 commit e16d01c

File tree

1 file changed

+9
-9
lines changed

1 file changed

+9
-9
lines changed

source/adapters/opencl/usm.cpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -255,9 +255,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill(
255255
return UR_RESULT_SUCCESS;
256256
}
257257

258-
// OpenCL only supports pattern sizes as large as the largest CL type
259-
// (double16/long16 - 128 bytes), anything larger we need to do on the host
260-
// side and copy it into the target allocation.
258+
// OpenCL only supports pattern sizes which are powers of 2 and are as large
259+
// as the largest CL type (double16/long16 - 128 bytes), anything larger or
260+
// not a power of 2, we need to do on the host side and copy it into the
261+
// target allocation.
261262
clHostMemAllocINTEL_fn HostMemAlloc = nullptr;
262263
UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext<clHostMemAllocINTEL_fn>(
263264
CLContext, cl_ext::ExtFuncPtrCache->clHostMemAllocINTELCache,
@@ -274,14 +275,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill(
274275
cl_ext::MemBlockingFreeName, &USMFree));
275276

276277
cl_int ClErr = CL_SUCCESS;
277-
auto HostBuffer = static_cast<uint64_t *>(
278-
HostMemAlloc(CLContext, nullptr, size, 0, &ClErr));
278+
auto HostBuffer =
279+
static_cast<uint8_t *>(HostMemAlloc(CLContext, nullptr, size, 0, &ClErr));
279280
CL_RETURN_ON_FAILURE(ClErr);
280281

281-
auto NumValues = size / sizeof(uint64_t);
282-
auto NumChunks = patternSize / sizeof(uint64_t);
283-
for (size_t i = 0; i < NumValues; i++) {
284-
HostBuffer[i] = static_cast<const uint64_t *>(pPattern)[i % NumChunks];
282+
auto *End = HostBuffer + size;
283+
for (auto *Iter = HostBuffer; Iter < End; Iter += patternSize) {
284+
std::memcpy(Iter, pPattern, patternSize);
285285
}
286286

287287
cl_event CopyEvent = nullptr;

0 commit comments

Comments
 (0)