Skip to content

Commit b8e15e2

Browse files
Modified host fill emulation to include patterns which are not powers of 2
1 parent 0b302e6 commit b8e15e2

File tree

1 file changed

+9
-8
lines changed

1 file changed

+9
-8
lines changed

source/adapters/opencl/usm.cpp

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -256,9 +256,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill(
256256
return UR_RESULT_SUCCESS;
257257
}
258258

259-
// OpenCL only supports pattern sizes as large as the largest CL type
260-
// (double16/long16 - 128 bytes), anything larger we need to do on the host
261-
// side and copy it into the target allocation.
259+
// OpenCL only supports pattern sizes which are powers of 2 and are as large
260+
// as the largest CL type (double16/long16 - 128 bytes), anything larger or
261+
// not a power of 2, we need to do on the host side and copy it into the
262+
// target allocation.
262263
clHostMemAllocINTEL_fn HostMemAlloc = nullptr;
263264
UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext<clHostMemAllocINTEL_fn>(
264265
CLContext, cl_ext::ExtFuncPtrCache->clHostMemAllocINTELCache,
@@ -275,14 +276,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill(
275276
cl_ext::MemBlockingFreeName, &USMFree));
276277

277278
cl_int ClErr = CL_SUCCESS;
278-
auto HostBuffer = static_cast<uint64_t *>(
279+
auto HostBuffer = static_cast<unsigned char *>(
279280
HostMemAlloc(CLContext, nullptr, size, 0, &ClErr));
280281
CL_RETURN_ON_FAILURE(ClErr);
281282

282-
auto NumValues = size / sizeof(uint64_t);
283-
auto NumChunks = patternSize / sizeof(uint64_t);
284-
for (size_t i = 0; i < NumValues; i++) {
285-
HostBuffer[i] = static_cast<const uint64_t *>(pPattern)[i % NumChunks];
283+
auto NumChunks = size / patternSize;
284+
for (size_t i = 0; i < NumChunks; i++) {
285+
auto Dest = HostBuffer + i * patternSize;
286+
memcpy(Dest, pPattern, patternSize);
286287
}
287288

288289
cl_event CopyEvent = nullptr;

0 commit comments

Comments
 (0)