@@ -255,9 +255,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill(
255
255
return UR_RESULT_SUCCESS;
256
256
}
257
257
258
- // OpenCL only supports pattern sizes as large as the largest CL type
259
- // (double16/long16 - 128 bytes), anything larger we need to do on the host
260
- // side and copy it into the target allocation.
258
+ // OpenCL only supports pattern sizes which are powers of 2 and are as large
259
+ // as the largest CL type (double16/long16 - 128 bytes), anything larger or
260
+ // not a power of 2, we need to do on the host side and copy it into the
261
+ // target allocation.
261
262
clHostMemAllocINTEL_fn HostMemAlloc = nullptr ;
262
263
UR_RETURN_ON_FAILURE (cl_ext::getExtFuncFromContext<clHostMemAllocINTEL_fn>(
263
264
CLContext, cl_ext::ExtFuncPtrCache->clHostMemAllocINTELCache ,
@@ -274,14 +275,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill(
274
275
cl_ext::MemBlockingFreeName, &USMFree));
275
276
276
277
cl_int ClErr = CL_SUCCESS;
277
- auto HostBuffer = static_cast < uint64_t *>(
278
- HostMemAlloc (CLContext, nullptr , size, 0 , &ClErr));
278
+ auto HostBuffer =
279
+ static_cast < uint8_t *>( HostMemAlloc (CLContext, nullptr , size, 0 , &ClErr));
279
280
CL_RETURN_ON_FAILURE (ClErr);
280
281
281
- auto NumValues = size / sizeof (uint64_t );
282
- auto NumChunks = patternSize / sizeof (uint64_t );
283
- for (size_t i = 0 ; i < NumValues; i++) {
284
- HostBuffer[i] = static_cast <const uint64_t *>(pPattern)[i % NumChunks];
282
+ auto *End = HostBuffer + size;
283
+ for (auto *Iter = HostBuffer; Iter < End; Iter += patternSize) {
284
+ std::memcpy (Iter, pPattern, patternSize);
285
285
}
286
286
287
287
cl_event CopyEvent = nullptr ;
0 commit comments