Skip to content

Commit 9cece7d

Browse files
committed
SWDEV-434298 - Add the new copy kernel
The new copy kernel can work with a limited number of launched workgroups Change-Id: I7807b37e4feb4ae21f10542837a6b17d563f03b5
1 parent b9556ed commit 9cece7d

File tree

1 file changed

+39
-0
lines changed

1 file changed

+39
-0
lines changed

amd/device-libs/opencl/src/misc/amdblit.cl

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,45 @@ __amd_copyBufferAligned(
389389
}
390390
}
391391

392+
__attribute__((always_inline)) void
393+
__amd_copyBufferExt(
394+
__global uchar* srcI,
395+
__global uchar* dstI,
396+
ulong srcOrigin,
397+
ulong dstOrigin,
398+
ulong size,
399+
uint remainder,
400+
uint aligned_size,
401+
ulong end_ptr,
402+
uint next_chunk) {
403+
ulong id = get_global_id(0);
404+
ulong id_remainder = id;
405+
406+
__global uchar* src = srcI + srcOrigin;
407+
__global uchar* dst = dstI + dstOrigin;
408+
409+
if (aligned_size == sizeof(ulong2)) {
410+
__global ulong2* srcD = (__global ulong2*)(src);
411+
__global ulong2* dstD = (__global ulong2*)(dst);
412+
while ((ulong)(&dstD[id]) < end_ptr) {
413+
dstD[id] = srcD[id];
414+
id += next_chunk;
415+
}
416+
} else {
417+
__global uint* srcD = (__global uint*)(src);
418+
__global uint* dstD = (__global uint*)(dst);
419+
while ((ulong)(&dstD[id]) < end_ptr) {
420+
dstD[id] = srcD[id];
421+
id += next_chunk;
422+
}
423+
}
424+
if ((remainder != 0) && (id_remainder == 0)) {
425+
for (ulong i = size - remainder; i < size; ++i) {
426+
dst[i] = src[i];
427+
}
428+
}
429+
}
430+
392431
__attribute__((always_inline)) void
393432
__amd_fillBuffer(
394433
__global uchar* bufUChar,

0 commit comments

Comments
 (0)