Skip to content

l_copy_and_adjust_arguments_for_device: reduce scope of device-side buffer increment #114

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 5, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 13 additions & 10 deletions src/acl_kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2755,10 +2755,8 @@ l_copy_and_adjust_arguments_for_device(cl_kernel kernel, cl_device_id device,
// with a base address of 0

// Need to determine sizeof device's pointers.
cl_uint dev_local_ptr_size = 4; // Always.
cl_uint dev_global_ptr_size = device->address_bits >> 3;

cl_uint buf_incr = 0;
const cl_uint dev_local_ptr_size = 4; // Always.
const cl_uint dev_global_ptr_size = device->address_bits >> 3;

// Bump allocator pointer for each local aspace
// Maps the aspace ID to the next available local memory address.
Expand All @@ -2784,12 +2782,16 @@ l_copy_and_adjust_arguments_for_device(cl_kernel kernel, cl_device_id device,
const acl_kernel_arg_info_t *arg_info =
&(kernel->accel_def->iface.args[iarg]);

// Exclude kernel argument value from device-side buffer by default.
cl_uint buf_incr = 0;

if (arg_info->addr_space == ACL_ARG_ADDR_LOCAL) {
#ifdef MEM_DEBUG_MSG
printf("local");
#endif

unsigned this_aspace = kernel->accel_def->iface.args[iarg].aspace_number;
const unsigned int this_aspace =
kernel->accel_def->iface.args[iarg].aspace_number;

// This arg is a pointer to __local.
cl_ulong local_size = 0;
Expand All @@ -2811,7 +2813,8 @@ l_copy_and_adjust_arguments_for_device(cl_kernel kernel, cl_device_id device,

// On the emulator, the argument size of a pipe (which is a mem object) is
// 0. Since we copy in 0 bytes, we should read out 0 bytes.
size_t copy_sz = (arg_info->size == 0) ? arg_info->size : sizeof(cl_mem);
const size_t copy_sz =
(arg_info->size == 0) ? arg_info->size : sizeof(cl_mem);
safe_memcpy(&mem_obj, &(kernel->arg_value[host_idx]), copy_sz,
sizeof(cl_mem), copy_sz);

Expand All @@ -2831,7 +2834,7 @@ l_copy_and_adjust_arguments_for_device(cl_kernel kernel, cl_device_id device,
value for the argument declared as a pointer to global or constant
memory in the kernel."
*/
cl_ulong null_ptr = 0;
const cl_ulong null_ptr = 0;
safe_memcpy(buf + device_idx, &null_ptr, dev_global_ptr_size,
dev_global_ptr_size, dev_global_ptr_size);
// Shared physical memory:
Expand Down Expand Up @@ -2901,7 +2904,7 @@ l_copy_and_adjust_arguments_for_device(cl_kernel kernel, cl_device_id device,

// copy the address of the reserved allocation into the invocation
// image:
void *mem_addr =
const void *mem_addr =
mem_obj->reserved_allocations[needed_physical_id][needed_mem_id]
->range.begin;
safe_memcpy(buf + device_idx, &mem_addr, dev_global_ptr_size,
Expand All @@ -2910,7 +2913,7 @@ l_copy_and_adjust_arguments_for_device(cl_kernel kernel, cl_device_id device,
if (memory_migration->num_mem_objects == 0) {
// First time allocation, 128 was chosen because previously, number of
// kernel arguments were set to an hardcoded limit of 128
const unsigned initial_alloc = 128;
const unsigned int initial_alloc = 128;

memory_migration->src_mem_list =
(acl_mem_migrate_wrapper_t *)acl_malloc(
Expand All @@ -2922,7 +2925,7 @@ l_copy_and_adjust_arguments_for_device(cl_kernel kernel, cl_device_id device,
memory_migration->num_alloc = initial_alloc;
} else if (memory_migration->num_mem_objects >=
memory_migration->num_alloc) {
const unsigned next_alloc = memory_migration->num_alloc * 2;
const unsigned int next_alloc = memory_migration->num_alloc * 2;
// check for overflow, num_alloc is a 32-bit unsigned integer and
// unsigned integer overflow is defined behaviour
if (next_alloc < memory_migration->num_alloc)
Expand Down