Skip to content

Commit 8336122

Browse files
committed
kernel: add buffer allocations and migrations for streaming arguments
This resolves a failure to pass local-memory sizes and global-memory buffers as streaming arguments to a streaming kernel in simulation. This amends #103 Signed-off-by: Peter Colberg <[email protected]>
1 parent a362613 commit 8336122

File tree

1 file changed

+48
-29
lines changed

1 file changed

+48
-29
lines changed

src/acl_kernel.cpp

Lines changed: 48 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -2727,6 +2727,32 @@ int acl_num_non_null_mem_args(cl_kernel kernel) {
27272727
return result;
27282728
}
27292729

2730+
// Copies argument value of `len` bytes from source buffer `src` to
2731+
// destination buffer `dst` and returns the number of bytes copied;
2732+
// unless `streaming_arg_info_available` is true to indicate a
2733+
// streaming kernel argument, in which case this function appends
2734+
// the argument interface name provided in `streaming_arg_info` and
2735+
// the argument value to `streaming_args` and returns zero.
2736+
static size_t l_copy_argument_to_buffer_or_streaming(
2737+
void *dst, const void *src, size_t len,
2738+
std::vector<aocl_mmd_streaming_kernel_arg_info_t> &streaming_args,
2739+
bool streaming_arg_info_available,
2740+
const acl_streaming_kernel_arg_info &streaming_arg_info) {
2741+
if (streaming_arg_info_available) {
2742+
#ifdef MEM_DEBUG_MSG
2743+
printf(" streaming");
2744+
#endif
2745+
streaming_args.emplace_back(aocl_mmd_streaming_kernel_arg_info_t{
2746+
streaming_arg_info.interface_name,
2747+
std::vector<char>(static_cast<const char *>(src),
2748+
static_cast<const char *>(src) + len)});
2749+
return 0;
2750+
}
2751+
2752+
safe_memcpy(dst, src, len, len, len);
2753+
return len;
2754+
}
2755+
27302756
// Copy kernel arguments to another buffer.
27312757
//
27322758
// Adjust for:
@@ -2785,20 +2811,7 @@ static cl_int l_copy_and_adjust_arguments_for_device(
27852811
const acl_kernel_arg_info_t *arg_info =
27862812
&(kernel->accel_def->iface.args[iarg]);
27872813

2788-
// Exclude kernel argument value from device-side buffer by default.
2789-
cl_uint buf_incr = 0;
2790-
2791-
if (arg_info->streaming_arg_info_available) {
2792-
#ifdef MEM_DEBUG_MSG
2793-
printf("streaming");
2794-
#endif
2795-
// Copy argument value to a separate buffer since it may be modified with
2796-
// clSetKernelArg() after kernel is enqueued but before it is launched.
2797-
const char *const arg_value = &kernel->arg_value[host_idx];
2798-
streaming_args.emplace_back(aocl_mmd_streaming_kernel_arg_info_t{
2799-
arg_info->streaming_arg_info.interface_name,
2800-
std::vector<char>(arg_value, arg_value + arg_info->size)});
2801-
} else if (arg_info->addr_space == ACL_ARG_ADDR_LOCAL) {
2814+
if (arg_info->addr_space == ACL_ARG_ADDR_LOCAL) {
28022815
#ifdef MEM_DEBUG_MSG
28032816
printf("local");
28042817
#endif
@@ -2808,15 +2821,16 @@ static cl_int l_copy_and_adjust_arguments_for_device(
28082821

28092822
// This arg is a pointer to __local.
28102823
cl_ulong local_size = 0;
2811-
safe_memcpy(buf + device_idx, &(next_local[this_aspace]),
2812-
dev_local_ptr_size, dev_local_ptr_size, dev_local_ptr_size);
2824+
device_idx += l_copy_argument_to_buffer_or_streaming(
2825+
buf + device_idx, &(next_local[this_aspace]), dev_local_ptr_size,
2826+
streaming_args, arg_info->streaming_arg_info_available,
2827+
arg_info->streaming_arg_info);
28132828
// Now reserve space for this object.
28142829
// Yes, this is a bump allocator. :-)
28152830
safe_memcpy(&local_size, &(kernel->arg_value[host_idx]), arg_info->size,
28162831
sizeof(cl_ulong), arg_info->size);
28172832
// (Need cast to size_t on 32-bit platforms)
28182833
next_local[this_aspace] += l_round_up_for_alignment((size_t)local_size);
2819-
buf_incr = dev_local_ptr_size;
28202834
} else if (arg_info->category == ACL_ARG_MEM_OBJ &&
28212835
!kernel->arg_is_svm[iarg] && !kernel->arg_is_ptr[iarg]) {
28222836
// Must use memcpy here just in case the argument pointer is not aligned.
@@ -2848,17 +2862,21 @@ static cl_int l_copy_and_adjust_arguments_for_device(
28482862
memory in the kernel."
28492863
*/
28502864
const cl_ulong null_ptr = 0;
2851-
safe_memcpy(buf + device_idx, &null_ptr, dev_global_ptr_size,
2852-
dev_global_ptr_size, dev_global_ptr_size);
2865+
device_idx += l_copy_argument_to_buffer_or_streaming(
2866+
buf + device_idx, &null_ptr, dev_global_ptr_size, streaming_args,
2867+
arg_info->streaming_arg_info_available,
2868+
arg_info->streaming_arg_info);
28532869
// Shared physical memory:
28542870
} else if (mem_obj->host_mem.device_addr != 0L) {
28552871
#ifdef MEM_DEBUG_MSG
28562872
printf("shared physical mem");
28572873
#endif
28582874
// Write the address into the invocation image:
2859-
safe_memcpy(buf + device_idx, &(mem_obj->host_mem.device_addr),
2860-
dev_global_ptr_size, dev_global_ptr_size,
2861-
dev_global_ptr_size);
2875+
device_idx += l_copy_argument_to_buffer_or_streaming(
2876+
buf + device_idx, &(mem_obj->host_mem.device_addr),
2877+
dev_global_ptr_size, streaming_args,
2878+
arg_info->streaming_arg_info_available,
2879+
arg_info->streaming_arg_info);
28622880
// Regular buffer:
28632881
} else {
28642882
#ifdef MEM_DEBUG_MSG
@@ -2920,8 +2938,10 @@ static cl_int l_copy_and_adjust_arguments_for_device(
29202938
const void *mem_addr =
29212939
mem_obj->reserved_allocations[needed_physical_id][needed_mem_id]
29222940
->range.begin;
2923-
safe_memcpy(buf + device_idx, &mem_addr, dev_global_ptr_size,
2924-
dev_global_ptr_size, dev_global_ptr_size);
2941+
device_idx += l_copy_argument_to_buffer_or_streaming(
2942+
buf + device_idx, &mem_addr, dev_global_ptr_size, streaming_args,
2943+
arg_info->streaming_arg_info_available,
2944+
arg_info->streaming_arg_info);
29252945

29262946
if (memory_migration->num_mem_objects == 0) {
29272947
// First time allocation, 128 was chosen because previously, number of
@@ -2965,19 +2985,18 @@ static cl_int l_copy_and_adjust_arguments_for_device(
29652985
needed_mem_id;
29662986
++memory_migration->num_mem_objects;
29672987
}
2968-
buf_incr = dev_global_ptr_size;
29692988
} else {
29702989
#ifdef MEM_DEBUG_MSG
29712990
printf("const");
29722991
#endif
29732992

29742993
// Host and device sizes are the same.
29752994
// E.g. for cl_uint, SVM ptr etc.
2976-
safe_memcpy(buf + device_idx, kernel->arg_value + host_idx,
2977-
arg_info->size, arg_info->size, arg_info->size);
2978-
buf_incr = arg_info->size;
2995+
device_idx += l_copy_argument_to_buffer_or_streaming(
2996+
buf + device_idx, kernel->arg_value + host_idx, arg_info->size,
2997+
streaming_args, arg_info->streaming_arg_info_available,
2998+
arg_info->streaming_arg_info);
29792999
}
2980-
device_idx += buf_incr;
29813000
host_idx += arg_info->size;
29823001
#ifdef MEM_DEBUG_MSG
29833002
printf("\n");

0 commit comments

Comments
 (0)