@@ -2727,6 +2727,32 @@ int acl_num_non_null_mem_args(cl_kernel kernel) {
2727
2727
return result;
2728
2728
}
2729
2729
2730
+ // Copies argument value of `len` bytes from source buffer `src` to
2731
+ // destination buffer `dst` and returns the number of bytes copied;
2732
+ // unless `streaming_arg_info_available` is true to indicate a
2733
+ // streaming kernel argument, in which case this function appends
2734
+ // the argument interface name provided in `streaming_arg_info` and
2735
+ // the argument value to `streaming_args` and returns zero.
2736
+ static size_t l_copy_argument_to_buffer_or_streaming (
2737
+ void *dst, const void *src, size_t len,
2738
+ std::vector<aocl_mmd_streaming_kernel_arg_info_t > &streaming_args,
2739
+ bool streaming_arg_info_available,
2740
+ const acl_streaming_kernel_arg_info &streaming_arg_info) {
2741
+ if (streaming_arg_info_available) {
2742
+ #ifdef MEM_DEBUG_MSG
2743
+ printf (" streaming" );
2744
+ #endif
2745
+ streaming_args.emplace_back (aocl_mmd_streaming_kernel_arg_info_t {
2746
+ streaming_arg_info.interface_name ,
2747
+ std::vector<char >(static_cast <const char *>(src),
2748
+ static_cast <const char *>(src) + len)});
2749
+ return 0 ;
2750
+ }
2751
+
2752
+ safe_memcpy (dst, src, len, len, len);
2753
+ return len;
2754
+ }
2755
+
2730
2756
// Copy kernel arguments to another buffer.
2731
2757
//
2732
2758
// Adjust for:
@@ -2785,20 +2811,7 @@ static cl_int l_copy_and_adjust_arguments_for_device(
2785
2811
const acl_kernel_arg_info_t *arg_info =
2786
2812
&(kernel->accel_def ->iface .args [iarg]);
2787
2813
2788
- // Exclude kernel argument value from device-side buffer by default.
2789
- cl_uint buf_incr = 0 ;
2790
-
2791
- if (arg_info->streaming_arg_info_available ) {
2792
- #ifdef MEM_DEBUG_MSG
2793
- printf (" streaming" );
2794
- #endif
2795
- // Copy argument value to a separate buffer since it may be modified with
2796
- // clSetKernelArg() after kernel is enqueued but before it is launched.
2797
- const char *const arg_value = &kernel->arg_value [host_idx];
2798
- streaming_args.emplace_back (aocl_mmd_streaming_kernel_arg_info_t {
2799
- arg_info->streaming_arg_info .interface_name ,
2800
- std::vector<char >(arg_value, arg_value + arg_info->size )});
2801
- } else if (arg_info->addr_space == ACL_ARG_ADDR_LOCAL) {
2814
+ if (arg_info->addr_space == ACL_ARG_ADDR_LOCAL) {
2802
2815
#ifdef MEM_DEBUG_MSG
2803
2816
printf (" local" );
2804
2817
#endif
@@ -2808,15 +2821,16 @@ static cl_int l_copy_and_adjust_arguments_for_device(
2808
2821
2809
2822
// This arg is a pointer to __local.
2810
2823
cl_ulong local_size = 0 ;
2811
- safe_memcpy (buf + device_idx, &(next_local[this_aspace]),
2812
- dev_local_ptr_size, dev_local_ptr_size, dev_local_ptr_size);
2824
+ device_idx += l_copy_argument_to_buffer_or_streaming (
2825
+ buf + device_idx, &(next_local[this_aspace]), dev_local_ptr_size,
2826
+ streaming_args, arg_info->streaming_arg_info_available ,
2827
+ arg_info->streaming_arg_info );
2813
2828
// Now reserve space for this object.
2814
2829
// Yes, this is a bump allocator. :-)
2815
2830
safe_memcpy (&local_size, &(kernel->arg_value [host_idx]), arg_info->size ,
2816
2831
sizeof (cl_ulong), arg_info->size );
2817
2832
// (Need cast to size_t on 32-bit platforms)
2818
2833
next_local[this_aspace] += l_round_up_for_alignment ((size_t )local_size);
2819
- buf_incr = dev_local_ptr_size;
2820
2834
} else if (arg_info->category == ACL_ARG_MEM_OBJ &&
2821
2835
!kernel->arg_is_svm [iarg] && !kernel->arg_is_ptr [iarg]) {
2822
2836
// Must use memcpy here just in case the argument pointer is not aligned.
@@ -2848,17 +2862,21 @@ static cl_int l_copy_and_adjust_arguments_for_device(
2848
2862
memory in the kernel."
2849
2863
*/
2850
2864
const cl_ulong null_ptr = 0 ;
2851
- safe_memcpy (buf + device_idx, &null_ptr, dev_global_ptr_size,
2852
- dev_global_ptr_size, dev_global_ptr_size);
2865
+ device_idx += l_copy_argument_to_buffer_or_streaming (
2866
+ buf + device_idx, &null_ptr, dev_global_ptr_size, streaming_args,
2867
+ arg_info->streaming_arg_info_available ,
2868
+ arg_info->streaming_arg_info );
2853
2869
// Shared physical memory:
2854
2870
} else if (mem_obj->host_mem .device_addr != 0L ) {
2855
2871
#ifdef MEM_DEBUG_MSG
2856
2872
printf (" shared physical mem" );
2857
2873
#endif
2858
2874
// Write the address into the invocation image:
2859
- safe_memcpy (buf + device_idx, &(mem_obj->host_mem .device_addr ),
2860
- dev_global_ptr_size, dev_global_ptr_size,
2861
- dev_global_ptr_size);
2875
+ device_idx += l_copy_argument_to_buffer_or_streaming (
2876
+ buf + device_idx, &(mem_obj->host_mem .device_addr ),
2877
+ dev_global_ptr_size, streaming_args,
2878
+ arg_info->streaming_arg_info_available ,
2879
+ arg_info->streaming_arg_info );
2862
2880
// Regular buffer:
2863
2881
} else {
2864
2882
#ifdef MEM_DEBUG_MSG
@@ -2920,8 +2938,10 @@ static cl_int l_copy_and_adjust_arguments_for_device(
2920
2938
const void *mem_addr =
2921
2939
mem_obj->reserved_allocations [needed_physical_id][needed_mem_id]
2922
2940
->range .begin ;
2923
- safe_memcpy (buf + device_idx, &mem_addr, dev_global_ptr_size,
2924
- dev_global_ptr_size, dev_global_ptr_size);
2941
+ device_idx += l_copy_argument_to_buffer_or_streaming (
2942
+ buf + device_idx, &mem_addr, dev_global_ptr_size, streaming_args,
2943
+ arg_info->streaming_arg_info_available ,
2944
+ arg_info->streaming_arg_info );
2925
2945
2926
2946
if (memory_migration->num_mem_objects == 0 ) {
2927
2947
// First time allocation, 128 was chosen because previously, number of
@@ -2965,19 +2985,18 @@ static cl_int l_copy_and_adjust_arguments_for_device(
2965
2985
needed_mem_id;
2966
2986
++memory_migration->num_mem_objects ;
2967
2987
}
2968
- buf_incr = dev_global_ptr_size;
2969
2988
} else {
2970
2989
#ifdef MEM_DEBUG_MSG
2971
2990
printf (" const" );
2972
2991
#endif
2973
2992
2974
2993
// Host and device sizes are the same.
2975
2994
// E.g. for cl_uint, SVM ptr etc.
2976
- safe_memcpy (buf + device_idx, kernel->arg_value + host_idx,
2977
- arg_info->size , arg_info->size , arg_info->size );
2978
- buf_incr = arg_info->size ;
2995
+ device_idx += l_copy_argument_to_buffer_or_streaming (
2996
+ buf + device_idx, kernel->arg_value + host_idx, arg_info->size ,
2997
+ streaming_args, arg_info->streaming_arg_info_available ,
2998
+ arg_info->streaming_arg_info );
2979
2999
}
2980
- device_idx += buf_incr;
2981
3000
host_idx += arg_info->size ;
2982
3001
#ifdef MEM_DEBUG_MSG
2983
3002
printf (" \n " );
0 commit comments