Skip to content

Support streaming kernel arguments and streaming kernel control #103

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Jun 23, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions include/acl.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,11 @@ typedef enum {
} acl_kernel_arg_access_qualifier_t; // this is defaulted to none, for non-pipe
// and non-image args.

struct acl_streaming_kernel_arg_info {
// name of the streaming interface at device image boundary
std::string interface_name;
};

// This defines everything "interface" of a kernel argument.
// Be sure to keep this consistent with l_kernel_interface_match() in
// acl_kernel.cpp. This struct must remain trivially copyable.
Expand Down Expand Up @@ -170,6 +175,9 @@ typedef struct {
// allowed, e.g., "struct mystruct"
std::string type_name;
std::string name;

bool streaming_arg_info_available;
acl_streaming_kernel_arg_info streaming_arg_info;
} acl_kernel_arg_info_t;

// This struct must remain trivially copyable.
Expand Down Expand Up @@ -231,6 +239,8 @@ typedef struct {
fast_launch_depth; /* How many kernels can be buffered on the device, 0
means no buffering just one can execute*/
unsigned int is_sycl_compile; /* [1] SYCL compile; [0] OpenCL compile*/

bool streaming_control_info_available;
} acl_accel_def_t;

/* An ACL system definition.
Expand Down
6 changes: 6 additions & 0 deletions include/acl_hal.h
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,12 @@ typedef struct {
/// Allocate USM shared memory
void *(*shared_alloc)(cl_device_id device, size_t size, size_t alignment,
mem_properties_t *properties, int *error);

void (*simulation_streaming_kernel_start)(unsigned int physical_device_id,
const std::string &kernel_name);
void (*simulation_streaming_kernel_done)(unsigned int physical_device_id,
const std::string &kernel_name,
unsigned int &finish_counter);
} acl_hal_t;

/// Linked list of MMD library names to load.
Expand Down
29 changes: 29 additions & 0 deletions include/acl_hal_mmd.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,17 @@ typedef struct {
unsigned long long size; /* size of this memory */
} aocl_mmd_memory_info_t;

// Interface to simulator to describe streaming kernel arguments that are
// excluded from the invocation image. Streaming arguments are passed to the
// simulator by calling aocl_mmd_simulation_streaming_kernel_args(), before
// writing the kernel invocation image containing non-streaming arguments.
struct aocl_mmd_streaming_kernel_arg_info_t {
// unique identifier for the bus-functional model (BFM)
std::string name;
// argument value
std::vector<char> value;
};

// MMD Version checking
// Since MMD version changes only with major releases it is safe to assume
// this is a float with at most one decimal
Expand Down Expand Up @@ -120,6 +131,24 @@ typedef struct {
int *error);

double mmd_version;

// Passes streaming kernel argument names and values to simulator.
void (*aocl_mmd_simulation_streaming_kernel_args)(
int handle,
const std::vector<aocl_mmd_streaming_kernel_arg_info_t> &streaming_args);

// Submits streaming kernel control start signal to simulator.
void (*aocl_mmd_simulation_streaming_kernel_start)(
int handle, const std::string &kernel_name);

// Queries streaming kernel control done signal from simulator.
// Returns non-negative number of finished kernels invocations.
//
// It is the responsibility of the simulator to ensure that any kernel
// invocations that finish *while* this function is invoked are properly
// accounted and returned in a subsequent invocation of this function.
void (*aocl_mmd_simulation_streaming_kernel_done)(
int handle, const std::string &kernel_name, unsigned int &finish_counter);
} acl_mmd_dispatch_t;

typedef struct {
Expand Down
6 changes: 6 additions & 0 deletions include/acl_kernel_if.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@
#include "acl_hal.h"
#include "acl_types.h"

#include <optional>
#include <string>
#include <vector>

#ifdef __cplusplus
extern "C" {
#endif
Expand All @@ -35,6 +39,8 @@ typedef struct {
acl_kernel_if_addr_range *accel_perf_mon;
unsigned int *accel_num_printfs;

std::vector<std::optional<std::string>> streaming_control_kernel_names;

// Track potential hangs
time_ns last_kern_update;

Expand Down
2 changes: 2 additions & 0 deletions include/acl_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "acl.h"
#include "acl_device_binary.h"
#include "acl_hal.h"
#include "acl_hal_mmd.h"
#include "acl_icd_dispatch.h"

#if defined(__cplusplus)
Expand Down Expand Up @@ -446,6 +447,7 @@ typedef struct acl_kernel_invocation_wrapper_t {
acl_dev_kernel_invocation_image_t *image;
acl_dev_kernel_invocation_image_t image_storage; // What image points to.

std::vector<aocl_mmd_streaming_kernel_arg_info_t> streaming_args;
} acl_kernel_invocation_wrapper_t;

typedef struct {
Expand Down
46 changes: 46 additions & 0 deletions src/acl_auto_configure.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -504,6 +504,24 @@ static bool read_device_global_mem_defs(
return result;
}

static bool read_streaming_kernel_arg_info(
const std::string &config_str, std::string::size_type &curr_pos,
bool &streaming_arg_info_available,
acl_streaming_kernel_arg_info &streaming_arg_info,
std::vector<int> &counters) noexcept {
unsigned int value = 0;
bool result = read_uint_counters(config_str, curr_pos, value, counters);
streaming_arg_info_available = value;

if (result && streaming_arg_info_available) {
streaming_arg_info = acl_streaming_kernel_arg_info{};
result = read_string_counters(config_str, curr_pos,
streaming_arg_info.interface_name, counters);
}

return result;
}

static bool read_kernel_args(const std::string &config_str,
const bool kernel_arg_info_available,
std::string::size_type &curr_pos,
Expand Down Expand Up @@ -597,6 +615,14 @@ static bool read_kernel_args(const std::string &config_str,
type_name = "";
}

bool streaming_arg_info_available = false;
acl_streaming_kernel_arg_info streaming_arg_info;
if (result && counters.back() > 0) {
result = read_streaming_kernel_arg_info(config_str, curr_pos,
streaming_arg_info_available,
streaming_arg_info, counters);
}

/*****************************************************************
Since the introduction of autodiscovery forwards-compatibility,
new entries for each kernel argument section start here.
Expand All @@ -619,6 +645,8 @@ static bool read_kernel_args(const std::string &config_str,
args[j].host_accessible = host_accessible;
args[j].pipe_channel_id = pipe_channel_id;
args[j].buffer_location = buffer_location;
args[j].streaming_arg_info_available = streaming_arg_info_available;
args[j].streaming_arg_info = streaming_arg_info;
}
// forward compatibility: bypassing remaining fields at the end of
// arguments section
Expand All @@ -635,6 +663,18 @@ static bool read_kernel_args(const std::string &config_str,
return result;
}

static bool
read_streaming_kernel_control_info(const std::string &config_str,
std::string::size_type &curr_pos,
bool &streaming_control_info_available,
std::vector<int> &counters) noexcept {
unsigned int value = 0;
bool result = read_uint_counters(config_str, curr_pos, value, counters);
streaming_control_info_available = value;

return result;
}

static bool read_accel_defs(const std::string &config_str,
std::string::size_type &curr_pos,
const bool kernel_arg_info_available,
Expand Down Expand Up @@ -872,6 +912,12 @@ static bool read_accel_defs(const std::string &config_str,
accel[i].is_sycl_compile, counters);
}

if (result && counters.back() > 0) {
result = read_streaming_kernel_control_info(
config_str, curr_pos, accel[i].streaming_control_info_available,
counters);
}

// forward compatibility: bypassing remaining fields at the end of kernel
// description section
while (result && counters.size() > 0 &&
Expand Down
33 changes: 32 additions & 1 deletion src/acl_hal_mmd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,12 @@ int acl_hal_mmd_set_profile_start_count(unsigned int physical_device_id,
int acl_hal_mmd_set_profile_stop_count(unsigned int physical_device_id,
unsigned int accel_id, uint64_t value);

void acl_hal_mmd_simulation_streaming_kernel_start(
unsigned int physical_device_id, const std::string &kernel_name);
void acl_hal_mmd_simulation_streaming_kernel_done(
unsigned int physical_device_id, const std::string &kernel_name,
unsigned int &finish_counter);

static size_t acl_kernel_if_read(acl_bsp_io *io, dev_addr_t src, char *dest,
size_t size);
static size_t acl_kernel_if_write(acl_bsp_io *io, dev_addr_t dest,
Expand Down Expand Up @@ -343,7 +349,9 @@ static acl_hal_t acl_hal_mmd = {
acl_hal_mmd_close_devices, // close_devices
acl_hal_mmd_host_alloc, // host_alloc
acl_hal_mmd_free, // free
acl_hal_mmd_shared_alloc // shared_alloc
acl_hal_mmd_shared_alloc, // shared_alloc
acl_hal_mmd_simulation_streaming_kernel_start, // simulation_streaming_kernel_start
acl_hal_mmd_simulation_streaming_kernel_done, // simulation_streaming_kernel_done
};

// This will contain the device physical id to tell us which device across all
Expand Down Expand Up @@ -1876,6 +1884,14 @@ void acl_hal_mmd_copy_globalmem_to_globalmem(cl_event event, const void *src,
void acl_hal_mmd_launch_kernel(unsigned int physical_device_id,
acl_kernel_invocation_wrapper_t *wrapper) {
acl_assert_locked();

const auto &streaming_args = wrapper->streaming_args;
if (!streaming_args.empty()) {
device_info[physical_device_id]
.mmd_dispatch->aocl_mmd_simulation_streaming_kernel_args(
device_info[physical_device_id].handle, streaming_args);
}

acl_kernel_if_launch_kernel(&kern[physical_device_id], wrapper);
}

Expand Down Expand Up @@ -2816,3 +2832,18 @@ unsigned acl_convert_mmd_capabilities(unsigned mmd_capabilities) {
}
return capability;
}

void acl_hal_mmd_simulation_streaming_kernel_start(
unsigned int physical_device_id, const std::string &kernel_name) {
device_info[physical_device_id]
.mmd_dispatch->aocl_mmd_simulation_streaming_kernel_start(
device_info[physical_device_id].handle, kernel_name);
}

void acl_hal_mmd_simulation_streaming_kernel_done(
unsigned int physical_device_id, const std::string &kernel_name,
unsigned int &finish_counter) {
device_info[physical_device_id]
.mmd_dispatch->aocl_mmd_simulation_streaming_kernel_done(
device_info[physical_device_id].handle, kernel_name, finish_counter);
}
35 changes: 24 additions & 11 deletions src/acl_kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,10 +103,10 @@ static cl_int l_enqueue_kernel_with_type(
const cl_event *event_wait_list, cl_event *event, cl_command_type type);
static void l_get_arg_offset_and_size(cl_kernel kernel, cl_uint arg_index,
size_t *start_idx_ret, size_t *size_ret);
static cl_int
l_copy_and_adjust_arguments_for_device(cl_kernel kernel, cl_device_id device,
char *buf, cl_uint *num_bytes,
acl_mem_migrate_t *memory_migration);
static cl_int l_copy_and_adjust_arguments_for_device(
cl_kernel kernel, cl_device_id device, char *buf, cl_uint *num_bytes,
acl_mem_migrate_t *memory_migration,
std::vector<aocl_mmd_streaming_kernel_arg_info_t> &streaming_args);

static void l_abort_use_of_wrapper(acl_kernel_invocation_wrapper_t *wrapper);

Expand Down Expand Up @@ -2178,7 +2178,7 @@ static cl_int l_enqueue_kernel_with_type(

kernel_arg_bytes = (cl_uint)l_copy_and_adjust_arguments_for_device(
kernel, device, &(invocation->arg_value[0]), &kernel_arg_bytes,
&memory_migration);
&memory_migration, serialization_wrapper->streaming_args);

assert(kernel_arg_bytes <= kernel->arg_value_size);

Expand Down Expand Up @@ -2242,7 +2242,7 @@ static cl_int l_enqueue_kernel_with_type(

status = l_copy_and_adjust_arguments_for_device(
kernel, device, &(invocation->arg_value[0]), &kernel_arg_bytes,
&memory_migration);
&memory_migration, wrapper->streaming_args);

if (status != CL_SUCCESS) {
ERR_RET(status, context, "Argument error");
Expand Down Expand Up @@ -2738,10 +2738,10 @@ int acl_num_non_null_mem_args(cl_kernel kernel) {
//
// Returns number of bytes written to the device-side buffer in num_bytes.
// Returns failure if memory could not be reserved on the device.
static cl_int
l_copy_and_adjust_arguments_for_device(cl_kernel kernel, cl_device_id device,
char *buf, cl_uint *num_bytes,
acl_mem_migrate_t *memory_migration) {
static cl_int l_copy_and_adjust_arguments_for_device(
cl_kernel kernel, cl_device_id device, char *buf, cl_uint *num_bytes,
acl_mem_migrate_t *memory_migration,
std::vector<aocl_mmd_streaming_kernel_arg_info_t> &streaming_args) {
// indices into the host and device arg value buffer arrays.
size_t host_idx = 0;
size_t device_idx = 0;
Expand Down Expand Up @@ -2770,6 +2770,9 @@ l_copy_and_adjust_arguments_for_device(cl_kernel kernel, cl_device_id device,
next_local[aspace.aspace_id] +=
l_round_up_for_alignment(aspace.static_demand);
}

streaming_args.clear();

#ifdef MEM_DEBUG_MSG
printf("kernel args\n");
#endif
Expand All @@ -2785,7 +2788,17 @@ l_copy_and_adjust_arguments_for_device(cl_kernel kernel, cl_device_id device,
// Exclude kernel argument value from device-side buffer by default.
cl_uint buf_incr = 0;

if (arg_info->addr_space == ACL_ARG_ADDR_LOCAL) {
if (arg_info->streaming_arg_info_available) {
#ifdef MEM_DEBUG_MSG
printf("streaming");
#endif
// Copy argument value to a separate buffer since it may be modified with
// clSetKernelArg() after kernel is enqueued but before it is launched.
const char *const arg_value = &kernel->arg_value[host_idx];
streaming_args.emplace_back(aocl_mmd_streaming_kernel_arg_info_t{
arg_info->streaming_arg_info.interface_name,
std::vector<char>(arg_value, arg_value + arg_info->size)});
} else if (arg_info->addr_space == ACL_ARG_ADDR_LOCAL) {
#ifdef MEM_DEBUG_MSG
printf("local");
#endif
Expand Down
Loading