Skip to content

Commit f89b759

Browse files
committed
Add device global in device autodiscovery definition
---------------------------------------------------------- The autodiscovery specifies how many device global there are in the device Specify for each device global how many attribute it has (currently 3) The 3 attribute are: device global name, address, size The device global name are used in runtime to get address of device global when given the name in clEnqueueReadGlobalVariableINTEL or clEnqueueWriteGlobalVariableINTEL
1 parent bcb414e commit f89b759

File tree

5 files changed

+137
-75
lines changed

5 files changed

+137
-75
lines changed

include/acl.h

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include <array>
88
#include <assert.h>
99
#include <string>
10+
#include <unordered_map>
1011
#include <vector>
1112

1213
#include <CL/cl_ext.h>
@@ -230,8 +231,6 @@ typedef struct {
230231
fast_launch_depth; /* How many kernels can be buffered on the device, 0
231232
means no buffering just one can execute*/
232233
unsigned int is_sycl_compile; /* [1] SYCL compile; [0] OpenCL compile*/
233-
unsigned int device_global_address; /* Address of kernel's device global*/
234-
unsigned int device_global_size; /* Size of address space of device global used by this kernel*/
235234
} acl_accel_def_t;
236235

237236
/* An ACL system definition.
@@ -480,6 +479,12 @@ typedef class acl_device_program_info_t *acl_device_program_info;
480479
*/
481480
#define ACL_MEM_CAPABILITY_P2P (1 << 3)
482481

482+
typedef struct acl_device_global_mem_def_t {
483+
std::string name;
484+
unsigned int address;
485+
unsigned int size;
486+
} acl_device_global_mem_def_t;
487+
483488
// Part of acl_device_def_t where members are populated from the information
484489
// in the autodiscovery string. This will get updated every time the device
485490
// is programmed with a new device binary as the new binary would contain a
@@ -498,6 +503,11 @@ typedef struct acl_device_def_autodiscovery_t {
498503
std::array<acl_system_global_mem_def_t, ACL_MAX_GLOBAL_MEM> global_mem_defs;
499504

500505
std::vector<acl_hostpipe_info_t> acl_hostpipe_info;
506+
507+
// device global definition
508+
unsigned int num_device_global;
509+
std::unordered_map<std::string, acl_device_global_mem_def_t>
510+
device_global_mem_defs;
501511
} acl_device_def_autodiscovery_t;
502512

503513
typedef struct acl_device_def_t {

src/acl_auto_configure.cpp

Lines changed: 56 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ static bool read_uint_counters(const std::string &str,
9696
UNREFERENCED_PARAMETER(e);
9797
return false;
9898
}
99+
99100
return true;
100101
}
101102

@@ -493,6 +494,61 @@ bool acl_load_device_def_from_str(const std::string &config_str,
493494
counters);
494495
}
495496

497+
// Read device global information
498+
unsigned int num_device_global = 0;
499+
if (result && counters.back() > 0) {
500+
result =
501+
read_uint_counters(config_str, curr_pos, num_device_global, counters);
502+
devdef.num_device_global = num_device_global;
503+
504+
for (auto i = 0U; result && (i < num_device_global);
505+
i++) { // device_global_memories
506+
// read total number of fields in global_memories
507+
int total_fields_device_global = 0;
508+
if (counters.back() > 0) {
509+
result = read_int_counters(config_str, curr_pos,
510+
total_fields_device_global, counters);
511+
}
512+
513+
counters.emplace_back(total_fields_device_global);
514+
515+
// read device global name
516+
std::string device_global_name;
517+
if (result && counters.back() > 0) {
518+
result = read_string_counters(config_str, curr_pos, device_global_name,
519+
counters);
520+
}
521+
522+
// read device global address
523+
unsigned int dev_global_addr = 0; // Default
524+
if (result && counters.back() > 0) {
525+
result =
526+
read_uint_counters(config_str, curr_pos, dev_global_addr, counters);
527+
}
528+
// read device global address size
529+
unsigned int dev_global_size = 0; // Default
530+
if (result && counters.back() > 0) {
531+
result =
532+
read_uint_counters(config_str, curr_pos, dev_global_size, counters);
533+
}
534+
535+
acl_device_global_mem_def_t dev_global_def = {
536+
device_global_name, dev_global_addr, dev_global_size};
537+
devdef.device_global_mem_defs[device_global_name] = dev_global_def;
538+
539+
// forward compatibility: bypassing remaining fields at the end of global
540+
// memory
541+
while (result && counters.size() > 0 &&
542+
counters.back() > 0) { // total_fields_device_global>0
543+
std::string tmp;
544+
result =
545+
result && read_string_counters(config_str, curr_pos, tmp, counters);
546+
check_section_counters(counters);
547+
}
548+
counters.pop_back(); // removing total_fields_device_global
549+
} // device_global_memories
550+
}
551+
496552
// forward compatibility: bypassing remaining fields at the end of device
497553
// description section
498554
while (result && counters.size() > 0 &&
@@ -873,30 +929,6 @@ bool acl_load_device_def_from_str(const std::string &config_str,
873929
devdef.accel[i].is_sycl_compile, counters);
874930
}
875931

876-
devdef.accel[i].device_global_address =
877-
0; // Initializing for backward compatability
878-
std::cerr << result << std::endl;
879-
std::cerr << (counters.back() > 0) << std::endl;
880-
if (result && counters.back() > 0) {
881-
std::cerr << "read dev global address" << std::endl;
882-
result = read_uint_counters(config_str, curr_pos,
883-
devdef.accel[i].device_global_address, counters);
884-
}else {
885-
std::cerr << "read dev global address fail" << std::endl;
886-
}
887-
888-
889-
devdef.accel[i].device_global_size =
890-
0; // Initializing for backward compatability
891-
if (result && counters.back() > 0) {
892-
std::cerr << "read dev global size" << std::endl;
893-
result = read_uint_counters(config_str, curr_pos,
894-
devdef.accel[i].device_global_size, counters);
895-
}else {
896-
std::cerr << "read dev global size fail" << std::endl;
897-
898-
}
899-
900932
// forward compatibility: bypassing remaining fields at the end of kernel
901933
// description section
902934
while (result && counters.size() > 0 &&

src/acl_mem.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -421,10 +421,12 @@ CL_API_ENTRY cl_int clEnqueueReadGlobalVariableINTEL(
421421
}
422422

423423
// dev_addr_t dev_global_address =
424-
uintptr_t dev_global_address = kernel->accel_def->device_global_address;
425-
assert(kernel->accel_def->device_global_address == 4096); // TODO: remove when merging
426-
// uintptr_t dev_global_address = 0x4000000;
427-
// TODO: add checks for whether the copy will be out of bound for device global
424+
// uintptr_t dev_global_address =
425+
// kernel->dev_bin->get_devdef().autodiscovery_def.device_global_mem_defs[name];
426+
// uintptr_t dev_global_address = kernel->accel_def->device_global_address;
427+
uintptr_t dev_global_address = 0x4000000;
428+
// TODO: add checks for whether the copy will be out of bound for device
429+
// global
428430
void *dev_global_ptr =
429431
(void *)(dev_global_address + offset * 8); // 1 unit of offset is 8 bits
430432
status = set_kernel_arg_mem_pointer_without_checks(kernel, 0, dev_global_ptr);

test/acl_auto_configure_test.cpp

Lines changed: 45 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ TEST(auto_configure, simple) {
3636
#define VERSIONIDSTRINGIFY(x) #x
3737
#define VERSIONIDTOSTR(x) VERSIONIDSTRINGIFY(x)
3838
#define DEVICE_FIELDS " 23"
39+
#define DEVICE_FIELDS_DEV_GLOBAL " 32"
3940
#define DEVICE_FIELDS_OLD " 18"
4041
#define BOARDNAME "de4_gen2x4_swdimm"
4142
#define BOARDNAME2 "pcie385_a7"
@@ -96,31 +97,32 @@ TEST(auto_configure, simple) {
9697
#define IS_SYCL_COMPILE " 1"
9798
#define IS_NOT_SYCL_COMPILE " 0"
9899

99-
// device global information
100-
#define KERNEL_DEVICE_GLOBAL_ADDRESS " 4096"
101-
#define KERNEL_DEVICE_GLOBAL_SIZE " 2048"
100+
// device global
101+
#define NUM_DEV_GLOBAL " 2"
102+
#define NUM_DEV_GLOBAL_FIELD " 3" // containing dev_globa_name, address, size
103+
#define DEV_GLOBAL_1 \
104+
" kernel15_dev_global 4096 2048" // in format of dev_globa_name, address, size
105+
#define DEV_GLOBAL_2 " kernel15_dev_global2 2048 1024"
102106

103107
int parsed;
104108
std::string err_str;
105109
std::string autodiscovery = std::string(
106-
VERSIONIDTOSTR(ACL_AUTO_CONFIGURE_VERSIONID)
107-
DEVICE_FIELDS RANDOM_HASH
108-
" " BOARDNAME IS_NOT_BIG_ENDIAN MEM HOSTPIPE KERNEL_ARG_INFO_NONE
109-
" 1 84 foo" KERNEL_CRA KERNEL_FAST_LAUNCH_DEPTH KERNEL_PERF_MON // 84 = number of kernel field
110-
KERNEL_WORKGROUP_VARIANT KERNEL_WORKITEM_VARIANT
111-
KERNEL_NUM_VECTOR_LANES1 KERNEL_PROFILE_SCANCHAIN_LENGTH
112-
ARGS_LOCAL_GLOBAL_LONG_PROF KERNEL_PRINTF_FORMATSTRINGS
113-
LD_1024 KERNEL_REQD_WORK_GROUP_SIZE_NONE
114-
KERNEL_MAX_WORK_GROUP_SIZE_NONE
115-
KERNEL_MAX_GLOBAL_WORK_DIM_NONE
116-
KERNEL_USES_GLOBAL_WORK_OFFSET_ENABLED
117-
IS_SYCL_COMPILE KERNEL_DEVICE_GLOBAL_ADDRESS KERNEL_DEVICE_GLOBAL_SIZE);
118-
std::cerr << autodiscovery << std::endl;
119-
ACL_LOCKED(
120-
parsed = acl_load_device_def_from_str(
121-
autodiscovery,
122-
m_device_def.autodiscovery_def, err_str));
123-
std::cerr << err_str << std::endl;
110+
VERSIONIDTOSTR(ACL_AUTO_CONFIGURE_VERSIONID)
111+
DEVICE_FIELDS_DEV_GLOBAL RANDOM_HASH
112+
" " BOARDNAME IS_NOT_BIG_ENDIAN MEM HOSTPIPE KERNEL_ARG_INFO_NONE
113+
NUM_DEV_GLOBAL NUM_DEV_GLOBAL_FIELD DEV_GLOBAL_1 NUM_DEV_GLOBAL_FIELD
114+
DEV_GLOBAL_2
115+
" 1 82 foo" KERNEL_CRA KERNEL_FAST_LAUNCH_DEPTH KERNEL_PERF_MON
116+
KERNEL_WORKGROUP_VARIANT KERNEL_WORKITEM_VARIANT
117+
KERNEL_NUM_VECTOR_LANES1 KERNEL_PROFILE_SCANCHAIN_LENGTH
118+
ARGS_LOCAL_GLOBAL_LONG_PROF KERNEL_PRINTF_FORMATSTRINGS
119+
LD_1024 KERNEL_REQD_WORK_GROUP_SIZE_NONE
120+
KERNEL_MAX_WORK_GROUP_SIZE_NONE
121+
KERNEL_MAX_GLOBAL_WORK_DIM_NONE
122+
KERNEL_USES_GLOBAL_WORK_OFFSET_ENABLED
123+
IS_SYCL_COMPILE);
124+
ACL_LOCKED(parsed = acl_load_device_def_from_str(
125+
autodiscovery, m_device_def.autodiscovery_def, err_str));
124126
CHECK_EQUAL(1, parsed);
125127

126128
CHECK_EQUAL(1, m_device_def.autodiscovery_def.num_global_mem_systems);
@@ -267,9 +269,26 @@ TEST(auto_configure, simple) {
267269
CHECK_EQUAL(0,
268270
(int)m_device_def.autodiscovery_def.accel[0].max_work_group_size);
269271
CHECK_EQUAL(1, (int)m_device_def.autodiscovery_def.accel[0].is_sycl_compile);
270-
CHECK_EQUAL(4096, (int)m_device_def.autodiscovery_def.accel[0].device_global_address);
271-
CHECK_EQUAL(2048, (int)m_device_def.autodiscovery_def.accel[0].device_global_size);
272272

273+
CHECK_EQUAL(2, (int)m_device_def.autodiscovery_def.num_device_global);
274+
CHECK(m_device_def.autodiscovery_def.device_global_mem_defs.find(
275+
"kernel15_dev_global") !=
276+
m_device_def.autodiscovery_def.device_global_mem_defs.end());
277+
CHECK(m_device_def.autodiscovery_def.device_global_mem_defs.find(
278+
"kernel15_dev_global2") !=
279+
m_device_def.autodiscovery_def.device_global_mem_defs.end());
280+
CHECK_EQUAL(4096, m_device_def.autodiscovery_def
281+
.device_global_mem_defs["kernel15_dev_global"]
282+
.address);
283+
CHECK_EQUAL(2048, m_device_def.autodiscovery_def
284+
.device_global_mem_defs["kernel15_dev_global"]
285+
.size);
286+
CHECK_EQUAL(2048, m_device_def.autodiscovery_def
287+
.device_global_mem_defs["kernel15_dev_global2"]
288+
.address);
289+
CHECK_EQUAL(1024, m_device_def.autodiscovery_def
290+
.device_global_mem_defs["kernel15_dev_global2"]
291+
.size);
273292

274293
// Check a second parsing.
275294
// It should allocate a new string for the name.
@@ -470,8 +489,8 @@ TEST(auto_configure, many_ok_forward_compatibility) {
470489
ACL_AUTO_CONFIGURE_VERSIONID) " 28 "
471490
"sample40byterandomhash000000000000000000 "
472491
"a10gx 0 1 15 DDR 2 1 6 0 2147483648 100 "
473-
"100 100 100 200 200 200 200 0 0 0 0 400 "
474-
"400 400 400 400 47 "
492+
"100 100 100 200 200 200 200 0 0 0 0 2 "
493+
"1 1 1 400 47 "
475494
"40 external_sort_stage_0 0 128 1 0 0 1 0 "
476495
"1 0 1 10 0 0 4 1 0 0 500 500 500 500 0 0 "
477496
"0 0 1 1 1 3 1 1 1 3 1 800 800 800 800 800 "
@@ -1185,7 +1204,7 @@ TEST(auto_configure, hostpipe) {
11851204
"200 "
11861205
"2 9 host_to_dev 1 0 32 32768 300 300 300 "
11871206
"300 dev_to_host 0 1 32 32768 300 300 300 "
1188-
"300 400 400 400 400 400 0 "
1207+
"300 400 1 3 400 400 0 "
11891208
"1 29 foo 0 128 1 0 0 1 0 1 0 0 0 0 0 0 1 "
11901209
"1 1 3 1 1 1 3 1 800 800 800 800 800 900 "
11911210
"900"

test/acl_globals_test.cpp

Lines changed: 18 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -198,25 +198,24 @@ static std::vector<acl_accel_def_t> acltest_complex_system_device0_accel = {
198198
{},
199199
{32768, 0, 0},
200200
1},
201-
{14, // id
202-
ACL_RANGE_FROM_ARRAY(acltest_devicelocal[11]), // mem
203-
acltest_kernels[14], // iface
204-
acltest_laspace_info, // local_aspaces
205-
{0, 0, 0}, // compile_work_group_size
206-
0, // is_workgroup_invariant
207-
0, // is_workitem_invariant
208-
1, // num_vector_lanes
209-
0, // profiling_words_to_readback
210-
32768, // max_work_group_size
211-
3, // max_global_work_dim
212-
{}, // printf_format_info
213-
{32768, 0, 0}, // max_work_group_size_arr
214-
1, // uses_global_work_offset
215-
0, // fast_launch_depth
216-
1, // is_sycl_compile
217-
4096, // device_global_address
218-
2048, // device_global_size
219-
},
201+
{
202+
14, // id
203+
ACL_RANGE_FROM_ARRAY(acltest_devicelocal[11]), // mem
204+
acltest_kernels[14], // iface
205+
acltest_laspace_info, // local_aspaces
206+
{0, 0, 0}, // compile_work_group_size
207+
0, // is_workgroup_invariant
208+
0, // is_workitem_invariant
209+
1, // num_vector_lanes
210+
0, // profiling_words_to_readback
211+
32768, // max_work_group_size
212+
3, // max_global_work_dim
213+
{}, // printf_format_info
214+
{32768, 0, 0}, // max_work_group_size_arr
215+
1, // uses_global_work_offset
216+
0, // fast_launch_depth
217+
1, // is_sycl_compile
218+
},
220219
{1,
221220
ACL_RANGE_FROM_ARRAY(acltest_devicelocal[1]),
222221
acltest_kernels[1],

0 commit comments

Comments
 (0)