Skip to content

Commit bcb414e

Browse files
committed
Per kernel autodiscovery change
----------------------------------------- Currently each kernel will receive the info about device global address and size This is potentially not desired, a better design would be to have device global at autodiscovery device level instead, and kernel query for such information during runtime.
1 parent e45add1 commit bcb414e

File tree

5 files changed

+64
-21
lines changed

5 files changed

+64
-21
lines changed

include/acl.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,8 @@ typedef struct {
230230
fast_launch_depth; /* How many kernels can be buffered on the device, 0
231231
means no buffering just one can execute*/
232232
unsigned int is_sycl_compile; /* [1] SYCL compile; [0] OpenCL compile*/
233+
unsigned int device_global_address; /* Address of kernel's device global*/
234+
unsigned int device_global_size; /* Size of address space of device global used by this kernel*/
233235
} acl_accel_def_t;
234236

235237
/* An ACL system definition.

src/acl_auto_configure.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -873,6 +873,30 @@ bool acl_load_device_def_from_str(const std::string &config_str,
873873
devdef.accel[i].is_sycl_compile, counters);
874874
}
875875

876+
devdef.accel[i].device_global_address =
877+
0; // Initializing for backward compatability
878+
std::cerr << result << std::endl;
879+
std::cerr << (counters.back() > 0) << std::endl;
880+
if (result && counters.back() > 0) {
881+
std::cerr << "read dev global address" << std::endl;
882+
result = read_uint_counters(config_str, curr_pos,
883+
devdef.accel[i].device_global_address, counters);
884+
}else {
885+
std::cerr << "read dev global address fail" << std::endl;
886+
}
887+
888+
889+
devdef.accel[i].device_global_size =
890+
0; // Initializing for backward compatability
891+
if (result && counters.back() > 0) {
892+
std::cerr << "read dev global size" << std::endl;
893+
result = read_uint_counters(config_str, curr_pos,
894+
devdef.accel[i].device_global_size, counters);
895+
}else {
896+
std::cerr << "read dev global size fail" << std::endl;
897+
898+
}
899+
876900
// forward compatibility: bypassing remaining fields at the end of kernel
877901
// description section
878902
while (result && counters.size() > 0 &&

src/acl_mem.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -421,8 +421,10 @@ CL_API_ENTRY cl_int clEnqueueReadGlobalVariableINTEL(
421421
}
422422

423423
// dev_addr_t dev_global_address =
424-
// kernel->dev_bin->get_devdef().autodiscovery_def.?
425-
uintptr_t dev_global_address = 0x4000000;
424+
uintptr_t dev_global_address = kernel->accel_def->device_global_address;
425+
assert(kernel->accel_def->device_global_address == 4096); // TODO: remove when merging
426+
// uintptr_t dev_global_address = 0x4000000;
427+
// TODO: add checks for whether the copy will be out of bound for device global
426428
void *dev_global_ptr =
427429
(void *)(dev_global_address + offset * 8); // 1 unit of offset is 8 bits
428430
status = set_kernel_arg_mem_pointer_without_checks(kernel, 0, dev_global_ptr);

test/acl_auto_configure_test.cpp

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -96,24 +96,31 @@ TEST(auto_configure, simple) {
9696
#define IS_SYCL_COMPILE " 1"
9797
#define IS_NOT_SYCL_COMPILE " 0"
9898

99+
// device global information
100+
#define KERNEL_DEVICE_GLOBAL_ADDRESS " 4096"
101+
#define KERNEL_DEVICE_GLOBAL_SIZE " 2048"
102+
99103
int parsed;
100104
std::string err_str;
101-
ACL_LOCKED(
102-
parsed = acl_load_device_def_from_str(
103-
std::string(
105+
std::string autodiscovery = std::string(
104106
VERSIONIDTOSTR(ACL_AUTO_CONFIGURE_VERSIONID)
105107
DEVICE_FIELDS RANDOM_HASH
106108
" " BOARDNAME IS_NOT_BIG_ENDIAN MEM HOSTPIPE KERNEL_ARG_INFO_NONE
107-
" 1 82 foo" KERNEL_CRA KERNEL_FAST_LAUNCH_DEPTH KERNEL_PERF_MON
109+
" 1 84 foo" KERNEL_CRA KERNEL_FAST_LAUNCH_DEPTH KERNEL_PERF_MON // 84 = number of kernel field
108110
KERNEL_WORKGROUP_VARIANT KERNEL_WORKITEM_VARIANT
109111
KERNEL_NUM_VECTOR_LANES1 KERNEL_PROFILE_SCANCHAIN_LENGTH
110112
ARGS_LOCAL_GLOBAL_LONG_PROF KERNEL_PRINTF_FORMATSTRINGS
111113
LD_1024 KERNEL_REQD_WORK_GROUP_SIZE_NONE
112114
KERNEL_MAX_WORK_GROUP_SIZE_NONE
113115
KERNEL_MAX_GLOBAL_WORK_DIM_NONE
114116
KERNEL_USES_GLOBAL_WORK_OFFSET_ENABLED
115-
IS_SYCL_COMPILE),
117+
IS_SYCL_COMPILE KERNEL_DEVICE_GLOBAL_ADDRESS KERNEL_DEVICE_GLOBAL_SIZE);
118+
std::cerr << autodiscovery << std::endl;
119+
ACL_LOCKED(
120+
parsed = acl_load_device_def_from_str(
121+
autodiscovery,
116122
m_device_def.autodiscovery_def, err_str));
123+
std::cerr << err_str << std::endl;
117124
CHECK_EQUAL(1, parsed);
118125

119126
CHECK_EQUAL(1, m_device_def.autodiscovery_def.num_global_mem_systems);
@@ -260,6 +267,9 @@ TEST(auto_configure, simple) {
260267
CHECK_EQUAL(0,
261268
(int)m_device_def.autodiscovery_def.accel[0].max_work_group_size);
262269
CHECK_EQUAL(1, (int)m_device_def.autodiscovery_def.accel[0].is_sycl_compile);
270+
CHECK_EQUAL(4096, (int)m_device_def.autodiscovery_def.accel[0].device_global_address);
271+
CHECK_EQUAL(2048, (int)m_device_def.autodiscovery_def.accel[0].device_global_size);
272+
263273

264274
// Check a second parsing.
265275
// It should allocate a new string for the name.

test/acl_globals_test.cpp

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -198,20 +198,25 @@ static std::vector<acl_accel_def_t> acltest_complex_system_device0_accel = {
198198
{},
199199
{32768, 0, 0},
200200
1},
201-
{14,
202-
ACL_RANGE_FROM_ARRAY(acltest_devicelocal[11]),
203-
acltest_kernels[14],
204-
acltest_laspace_info,
205-
{0, 0, 0},
206-
0,
207-
0,
208-
1,
209-
0,
210-
32768,
211-
3,
212-
{},
213-
{32768, 0, 0},
214-
1},
201+
{14, // id
202+
ACL_RANGE_FROM_ARRAY(acltest_devicelocal[11]), // mem
203+
acltest_kernels[14], // iface
204+
acltest_laspace_info, // local_aspaces
205+
{0, 0, 0}, // compile_work_group_size
206+
0, // is_workgroup_invariant
207+
0, // is_workitem_invariant
208+
1, // num_vector_lanes
209+
0, // profiling_words_to_readback
210+
32768, // max_work_group_size
211+
3, // max_global_work_dim
212+
{}, // printf_format_info
213+
{32768, 0, 0}, // max_work_group_size_arr
214+
1, // uses_global_work_offset
215+
0, // fast_launch_depth
216+
1, // is_sycl_compile
217+
4096, // device_global_address
218+
2048, // device_global_size
219+
},
215220
{1,
216221
ACL_RANGE_FROM_ARRAY(acltest_devicelocal[1]),
217222
acltest_kernels[1],

0 commit comments

Comments
 (0)