Skip to content

Commit 22cbb66

Browse files
committed
Perform a memory copy for simulation buffer located in incorrect global memory address range
1 parent 17d8f6b commit 22cbb66

File tree

3 files changed

+90
-2
lines changed

3 files changed

+90
-2
lines changed

include/acl_mem.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,10 @@ int acl_submit_mem_transfer_device_op(cl_event event);
3232

3333
int acl_submit_migrate_mem_device_op(cl_event event);
3434

35+
int acl_realloc_buffer_for_simulator(cl_mem mem,
36+
const unsigned int physical_device_id,
37+
const unsigned int mem_id);
38+
3539
// Actually execute the memory transfer device operation.
3640
// In the normal case source and destination are different, in which case
3741
// the HAL is called and the transfer is non-blocking.

src/acl_kernel.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2917,6 +2917,14 @@ static cl_int l_copy_and_adjust_arguments_for_device(
29172917
[needed_mem_id]);
29182918
#endif
29192919

2920+
int env_override = 0;
2921+
(void)acl_get_offline_device_user_setting(&env_override);
2922+
if (env_override == ACL_CONTEXT_MPSIM) {
2923+
if (!acl_realloc_buffer_for_simulator(mem_obj, needed_physical_id,
2924+
needed_mem_id)) {
2925+
return CL_MEM_OBJECT_ALLOCATION_FAILURE;
2926+
}
2927+
}
29202928
// copy the address of the reserved allocation into the invocation
29212929
// image:
29222930
const void *mem_addr =

src/acl_mem.cpp

Lines changed: 78 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4224,8 +4224,6 @@ static void l_get_working_range(const acl_block_allocation_t *block_allocation,
42244224
acl_assert_locked();
42254225

42264226
if (block_allocation->region == &(acl_platform.global_mem)) {
4227-
int env_override = 0;
4228-
(void)acl_get_offline_device_user_setting(&env_override);
42294227
const auto *global_mem_defs = &(acl_platform.device[physical_device_id]
42304228
.def.autodiscovery_def.global_mem_defs);
42314229

@@ -6583,6 +6581,84 @@ void acl_copy_device_buffers_from_host_after_programming(
65836581
}
65846582
}
65856583

6584+
int acl_realloc_buffer_for_simulator(cl_mem mem,
6585+
const unsigned int physical_device_id,
6586+
const unsigned int mem_id) {
6587+
// Only reallocate and migrate if mem resides in global memory
6588+
if (mem->block_allocation->region == &(acl_platform.global_mem)) {
6589+
int mem_on_host;
6590+
void *old_mem_address;
6591+
void *new_mem_address;
6592+
6593+
acl_addr_range_t global_mem_range =
6594+
acl_platform.device[physical_device_id]
6595+
.def.autodiscovery_def.global_mem_defs[mem_id]
6596+
.get_usable_range();
6597+
6598+
// Save old address
6599+
old_mem_address = l_get_address_of_writable_copy(mem, physical_device_id,
6600+
&mem_on_host, CL_FALSE);
6601+
6602+
// The mem migration is only needed if the buffer is binded to the device
6603+
// before global memory range is confirmed (i.e., before reprogram), and
6604+
// assumed address range before reprogram is different from actual
6605+
// Therefore, check if:
6606+
// 1. allocation is deferred (if so auto migration will happen)
6607+
// 2. buffer is on host
6608+
// 3. buffer appears to be "at the destination"
6609+
// 4. block allocation is outside the global memory range
6610+
if (!mem->allocation_deferred &&
6611+
!(mem->mem_cpy_host_ptr_pending || mem_on_host) &&
6612+
(mem->block_allocation ==
6613+
mem->reserved_allocations[physical_device_id][mem_id]) &&
6614+
(mem->block_allocation->range.begin >= global_mem_range.next ||
6615+
mem->block_allocation->range.next < global_mem_range.begin)) {
6616+
6617+
// mem_id should align if block allocation is the same as reserved
6618+
// allocation
6619+
assert(mem->mem_id == mem_id);
6620+
6621+
// Okay to set this to NULL, memory tracked in mem->block_allocation
6622+
mem->reserved_allocations[physical_device_id][mem_id] = NULL;
6623+
// We will reallocate block, so remove it from linked list first
6624+
acl_block_allocation_t **block_ptr =
6625+
&(mem->block_allocation->region->first_block);
6626+
acl_block_allocation_t *block = *block_ptr;
6627+
assert(block != NULL); // Should be at least one block
6628+
while (*block_ptr) {
6629+
if (block == mem->block_allocation) {
6630+
*block_ptr = block->next_block_in_region;
6631+
break;
6632+
}
6633+
// Advance to the next block in the region
6634+
block_ptr = &(block->next_block_in_region);
6635+
block = *block_ptr;
6636+
}
6637+
if (!acl_do_physical_buffer_allocation(physical_device_id, mem)) {
6638+
return 0;
6639+
}
6640+
6641+
new_mem_address =
6642+
mem->reserved_allocations[physical_device_id][mem_id]->range.begin;
6643+
const acl_hal_t *const hal = acl_get_hal();
6644+
6645+
#ifdef MEM_DEBUG_MSG
6646+
printf("reallocating mem obj for simulation after getting global mem "
6647+
"info, device %u ([0]%zx -> [0]%zx) ",
6648+
physical_device_id,
6649+
(size_t)(ACL_STRIP_PHYSICAL_ID(old_mem_address)),
6650+
(size_t)(ACL_STRIP_PHYSICAL_ID(new_mem_address)));
6651+
#endif
6652+
6653+
// do blocking copy, this is for simulation only so performance is
6654+
// probably not a huge concern
6655+
hal->copy_globalmem_to_globalmem(0, old_mem_address, new_mem_address,
6656+
mem->size);
6657+
}
6658+
}
6659+
return 1;
6660+
}
6661+
65866662
static void acl_print_all_mem_in_region(acl_mem_region_t *region);
65876663
void acl_print_all_mem(void) {
65886664
acl_assert_locked();

0 commit comments

Comments
 (0)