@@ -4224,8 +4224,6 @@ static void l_get_working_range(const acl_block_allocation_t *block_allocation,
4224
4224
acl_assert_locked ();
4225
4225
4226
4226
if (block_allocation->region == &(acl_platform.global_mem )) {
4227
- int env_override = 0 ;
4228
- (void )acl_get_offline_device_user_setting (&env_override);
4229
4227
const auto *global_mem_defs = &(acl_platform.device [physical_device_id]
4230
4228
.def .autodiscovery_def .global_mem_defs );
4231
4229
@@ -6583,6 +6581,84 @@ void acl_copy_device_buffers_from_host_after_programming(
6583
6581
}
6584
6582
}
6585
6583
6584
+ int acl_realloc_buffer_for_simulator (cl_mem mem,
6585
+ const unsigned int physical_device_id,
6586
+ const unsigned int mem_id) {
6587
+ // Only reallocate and migrate if mem resides in global memory
6588
+ if (mem->block_allocation ->region == &(acl_platform.global_mem )) {
6589
+ int mem_on_host;
6590
+ void *old_mem_address;
6591
+ void *new_mem_address;
6592
+
6593
+ acl_addr_range_t global_mem_range =
6594
+ acl_platform.device [physical_device_id]
6595
+ .def .autodiscovery_def .global_mem_defs [mem_id]
6596
+ .get_usable_range ();
6597
+
6598
+ // Save old address
6599
+ old_mem_address = l_get_address_of_writable_copy (mem, physical_device_id,
6600
+ &mem_on_host, CL_FALSE);
6601
+
6602
+ // The mem migration is only needed if the buffer is binded to the device
6603
+ // before global memory range is confirmed (i.e., before reprogram), and
6604
+ // assumed address range before reprogram is different from actual
6605
+ // Therefore, check if:
6606
+ // 1. allocation is deferred (if so auto migration will happen)
6607
+ // 2. buffer is on host
6608
+ // 3. buffer appears to be "at the destination"
6609
+ // 4. block allocation is outside the global memory range
6610
+ if (!mem->allocation_deferred &&
6611
+ !(mem->mem_cpy_host_ptr_pending || mem_on_host) &&
6612
+ (mem->block_allocation ==
6613
+ mem->reserved_allocations [physical_device_id][mem_id]) &&
6614
+ (mem->block_allocation ->range .begin >= global_mem_range.next ||
6615
+ mem->block_allocation ->range .next < global_mem_range.begin )) {
6616
+
6617
+ // mem_id should align if block allocation is the same as reserved
6618
+ // allocation
6619
+ assert (mem->mem_id == mem_id);
6620
+
6621
+ // Okay to set this to NULL, memory tracked in mem->block_allocation
6622
+ mem->reserved_allocations [physical_device_id][mem_id] = NULL ;
6623
+ // We will reallocate block, so remove it from linked list first
6624
+ acl_block_allocation_t **block_ptr =
6625
+ &(mem->block_allocation ->region ->first_block );
6626
+ acl_block_allocation_t *block = *block_ptr;
6627
+ assert (block != NULL ); // Should be at least one block
6628
+ while (*block_ptr) {
6629
+ if (block == mem->block_allocation ) {
6630
+ *block_ptr = block->next_block_in_region ;
6631
+ break ;
6632
+ }
6633
+ // Advance to the next block in the region
6634
+ block_ptr = &(block->next_block_in_region );
6635
+ block = *block_ptr;
6636
+ }
6637
+ if (!acl_do_physical_buffer_allocation (physical_device_id, mem)) {
6638
+ return 0 ;
6639
+ }
6640
+
6641
+ new_mem_address =
6642
+ mem->reserved_allocations [physical_device_id][mem_id]->range .begin ;
6643
+ const acl_hal_t *const hal = acl_get_hal ();
6644
+
6645
+ #ifdef MEM_DEBUG_MSG
6646
+ printf (" reallocating mem obj for simulation after getting global mem "
6647
+ " info, device %u ([0]%zx -> [0]%zx) " ,
6648
+ physical_device_id,
6649
+ (size_t )(ACL_STRIP_PHYSICAL_ID (old_mem_address)),
6650
+ (size_t )(ACL_STRIP_PHYSICAL_ID (new_mem_address)));
6651
+ #endif
6652
+
6653
+ // do blocking copy, this is for simulation only so performance is
6654
+ // probably not a huge concern
6655
+ hal->copy_globalmem_to_globalmem (0 , old_mem_address, new_mem_address,
6656
+ mem->size );
6657
+ }
6658
+ }
6659
+ return 1 ;
6660
+ }
6661
+
6586
6662
static void acl_print_all_mem_in_region (acl_mem_region_t *region);
6587
6663
void acl_print_all_mem (void ) {
6588
6664
acl_assert_locked ();
0 commit comments