@@ -4224,8 +4224,6 @@ static void l_get_working_range(const acl_block_allocation_t *block_allocation,
4224
4224
acl_assert_locked ();
4225
4225
4226
4226
if (block_allocation->region == &(acl_platform.global_mem )) {
4227
- int env_override = 0 ;
4228
- (void )acl_get_offline_device_user_setting (&env_override);
4229
4227
const auto *global_mem_defs = &(acl_platform.device [physical_device_id]
4230
4228
.def .autodiscovery_def .global_mem_defs );
4231
4229
@@ -4725,7 +4723,7 @@ cl_int l_enqueue_mem_transfer(cl_command_queue command_queue, cl_bool blocking,
4725
4723
if (src_buffer->flags & CL_MEM_HOST_WRITE_ONLY ||
4726
4724
src_buffer->flags & CL_MEM_HOST_NO_ACCESS) {
4727
4725
ERR_RET (CL_INVALID_OPERATION, context,
4728
- " clEnqeueueReadBuffer cannot be called on a buffer "
4726
+ " clEnqueueReadBuffer cannot be called on a buffer "
4729
4727
" created with CL_MEM_HOST_WRITE_ONLY or CL_MEM_HOST_NO_ACCESS" );
4730
4728
}
4731
4729
break ;
@@ -4743,7 +4741,7 @@ cl_int l_enqueue_mem_transfer(cl_command_queue command_queue, cl_bool blocking,
4743
4741
if (dst_buffer->flags & CL_MEM_HOST_READ_ONLY ||
4744
4742
dst_buffer->flags & CL_MEM_HOST_NO_ACCESS) {
4745
4743
ERR_RET (CL_INVALID_OPERATION, context,
4746
- " clEnqeueueWriteBuffer cannot be called on a buffer "
4744
+ " clEnqueueWriteBuffer cannot be called on a buffer "
4747
4745
" created with CL_MEM_HOST_READ_ONLY or CL_MEM_HOST_NO_ACCESS" );
4748
4746
}
4749
4747
break ;
@@ -6583,6 +6581,99 @@ void acl_copy_device_buffers_from_host_after_programming(
6583
6581
}
6584
6582
}
6585
6583
6584
+ // Simulator does not have any global memory interface information before
6585
+ // reprogram, the runtime initializes device def to have the same global
6586
+ // memory address range obtained from a autodiscovery string predefined in
6587
+ // acl_shipped_board_cfgs.h
6588
+ // When a buffer is created with the buffer location property specifying a
6589
+ // global memory whose address range lies beyond the range defined in the
6590
+ // default autodiscovery string, and is written before the device reprogram,
6591
+ // the write will bind the buffer to the wrong address range, causing issues
6592
+ // when running the kernel
6593
+ // The following function do a memory copy for the buffers binded to the
6594
+ // wrong address range to the right one after the global memory information
6595
+ // becomes available and before the kernel launch
6596
+ // Returns 1 on success and 0 on failure
6597
+ int acl_realloc_buffer_for_simulator (cl_mem mem,
6598
+ const unsigned int physical_device_id,
6599
+ const unsigned int mem_id) {
6600
+ // Only reallocate and migrate if mem resides in global memory
6601
+ if (mem->block_allocation ->region != &(acl_platform.global_mem )) {
6602
+ return 1 ;
6603
+ }
6604
+
6605
+ const acl_addr_range_t global_mem_range =
6606
+ acl_platform.device [physical_device_id]
6607
+ .def .autodiscovery_def .global_mem_defs [mem_id]
6608
+ .get_usable_range ();
6609
+
6610
+ // Save old address
6611
+ int mem_on_host;
6612
+ void *const old_mem_address = l_get_address_of_writable_copy (
6613
+ mem, physical_device_id, &mem_on_host, CL_FALSE);
6614
+
6615
+ // The mem copy is only needed if the buffer is bound to the device
6616
+ // before global memory range is confirmed (i.e., before reprogram), and
6617
+ // assumed address range before reprogram is different from actual
6618
+ // Therefore, check if:
6619
+ // 1. allocation is deferred (if so auto migration will happen)
6620
+ // 2. buffer is on host
6621
+ // 3. buffer appears to be "at the destination"
6622
+ // 4. block allocation is outside the global memory range
6623
+ if (!mem->allocation_deferred &&
6624
+ !(mem->mem_cpy_host_ptr_pending || mem_on_host) &&
6625
+ (mem->block_allocation ==
6626
+ mem->reserved_allocations [physical_device_id][mem_id]) &&
6627
+ (ACL_STRIP_PHYSICAL_ID (mem->block_allocation ->range .begin ) >=
6628
+ global_mem_range.next ||
6629
+ ACL_STRIP_PHYSICAL_ID (mem->block_allocation ->range .next ) <
6630
+ global_mem_range.begin )) {
6631
+
6632
+ // mem_id should align if block allocation is the same as reserved
6633
+ // allocation
6634
+ assert (mem->mem_id == mem_id);
6635
+
6636
+ // Okay to set this to NULL, memory tracked in mem->block_allocation
6637
+ mem->reserved_allocations [physical_device_id][mem_id] = NULL ;
6638
+ // We will reallocate block, so remove it from linked list first
6639
+ acl_block_allocation_t **block_ptr =
6640
+ &(mem->block_allocation ->region ->first_block );
6641
+ // try to find the mem->block_allocation in the linked list, error if
6642
+ // the block is not found before reaching the end of list
6643
+ while (true ) {
6644
+ acl_block_allocation_t *const block = *block_ptr;
6645
+ assert (block != NULL );
6646
+ if (block == mem->block_allocation ) {
6647
+ *block_ptr = block->next_block_in_region ;
6648
+ break ;
6649
+ }
6650
+ // Advance to the next block in the region
6651
+ block_ptr = &(block->next_block_in_region );
6652
+ }
6653
+ // Reallocate buffer range
6654
+ if (!acl_do_physical_buffer_allocation (physical_device_id, mem)) {
6655
+ return 0 ;
6656
+ }
6657
+
6658
+ void *const new_mem_address =
6659
+ mem->reserved_allocations [physical_device_id][mem_id]->range .begin ;
6660
+ const acl_hal_t *const hal = acl_get_hal ();
6661
+
6662
+ #ifdef MEM_DEBUG_MSG
6663
+ printf (" reallocating mem obj for simulation after getting global mem "
6664
+ " info, device %u ([0]%zx -> [0]%zx) " ,
6665
+ physical_device_id, (size_t )(ACL_STRIP_PHYSICAL_ID (old_mem_address)),
6666
+ (size_t )(ACL_STRIP_PHYSICAL_ID (new_mem_address)));
6667
+ #endif
6668
+
6669
+ // do blocking copy, this is for simulation only so performance is
6670
+ // probably not a huge concern
6671
+ hal->copy_globalmem_to_globalmem (0 , old_mem_address, new_mem_address,
6672
+ mem->size );
6673
+ }
6674
+ return 1 ;
6675
+ }
6676
+
6586
6677
static void acl_print_all_mem_in_region (acl_mem_region_t *region);
6587
6678
void acl_print_all_mem (void ) {
6588
6679
acl_assert_locked ();
0 commit comments