@@ -4723,7 +4723,7 @@ cl_int l_enqueue_mem_transfer(cl_command_queue command_queue, cl_bool blocking,
4723
4723
if (src_buffer->flags & CL_MEM_HOST_WRITE_ONLY ||
4724
4724
src_buffer->flags & CL_MEM_HOST_NO_ACCESS) {
4725
4725
ERR_RET (CL_INVALID_OPERATION, context,
4726
- " clEnqeueueReadBuffer cannot be called on a buffer "
4726
+ " clEnqueueReadBuffer cannot be called on a buffer "
4727
4727
" created with CL_MEM_HOST_WRITE_ONLY or CL_MEM_HOST_NO_ACCESS" );
4728
4728
}
4729
4729
break ;
@@ -4741,7 +4741,7 @@ cl_int l_enqueue_mem_transfer(cl_command_queue command_queue, cl_bool blocking,
4741
4741
if (dst_buffer->flags & CL_MEM_HOST_READ_ONLY ||
4742
4742
dst_buffer->flags & CL_MEM_HOST_NO_ACCESS) {
4743
4743
ERR_RET (CL_INVALID_OPERATION, context,
4744
- " clEnqeueueWriteBuffer cannot be called on a buffer "
4744
+ " clEnqueueWriteBuffer cannot be called on a buffer "
4745
4745
" created with CL_MEM_HOST_READ_ONLY or CL_MEM_HOST_NO_ACCESS" );
4746
4746
}
4747
4747
break ;
@@ -6581,80 +6581,98 @@ void acl_copy_device_buffers_from_host_after_programming(
6581
6581
}
6582
6582
}
6583
6583
6584
+ // Simulator does not have any global memory interface information before
6585
+ // reprogram, the runtime initializes device def to have the same global
6586
+ // memory address range obtained from a autodiscovery string predefined in
6587
+ // acl_shipped_board_cfgs.h
6588
+ // When a buffer is created with the buffer location property specifying a
6589
+ // global memory whose address range lies beyond the range defined in the
6590
+ // default autodiscovery string, and is written before the device reprogram,
6591
+ // the write will bind the buffer to the wrong address range, causing issues
6592
+ // when running the kernel
6593
+ // The following function do a memory copy for the buffers binded to the
6594
+ // wrong address range to the right one after the global memory information
6595
+ // becomes available and before the kernel launch
6596
+ // Returns 1 on success and 0 on failure
6584
6597
int acl_realloc_buffer_for_simulator (cl_mem mem,
6585
6598
const unsigned int physical_device_id,
6586
6599
const unsigned int mem_id) {
6587
6600
// Only reallocate and migrate if mem resides in global memory
6588
- if (mem->block_allocation ->region = = &(acl_platform.global_mem )) {
6589
- int mem_on_host ;
6590
- void *old_mem_address;
6591
- void *new_mem_address;
6592
-
6593
- acl_addr_range_t global_mem_range =
6594
- acl_platform. device [physical_device_id]
6595
- . def . autodiscovery_def . global_mem_defs [mem_id]
6596
- . get_usable_range ();
6597
-
6598
- // Save old address
6599
- old_mem_address = l_get_address_of_writable_copy (mem, physical_device_id,
6600
- &mem_on_host, CL_FALSE);
6601
-
6602
- // The mem migration is only needed if the buffer is binded to the device
6603
- // before global memory range is confirmed (i.e., before reprogram), and
6604
- // assumed address range before reprogram is different from actual
6605
- // Therefore, check if:
6606
- // 1. allocation is deferred (if so auto migration will happen)
6607
- // 2. buffer is on host
6608
- // 3. buffer appears to be "at the destination"
6609
- // 4. block allocation is outside the global memory range
6610
- if (!mem-> allocation_deferred &&
6611
- !(mem-> mem_cpy_host_ptr_pending || mem_on_host) &&
6612
- (mem-> block_allocation ==
6613
- mem->reserved_allocations [physical_device_id][mem_id]) &&
6614
- (mem->block_allocation -> range . begin >= global_mem_range. next ||
6615
- mem->block_allocation -> range . next < global_mem_range. begin )) {
6616
-
6617
- // mem_id should align if block allocation is the same as reserved
6618
- // allocation
6619
- assert (mem->mem_id == mem_id);
6620
-
6621
- // Okay to set this to NULL, memory tracked in mem->block_allocation
6622
- mem-> reserved_allocations [physical_device_id][ mem_id] = NULL ;
6623
- // We will reallocate block, so remove it from linked list first
6624
- acl_block_allocation_t **block_ptr =
6625
- &(mem-> block_allocation -> region -> first_block );
6626
- acl_block_allocation_t *block = *block_ptr;
6627
- assert (block ! = NULL ); // Should be at least one block
6628
- while (*block_ptr) {
6629
- if (block == mem-> block_allocation ) {
6630
- *block_ptr = block-> next_block_in_region ;
6631
- break ;
6632
- }
6633
- // Advance to the next block in the region
6634
- block_ptr = &(block-> next_block_in_region ) ;
6635
- block = *block_ptr ;
6636
- }
6637
- if (! acl_do_physical_buffer_allocation (physical_device_id, mem)) {
6638
- return 0 ;
6601
+ if (mem->block_allocation ->region ! = &(acl_platform.global_mem )) {
6602
+ return 1 ;
6603
+ }
6604
+
6605
+ int mem_on_host;
6606
+ void *old_mem_address;
6607
+ void *new_mem_address;
6608
+
6609
+ acl_addr_range_t global_mem_range =
6610
+ acl_platform. device [physical_device_id]
6611
+ . def . autodiscovery_def . global_mem_defs [mem_id]
6612
+ . get_usable_range ();
6613
+
6614
+ // Save old address
6615
+ old_mem_address = l_get_address_of_writable_copy ( mem, physical_device_id,
6616
+ &mem_on_host, CL_FALSE);
6617
+
6618
+ // The mem copy is only needed if the buffer is binded to the device
6619
+ // before global memory range is confirmed (i.e., before reprogram), and
6620
+ // assumed address range before reprogram is different from actual
6621
+ // Therefore, check if:
6622
+ // 1. allocation is deferred (if so auto migration will happen)
6623
+ // 2. buffer is on host
6624
+ // 3. buffer appears to be "at the destination"
6625
+ // 4. block allocation is outside the global memory range
6626
+ if (! mem->allocation_deferred &&
6627
+ ! (mem->mem_cpy_host_ptr_pending || mem_on_host) &&
6628
+ ( mem->block_allocation ==
6629
+ mem-> reserved_allocations [physical_device_id][mem_id]) &&
6630
+ ( ACL_STRIP_PHYSICAL_ID (mem-> block_allocation -> range . begin ) >=
6631
+ global_mem_range. next ||
6632
+ ACL_STRIP_PHYSICAL_ID (mem->block_allocation -> range . next ) <
6633
+ global_mem_range. begin )) {
6634
+
6635
+ // mem_id should align if block allocation is the same as reserved
6636
+ // allocation
6637
+ assert (mem-> mem_id == mem_id);
6638
+
6639
+ // Okay to set this to NULL, memory tracked in mem->block_allocation
6640
+ mem-> reserved_allocations [physical_device_id][mem_id] = NULL ;
6641
+ // We will reallocate block, so remove it from linked list first
6642
+ acl_block_allocation_t **block_ptr =
6643
+ &(mem-> block_allocation -> region -> first_block ) ;
6644
+ // try to find the mem->block_allocation in the linked list, error if
6645
+ // the block is not found before reaching the end of list
6646
+ while ( true ) {
6647
+ acl_block_allocation_t * const block = *block_ptr ;
6648
+ assert ( block != NULL ) ;
6649
+ if (block == mem-> block_allocation ) {
6650
+ *block_ptr = block-> next_block_in_region ;
6651
+ break ;
6639
6652
}
6653
+ // Advance to the next block in the region
6654
+ block_ptr = &(block->next_block_in_region );
6655
+ }
6656
+ // Reallocate buffer range
6657
+ if (!acl_do_physical_buffer_allocation (physical_device_id, mem)) {
6658
+ return 0 ;
6659
+ }
6640
6660
6641
- new_mem_address =
6642
- mem->reserved_allocations [physical_device_id][mem_id]->range .begin ;
6643
- const acl_hal_t *const hal = acl_get_hal ();
6661
+ new_mem_address =
6662
+ mem->reserved_allocations [physical_device_id][mem_id]->range .begin ;
6663
+ const acl_hal_t *const hal = acl_get_hal ();
6644
6664
6645
6665
#ifdef MEM_DEBUG_MSG
6646
- printf (" reallocating mem obj for simulation after getting global mem "
6647
- " info, device %u ([0]%zx -> [0]%zx) " ,
6648
- physical_device_id,
6649
- (size_t )(ACL_STRIP_PHYSICAL_ID (old_mem_address)),
6650
- (size_t )(ACL_STRIP_PHYSICAL_ID (new_mem_address)));
6666
+ printf (" reallocating mem obj for simulation after getting global mem "
6667
+ " info, device %u ([0]%zx -> [0]%zx) " ,
6668
+ physical_device_id, (size_t )(ACL_STRIP_PHYSICAL_ID (old_mem_address)),
6669
+ (size_t )(ACL_STRIP_PHYSICAL_ID (new_mem_address)));
6651
6670
#endif
6652
6671
6653
- // do blocking copy, this is for simulation only so performance is
6654
- // probably not a huge concern
6655
- hal->copy_globalmem_to_globalmem (0 , old_mem_address, new_mem_address,
6656
- mem->size );
6657
- }
6672
+ // do blocking copy, this is for simulation only so performance is
6673
+ // probably not a huge concern
6674
+ hal->copy_globalmem_to_globalmem (0 , old_mem_address, new_mem_address,
6675
+ mem->size );
6658
6676
}
6659
6677
return 1 ;
6660
6678
}
0 commit comments