Skip to content

Commit 6b013eb

Browse files
committed
Add buffer location property
Sycl runtime calls clEnqueueWriteBuffer before clSetKernelArgs, this cause the buffer to be allocated on the device before knowing the right place of allocating the memory. As a result, later when kernel get invoked, the memory has to be copied from device's default global memory to the buffer location specified in kernel. This is an additional memory copy operation. This extension does not interfer with the other way of setting buffer location (i.e through clSetKernelArgs). This property exist for integration with sycl runtime, not for pure opencl user to use. If opencl user wish to use this property, they have to make sure the buffer location passed into clCreateBufferWithPropertyINTEL has to match the one defined in kernel function interface, otherwise the extra memory copy issue will remain. When resizing reserved allocation, we now have the information to allocate minimum amount of space required according to the property passed in.
1 parent 8b8ed35 commit 6b013eb

File tree

4 files changed

+38
-8
lines changed

4 files changed

+38
-8
lines changed

include/CL/cl_ext_intel.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -441,10 +441,11 @@ typedef cl_bitfield cl_mem_alloc_flags_intel;
441441
typedef cl_uint cl_mem_info_intel;
442442

443443
/* cl_mem_alloc_info_intel */
444-
#define CL_MEM_ALLOC_TYPE_INTEL 0x419A
445-
#define CL_MEM_ALLOC_BASE_PTR_INTEL 0x419B
446-
#define CL_MEM_ALLOC_SIZE_INTEL 0x419C
447-
#define CL_MEM_ALLOC_DEVICE_INTEL 0x419D
444+
#define CL_MEM_ALLOC_TYPE_INTEL 0x419A
445+
#define CL_MEM_ALLOC_BASE_PTR_INTEL 0x419B
446+
#define CL_MEM_ALLOC_SIZE_INTEL 0x419C
447+
#define CL_MEM_ALLOC_DEVICE_INTEL 0x419D
448+
#define CL_MEM_ALLOC_BUFFER_LOCATION_INTEL 0x419E
448449
/* Enum values 0x419E-0x419F are reserved for future queries. */
449450

450451
typedef cl_uint cl_unified_shared_memory_type_intel;

src/acl_mem.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,7 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL(
416416
cl_bool context_has_device_with_physical_mem;
417417
unsigned int idevice;
418418
cl_uint bank_id = 0;
419+
unsigned int tmp_mem_id = 0;
419420
acl_lock();
420421

421422
#ifdef MEM_DEBUG_MSG
@@ -431,6 +432,9 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL(
431432
}
432433
bank_id = (cl_uint) * (properties + 1);
433434
} break;
435+
case CL_MEM_ALLOC_BUFFER_LOCATION_INTEL: {
436+
tmp_mem_id = (unsigned int)*(properties + 1);
437+
} break;
434438
default: {
435439
UNLOCK_BAIL_INFO(CL_INVALID_DEVICE, context, "Invalid properties");
436440
}
@@ -553,6 +557,7 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL(
553557
UNLOCK_BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context,
554558
"Could not allocate a cl_mem object");
555559
}
560+
mem->mem_id = tmp_mem_id;
556561

557562
mem->block_allocation = new_block;
558563
mem->block_allocation->mem_obj = mem;
@@ -784,7 +789,6 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL(
784789
mem->context = context;
785790
mem->flags = flags;
786791
mem->size = size;
787-
mem->mem_id = 0;
788792

789793
mem->bank_id = 0;
790794
if (is_SOC_device()) {
@@ -1245,7 +1249,7 @@ CL_API_ENTRY cl_mem CL_API_CALL clCreateSubBufferIntelFPGA(
12451249

12461250
mem->context = context;
12471251
mem->flags = sub_flags;
1248-
mem->mem_id = 0;
1252+
mem->mem_id = buffer->mem_id;
12491253

12501254
if (is_SOC_device()) {
12511255
// HPS DDR is system managed for SoC.
@@ -4408,6 +4412,10 @@ void acl_resize_reserved_allocations_for_device(cl_mem mem,
44084412
unsigned int num_global_mem_systems =
44094413
def.autodiscovery_def.num_global_mem_systems;
44104414

4415+
// When we don't know how many memory systems will exist
4416+
// Load as much as needed.
4417+
num_global_mem_systems = std::max(num_global_mem_systems, mem->mem_id + 1);
4418+
44114419
// For the simulation flow we don't know how many memory systems will exist
44124420
// until we load the .aocx, which may not happen until somewhat later.
44134421
// Reserving space is quite cheap, so reserve space for many memory systems.

src/acl_usm.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -255,8 +255,8 @@ clDeviceMemAllocINTEL(cl_context context, cl_device_id device,
255255
cl_int status;
256256

257257
// Use cl_mem for convenience
258-
cl_mem usm_device_buffer =
259-
clCreateBufferIntelFPGA(context, CL_MEM_READ_WRITE, size, NULL, &status);
258+
cl_mem usm_device_buffer = clCreateBufferWithPropertiesINTEL(
259+
context, NULL, CL_MEM_READ_WRITE, size, NULL, &status);
260260
if (status != CL_SUCCESS) {
261261
UNLOCK_BAIL_INFO(status, context, "Failed to allocate device memory");
262262
}

test/acl_mem_test.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2800,6 +2800,27 @@ TEST(acl_mem, case_205751_overlapping_alloc) {
28002800
CHECK_EQUAL(CL_SUCCESS, clReleaseMemObject(c));
28012801
}
28022802

2803+
TEST(acl_mem, buffer_location_property) {
2804+
ACL_LOCKED(acl_print_debug_msg("begin buffer_location_property\n"));
2805+
// Test assumes more than 1 global memory space
2806+
// Allocate a small buffer (a), then try to allocate two buffers (b, c) of
2807+
// size bank_size. Expect the second allocation to fail.
2808+
cl_mem a;
2809+
cl_int status = CL_SUCCESS;
2810+
size_t total_size = ACL_RANGE_SIZE(
2811+
m_device[0]->def.autodiscovery_def.global_mem_defs[0].range);
2812+
size_t bank_size = total_size / 2;
2813+
size_t small_size = bank_size / 1024;
2814+
2815+
cl_mem_properties_intel props[] = {CL_MEM_ALLOC_BUFFER_LOCATION_INTEL, 0, 0};
2816+
a = clCreateBufferWithPropertiesINTEL(m_context, props, 0, bank_size, 0,
2817+
&status);
2818+
CHECK_EQUAL(CL_SUCCESS, status);
2819+
ACL_LOCKED(CHECK_EQUAL(acl_bind_buffer_to_device(m_cq->device, a), 1));
2820+
2821+
CHECK_EQUAL(CL_SUCCESS, clReleaseMemObject(a));
2822+
}
2823+
28032824
MT_TEST(acl_mem, map_buf_bad_flags) {
28042825
ACL_LOCKED(acl_print_debug_msg("begin buf_bad_flags\n"));
28052826
cl_int status = CL_SUCCESS;

0 commit comments

Comments
 (0)