Skip to content

Commit 8a49068

Browse files
committed
Test both read and write device global
1 parent d154cc7 commit 8a49068

File tree

3 files changed

+172
-38
lines changed

3 files changed

+172
-38
lines changed

include/acl_mem.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,10 @@ cl_bool acl_is_sub_or_parent_buffer(cl_mem mem);
8989
void CL_CALLBACK acl_free_allocation_after_event_completion(
9090
cl_event event, cl_int event_command_exec_status, void *callback_data);
9191

92+
void CL_CALLBACK acl_dev_global_cleanup(cl_event event,
93+
cl_int event_command_exec_status,
94+
void *callback_data);
95+
9296
#ifdef __GNUC__
9397
#pragma GCC visibility pop
9498
#endif

src/acl_mem.cpp

Lines changed: 132 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -410,15 +410,93 @@ ACL_EXPORT
410410
// CL_API_ENTRY cl_int clEnqueueReadGlobalVariableINTEL() {
411411
CL_API_ENTRY cl_int clEnqueueReadGlobalVariableINTEL(
412412
cl_command_queue command_queue, cl_program program, const char *name,
413-
cl_bool blocking_write, size_t size, size_t offset, void *ptr,
413+
cl_bool blocking_read, size_t size, size_t offset, void *ptr,
414414
cl_uint num_events_in_wait_list, const cl_event *event_wait_list,
415415
cl_event *event) {
416+
cl_int status;
416417

417-
// TODO: get dev_global_ptr from autodiscovery instead later
418-
// return 0;
419-
return clEnqueueWriteGlobalVariableINTEL(
420-
command_queue, program, name, blocking_write, size, offset, ptr,
421-
num_events_in_wait_list, event_wait_list, event);
418+
cl_kernel kernel = clCreateKernelIntelFPGA(program, name, &status);
419+
if (status != CL_SUCCESS) {
420+
return status;
421+
}
422+
423+
// dev_addr_t dev_global_address =
424+
// kernel->dev_bin->get_devdef().autodiscovery_def.?
425+
uintptr_t dev_global_address = 0x4000000;
426+
void *dev_global_ptr =
427+
(void *)(dev_global_address + offset * 8); // 1 unit of offset is 8 bits
428+
status = set_kernel_arg_mem_pointer_without_checks(kernel, 0, dev_global_ptr);
429+
// status = clSetKernelArgMemPointerINTEL(kernel, 1, dev_global_ptr);
430+
if (status != CL_SUCCESS) {
431+
return status;
432+
}
433+
434+
// Copy device global memory to temporary device usm pointer first
435+
void *tmp_dev_ptr = clDeviceMemAllocINTEL(
436+
command_queue->context, command_queue->device, NULL, size, 1, &status);
437+
if (status != CL_SUCCESS) {
438+
return status;
439+
}
440+
if (!tmp_dev_ptr) {
441+
return CL_MEM_OBJECT_ALLOCATION_FAILURE;
442+
}
443+
444+
status = clSetKernelArgMemPointerINTEL(kernel, 1, tmp_dev_ptr);
445+
if (status != CL_SUCCESS) {
446+
return status;
447+
}
448+
449+
// Set size kernel arg
450+
status = clSetKernelArg(kernel, 2, sizeof(size_t), (const void *)(&size));
451+
if (status != CL_SUCCESS) {
452+
return status;
453+
}
454+
455+
cl_event tmp_event = 0;
456+
status = clEnqueueTask(command_queue, kernel, num_events_in_wait_list,
457+
event_wait_list, &tmp_event);
458+
if (status != CL_SUCCESS) {
459+
return status;
460+
}
461+
std::cerr << tmp_event->cmd.info.ndrange_kernel.invocation_wrapper->image
462+
->activation_id
463+
<< std::endl;
464+
465+
// copy from the temporary device memory into user provided pointer
466+
std::cerr << "read: copy from tmp dev pointer to source pointer" << std::endl;
467+
status = clEnqueueMemcpyINTEL(command_queue, blocking_read, ptr, tmp_dev_ptr,
468+
size, 1, &tmp_event, event);
469+
if (status != CL_SUCCESS) {
470+
return status;
471+
}
472+
473+
if (blocking_read) {
474+
status = clReleaseEvent(tmp_event);
475+
if (status != CL_SUCCESS) {
476+
return status;
477+
}
478+
status = clMemFreeINTEL(command_queue->context, tmp_dev_ptr);
479+
if (status != CL_SUCCESS) {
480+
return status;
481+
}
482+
status = clReleaseKernel(kernel);
483+
if (status != CL_SUCCESS) {
484+
return status;
485+
}
486+
} else {
487+
// Clean up resources after event finishes
488+
void **callback_data = (void **)acl_malloc(sizeof(void *) * 3);
489+
if (!callback_data) {
490+
return CL_OUT_OF_HOST_MEMORY;
491+
}
492+
callback_data[0] = (void *)(tmp_dev_ptr);
493+
callback_data[1] = (void *)(kernel);
494+
callback_data[2] = (void *)(tmp_event);
495+
clSetEventCallback(*event, CL_COMPLETE, acl_dev_global_cleanup,
496+
(void *)callback_data);
497+
}
498+
499+
return CL_SUCCESS;
422500
}
423501

424502
ACL_EXPORT
@@ -452,6 +530,7 @@ CL_API_ENTRY cl_int clEnqueueWriteGlobalVariableINTEL(
452530
if (status != CL_SUCCESS) {
453531
return status;
454532
}
533+
455534
// if (to_dev_event->execution_status != CL_COMPLETE) {
456535
// return CL_INVALID_OPERATION;
457536
// }
@@ -473,11 +552,10 @@ CL_API_ENTRY cl_int clEnqueueWriteGlobalVariableINTEL(
473552
// dev_addr_t dev_global_address =
474553
// kernel->dev_bin->get_devdef().autodiscovery_def.?
475554
uintptr_t dev_global_address = 0x4000000;
476-
void *dev_global_ptr2 =
555+
void *dev_global_ptr =
477556
(void *)(dev_global_address + offset * 8); // 1 unit of offset is 8 bits
478-
status =
479-
set_kernel_arg_mem_pointer_without_checks(kernel, 1, dev_global_ptr2);
480-
// status = clSetKernelArgMemPointerINTEL(kernel, 1, dev_global_ptr2);
557+
status = set_kernel_arg_mem_pointer_without_checks(kernel, 1, dev_global_ptr);
558+
// status = clSetKernelArgMemPointerINTEL(kernel, 1, dev_global_ptr);
481559
if (status != CL_SUCCESS) {
482560
return status;
483561
}
@@ -499,24 +577,54 @@ CL_API_ENTRY cl_int clEnqueueWriteGlobalVariableINTEL(
499577

500578
if (blocking_write) {
501579
status = clWaitForEvents(1, event);
580+
if (status == CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST) {
581+
return status;
582+
}
583+
status = clMemFreeINTEL(command_queue->context, src_dev_ptr);
584+
if (status != CL_SUCCESS) {
585+
return status;
586+
}
587+
status = clReleaseKernel(kernel);
588+
if (status != CL_SUCCESS) {
589+
return status;
590+
}
591+
} else {
592+
// Clean up resources after event finishes
593+
void **callback_data = (void **)acl_malloc(sizeof(void *) * 3);
594+
if (!callback_data) {
595+
return CL_OUT_OF_HOST_MEMORY;
596+
}
597+
callback_data[0] = (void *)(src_dev_ptr);
598+
callback_data[1] = (void *)(kernel);
599+
clSetEventCallback(*event, CL_COMPLETE, acl_dev_global_cleanup,
600+
(void *)callback_data);
502601
}
503602

504-
if (blocking_write &&
505-
status == CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST) {
506-
return status;
507-
}
603+
return CL_SUCCESS;
604+
}
508605

509-
// Free allocated device memory
510-
status = clMemFreeINTEL(command_queue->context, src_dev_ptr);
511-
if (status != CL_SUCCESS) {
512-
return status;
606+
void CL_CALLBACK acl_dev_global_cleanup(cl_event event,
607+
cl_int event_command_exec_status,
608+
void *callback_data) {
609+
void **callback_ptrs =
610+
(void **)callback_data; // callback_ptrs[0] is usm device pointer
611+
// callback_ptrs[1] kernel to be released
612+
// callback_ptrs[2] temporary event to be released
613+
event_command_exec_status =
614+
event_command_exec_status; // Avoiding Windows warning.
615+
event = event;
616+
acl_lock();
617+
if (callback_ptrs[0]) {
618+
clMemFreeINTEL(event->context, callback_ptrs[0]);
513619
}
514-
// status = clReleaseKernel(kernel);
515-
// if (status != CL_SUCCESS) {
516-
// return status;
517-
// }
518-
519-
return CL_SUCCESS;
620+
if (callback_ptrs[1]) {
621+
clReleaseKernel(((cl_kernel)callback_ptrs[1]));
622+
}
623+
if (callback_ptrs[2]) {
624+
clReleaseEvent(((cl_event)callback_ptrs[2]));
625+
}
626+
acl_free(callback_data);
627+
acl_unlock();
520628
}
521629

522630
ACL_EXPORT

test/acl_usm_test.cpp

Lines changed: 36 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1246,13 +1246,7 @@ MT_TEST(acl_usm, read_device_global) {
12461246
char resultbuf[strsize];
12471247
cl_int status;
12481248

1249-
// Don't translate device addresses in the test HAL because we already
1250-
// will be passing in "device" memory that are actually in the host
1251-
// address space of the test executable. Ugh.
1252-
acltest_hal_emulate_device_mem = false;
1253-
12541249
cl_event write_event = 0;
1255-
cl_event copy_event = 0;
12561250
cl_event read_event = 0;
12571251

12581252
// Prepare host memory
@@ -1266,18 +1260,46 @@ MT_TEST(acl_usm, read_device_global) {
12661260
// CHECK(src_ptr != NULL);
12671261

12681262
syncThreads();
1263+
// Write to device global
1264+
status = clEnqueueWriteGlobalVariableINTEL(
1265+
m_cq, m_program, "kernel15_dev_global", CL_FALSE, strsize, 0, src_ptr, 0,
1266+
NULL, &write_event);
1267+
CHECK_EQUAL(CL_SUCCESS, status);
12691268

1270-
// Function of interest
1269+
// Read from device global, with dependence on write event
12711270
status = clEnqueueReadGlobalVariableINTEL(
1272-
m_cq, m_program, "kernel15_dev_global", CL_FALSE, strsize, 0, src_ptr, 0,
1273-
NULL, &copy_event);
1271+
m_cq, m_program, "kernel15_dev_global", CL_FALSE, strsize, 0, src_ptr, 1,
1272+
&write_event, &read_event);
12741273
CHECK_EQUAL(CL_SUCCESS, status);
1275-
int activation_id = copy_event->cmd.info.ndrange_kernel.invocation_wrapper
1276-
->image->activation_id;
1277-
acltest_call_kernel_update_callback(activation_id, CL_RUNNING);
1278-
acltest_call_kernel_update_callback(activation_id, CL_COMPLETE);
1274+
1275+
// Manually set "write device global" event done
1276+
int write_activation_id = write_event->cmd.info.ndrange_kernel
1277+
.invocation_wrapper->image->activation_id;
1278+
acltest_call_kernel_update_callback(write_activation_id, CL_RUNNING);
1279+
acltest_call_kernel_update_callback(write_activation_id, CL_COMPLETE);
1280+
1281+
// Nudge the scheduler to take above finish into account
1282+
acl_lock();
1283+
// If nothing's blocking, then complete right away
1284+
acl_idle_update(m_cq->context);
1285+
acl_unlock();
1286+
1287+
// The event returned from read device global is not the copy kernel launch
1288+
// event Therefore need to first get the event that it depend on, then
1289+
// manually set it to complete
1290+
auto last_event = read_event->depend_on.end();
1291+
last_event--;
1292+
cl_event read_copy_kernel_event = *last_event;
1293+
int read_activation_id = read_copy_kernel_event->cmd.info.ndrange_kernel
1294+
.invocation_wrapper->image->activation_id;
1295+
acltest_call_kernel_update_callback(read_activation_id, CL_RUNNING);
1296+
acltest_call_kernel_update_callback(read_activation_id, CL_COMPLETE);
1297+
// Now the usm copy operation will execute
1298+
1299+
// Block on all event completion
12791300
CHECK_EQUAL(CL_SUCCESS, clFinish(m_cq));
1280-
CHECK_EQUAL(CL_SUCCESS, clReleaseEvent(copy_event));
1301+
CHECK_EQUAL(CL_SUCCESS, clReleaseEvent(write_event));
1302+
CHECK_EQUAL(CL_SUCCESS, clReleaseEvent(read_event));
12811303

12821304
// Host pointer example
12831305
free(src_ptr);

0 commit comments

Comments
 (0)