@@ -410,15 +410,93 @@ ACL_EXPORT
410
410
// CL_API_ENTRY cl_int clEnqueueReadGlobalVariableINTEL() {
411
411
CL_API_ENTRY cl_int clEnqueueReadGlobalVariableINTEL (
412
412
cl_command_queue command_queue, cl_program program, const char *name,
413
- cl_bool blocking_write , size_t size, size_t offset, void *ptr,
413
+ cl_bool blocking_read , size_t size, size_t offset, void *ptr,
414
414
cl_uint num_events_in_wait_list, const cl_event *event_wait_list,
415
415
cl_event *event) {
416
+ cl_int status;
416
417
417
- // TODO: get dev_global_ptr from autodiscovery instead later
418
- // return 0;
419
- return clEnqueueWriteGlobalVariableINTEL (
420
- command_queue, program, name, blocking_write, size, offset, ptr,
421
- num_events_in_wait_list, event_wait_list, event);
418
+ cl_kernel kernel = clCreateKernelIntelFPGA (program, name, &status);
419
+ if (status != CL_SUCCESS) {
420
+ return status;
421
+ }
422
+
423
+ // dev_addr_t dev_global_address =
424
+ // kernel->dev_bin->get_devdef().autodiscovery_def.?
425
+ uintptr_t dev_global_address = 0x4000000 ;
426
+ void *dev_global_ptr =
427
+ (void *)(dev_global_address + offset * 8 ); // 1 unit of offset is 8 bits
428
+ status = set_kernel_arg_mem_pointer_without_checks (kernel, 0 , dev_global_ptr);
429
+ // status = clSetKernelArgMemPointerINTEL(kernel, 1, dev_global_ptr);
430
+ if (status != CL_SUCCESS) {
431
+ return status;
432
+ }
433
+
434
+ // Copy device global memory to temporary device usm pointer first
435
+ void *tmp_dev_ptr = clDeviceMemAllocINTEL (
436
+ command_queue->context , command_queue->device , NULL , size, 1 , &status);
437
+ if (status != CL_SUCCESS) {
438
+ return status;
439
+ }
440
+ if (!tmp_dev_ptr) {
441
+ return CL_MEM_OBJECT_ALLOCATION_FAILURE;
442
+ }
443
+
444
+ status = clSetKernelArgMemPointerINTEL (kernel, 1 , tmp_dev_ptr);
445
+ if (status != CL_SUCCESS) {
446
+ return status;
447
+ }
448
+
449
+ // Set size kernel arg
450
+ status = clSetKernelArg (kernel, 2 , sizeof (size_t ), (const void *)(&size));
451
+ if (status != CL_SUCCESS) {
452
+ return status;
453
+ }
454
+
455
+ cl_event tmp_event = 0 ;
456
+ status = clEnqueueTask (command_queue, kernel, num_events_in_wait_list,
457
+ event_wait_list, &tmp_event);
458
+ if (status != CL_SUCCESS) {
459
+ return status;
460
+ }
461
+ std::cerr << tmp_event->cmd .info .ndrange_kernel .invocation_wrapper ->image
462
+ ->activation_id
463
+ << std::endl;
464
+
465
+ // copy from the temporary device memory into user provided pointer
466
+ std::cerr << " read: copy from tmp dev pointer to source pointer" << std::endl;
467
+ status = clEnqueueMemcpyINTEL (command_queue, blocking_read, ptr, tmp_dev_ptr,
468
+ size, 1 , &tmp_event, event);
469
+ if (status != CL_SUCCESS) {
470
+ return status;
471
+ }
472
+
473
+ if (blocking_read) {
474
+ status = clReleaseEvent (tmp_event);
475
+ if (status != CL_SUCCESS) {
476
+ return status;
477
+ }
478
+ status = clMemFreeINTEL (command_queue->context , tmp_dev_ptr);
479
+ if (status != CL_SUCCESS) {
480
+ return status;
481
+ }
482
+ status = clReleaseKernel (kernel);
483
+ if (status != CL_SUCCESS) {
484
+ return status;
485
+ }
486
+ } else {
487
+ // Clean up resources after event finishes
488
+ void **callback_data = (void **)acl_malloc (sizeof (void *) * 3 );
489
+ if (!callback_data) {
490
+ return CL_OUT_OF_HOST_MEMORY;
491
+ }
492
+ callback_data[0 ] = (void *)(tmp_dev_ptr);
493
+ callback_data[1 ] = (void *)(kernel);
494
+ callback_data[2 ] = (void *)(tmp_event);
495
+ clSetEventCallback (*event, CL_COMPLETE, acl_dev_global_cleanup,
496
+ (void *)callback_data);
497
+ }
498
+
499
+ return CL_SUCCESS;
422
500
}
423
501
424
502
ACL_EXPORT
@@ -452,6 +530,7 @@ CL_API_ENTRY cl_int clEnqueueWriteGlobalVariableINTEL(
452
530
if (status != CL_SUCCESS) {
453
531
return status;
454
532
}
533
+
455
534
// if (to_dev_event->execution_status != CL_COMPLETE) {
456
535
// return CL_INVALID_OPERATION;
457
536
// }
@@ -473,11 +552,10 @@ CL_API_ENTRY cl_int clEnqueueWriteGlobalVariableINTEL(
473
552
// dev_addr_t dev_global_address =
474
553
// kernel->dev_bin->get_devdef().autodiscovery_def.?
475
554
uintptr_t dev_global_address = 0x4000000 ;
476
- void *dev_global_ptr2 =
555
+ void *dev_global_ptr =
477
556
(void *)(dev_global_address + offset * 8 ); // 1 unit of offset is 8 bits
478
- status =
479
- set_kernel_arg_mem_pointer_without_checks (kernel, 1 , dev_global_ptr2);
480
- // status = clSetKernelArgMemPointerINTEL(kernel, 1, dev_global_ptr2);
557
+ status = set_kernel_arg_mem_pointer_without_checks (kernel, 1 , dev_global_ptr);
558
+ // status = clSetKernelArgMemPointerINTEL(kernel, 1, dev_global_ptr);
481
559
if (status != CL_SUCCESS) {
482
560
return status;
483
561
}
@@ -499,24 +577,54 @@ CL_API_ENTRY cl_int clEnqueueWriteGlobalVariableINTEL(
499
577
500
578
if (blocking_write) {
501
579
status = clWaitForEvents (1 , event);
580
+ if (status == CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST) {
581
+ return status;
582
+ }
583
+ status = clMemFreeINTEL (command_queue->context , src_dev_ptr);
584
+ if (status != CL_SUCCESS) {
585
+ return status;
586
+ }
587
+ status = clReleaseKernel (kernel);
588
+ if (status != CL_SUCCESS) {
589
+ return status;
590
+ }
591
+ } else {
592
+ // Clean up resources after event finishes
593
+ void **callback_data = (void **)acl_malloc (sizeof (void *) * 3 );
594
+ if (!callback_data) {
595
+ return CL_OUT_OF_HOST_MEMORY;
596
+ }
597
+ callback_data[0 ] = (void *)(src_dev_ptr);
598
+ callback_data[1 ] = (void *)(kernel);
599
+ clSetEventCallback (*event, CL_COMPLETE, acl_dev_global_cleanup,
600
+ (void *)callback_data);
502
601
}
503
602
504
- if (blocking_write &&
505
- status == CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST) {
506
- return status;
507
- }
603
+ return CL_SUCCESS;
604
+ }
508
605
509
- // Free allocated device memory
510
- status = clMemFreeINTEL (command_queue->context , src_dev_ptr);
511
- if (status != CL_SUCCESS) {
512
- return status;
606
+ void CL_CALLBACK acl_dev_global_cleanup (cl_event event,
607
+ cl_int event_command_exec_status,
608
+ void *callback_data) {
609
+ void **callback_ptrs =
610
+ (void **)callback_data; // callback_ptrs[0] is usm device pointer
611
+ // callback_ptrs[1] kernel to be released
612
+ // callback_ptrs[2] temporary event to be released
613
+ event_command_exec_status =
614
+ event_command_exec_status; // Avoiding Windows warning.
615
+ event = event;
616
+ acl_lock ();
617
+ if (callback_ptrs[0 ]) {
618
+ clMemFreeINTEL (event->context , callback_ptrs[0 ]);
513
619
}
514
- // status = clReleaseKernel(kernel);
515
- // if (status != CL_SUCCESS) {
516
- // return status;
517
- // }
518
-
519
- return CL_SUCCESS;
620
+ if (callback_ptrs[1 ]) {
621
+ clReleaseKernel (((cl_kernel)callback_ptrs[1 ]));
622
+ }
623
+ if (callback_ptrs[2 ]) {
624
+ clReleaseEvent (((cl_event)callback_ptrs[2 ]));
625
+ }
626
+ acl_free (callback_data);
627
+ acl_unlock ();
520
628
}
521
629
522
630
ACL_EXPORT
0 commit comments