@@ -3968,7 +3968,7 @@ TEST(acl_kernel_reprogram_scheduler, switch_prog) {
3968
3968
// set MEM_MIGRATE2.1 to COMPLETE +
3969
3969
// set MEM_MIGRATE2.2 to RUNNING +
3970
3970
// set MEM_MIGRATE2.2 to COMPLETE +
3971
- // submit KERNEL2 to device = 5
3971
+ // submit KERNEL2 to device = 10
3972
3972
CHECK_EQUAL (offset + 15 , m_devlog.num_ops );
3973
3973
3974
3974
// Should have copied the memory over.
@@ -4332,6 +4332,272 @@ TEST(acl_kernel_reprogram_scheduler, switch_prog) {
4332
4332
CHECK_EQUAL (CL_SUCCESS, clReleaseCommandQueue (cq2));
4333
4333
}
4334
4334
4335
+ TEST (acl_kernel_reprogram_scheduler, device_global_reprogram) {
4336
+ // In this test, we will force the device to contain reprogram
4337
+ // device global. The device will be first reprogrammed eagerly
4338
+ // due to the clCreateProgramWithBinary call which will set the
4339
+ // last_bin and loaded_bin. We revert that by setting them to
4340
+ // null again to emulate a hw device with binary on the board
4341
+ // but not yet reprogrammed in execution.
4342
+ // The kernel will be launched two times, the first time should
4343
+ // trigger a reprogram even thought the random hash matches due
4344
+ // to the device global, the second time shouldn't as the device
4345
+ // has been reprogrammed in the execution.
4346
+
4347
+ // Force device to contain device global
4348
+ m_device->def .autodiscovery_def .device_global_mem_defs .insert (
4349
+ {" dev_glob1" ,
4350
+ {/* address */ 1024 ,
4351
+ /* size */ 1024 ,
4352
+ /* host_access */ ACL_DEVICE_GLOBAL_HOST_ACCESS_READ_WRITE,
4353
+ /* init_mode */ ACL_DEVICE_GLOBAL_INIT_MODE_REPROGRAM,
4354
+ /* implement_in_csr */ false }});
4355
+
4356
+ // Initial eager reprogram
4357
+ int offset = m_devlog.num_ops ;
4358
+ CHECK_EQUAL (3 , offset);
4359
+ // Just the initial program load.
4360
+ CHECK_EQUAL (m_first_dev_bin, m_device->last_bin );
4361
+ CHECK_EQUAL (m_first_dev_bin, m_device->loaded_bin );
4362
+
4363
+ // Pretend execution starts now
4364
+ m_device->last_bin ->unload_content ();
4365
+ m_device->last_bin = nullptr ;
4366
+ m_device->loaded_bin ->unload_content ();
4367
+ m_device->loaded_bin = nullptr ;
4368
+
4369
+ acl_device_program_info_t *dp0 = check_dev_prog (m_program0);
4370
+ m_context->reprogram_buf_read_callback = read_mem_callback;
4371
+ m_context->reprogram_buf_write_callback = write_mem_callback;
4372
+
4373
+ // A device side buffer
4374
+ cl_int status = CL_INVALID_VALUE;
4375
+ cl_mem mem = clCreateBuffer (m_context, CL_MEM_READ_WRITE, 2048 , 0 , &status);
4376
+ CHECK_EQUAL (CL_SUCCESS, status);
4377
+ CHECK (mem);
4378
+ memset (mem->host_mem .aligned_ptr , ' X' , mem->size );
4379
+ memset (mem->block_allocation ->range .begin , ' x' , mem->size );
4380
+
4381
+ CHECK_EQUAL (1 , m_context->device_buffers_have_backing_store );
4382
+ CHECK_EQUAL (0 , mem->block_allocation ->region ->is_host_accessible );
4383
+ CHECK_EQUAL (0 , mem->writable_copy_on_host );
4384
+
4385
+ cl_kernel k = get_kernel (m_program0);
4386
+ cl_event ue1 = get_user_event ();
4387
+ cl_event ue2 = get_user_event ();
4388
+ cl_event k_e1 = 0 ;
4389
+ cl_event k_e2 = 0 ;
4390
+
4391
+ // Launch the kernel for the first time
4392
+ CHECK_EQUAL (CL_SUCCESS, clSetKernelArg (k, 0 , sizeof (cl_mem), &mem));
4393
+ CHECK_EQUAL (CL_SUCCESS, clSetKernelArg (k, 1 , sizeof (cl_mem), &mem));
4394
+ CHECK_EQUAL (CL_SUCCESS, clEnqueueTask (m_cq, k, 1 , &ue1, &k_e1));
4395
+ CHECK_EQUAL (CL_COMMAND_TASK, k_e1->cmd .type );
4396
+ CHECK (m_device->def .autodiscovery_def .binary_rand_hash ==
4397
+ k_e1->cmd .info .ndrange_kernel .dev_bin ->get_devdef ()
4398
+ .autodiscovery_def .binary_rand_hash );
4399
+
4400
+ // last_bin and loaded_bin should still in a reset state
4401
+ CHECK (m_device->last_bin == nullptr );
4402
+ CHECK (m_device->loaded_bin == nullptr );
4403
+
4404
+ acl_print_debug_msg (" Forcing user event completion for first kernel\n " );
4405
+ CHECK_EQUAL (CL_SUCCESS, clSetUserEventStatus (ue1, CL_COMPLETE));
4406
+ CHECK_EQUAL (CL_SUCCESS, clReleaseEvent (ue1));
4407
+
4408
+ // Should have recorded that we loaded the program.
4409
+ CHECK_EQUAL (&(dp0->device_binary ), m_device->last_bin );
4410
+ CHECK_EQUAL (&(dp0->device_binary ), m_device->loaded_bin );
4411
+
4412
+ // submit device global forced REPROGRAM +
4413
+ // set REPROGRAM to RUNNING +
4414
+ // set REPROGRAM to COMPLETE +
4415
+ // set MEM_MIGRATE 1 to RUNNING +
4416
+ // set MEM_MIGRATE 1 to COMPLETE +
4417
+ // set MEM_MIGRATE 2 to RUNNING +
4418
+ // set MEM_MIGRATE 2 to COMPLETE +
4419
+ // submit KERNEL = 8
4420
+ CHECK_EQUAL (offset + 8 , m_devlog.num_ops );
4421
+ const acl_device_op_t *op0submit = &(m_devlog.before [3 ]);
4422
+ const acl_device_op_t *op0running = &(m_devlog.before [4 ]);
4423
+ const acl_device_op_t *op0complete = &(m_devlog.before [5 ]);
4424
+
4425
+ // Device global forced reprogram
4426
+ CHECK_EQUAL (ACL_DEVICE_OP_REPROGRAM, op0submit->info .type );
4427
+ CHECK_EQUAL (0 , op0submit->id );
4428
+ CHECK (op0submit->info .event );
4429
+ CHECK_EQUAL (CL_SUBMITTED, op0submit->status );
4430
+ CHECK_EQUAL (0 , op0submit->info .num_printf_bytes_pending );
4431
+ CHECK_EQUAL (1 , op0submit->first_in_group );
4432
+ CHECK_EQUAL (0 , op0submit->last_in_group );
4433
+
4434
+ CHECK_EQUAL (ACL_DEVICE_OP_REPROGRAM, op0running->info .type );
4435
+ CHECK_EQUAL (0 , op0running->id );
4436
+ CHECK (op0running->info .event );
4437
+ CHECK_EQUAL (CL_RUNNING, op0running->status );
4438
+ CHECK_EQUAL (0 , op0running->info .num_printf_bytes_pending );
4439
+ CHECK_EQUAL (1 , op0running->first_in_group );
4440
+ CHECK_EQUAL (0 , op0running->last_in_group );
4441
+
4442
+ CHECK_EQUAL (ACL_DEVICE_OP_REPROGRAM, op0complete->info .type );
4443
+ CHECK_EQUAL (0 , op0complete->id );
4444
+ CHECK (op0complete->info .event );
4445
+ CHECK_EQUAL (CL_COMPLETE, op0complete->status );
4446
+ CHECK_EQUAL (0 , op0complete->info .num_printf_bytes_pending );
4447
+ CHECK_EQUAL (1 , op0complete->first_in_group );
4448
+ CHECK_EQUAL (0 , op0complete->last_in_group );
4449
+
4450
+ // The device is still programmed with the same program.
4451
+ CHECK_EQUAL (&(dp0->device_binary ), m_device->last_bin );
4452
+ CHECK_EQUAL (&(dp0->device_binary ), m_device->loaded_bin );
4453
+
4454
+ const acl_device_op_t *op1submit = &(m_devlog.before [10 ]);
4455
+ CHECK_EQUAL (ACL_DEVICE_OP_KERNEL, op1submit->info .type );
4456
+ CHECK_EQUAL (k_e1, op1submit->info .event );
4457
+ CHECK_EQUAL (CL_SUBMITTED, op1submit->status );
4458
+ CHECK_EQUAL (0 , op1submit->info .num_printf_bytes_pending );
4459
+ CHECK_EQUAL (0 , op1submit->first_in_group ); // reprogram is first
4460
+ CHECK_EQUAL (1 , op1submit->last_in_group );
4461
+
4462
+ // The user-level event is linked to the kernel device op now.
4463
+ CHECK_EQUAL (op1submit->id , k_e1->current_device_op ->id );
4464
+
4465
+ // Pretend to start the kernel
4466
+ acl_print_debug_msg (" Say kernel is running\n " );
4467
+ ACL_LOCKED (
4468
+ acl_receive_kernel_update (k_e1->current_device_op ->id , CL_RUNNING));
4469
+ CHECK_EQUAL (CL_RUNNING, k_e1->current_device_op ->execution_status );
4470
+
4471
+ ACL_LOCKED (acl_idle_update (m_context));
4472
+
4473
+ // Now we have a "running" transition
4474
+ CHECK_EQUAL (offset + 9 , m_devlog.num_ops );
4475
+ const acl_device_op_t *op1running = &(m_devlog.after [11 ]);
4476
+ CHECK_EQUAL (ACL_DEVICE_OP_KERNEL, op1running->info .type );
4477
+ CHECK_EQUAL (k_e1, op1running->info .event );
4478
+ CHECK_EQUAL (CL_RUNNING, op1running->status );
4479
+ CHECK_EQUAL (0 , op1running->info .num_printf_bytes_pending );
4480
+ CHECK_EQUAL (0 , op1running->first_in_group );
4481
+ CHECK_EQUAL (1 , op1running->last_in_group );
4482
+
4483
+ // The running status was propagated up to the user-level event.
4484
+ CHECK_EQUAL (CL_RUNNING, k_e1->execution_status );
4485
+
4486
+ acl_print_debug_msg (" Say kernel is complete\n " );
4487
+ ACL_LOCKED (
4488
+ acl_receive_kernel_update (k_e1->current_device_op ->id , CL_COMPLETE));
4489
+ CHECK_EQUAL (CL_COMPLETE, k_e1->current_device_op ->execution_status );
4490
+
4491
+ ACL_LOCKED (acl_idle_update (m_context));
4492
+ // Now we have a "complete" transition
4493
+ CHECK_EQUAL (offset + 10 , m_devlog.num_ops );
4494
+ const acl_device_op_t *op1complete = &(m_devlog.after [12 ]);
4495
+ CHECK_EQUAL (ACL_DEVICE_OP_KERNEL, op1complete->info .type );
4496
+ CHECK_EQUAL (k_e1, op1complete->info .event );
4497
+ CHECK_EQUAL (CL_COMPLETE, op1complete->status );
4498
+ CHECK_EQUAL (0 , op1complete->info .num_printf_bytes_pending );
4499
+ CHECK_EQUAL (0 , op1complete->first_in_group );
4500
+ CHECK_EQUAL (1 , op1complete->last_in_group );
4501
+
4502
+ // Completion timestamp has propagated up to the user level event.
4503
+ CHECK_EQUAL (
4504
+ acl_platform.device_op_queue .op [op1complete->id ].timestamp [CL_COMPLETE],
4505
+ k_e1->timestamp [CL_COMPLETE]);
4506
+
4507
+ // Completion wipes out the downlink.
4508
+ CHECK_EQUAL (0 , k_e1->current_device_op );
4509
+
4510
+ // Launch the kernel for the second time
4511
+ CHECK_EQUAL (CL_SUCCESS, clEnqueueTask (m_cq, k, 1 , &ue2, &k_e2));
4512
+ CHECK_EQUAL (CL_COMMAND_TASK, k_e2->cmd .type );
4513
+ CHECK (m_device->def .autodiscovery_def .binary_rand_hash ==
4514
+ k_e2->cmd .info .ndrange_kernel .dev_bin ->get_devdef ()
4515
+ .autodiscovery_def .binary_rand_hash );
4516
+
4517
+ acl_print_debug_msg (" Forcing user event completion for second kernel\n " );
4518
+ CHECK_EQUAL (CL_SUCCESS, clSetUserEventStatus (ue2, CL_COMPLETE));
4519
+ CHECK_EQUAL (CL_SUCCESS, clReleaseEvent (ue2));
4520
+
4521
+ // Should still have the same program loaded
4522
+ CHECK_EQUAL (&(dp0->device_binary ), m_device->last_bin );
4523
+ CHECK_EQUAL (&(dp0->device_binary ), m_device->loaded_bin );
4524
+
4525
+ // set MEM_MIGRATE 1 to RUNNING +
4526
+ // set MEM_MIGRATE 1 to COMPLETE +
4527
+ // set MEM_MIGRATE 2 to RUNNING +
4528
+ // set MEM_MIGRATE 2 to COMPLETE +
4529
+ // submit KERNEL = 5
4530
+ CHECK_EQUAL (offset + 15 , m_devlog.num_ops );
4531
+ const acl_device_op_t *op2submit = &(m_devlog.before [17 ]);
4532
+ CHECK_EQUAL (ACL_DEVICE_OP_KERNEL, op2submit->info .type );
4533
+ CHECK_EQUAL (k_e2, op2submit->info .event );
4534
+ CHECK_EQUAL (CL_SUBMITTED, op2submit->status );
4535
+ CHECK_EQUAL (0 , op2submit->info .num_printf_bytes_pending );
4536
+ CHECK_EQUAL (0 , op2submit->first_in_group ); // mem migration is first
4537
+ CHECK_EQUAL (1 , op2submit->last_in_group );
4538
+
4539
+ // The user-level event is linked to the kernel device op now.
4540
+ CHECK_EQUAL (op2submit->id , k_e2->current_device_op ->id );
4541
+
4542
+ // Pretend to start the kernel
4543
+ acl_print_debug_msg (" Say kernel is running\n " );
4544
+ ACL_LOCKED (
4545
+ acl_receive_kernel_update (k_e2->current_device_op ->id , CL_RUNNING));
4546
+ CHECK_EQUAL (CL_RUNNING, k_e2->current_device_op ->execution_status );
4547
+
4548
+ ACL_LOCKED (acl_idle_update (m_context));
4549
+
4550
+ // Now we have a "running" transition
4551
+ CHECK_EQUAL (offset + 16 , m_devlog.num_ops );
4552
+ const acl_device_op_t *op2running = &(m_devlog.after [18 ]);
4553
+ CHECK_EQUAL (ACL_DEVICE_OP_KERNEL, op2running->info .type );
4554
+ CHECK_EQUAL (k_e2, op2running->info .event );
4555
+ CHECK_EQUAL (CL_RUNNING, op2running->status );
4556
+ CHECK_EQUAL (0 , op2running->info .num_printf_bytes_pending );
4557
+ CHECK_EQUAL (0 , op2running->first_in_group );
4558
+ CHECK_EQUAL (1 , op2running->last_in_group );
4559
+
4560
+ // The running status was propagated up to the user-level event.
4561
+ CHECK_EQUAL (CL_RUNNING, k_e2->execution_status );
4562
+
4563
+ acl_print_debug_msg (" Say kernel is complete\n " );
4564
+ ACL_LOCKED (
4565
+ acl_receive_kernel_update (k_e2->current_device_op ->id , CL_COMPLETE));
4566
+ CHECK_EQUAL (CL_COMPLETE, k_e2->current_device_op ->execution_status );
4567
+
4568
+ ACL_LOCKED (acl_idle_update (m_context));
4569
+ // Now we have a "complete" transition
4570
+ CHECK_EQUAL (offset + 17 , m_devlog.num_ops );
4571
+ const acl_device_op_t *op2complete = &(m_devlog.after [19 ]);
4572
+ CHECK_EQUAL (ACL_DEVICE_OP_KERNEL, op2complete->info .type );
4573
+ CHECK_EQUAL (k_e2, op2complete->info .event );
4574
+ CHECK_EQUAL (CL_COMPLETE, op2complete->status );
4575
+ CHECK_EQUAL (0 , op2complete->info .num_printf_bytes_pending );
4576
+ CHECK_EQUAL (0 , op2complete->first_in_group );
4577
+ CHECK_EQUAL (1 , op2complete->last_in_group );
4578
+
4579
+ // Completion timestamp has propagated up to the user level event.
4580
+ CHECK_EQUAL (
4581
+ acl_platform.device_op_queue .op [op2complete->id ].timestamp [CL_COMPLETE],
4582
+ k_e2->timestamp [CL_COMPLETE]);
4583
+
4584
+ // Completion wipes out the downlink.
4585
+ CHECK_EQUAL (0 , k_e2->current_device_op );
4586
+
4587
+ // And let go.
4588
+ // (Don't check for CL_INVALID_EVENT on a second release of each of
4589
+ // these events because the events might be reused.)
4590
+ CHECK_EQUAL (CL_SUCCESS, clReleaseMemObject (mem));
4591
+ CHECK_EQUAL (CL_SUCCESS, clReleaseEvent (k_e1));
4592
+ CHECK_EQUAL (CL_SUCCESS, clReleaseEvent (k_e2));
4593
+ CHECK_EQUAL (CL_SUCCESS, clReleaseKernel (k));
4594
+
4595
+ // Clean up device global
4596
+ m_device->def .autodiscovery_def .device_global_mem_defs .clear ();
4597
+
4598
+ acl_print_debug_msg (" DONE!\n " );
4599
+ }
4600
+
4335
4601
TEST (acl_kernel_reprogram_scheduler, use_host_buf_as_arg) {
4336
4602
// Must be able to use a host-side buffer as a kernel argument.
4337
4603
cl_int status = 0 ;
0 commit comments