@@ -3962,7 +3962,7 @@ TEST(acl_kernel_reprogram_scheduler, switch_prog) {
3962
3962
// set MEM_MIGRATE2.1 to COMPLETE +
3963
3963
// set MEM_MIGRATE2.2 to RUNNING +
3964
3964
// set MEM_MIGRATE2.2 to COMPLETE +
3965
- // submit KERNEL2 to device = 5
3965
+ // submit KERNEL2 to device = 10
3966
3966
CHECK_EQUAL (offset + 15 , m_devlog.num_ops );
3967
3967
3968
3968
// Should have copied the memory over.
@@ -4326,6 +4326,180 @@ TEST(acl_kernel_reprogram_scheduler, switch_prog) {
4326
4326
CHECK_EQUAL (CL_SUCCESS, clReleaseCommandQueue (cq2));
4327
4327
}
4328
4328
4329
+ TEST (acl_kernel_reprogram_scheduler, device_global_reprogram) {
4330
+ // In this test, we will force the device to contain reprogram
4331
+ // device global. The device will be first reprogrammed eagerly
4332
+ // due to the clCreateProgramWithBinary call, then when the
4333
+ // kernel is enqueued, another reprogram should be scheduled
4334
+ // even though the device is already programmed with the right
4335
+ // binary, due to the presence of the device global.
4336
+
4337
+ // Force device to contain device global
4338
+ m_device->def .autodiscovery_def .device_global_mem_defs .insert (
4339
+ {" dev_glob1" ,
4340
+ {/* address */ 1024 ,
4341
+ /* size */ 1024 ,
4342
+ /* host_access */ ACL_DEVICE_GLOBAL_HOST_ACCESS_READ_WRITE,
4343
+ /* init_mode */ ACL_DEVICE_GLOBAL_INIT_MODE_REPROGRAM,
4344
+ /* implement_in_csr */ false }});
4345
+
4346
+ // Initial eager reprogram
4347
+ int offset = m_devlog.num_ops ;
4348
+ CHECK_EQUAL (3 , offset);
4349
+
4350
+ acl_device_program_info_t *dp0 = check_dev_prog (m_program0);
4351
+
4352
+ m_context->reprogram_buf_read_callback = read_mem_callback;
4353
+ m_context->reprogram_buf_write_callback = write_mem_callback;
4354
+
4355
+ // A device side buffer
4356
+ cl_int status = CL_INVALID_VALUE;
4357
+ cl_mem mem = clCreateBuffer (m_context, CL_MEM_READ_WRITE, 2048 , 0 , &status);
4358
+ CHECK_EQUAL (CL_SUCCESS, status);
4359
+ CHECK (mem);
4360
+ memset (mem->host_mem .aligned_ptr , ' X' , mem->size );
4361
+ memset (mem->block_allocation ->range .begin , ' x' , mem->size );
4362
+
4363
+ CHECK_EQUAL (1 , m_context->device_buffers_have_backing_store );
4364
+ CHECK_EQUAL (0 , mem->block_allocation ->region ->is_host_accessible );
4365
+ CHECK_EQUAL (0 , mem->writable_copy_on_host );
4366
+
4367
+ cl_kernel k = get_kernel (m_program0);
4368
+
4369
+ // Just the initial program load.
4370
+ CHECK_EQUAL (m_first_dev_bin, m_device->last_bin );
4371
+ CHECK_EQUAL (m_first_dev_bin, m_device->loaded_bin );
4372
+
4373
+ cl_event ue = get_user_event ();
4374
+ cl_event k_e = 0 ;
4375
+
4376
+ CHECK_EQUAL (CL_SUCCESS, clSetKernelArg (k, 0 , sizeof (cl_mem), &mem));
4377
+ CHECK_EQUAL (CL_SUCCESS, clSetKernelArg (k, 1 , sizeof (cl_mem), &mem));
4378
+ CHECK_EQUAL (CL_SUCCESS, clEnqueueTask (m_cq, k, 1 , &ue, &k_e));
4379
+ CHECK_EQUAL (CL_COMMAND_TASK, k_e->cmd .type );
4380
+
4381
+ // Only initial programming has occurred.
4382
+ // Has 3 transitions logged: SUBMITTED, RUNNING, COMPLETE
4383
+ CHECK_EQUAL (m_first_dev_bin, m_device->last_bin );
4384
+ CHECK_EQUAL (m_first_dev_bin, m_device->loaded_bin );
4385
+
4386
+ acl_print_debug_msg (" Forcing user event completion\n " );
4387
+ CHECK_EQUAL (CL_SUCCESS, clSetUserEventStatus (ue, CL_COMPLETE));
4388
+ CHECK_EQUAL (CL_SUCCESS, clReleaseEvent (ue));
4389
+
4390
+ // Should have recorded that we loaded the program.
4391
+ CHECK_EQUAL (&(dp0->device_binary ), m_device->last_bin );
4392
+ CHECK_EQUAL (&(dp0->device_binary ), m_device->loaded_bin );
4393
+
4394
+ // submit device global forced REPROGRAM +
4395
+ // set REPROGRAM to RUNNING +
4396
+ // set REPROGRAM to COMPLETE +
4397
+ // set MEM_MIGRATE 1 to RUNNING +
4398
+ // set MEM_MIGRATE 1 to COMPLETE +
4399
+ // set MEM_MIGRATE 2 to RUNNING +
4400
+ // set MEM_MIGRATE 2 to COMPLETE +
4401
+ // submit KERNEL = 8
4402
+ CHECK_EQUAL (offset+8 , m_devlog.num_ops );
4403
+ const acl_device_op_t *op0submit = &(m_devlog.before [3 ]);
4404
+ const acl_device_op_t *op0running = &(m_devlog.before [4 ]);
4405
+ const acl_device_op_t *op0complete = &(m_devlog.before [5 ]);
4406
+
4407
+ // Device global forced reprogram
4408
+ CHECK_EQUAL (ACL_DEVICE_OP_REPROGRAM, op0submit->info .type );
4409
+ CHECK_EQUAL (0 , op0submit->id );
4410
+ CHECK (op0submit->info .event );
4411
+ CHECK_EQUAL (CL_SUBMITTED, op0submit->status );
4412
+ CHECK_EQUAL (0 , op0submit->info .num_printf_bytes_pending );
4413
+ CHECK_EQUAL (1 , op0submit->first_in_group );
4414
+ CHECK_EQUAL (0 , op0submit->last_in_group );
4415
+
4416
+ CHECK_EQUAL (ACL_DEVICE_OP_REPROGRAM, op0running->info .type );
4417
+ CHECK_EQUAL (0 , op0running->id );
4418
+ CHECK (op0running->info .event );
4419
+ CHECK_EQUAL (CL_RUNNING, op0running->status );
4420
+ CHECK_EQUAL (0 , op0running->info .num_printf_bytes_pending );
4421
+ CHECK_EQUAL (1 , op0running->first_in_group );
4422
+ CHECK_EQUAL (0 , op0running->last_in_group );
4423
+
4424
+ CHECK_EQUAL (ACL_DEVICE_OP_REPROGRAM, op0complete->info .type );
4425
+ CHECK_EQUAL (0 , op0complete->id );
4426
+ CHECK (op0complete->info .event );
4427
+ CHECK_EQUAL (CL_COMPLETE, op0complete->status );
4428
+ CHECK_EQUAL (0 , op0complete->info .num_printf_bytes_pending );
4429
+ CHECK_EQUAL (1 , op0complete->first_in_group );
4430
+ CHECK_EQUAL (0 , op0complete->last_in_group );
4431
+
4432
+ // The device is still programmed with the same program.
4433
+ CHECK_EQUAL (&(dp0->device_binary ), m_device->last_bin );
4434
+ CHECK_EQUAL (&(dp0->device_binary ), m_device->loaded_bin );
4435
+
4436
+ const acl_device_op_t *op1submit = &(m_devlog.before [10 ]);
4437
+ CHECK_EQUAL (ACL_DEVICE_OP_KERNEL, op1submit->info .type );
4438
+ CHECK_EQUAL (k_e, op1submit->info .event );
4439
+ CHECK_EQUAL (CL_SUBMITTED, op1submit->status );
4440
+ CHECK_EQUAL (0 , op1submit->info .num_printf_bytes_pending );
4441
+ CHECK_EQUAL (0 , op1submit->first_in_group ); // reprogram is first
4442
+ CHECK_EQUAL (1 , op1submit->last_in_group );
4443
+
4444
+ // The user-level event is linked to the kernel device op now.
4445
+ CHECK_EQUAL (op1submit->id , k_e->current_device_op ->id );
4446
+
4447
+ // Pretend to start the kernel
4448
+ acl_print_debug_msg (" Say kernel is running\n " );
4449
+ ACL_LOCKED (acl_receive_kernel_update (k_e->current_device_op ->id , CL_RUNNING));
4450
+ CHECK_EQUAL (CL_RUNNING, k_e->current_device_op ->execution_status );
4451
+
4452
+ ACL_LOCKED (acl_idle_update (m_context));
4453
+
4454
+ // Now we have a "running" transition
4455
+ CHECK_EQUAL (offset+9 , m_devlog.num_ops );
4456
+ const acl_device_op_t *op2a = &(m_devlog.after [11 ]);
4457
+ CHECK_EQUAL (ACL_DEVICE_OP_KERNEL, op2a->info .type );
4458
+ CHECK_EQUAL (k_e, op2a->info .event );
4459
+ CHECK_EQUAL (CL_RUNNING, op2a->status );
4460
+ CHECK_EQUAL (0 , op2a->info .num_printf_bytes_pending );
4461
+ CHECK_EQUAL (0 , op2a->first_in_group );
4462
+ CHECK_EQUAL (1 , op2a->last_in_group );
4463
+
4464
+ // The running status was propagated up to the user-level event.
4465
+ CHECK_EQUAL (CL_RUNNING, k_e->execution_status );
4466
+
4467
+ acl_print_debug_msg (" Say kernel is complete\n " );
4468
+ ACL_LOCKED (
4469
+ acl_receive_kernel_update (k_e->current_device_op ->id , CL_COMPLETE));
4470
+ CHECK_EQUAL (CL_COMPLETE, k_e->current_device_op ->execution_status );
4471
+
4472
+ ACL_LOCKED (acl_idle_update (m_context));
4473
+ // Now we have a "complete" transition
4474
+ CHECK_EQUAL (offset+10 , m_devlog.num_ops );
4475
+ const acl_device_op_t *op3a = &(m_devlog.after [12 ]);
4476
+ CHECK_EQUAL (ACL_DEVICE_OP_KERNEL, op3a->info .type );
4477
+ CHECK_EQUAL (k_e, op3a->info .event );
4478
+ CHECK_EQUAL (CL_COMPLETE, op3a->status );
4479
+ CHECK_EQUAL (0 , op3a->info .num_printf_bytes_pending );
4480
+ CHECK_EQUAL (0 , op3a->first_in_group );
4481
+ CHECK_EQUAL (1 , op3a->last_in_group );
4482
+
4483
+ // Completion timestamp has propagated up to the user level event.
4484
+ CHECK_EQUAL (acl_platform.device_op_queue .op [op3a->id ].timestamp [CL_COMPLETE],
4485
+ k_e->timestamp [CL_COMPLETE]);
4486
+
4487
+ // Completion wipes out the downlink.
4488
+ CHECK_EQUAL (0 , k_e->current_device_op );
4489
+
4490
+ // And let go.
4491
+ // (Don't check for CL_INVALID_EVENT on a second release of each of
4492
+ // these events because the events might be reused.)
4493
+ CHECK_EQUAL (CL_SUCCESS, clReleaseMemObject (mem));
4494
+ CHECK_EQUAL (CL_SUCCESS, clReleaseEvent (k_e));
4495
+ CHECK_EQUAL (CL_SUCCESS, clReleaseKernel (k));
4496
+
4497
+ // Clean up device global
4498
+ m_device->def .autodiscovery_def .device_global_mem_defs .clear ();
4499
+
4500
+ acl_print_debug_msg (" DONE!\n " );
4501
+ }
4502
+
4329
4503
TEST (acl_kernel_reprogram_scheduler, use_host_buf_as_arg) {
4330
4504
// Must be able to use a host-side buffer as a kernel argument.
4331
4505
cl_int status = 0 ;
0 commit comments