@@ -1303,6 +1303,159 @@ void acl_kernel_if_launch_kernel(acl_kernel_if *kern,
1303
1303
activation_id);
1304
1304
}
1305
1305
1306
+ static bool acl_kernel_if_update_status_done (
1307
+ acl_kernel_if *kern, const unsigned int accel_id, const int activation_id,
1308
+ std::optional<unsigned int > &printf_size) {
1309
+ // Read the accelerator's status register
1310
+ unsigned int csr = 0 ;
1311
+ acl_kernel_cra_read (kern, accel_id, KERNEL_OFFSET_CSR, &csr);
1312
+
1313
+ // Ignore non-status bits.
1314
+ // Required by Option 3 wrappers which now have a version info in
1315
+ // top 16 bits.
1316
+ csr = ACL_KERNEL_READ_BIT_RANGE (csr, KERNEL_CSR_LAST_STATUS_BIT, 0 );
1317
+
1318
+ // Check for updated status bits
1319
+ if (0 == (csr & KERNEL_CSR_STATUS_BITS_MASK)) {
1320
+ return false ;
1321
+ }
1322
+
1323
+ // Clear the status bits that we read
1324
+ ACL_KERNEL_IF_DEBUG_MSG (kern, " :: Accelerator %d reporting status %x.\n " ,
1325
+ accel_id, csr);
1326
+
1327
+ if (ACL_KERNEL_READ_BIT (csr, KERNEL_CSR_DONE) == 1 ) {
1328
+ ACL_KERNEL_IF_DEBUG_MSG (kern, " :: Accelerator %d is done.\n " , accel_id);
1329
+ }
1330
+ if (ACL_KERNEL_READ_BIT (csr, KERNEL_CSR_STALLED) == 1 ) {
1331
+ ACL_KERNEL_IF_DEBUG_MSG (kern, " :: Accelerator %d is stalled.\n " , accel_id);
1332
+ }
1333
+ if (ACL_KERNEL_READ_BIT (csr, KERNEL_CSR_UNSTALL) == 1 ) {
1334
+ ACL_KERNEL_IF_DEBUG_MSG (kern, " :: Accelerator %d is unstalled.\n " ,
1335
+ accel_id);
1336
+ }
1337
+ if (ACL_KERNEL_READ_BIT (csr, KERNEL_CSR_PROFILE_TEMPORAL_STATUS) == 1 ) {
1338
+ ACL_KERNEL_IF_DEBUG_MSG (
1339
+ kern, " :: Accelerator %d ready for temporal profile readback.\n " ,
1340
+ accel_id);
1341
+ }
1342
+
1343
+ if (ACL_KERNEL_READ_BIT (csr, KERNEL_CSR_DONE) == 0 &&
1344
+ ACL_KERNEL_READ_BIT (csr, KERNEL_CSR_STALLED) == 0 &&
1345
+ ACL_KERNEL_READ_BIT (csr, KERNEL_CSR_PROFILE_TEMPORAL_STATUS) == 0 ) {
1346
+ return false ;
1347
+ }
1348
+
1349
+ // read the printf buffer size from the kernel cra, just after the
1350
+ // kernel arguments
1351
+ if (kern->accel_num_printfs [accel_id] > 0 ) {
1352
+ printf_size = 0 ;
1353
+ acl_kernel_cra_read (kern, accel_id, KERNEL_OFFSET_PRINTF_BUFFER_SIZE,
1354
+ &*printf_size);
1355
+ assert (*printf_size <= ACL_PRINTF_BUFFER_TOTAL_SIZE);
1356
+ ACL_KERNEL_IF_DEBUG_MSG (kern,
1357
+ " :: Accelerator %d printf buffer size is %d.\n " ,
1358
+ accel_id, *printf_size);
1359
+
1360
+ // kernel is stalled because the printf buffer is full
1361
+ if (ACL_KERNEL_READ_BIT (csr, KERNEL_CSR_STALLED) == 1 ) {
1362
+ // clear interrupt
1363
+ unsigned int new_csr = 0 ;
1364
+ acl_kernel_cra_read (kern, accel_id, KERNEL_OFFSET_CSR, &new_csr);
1365
+ ACL_KERNEL_CLEAR_BIT (new_csr, KERNEL_CSR_STALLED);
1366
+
1367
+ ACL_KERNEL_IF_DEBUG_MSG (kern,
1368
+ " :: Calling acl_process_printf_buffer_fn with "
1369
+ " activation_id=%d and printf_size=%u.\n " ,
1370
+ activation_id, *printf_size);
1371
+ // update status, which will dump the printf buffer, set
1372
+ // debug_dump_printf = 0
1373
+ acl_process_printf_buffer_fn (activation_id, (int )*printf_size, 0 );
1374
+
1375
+ ACL_KERNEL_IF_DEBUG_MSG (
1376
+ kern, " :: Accelerator %d new csr is %x.\n " , accel_id,
1377
+ ACL_KERNEL_READ_BIT_RANGE (new_csr, KERNEL_CSR_LAST_STATUS_BIT, 0 ));
1378
+
1379
+ acl_kernel_cra_write (kern, accel_id, KERNEL_OFFSET_CSR, new_csr);
1380
+ return false ;
1381
+ }
1382
+ }
1383
+
1384
+ // Start profile counter readback if profile interrupt and not done
1385
+ if (ACL_KERNEL_READ_BIT (csr, KERNEL_CSR_PROFILE_TEMPORAL_STATUS) != 0 &&
1386
+ ACL_KERNEL_READ_BIT (csr, KERNEL_CSR_DONE) == 0 ) {
1387
+ ACL_KERNEL_IF_DEBUG_MSG (
1388
+ kern, " :: Issuing profile reset command:: Accelerator %d.\n " , accel_id);
1389
+
1390
+ // Reset temporal profiling counter
1391
+ int status;
1392
+ unsigned int ctrl_val;
1393
+ status = acl_kernel_cra_read (kern, accel_id, KERNEL_OFFSET_CSR, &ctrl_val);
1394
+ if (status) {
1395
+ ACL_KERNEL_IF_DEBUG_MSG (
1396
+ kern, " :: Got bad status reading CSR ctrl reg:: Accelerator %d.\n " ,
1397
+ accel_id);
1398
+ }
1399
+ ACL_KERNEL_SET_BIT (ctrl_val, KERNEL_CSR_PROFILE_TEMPORAL_RESET);
1400
+ status = acl_kernel_cra_write (kern, accel_id, KERNEL_OFFSET_CSR, ctrl_val);
1401
+ if (status) {
1402
+ ACL_KERNEL_IF_DEBUG_MSG (
1403
+ kern, " :: Got bad status writing CSR ctrl reg:: Accelerator %d.\n " ,
1404
+ accel_id);
1405
+ }
1406
+
1407
+ if (activation_id < 0 ) {
1408
+ // This is an autorun kernel
1409
+ acl_process_autorun_profiler_scan_chain (kern->physical_device_id ,
1410
+ accel_id);
1411
+ } else {
1412
+ acl_kernel_profile_fn (activation_id);
1413
+ }
1414
+ return false ;
1415
+ }
1416
+
1417
+ return true ;
1418
+ }
1419
+
1420
+ static void acl_kernel_if_update_status_finish (
1421
+ acl_kernel_if *kern, const unsigned int accel_id, const int activation_id,
1422
+ const std::optional<unsigned int > &printf_size) {
1423
+ #ifdef TEST_PROFILING_HARDWARE
1424
+ // Test readback of fake profile data using the acl_hal_mmd function that
1425
+ // would be called from the acl runtime.
1426
+ ACL_KERNEL_IF_DEBUG_MSG (kern, " :: testing profile hardware on accel_id=%u.\n " ,
1427
+ accel_id);
1428
+
1429
+ uint64_t data[10 ];
1430
+
1431
+ acl_hal_mmd_get_profile_data (kern->physical_device_id , accel_id, data, 6 );
1432
+
1433
+ acl_hal_mmd_reset_profile_counters (kern->physical_device_id , accel_id);
1434
+
1435
+ acl_hal_mmd_get_profile_data (kern->physical_device_id , accel_id, data, 6 );
1436
+ #endif
1437
+
1438
+ // Just clear the "done" bit. The "go" bit should already have been
1439
+ // cleared, but this is harmless anyway.
1440
+ // Since csr version 19, done bit is cleared when finish counter is read.
1441
+ // Since csr version 2022.3, done bit needs to be cleared explicitly.
1442
+ if (!kern->streaming_control_kernel_names [accel_id] &&
1443
+ (kern->csr_version == CSR_VERSION_ID_18_1 ||
1444
+ kern->csr_version >= CSR_VERSION_ID_2022_3)) {
1445
+ unsigned int dum;
1446
+ acl_kernel_cra_write (kern, accel_id, KERNEL_OFFSET_CSR, 0 );
1447
+ acl_kernel_cra_read (kern, accel_id, KERNEL_OFFSET_CSR, &dum);
1448
+ }
1449
+
1450
+ if (printf_size) {
1451
+ ACL_KERNEL_IF_DEBUG_MSG (kern,
1452
+ " :: Calling acl_process_printf_buffer_fn with "
1453
+ " activation_id=%d and printf_size=%u.\n " ,
1454
+ activation_id, *printf_size);
1455
+ acl_process_printf_buffer_fn (activation_id, (int )*printf_size, 0 );
1456
+ }
1457
+ }
1458
+
1306
1459
// Called when we receive a kernel status interrupt. Cycle through all of
1307
1460
// the running accelerators and check for updated status.
1308
1461
void acl_kernel_if_update_status (acl_kernel_if *kern) {
@@ -1327,126 +1480,30 @@ void acl_kernel_if_update_status(acl_kernel_if *kern) {
1327
1480
const uintptr_t segment_pre_irq = kern->cur_segment ;
1328
1481
1329
1482
// Check which accelerators are done and update their status appropriately
1330
- for (unsigned int k = 0 ; k < kern->num_accel ; ++k ) {
1483
+ for (unsigned int accel_id = 0 ; accel_id < kern->num_accel ; ++accel_id ) {
1331
1484
int next_queue_back;
1332
1485
unsigned int finish_counter = 0 ;
1333
1486
1334
- if (kern->accel_queue_back [k] == (int )kern->accel_invoc_queue_depth [k] - 1 )
1487
+ if (kern->accel_queue_back [accel_id] ==
1488
+ (int )kern->accel_invoc_queue_depth [accel_id] - 1 )
1335
1489
next_queue_back = 0 ;
1336
1490
else
1337
- next_queue_back = kern->accel_queue_back [k ] + 1 ;
1491
+ next_queue_back = kern->accel_queue_back [accel_id ] + 1 ;
1338
1492
1339
1493
// Skip idle kernel
1340
- if (kern->accel_job_ids [k ][next_queue_back] < 0 ) {
1494
+ if (kern->accel_job_ids [accel_id ][next_queue_back] < 0 ) {
1341
1495
// If this is the autorun profiling kernel, we want to read back profiling
1342
1496
// data from it, so don't 'continue' (this kernel is always 'idle').
1343
- if (k != (unsigned )kern->autorun_profiling_kernel_id ) {
1344
- continue ;
1345
- }
1346
- }
1347
-
1348
- // Read the accelerator's status register
1349
- unsigned int csr = 0 ;
1350
- acl_kernel_cra_read (kern, k, KERNEL_OFFSET_CSR, &csr);
1351
-
1352
- // Ignore non-status bits.
1353
- // Required by Option 3 wrappers which now have a version info in
1354
- // top 16 bits.
1355
- csr = ACL_KERNEL_READ_BIT_RANGE (csr, KERNEL_CSR_LAST_STATUS_BIT, 0 );
1356
-
1357
- // Check for updated status bits
1358
- if (0 == (csr & KERNEL_CSR_STATUS_BITS_MASK))
1359
- continue ;
1360
-
1361
- // Clear the status bits that we read
1362
- ACL_KERNEL_IF_DEBUG_MSG (kern, " :: Accelerator %d reporting status %x.\n " , k,
1363
- csr);
1364
-
1365
- if (ACL_KERNEL_READ_BIT (csr, KERNEL_CSR_DONE) == 1 ) {
1366
- ACL_KERNEL_IF_DEBUG_MSG (kern, " :: Accelerator %d is done.\n " , k);
1367
- }
1368
- if (ACL_KERNEL_READ_BIT (csr, KERNEL_CSR_STALLED) == 1 ) {
1369
- ACL_KERNEL_IF_DEBUG_MSG (kern, " :: Accelerator %d is stalled.\n " , k);
1370
- }
1371
- if (ACL_KERNEL_READ_BIT (csr, KERNEL_CSR_UNSTALL) == 1 ) {
1372
- ACL_KERNEL_IF_DEBUG_MSG (kern, " :: Accelerator %d is unstalled.\n " , k);
1373
- }
1374
- if (ACL_KERNEL_READ_BIT (csr, KERNEL_CSR_PROFILE_TEMPORAL_STATUS) == 1 ) {
1375
- ACL_KERNEL_IF_DEBUG_MSG (
1376
- kern, " :: Accelerator %d ready for temporal profile readback.\n " , k);
1377
- }
1378
-
1379
- if (ACL_KERNEL_READ_BIT (csr, KERNEL_CSR_DONE) == 0 &&
1380
- ACL_KERNEL_READ_BIT (csr, KERNEL_CSR_STALLED) == 0 &&
1381
- ACL_KERNEL_READ_BIT (csr, KERNEL_CSR_PROFILE_TEMPORAL_STATUS) == 0 )
1382
- continue ;
1383
-
1384
- int activation_id = kern->accel_job_ids [k][next_queue_back];
1385
-
1386
- // read the printf buffer size from the kernel cra, just after the
1387
- // kernel arguments
1388
- unsigned int printf_size = 0 ;
1389
- if (kern->accel_num_printfs [k] > 0 ) {
1390
- acl_kernel_cra_read (kern, k, KERNEL_OFFSET_PRINTF_BUFFER_SIZE,
1391
- &printf_size);
1392
- assert (printf_size <= ACL_PRINTF_BUFFER_TOTAL_SIZE);
1393
- ACL_KERNEL_IF_DEBUG_MSG (kern,
1394
- " :: Accelerator %d printf buffer size is %d.\n " ,
1395
- k, printf_size);
1396
-
1397
- // kernel is stalled because the printf buffer is full
1398
- if (ACL_KERNEL_READ_BIT (csr, KERNEL_CSR_STALLED) == 1 ) {
1399
- // clear interrupt
1400
- unsigned int new_csr = 0 ;
1401
- acl_kernel_cra_read (kern, k, KERNEL_OFFSET_CSR, &new_csr);
1402
- ACL_KERNEL_CLEAR_BIT (new_csr, KERNEL_CSR_STALLED);
1403
-
1404
- ACL_KERNEL_IF_DEBUG_MSG (kern,
1405
- " :: Calling acl_process_printf_buffer_fn with "
1406
- " activation_id=%d and printf_size=%u.\n " ,
1407
- activation_id, printf_size);
1408
- // update status, which will dump the printf buffer, set
1409
- // debug_dump_printf = 0
1410
- acl_process_printf_buffer_fn (activation_id, (int )printf_size, 0 );
1411
-
1412
- ACL_KERNEL_IF_DEBUG_MSG (
1413
- kern, " :: Accelerator %d new csr is %x.\n " , k,
1414
- ACL_KERNEL_READ_BIT_RANGE (new_csr, KERNEL_CSR_LAST_STATUS_BIT, 0 ));
1415
-
1416
- acl_kernel_cra_write (kern, k, KERNEL_OFFSET_CSR, new_csr);
1497
+ if (accel_id != (unsigned )kern->autorun_profiling_kernel_id ) {
1417
1498
continue ;
1418
1499
}
1419
1500
}
1420
1501
1421
- // Start profile counter readback if profile interrupt and not done
1422
- if (ACL_KERNEL_READ_BIT (csr, KERNEL_CSR_PROFILE_TEMPORAL_STATUS) != 0 &&
1423
- ACL_KERNEL_READ_BIT (csr, KERNEL_CSR_DONE) == 0 ) {
1424
- ACL_KERNEL_IF_DEBUG_MSG (
1425
- kern, " :: Issuing profile reset command:: Accelerator %d.\n " , k);
1426
-
1427
- // Reset temporal profiling counter
1428
- int status;
1429
- unsigned int ctrl_val;
1430
- status = acl_kernel_cra_read (kern, k, KERNEL_OFFSET_CSR, &ctrl_val);
1431
- if (status) {
1432
- ACL_KERNEL_IF_DEBUG_MSG (
1433
- kern, " :: Got bad status reading CSR ctrl reg:: Accelerator %d.\n " ,
1434
- k);
1435
- }
1436
- ACL_KERNEL_SET_BIT (ctrl_val, KERNEL_CSR_PROFILE_TEMPORAL_RESET);
1437
- status = acl_kernel_cra_write (kern, k, KERNEL_OFFSET_CSR, ctrl_val);
1438
- if (status) {
1439
- ACL_KERNEL_IF_DEBUG_MSG (
1440
- kern, " :: Got bad status writing CSR ctrl reg:: Accelerator %d.\n " ,
1441
- k);
1442
- }
1502
+ const int activation_id = kern->accel_job_ids [accel_id][next_queue_back];
1503
+ std::optional<unsigned int > printf_size;
1443
1504
1444
- if (activation_id < 0 ) {
1445
- // This is an autorun kernel
1446
- acl_process_autorun_profiler_scan_chain (kern->physical_device_id , k);
1447
- } else {
1448
- acl_kernel_profile_fn (activation_id);
1449
- }
1505
+ if (!acl_kernel_if_update_status_done (kern, accel_id, activation_id,
1506
+ printf_size)) {
1450
1507
continue ;
1451
1508
}
1452
1509
@@ -1456,51 +1513,20 @@ void acl_kernel_if_update_status(acl_kernel_if *kern) {
1456
1513
// Only expect single completion for older csr version
1457
1514
finish_counter = 1 ;
1458
1515
} else {
1459
- acl_kernel_cra_read (kern, k , KERNEL_OFFSET_FINISH_COUNTER,
1516
+ acl_kernel_cra_read (kern, accel_id , KERNEL_OFFSET_FINISH_COUNTER,
1460
1517
&finish_counter);
1461
- ACL_KERNEL_IF_DEBUG_MSG (kern, " :: Accelerator %d has %d finishes.\n " , k,
1462
- finish_counter);
1518
+ ACL_KERNEL_IF_DEBUG_MSG (kern, " :: Accelerator %d has %d finishes.\n " ,
1519
+ accel_id, finish_counter);
1463
1520
}
1464
1521
1465
1522
for (unsigned int i = 0 ; i < finish_counter; i++) {
1466
- activation_id = kern->accel_job_ids [k ][next_queue_back];
1523
+ const int activation_id = kern->accel_job_ids [accel_id ][next_queue_back];
1467
1524
1468
1525
// Tell the host library this job is done
1469
- kern->accel_job_ids [k][next_queue_back] = -1 ;
1470
-
1471
- #ifdef TEST_PROFILING_HARDWARE
1472
- // Test readback of fake profile data using the acl_hal_mmd function that
1473
- // would be called from the acl runtime.
1474
- ACL_KERNEL_IF_DEBUG_MSG (
1475
- kern, " :: testing profile hardware on accel_id=%u.\n " , k);
1476
-
1477
- uint64_t data[10 ];
1478
-
1479
- acl_hal_mmd_get_profile_data (kern->physical_device_id , k, data, 6 );
1526
+ kern->accel_job_ids [accel_id][next_queue_back] = -1 ;
1480
1527
1481
- acl_hal_mmd_reset_profile_counters (kern->physical_device_id , k);
1482
-
1483
- acl_hal_mmd_get_profile_data (kern->physical_device_id , k, data, 6 );
1484
- #endif
1485
-
1486
- // Just clear the "done" bit. The "go" bit should already have been
1487
- // cleared, but this is harmless anyway.
1488
- // Since csr version 19, done bit is cleared when finish counter is read.
1489
- // Since csr version 2022.3, done bit needs to be cleared explicitly.
1490
- if (kern->csr_version == CSR_VERSION_ID_18_1 ||
1491
- kern->csr_version >= CSR_VERSION_ID_2022_3) {
1492
- unsigned int dum;
1493
- acl_kernel_cra_write (kern, k, KERNEL_OFFSET_CSR, 0 );
1494
- acl_kernel_cra_read (kern, k, KERNEL_OFFSET_CSR, &dum);
1495
- }
1496
-
1497
- if (kern->accel_num_printfs [k] > 0 ) {
1498
- ACL_KERNEL_IF_DEBUG_MSG (kern,
1499
- " :: Calling acl_process_printf_buffer_fn with "
1500
- " activation_id=%d and printf_size=%u.\n " ,
1501
- activation_id, printf_size);
1502
- acl_process_printf_buffer_fn (activation_id, (int )printf_size, 0 );
1503
- }
1528
+ acl_kernel_if_update_status_finish (kern, accel_id, activation_id,
1529
+ printf_size);
1504
1530
1505
1531
// Executing the following update after reading from performance
1506
1532
// and efficiency monitors will clobber the throughput reported by
@@ -1509,16 +1535,16 @@ void acl_kernel_if_update_status(acl_kernel_if *kern) {
1509
1535
// ports before setting CL_COMPLETE adds to the apparent kernel time.
1510
1536
//
1511
1537
acl_kernel_if_update_fn (activation_id, CL_COMPLETE);
1512
- kern->accel_queue_back [k ] = next_queue_back;
1538
+ kern->accel_queue_back [accel_id ] = next_queue_back;
1513
1539
1514
- if (kern->accel_queue_back [k ] ==
1515
- (int )kern->accel_invoc_queue_depth [k ] - 1 )
1540
+ if (kern->accel_queue_back [accel_id ] ==
1541
+ (int )kern->accel_invoc_queue_depth [accel_id ] - 1 )
1516
1542
next_queue_back = 0 ;
1517
1543
else
1518
- next_queue_back = kern->accel_queue_back [k ] + 1 ;
1544
+ next_queue_back = kern->accel_queue_back [accel_id ] + 1 ;
1519
1545
1520
- if (kern->accel_job_ids [k ][next_queue_back] > -1 ) {
1521
- acl_kernel_if_update_fn (kern->accel_job_ids [k ][next_queue_back],
1546
+ if (kern->accel_job_ids [accel_id ][next_queue_back] > -1 ) {
1547
+ acl_kernel_if_update_fn (kern->accel_job_ids [accel_id ][next_queue_back],
1522
1548
CL_RUNNING);
1523
1549
}
1524
1550
}
0 commit comments