Skip to content

Commit 569c6c0

Browse files
committed
kernel_if: factor out CSR-dependent status handling into separate functions
1 parent 3187bc1 commit 569c6c0

File tree

1 file changed

+179
-152
lines changed

1 file changed

+179
-152
lines changed

src/acl_kernel_if.cpp

Lines changed: 179 additions & 152 deletions
Original file line numberDiff line numberDiff line change
@@ -1303,6 +1303,158 @@ void acl_kernel_if_launch_kernel(acl_kernel_if *kern,
13031303
activation_id);
13041304
}
13051305

1306+
static bool acl_kernel_if_update_status_done(
1307+
acl_kernel_if *kern, const unsigned int accel_id, const int activation_id,
1308+
std::optional<unsigned int> &printf_size) {
1309+
// Read the accelerator's status register
1310+
unsigned int csr = 0;
1311+
acl_kernel_cra_read(kern, accel_id, KERNEL_OFFSET_CSR, &csr);
1312+
1313+
// Ignore non-status bits.
1314+
// Required by Option 3 wrappers which now have a version info in
1315+
// top 16 bits.
1316+
csr = ACL_KERNEL_READ_BIT_RANGE(csr, KERNEL_CSR_LAST_STATUS_BIT, 0);
1317+
1318+
// Check for updated status bits
1319+
if (0 == (csr & KERNEL_CSR_STATUS_BITS_MASK)) {
1320+
return false;
1321+
}
1322+
1323+
// Clear the status bits that we read
1324+
ACL_KERNEL_IF_DEBUG_MSG(kern, ":: Accelerator %d reporting status %x.\n",
1325+
accel_id, csr);
1326+
1327+
if (ACL_KERNEL_READ_BIT(csr, KERNEL_CSR_DONE) == 1) {
1328+
ACL_KERNEL_IF_DEBUG_MSG(kern, ":: Accelerator %d is done.\n", accel_id);
1329+
}
1330+
if (ACL_KERNEL_READ_BIT(csr, KERNEL_CSR_STALLED) == 1) {
1331+
ACL_KERNEL_IF_DEBUG_MSG(kern, ":: Accelerator %d is stalled.\n", accel_id);
1332+
}
1333+
if (ACL_KERNEL_READ_BIT(csr, KERNEL_CSR_UNSTALL) == 1) {
1334+
ACL_KERNEL_IF_DEBUG_MSG(kern, ":: Accelerator %d is unstalled.\n",
1335+
accel_id);
1336+
}
1337+
if (ACL_KERNEL_READ_BIT(csr, KERNEL_CSR_PROFILE_TEMPORAL_STATUS) == 1) {
1338+
ACL_KERNEL_IF_DEBUG_MSG(
1339+
kern, ":: Accelerator %d ready for temporal profile readback.\n",
1340+
accel_id);
1341+
}
1342+
1343+
if (ACL_KERNEL_READ_BIT(csr, KERNEL_CSR_DONE) == 0 &&
1344+
ACL_KERNEL_READ_BIT(csr, KERNEL_CSR_STALLED) == 0 &&
1345+
ACL_KERNEL_READ_BIT(csr, KERNEL_CSR_PROFILE_TEMPORAL_STATUS) == 0) {
1346+
return false;
1347+
}
1348+
1349+
// read the printf buffer size from the kernel cra, just after the
1350+
// kernel arguments
1351+
if (kern->accel_num_printfs[accel_id] > 0) {
1352+
printf_size.emplace(0);
1353+
acl_kernel_cra_read(kern, accel_id, KERNEL_OFFSET_PRINTF_BUFFER_SIZE,
1354+
&*printf_size);
1355+
assert(*printf_size <= ACL_PRINTF_BUFFER_TOTAL_SIZE);
1356+
ACL_KERNEL_IF_DEBUG_MSG(kern,
1357+
":: Accelerator %d printf buffer size is %d.\n",
1358+
accel_id, *printf_size);
1359+
1360+
// kernel is stalled because the printf buffer is full
1361+
if (ACL_KERNEL_READ_BIT(csr, KERNEL_CSR_STALLED) == 1) {
1362+
// clear interrupt
1363+
unsigned int new_csr = 0;
1364+
acl_kernel_cra_read(kern, accel_id, KERNEL_OFFSET_CSR, &new_csr);
1365+
ACL_KERNEL_CLEAR_BIT(new_csr, KERNEL_CSR_STALLED);
1366+
1367+
ACL_KERNEL_IF_DEBUG_MSG(kern,
1368+
":: Calling acl_process_printf_buffer_fn with "
1369+
"activation_id=%d and printf_size=%u.\n",
1370+
activation_id, *printf_size);
1371+
// update status, which will dump the printf buffer, set
1372+
// debug_dump_printf = 0
1373+
acl_process_printf_buffer_fn(activation_id, (int)*printf_size, 0);
1374+
1375+
ACL_KERNEL_IF_DEBUG_MSG(
1376+
kern, ":: Accelerator %d new csr is %x.\n", accel_id,
1377+
ACL_KERNEL_READ_BIT_RANGE(new_csr, KERNEL_CSR_LAST_STATUS_BIT, 0));
1378+
1379+
acl_kernel_cra_write(kern, accel_id, KERNEL_OFFSET_CSR, new_csr);
1380+
return false;
1381+
}
1382+
}
1383+
1384+
// Start profile counter readback if profile interrupt and not done
1385+
if (ACL_KERNEL_READ_BIT(csr, KERNEL_CSR_PROFILE_TEMPORAL_STATUS) != 0 &&
1386+
ACL_KERNEL_READ_BIT(csr, KERNEL_CSR_DONE) == 0) {
1387+
ACL_KERNEL_IF_DEBUG_MSG(
1388+
kern, ":: Issuing profile reset command:: Accelerator %d.\n", accel_id);
1389+
1390+
// Reset temporal profiling counter
1391+
int status;
1392+
unsigned int ctrl_val;
1393+
status = acl_kernel_cra_read(kern, accel_id, KERNEL_OFFSET_CSR, &ctrl_val);
1394+
if (status) {
1395+
ACL_KERNEL_IF_DEBUG_MSG(
1396+
kern, ":: Got bad status reading CSR ctrl reg:: Accelerator %d.\n",
1397+
accel_id);
1398+
}
1399+
ACL_KERNEL_SET_BIT(ctrl_val, KERNEL_CSR_PROFILE_TEMPORAL_RESET);
1400+
status = acl_kernel_cra_write(kern, accel_id, KERNEL_OFFSET_CSR, ctrl_val);
1401+
if (status) {
1402+
ACL_KERNEL_IF_DEBUG_MSG(
1403+
kern, ":: Got bad status writing CSR ctrl reg:: Accelerator %d.\n",
1404+
accel_id);
1405+
}
1406+
1407+
if (activation_id < 0) {
1408+
// This is an autorun kernel
1409+
acl_process_autorun_profiler_scan_chain(kern->physical_device_id,
1410+
accel_id);
1411+
} else {
1412+
acl_kernel_profile_fn(activation_id);
1413+
}
1414+
return false;
1415+
}
1416+
1417+
return true;
1418+
}
1419+
1420+
static void acl_kernel_if_update_status_finish(
1421+
acl_kernel_if *kern, const unsigned int accel_id, const int activation_id,
1422+
const std::optional<unsigned int> &printf_size) {
1423+
#ifdef TEST_PROFILING_HARDWARE
1424+
// Test readback of fake profile data using the acl_hal_mmd function that
1425+
// would be called from the acl runtime.
1426+
ACL_KERNEL_IF_DEBUG_MSG(kern, ":: testing profile hardware on accel_id=%u.\n",
1427+
accel_id);
1428+
1429+
uint64_t data[10];
1430+
1431+
acl_hal_mmd_get_profile_data(kern->physical_device_id, accel_id, data, 6);
1432+
1433+
acl_hal_mmd_reset_profile_counters(kern->physical_device_id, accel_id);
1434+
1435+
acl_hal_mmd_get_profile_data(kern->physical_device_id, accel_id, data, 6);
1436+
#endif
1437+
1438+
// Just clear the "done" bit. The "go" bit should already have been
1439+
// cleared, but this is harmless anyway.
1440+
// Since csr version 19, done bit is cleared when finish counter is read.
1441+
// Since csr version 2022.3, done bit needs to be cleared explicitly.
1442+
if (kern->csr_version == CSR_VERSION_ID_18_1 ||
1443+
kern->csr_version >= CSR_VERSION_ID_2022_3) {
1444+
unsigned int dum;
1445+
acl_kernel_cra_write(kern, accel_id, KERNEL_OFFSET_CSR, 0);
1446+
acl_kernel_cra_read(kern, accel_id, KERNEL_OFFSET_CSR, &dum);
1447+
}
1448+
1449+
if (printf_size) {
1450+
ACL_KERNEL_IF_DEBUG_MSG(kern,
1451+
":: Calling acl_process_printf_buffer_fn with "
1452+
"activation_id=%d and printf_size=%u.\n",
1453+
activation_id, *printf_size);
1454+
acl_process_printf_buffer_fn(activation_id, (int)*printf_size, 0);
1455+
}
1456+
}
1457+
13061458
// Called when we receive a kernel status interrupt. Cycle through all of
13071459
// the running accelerators and check for updated status.
13081460
void acl_kernel_if_update_status(acl_kernel_if *kern) {
@@ -1327,126 +1479,31 @@ void acl_kernel_if_update_status(acl_kernel_if *kern) {
13271479
const uintptr_t segment_pre_irq = kern->cur_segment;
13281480

13291481
// Check which accelerators are done and update their status appropriately
1330-
for (unsigned int k = 0; k < kern->num_accel; ++k) {
1482+
for (unsigned int accel_id = 0; accel_id < kern->num_accel; ++accel_id) {
13311483
int next_queue_back;
13321484
unsigned int finish_counter = 0;
13331485

1334-
if (kern->accel_queue_back[k] == (int)kern->accel_invoc_queue_depth[k] - 1)
1486+
if (kern->accel_queue_back[accel_id] ==
1487+
(int)kern->accel_invoc_queue_depth[accel_id] - 1) {
13351488
next_queue_back = 0;
1336-
else
1337-
next_queue_back = kern->accel_queue_back[k] + 1;
1489+
} else {
1490+
next_queue_back = kern->accel_queue_back[accel_id] + 1;
1491+
}
13381492

13391493
// Skip idle kernel
1340-
if (kern->accel_job_ids[k][next_queue_back] < 0) {
1494+
if (kern->accel_job_ids[accel_id][next_queue_back] < 0) {
13411495
// If this is the autorun profiling kernel, we want to read back profiling
13421496
// data from it, so don't 'continue' (this kernel is always 'idle').
1343-
if (k != (unsigned)kern->autorun_profiling_kernel_id) {
1497+
if (accel_id != (unsigned)kern->autorun_profiling_kernel_id) {
13441498
continue;
13451499
}
13461500
}
13471501

1348-
// Read the accelerator's status register
1349-
unsigned int csr = 0;
1350-
acl_kernel_cra_read(kern, k, KERNEL_OFFSET_CSR, &csr);
1502+
const int activation_id = kern->accel_job_ids[accel_id][next_queue_back];
1503+
std::optional<unsigned int> printf_size;
13511504

1352-
// Ignore non-status bits.
1353-
// Required by Option 3 wrappers which now have a version info in
1354-
// top 16 bits.
1355-
csr = ACL_KERNEL_READ_BIT_RANGE(csr, KERNEL_CSR_LAST_STATUS_BIT, 0);
1356-
1357-
// Check for updated status bits
1358-
if (0 == (csr & KERNEL_CSR_STATUS_BITS_MASK))
1359-
continue;
1360-
1361-
// Clear the status bits that we read
1362-
ACL_KERNEL_IF_DEBUG_MSG(kern, ":: Accelerator %d reporting status %x.\n", k,
1363-
csr);
1364-
1365-
if (ACL_KERNEL_READ_BIT(csr, KERNEL_CSR_DONE) == 1) {
1366-
ACL_KERNEL_IF_DEBUG_MSG(kern, ":: Accelerator %d is done.\n", k);
1367-
}
1368-
if (ACL_KERNEL_READ_BIT(csr, KERNEL_CSR_STALLED) == 1) {
1369-
ACL_KERNEL_IF_DEBUG_MSG(kern, ":: Accelerator %d is stalled.\n", k);
1370-
}
1371-
if (ACL_KERNEL_READ_BIT(csr, KERNEL_CSR_UNSTALL) == 1) {
1372-
ACL_KERNEL_IF_DEBUG_MSG(kern, ":: Accelerator %d is unstalled.\n", k);
1373-
}
1374-
if (ACL_KERNEL_READ_BIT(csr, KERNEL_CSR_PROFILE_TEMPORAL_STATUS) == 1) {
1375-
ACL_KERNEL_IF_DEBUG_MSG(
1376-
kern, ":: Accelerator %d ready for temporal profile readback.\n", k);
1377-
}
1378-
1379-
if (ACL_KERNEL_READ_BIT(csr, KERNEL_CSR_DONE) == 0 &&
1380-
ACL_KERNEL_READ_BIT(csr, KERNEL_CSR_STALLED) == 0 &&
1381-
ACL_KERNEL_READ_BIT(csr, KERNEL_CSR_PROFILE_TEMPORAL_STATUS) == 0)
1382-
continue;
1383-
1384-
int activation_id = kern->accel_job_ids[k][next_queue_back];
1385-
1386-
// read the printf buffer size from the kernel cra, just after the
1387-
// kernel arguments
1388-
unsigned int printf_size = 0;
1389-
if (kern->accel_num_printfs[k] > 0) {
1390-
acl_kernel_cra_read(kern, k, KERNEL_OFFSET_PRINTF_BUFFER_SIZE,
1391-
&printf_size);
1392-
assert(printf_size <= ACL_PRINTF_BUFFER_TOTAL_SIZE);
1393-
ACL_KERNEL_IF_DEBUG_MSG(kern,
1394-
":: Accelerator %d printf buffer size is %d.\n",
1395-
k, printf_size);
1396-
1397-
// kernel is stalled because the printf buffer is full
1398-
if (ACL_KERNEL_READ_BIT(csr, KERNEL_CSR_STALLED) == 1) {
1399-
// clear interrupt
1400-
unsigned int new_csr = 0;
1401-
acl_kernel_cra_read(kern, k, KERNEL_OFFSET_CSR, &new_csr);
1402-
ACL_KERNEL_CLEAR_BIT(new_csr, KERNEL_CSR_STALLED);
1403-
1404-
ACL_KERNEL_IF_DEBUG_MSG(kern,
1405-
":: Calling acl_process_printf_buffer_fn with "
1406-
"activation_id=%d and printf_size=%u.\n",
1407-
activation_id, printf_size);
1408-
// update status, which will dump the printf buffer, set
1409-
// debug_dump_printf = 0
1410-
acl_process_printf_buffer_fn(activation_id, (int)printf_size, 0);
1411-
1412-
ACL_KERNEL_IF_DEBUG_MSG(
1413-
kern, ":: Accelerator %d new csr is %x.\n", k,
1414-
ACL_KERNEL_READ_BIT_RANGE(new_csr, KERNEL_CSR_LAST_STATUS_BIT, 0));
1415-
1416-
acl_kernel_cra_write(kern, k, KERNEL_OFFSET_CSR, new_csr);
1417-
continue;
1418-
}
1419-
}
1420-
1421-
// Start profile counter readback if profile interrupt and not done
1422-
if (ACL_KERNEL_READ_BIT(csr, KERNEL_CSR_PROFILE_TEMPORAL_STATUS) != 0 &&
1423-
ACL_KERNEL_READ_BIT(csr, KERNEL_CSR_DONE) == 0) {
1424-
ACL_KERNEL_IF_DEBUG_MSG(
1425-
kern, ":: Issuing profile reset command:: Accelerator %d.\n", k);
1426-
1427-
// Reset temporal profiling counter
1428-
int status;
1429-
unsigned int ctrl_val;
1430-
status = acl_kernel_cra_read(kern, k, KERNEL_OFFSET_CSR, &ctrl_val);
1431-
if (status) {
1432-
ACL_KERNEL_IF_DEBUG_MSG(
1433-
kern, ":: Got bad status reading CSR ctrl reg:: Accelerator %d.\n",
1434-
k);
1435-
}
1436-
ACL_KERNEL_SET_BIT(ctrl_val, KERNEL_CSR_PROFILE_TEMPORAL_RESET);
1437-
status = acl_kernel_cra_write(kern, k, KERNEL_OFFSET_CSR, ctrl_val);
1438-
if (status) {
1439-
ACL_KERNEL_IF_DEBUG_MSG(
1440-
kern, ":: Got bad status writing CSR ctrl reg:: Accelerator %d.\n",
1441-
k);
1442-
}
1443-
1444-
if (activation_id < 0) {
1445-
// This is an autorun kernel
1446-
acl_process_autorun_profiler_scan_chain(kern->physical_device_id, k);
1447-
} else {
1448-
acl_kernel_profile_fn(activation_id);
1449-
}
1505+
if (!acl_kernel_if_update_status_done(kern, accel_id, activation_id,
1506+
printf_size)) {
14501507
continue;
14511508
}
14521509

@@ -1456,51 +1513,20 @@ void acl_kernel_if_update_status(acl_kernel_if *kern) {
14561513
// Only expect single completion for older csr version
14571514
finish_counter = 1;
14581515
} else {
1459-
acl_kernel_cra_read(kern, k, KERNEL_OFFSET_FINISH_COUNTER,
1516+
acl_kernel_cra_read(kern, accel_id, KERNEL_OFFSET_FINISH_COUNTER,
14601517
&finish_counter);
1461-
ACL_KERNEL_IF_DEBUG_MSG(kern, ":: Accelerator %d has %d finishes.\n", k,
1462-
finish_counter);
1518+
ACL_KERNEL_IF_DEBUG_MSG(kern, ":: Accelerator %d has %d finishes.\n",
1519+
accel_id, finish_counter);
14631520
}
14641521

14651522
for (unsigned int i = 0; i < finish_counter; i++) {
1466-
activation_id = kern->accel_job_ids[k][next_queue_back];
1523+
const int activation_id = kern->accel_job_ids[accel_id][next_queue_back];
14671524

14681525
// Tell the host library this job is done
1469-
kern->accel_job_ids[k][next_queue_back] = -1;
1470-
1471-
#ifdef TEST_PROFILING_HARDWARE
1472-
// Test readback of fake profile data using the acl_hal_mmd function that
1473-
// would be called from the acl runtime.
1474-
ACL_KERNEL_IF_DEBUG_MSG(
1475-
kern, ":: testing profile hardware on accel_id=%u.\n", k);
1476-
1477-
uint64_t data[10];
1478-
1479-
acl_hal_mmd_get_profile_data(kern->physical_device_id, k, data, 6);
1480-
1481-
acl_hal_mmd_reset_profile_counters(kern->physical_device_id, k);
1526+
kern->accel_job_ids[accel_id][next_queue_back] = -1;
14821527

1483-
acl_hal_mmd_get_profile_data(kern->physical_device_id, k, data, 6);
1484-
#endif
1485-
1486-
// Just clear the "done" bit. The "go" bit should already have been
1487-
// cleared, but this is harmless anyway.
1488-
// Since csr version 19, done bit is cleared when finish counter is read.
1489-
// Since csr version 2022.3, done bit needs to be cleared explicitly.
1490-
if (kern->csr_version == CSR_VERSION_ID_18_1 ||
1491-
kern->csr_version >= CSR_VERSION_ID_2022_3) {
1492-
unsigned int dum;
1493-
acl_kernel_cra_write(kern, k, KERNEL_OFFSET_CSR, 0);
1494-
acl_kernel_cra_read(kern, k, KERNEL_OFFSET_CSR, &dum);
1495-
}
1496-
1497-
if (kern->accel_num_printfs[k] > 0) {
1498-
ACL_KERNEL_IF_DEBUG_MSG(kern,
1499-
":: Calling acl_process_printf_buffer_fn with "
1500-
"activation_id=%d and printf_size=%u.\n",
1501-
activation_id, printf_size);
1502-
acl_process_printf_buffer_fn(activation_id, (int)printf_size, 0);
1503-
}
1528+
acl_kernel_if_update_status_finish(kern, accel_id, activation_id,
1529+
printf_size);
15041530

15051531
// Executing the following update after reading from performance
15061532
// and efficiency monitors will clobber the throughput reported by
@@ -1509,16 +1535,17 @@ void acl_kernel_if_update_status(acl_kernel_if *kern) {
15091535
// ports before setting CL_COMPLETE adds to the apparent kernel time.
15101536
//
15111537
acl_kernel_if_update_fn(activation_id, CL_COMPLETE);
1512-
kern->accel_queue_back[k] = next_queue_back;
1538+
kern->accel_queue_back[accel_id] = next_queue_back;
15131539

1514-
if (kern->accel_queue_back[k] ==
1515-
(int)kern->accel_invoc_queue_depth[k] - 1)
1540+
if (kern->accel_queue_back[accel_id] ==
1541+
(int)kern->accel_invoc_queue_depth[accel_id] - 1) {
15161542
next_queue_back = 0;
1517-
else
1518-
next_queue_back = kern->accel_queue_back[k] + 1;
1543+
} else {
1544+
next_queue_back = kern->accel_queue_back[accel_id] + 1;
1545+
}
15191546

1520-
if (kern->accel_job_ids[k][next_queue_back] > -1) {
1521-
acl_kernel_if_update_fn(kern->accel_job_ids[k][next_queue_back],
1547+
if (kern->accel_job_ids[accel_id][next_queue_back] > -1) {
1548+
acl_kernel_if_update_fn(kern->accel_job_ids[accel_id][next_queue_back],
15221549
CL_RUNNING);
15231550
}
15241551
}

0 commit comments

Comments
 (0)