Skip to content

Commit c71ad41

Browse files
ferasdSaeed Mahameed
authored andcommitted
net/mlx5: FW tracer, events handling
The tracer has one event, event 0x26, with two subtypes: - Subtype 0: Ownership change - Subtype 1: Traces available An ownership change occurs in the following cases: 1- Owner releases his ownership, in this case, an event will be sent to inform others to reattempt acquire ownership. 2- Ownership was taken by a higher priority tool, in this case the owner should understand that it lost ownership, and go through tear down flow. The second subtype indicates that there are traces in the trace buffer, in this case, the driver polls the tracer buffer for new traces, parse them and prepares the messages for printing. The HW starts tracing from the first address in the tracer buffer. Driver receives an event notifying that new trace block exists. HW posts a timestamp event at the last 8B of every 256B block. Comparing the timestamp to the last handled timestamp would indicate that this is a new trace block. Once the new timestamp is detected, the entire block is considered valid. Block validation and parsing, should be done after copying the current block to a different location, in order to avoid block overwritten during processing. Signed-off-by: Feras Daoud <[email protected]> Signed-off-by: Saeed Mahameed <[email protected]>
1 parent e9cad2c commit c71ad41

File tree

4 files changed

+347
-10
lines changed

4 files changed

+347
-10
lines changed

drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c

Lines changed: 259 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -318,25 +318,244 @@ static void mlx5_tracer_read_strings_db(struct work_struct *work)
318318
return;
319319
}
320320

321-
static void mlx5_fw_tracer_ownership_change(struct work_struct *work)
321+
static void mlx5_fw_tracer_arm(struct mlx5_core_dev *dev)
322322
{
323-
struct mlx5_fw_tracer *tracer = container_of(work, struct mlx5_fw_tracer,
324-
ownership_change_work);
325-
struct mlx5_core_dev *dev = tracer->dev;
323+
u32 out[MLX5_ST_SZ_DW(mtrc_ctrl)] = {0};
324+
u32 in[MLX5_ST_SZ_DW(mtrc_ctrl)] = {0};
326325
int err;
327326

328-
if (tracer->owner) {
329-
mlx5_fw_tracer_ownership_release(tracer);
327+
MLX5_SET(mtrc_ctrl, in, arm_event, 1);
328+
329+
err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
330+
MLX5_REG_MTRC_CTRL, 0, 1);
331+
if (err)
332+
mlx5_core_warn(dev, "FWTracer: Failed to arm tracer event %d\n", err);
333+
}
334+
335+
static void poll_trace(struct mlx5_fw_tracer *tracer,
336+
struct tracer_event *tracer_event, u64 *trace)
337+
{
338+
u32 timestamp_low, timestamp_mid, timestamp_high, urts;
339+
340+
tracer_event->event_id = MLX5_GET(tracer_event, trace, event_id);
341+
tracer_event->lost_event = MLX5_GET(tracer_event, trace, lost);
342+
343+
switch (tracer_event->event_id) {
344+
case TRACER_EVENT_TYPE_TIMESTAMP:
345+
tracer_event->type = TRACER_EVENT_TYPE_TIMESTAMP;
346+
urts = MLX5_GET(tracer_timestamp_event, trace, urts);
347+
if (tracer->trc_ver == 0)
348+
tracer_event->timestamp_event.unreliable = !!(urts >> 2);
349+
else
350+
tracer_event->timestamp_event.unreliable = !!(urts & 1);
351+
352+
timestamp_low = MLX5_GET(tracer_timestamp_event,
353+
trace, timestamp7_0);
354+
timestamp_mid = MLX5_GET(tracer_timestamp_event,
355+
trace, timestamp39_8);
356+
timestamp_high = MLX5_GET(tracer_timestamp_event,
357+
trace, timestamp52_40);
358+
359+
tracer_event->timestamp_event.timestamp =
360+
((u64)timestamp_high << 40) |
361+
((u64)timestamp_mid << 8) |
362+
(u64)timestamp_low;
363+
break;
364+
default:
365+
if (tracer_event->event_id >= tracer->str_db.first_string_trace ||
366+
tracer_event->event_id <= tracer->str_db.first_string_trace +
367+
tracer->str_db.num_string_trace) {
368+
tracer_event->type = TRACER_EVENT_TYPE_STRING;
369+
tracer_event->string_event.timestamp =
370+
MLX5_GET(tracer_string_event, trace, timestamp);
371+
tracer_event->string_event.string_param =
372+
MLX5_GET(tracer_string_event, trace, string_param);
373+
tracer_event->string_event.tmsn =
374+
MLX5_GET(tracer_string_event, trace, tmsn);
375+
tracer_event->string_event.tdsn =
376+
MLX5_GET(tracer_string_event, trace, tdsn);
377+
} else {
378+
tracer_event->type = TRACER_EVENT_TYPE_UNRECOGNIZED;
379+
}
380+
break;
381+
}
382+
}
383+
384+
static u64 get_block_timestamp(struct mlx5_fw_tracer *tracer, u64 *ts_event)
385+
{
386+
struct tracer_event tracer_event;
387+
u8 event_id;
388+
389+
event_id = MLX5_GET(tracer_event, ts_event, event_id);
390+
391+
if (event_id == TRACER_EVENT_TYPE_TIMESTAMP)
392+
poll_trace(tracer, &tracer_event, ts_event);
393+
else
394+
tracer_event.timestamp_event.timestamp = 0;
395+
396+
return tracer_event.timestamp_event.timestamp;
397+
}
398+
399+
static void mlx5_fw_tracer_handle_traces(struct work_struct *work)
400+
{
401+
struct mlx5_fw_tracer *tracer =
402+
container_of(work, struct mlx5_fw_tracer, handle_traces_work);
403+
u64 block_timestamp, last_block_timestamp, tmp_trace_block[TRACES_PER_BLOCK];
404+
u32 block_count, start_offset, prev_start_offset, prev_consumer_index;
405+
u32 trace_event_size = MLX5_ST_SZ_BYTES(tracer_event);
406+
struct tracer_event tracer_event;
407+
struct mlx5_core_dev *dev;
408+
int i;
409+
410+
if (!tracer->owner)
330411
return;
412+
413+
dev = tracer->dev;
414+
block_count = tracer->buff.size / TRACER_BLOCK_SIZE_BYTE;
415+
start_offset = tracer->buff.consumer_index * TRACER_BLOCK_SIZE_BYTE;
416+
417+
/* Copy the block to local buffer to avoid HW override while being processed*/
418+
memcpy(tmp_trace_block, tracer->buff.log_buf + start_offset,
419+
TRACER_BLOCK_SIZE_BYTE);
420+
421+
block_timestamp =
422+
get_block_timestamp(tracer, &tmp_trace_block[TRACES_PER_BLOCK - 1]);
423+
424+
while (block_timestamp > tracer->last_timestamp) {
425+
/* Check block override if its not the first block */
426+
if (!tracer->last_timestamp) {
427+
u64 *ts_event;
428+
/* To avoid block override be the HW in case of buffer
429+
* wraparound, the time stamp of the previous block
430+
* should be compared to the last timestamp handled
431+
* by the driver.
432+
*/
433+
prev_consumer_index =
434+
(tracer->buff.consumer_index - 1) & (block_count - 1);
435+
prev_start_offset = prev_consumer_index * TRACER_BLOCK_SIZE_BYTE;
436+
437+
ts_event = tracer->buff.log_buf + prev_start_offset +
438+
(TRACES_PER_BLOCK - 1) * trace_event_size;
439+
last_block_timestamp = get_block_timestamp(tracer, ts_event);
440+
/* If previous timestamp different from last stored
441+
* timestamp then there is a good chance that the
442+
* current buffer is overwritten and therefore should
443+
* not be parsed.
444+
*/
445+
if (tracer->last_timestamp != last_block_timestamp) {
446+
mlx5_core_warn(dev, "FWTracer: Events were lost\n");
447+
tracer->last_timestamp = block_timestamp;
448+
tracer->buff.consumer_index =
449+
(tracer->buff.consumer_index + 1) & (block_count - 1);
450+
break;
451+
}
452+
}
453+
454+
/* Parse events */
455+
for (i = 0; i < TRACES_PER_BLOCK ; i++)
456+
poll_trace(tracer, &tracer_event, &tmp_trace_block[i]);
457+
458+
tracer->buff.consumer_index =
459+
(tracer->buff.consumer_index + 1) & (block_count - 1);
460+
461+
tracer->last_timestamp = block_timestamp;
462+
start_offset = tracer->buff.consumer_index * TRACER_BLOCK_SIZE_BYTE;
463+
memcpy(tmp_trace_block, tracer->buff.log_buf + start_offset,
464+
TRACER_BLOCK_SIZE_BYTE);
465+
block_timestamp = get_block_timestamp(tracer,
466+
&tmp_trace_block[TRACES_PER_BLOCK - 1]);
331467
}
332468

469+
mlx5_fw_tracer_arm(dev);
470+
}
471+
472+
static int mlx5_fw_tracer_set_mtrc_conf(struct mlx5_fw_tracer *tracer)
473+
{
474+
struct mlx5_core_dev *dev = tracer->dev;
475+
u32 out[MLX5_ST_SZ_DW(mtrc_conf)] = {0};
476+
u32 in[MLX5_ST_SZ_DW(mtrc_conf)] = {0};
477+
int err;
478+
479+
MLX5_SET(mtrc_conf, in, trace_mode, TRACE_TO_MEMORY);
480+
MLX5_SET(mtrc_conf, in, log_trace_buffer_size,
481+
ilog2(TRACER_BUFFER_PAGE_NUM));
482+
MLX5_SET(mtrc_conf, in, trace_mkey, tracer->buff.mkey.key);
483+
484+
err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
485+
MLX5_REG_MTRC_CONF, 0, 1);
486+
if (err)
487+
mlx5_core_warn(dev, "FWTracer: Failed to set tracer configurations %d\n", err);
488+
489+
return err;
490+
}
491+
492+
static int mlx5_fw_tracer_set_mtrc_ctrl(struct mlx5_fw_tracer *tracer, u8 status, u8 arm)
493+
{
494+
struct mlx5_core_dev *dev = tracer->dev;
495+
u32 out[MLX5_ST_SZ_DW(mtrc_ctrl)] = {0};
496+
u32 in[MLX5_ST_SZ_DW(mtrc_ctrl)] = {0};
497+
int err;
498+
499+
MLX5_SET(mtrc_ctrl, in, modify_field_select, TRACE_STATUS);
500+
MLX5_SET(mtrc_ctrl, in, trace_status, status);
501+
MLX5_SET(mtrc_ctrl, in, arm_event, arm);
502+
503+
err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
504+
MLX5_REG_MTRC_CTRL, 0, 1);
505+
506+
if (!err && status)
507+
tracer->last_timestamp = 0;
508+
509+
return err;
510+
}
511+
512+
static int mlx5_fw_tracer_start(struct mlx5_fw_tracer *tracer)
513+
{
514+
struct mlx5_core_dev *dev = tracer->dev;
515+
int err;
516+
333517
err = mlx5_fw_tracer_ownership_acquire(tracer);
334518
if (err) {
335519
mlx5_core_dbg(dev, "FWTracer: Ownership was not granted %d\n", err);
520+
/* Don't fail since ownership can be acquired on a later FW event */
521+
return 0;
522+
}
523+
524+
err = mlx5_fw_tracer_set_mtrc_conf(tracer);
525+
if (err) {
526+
mlx5_core_warn(dev, "FWTracer: Failed to set tracer configuration %d\n", err);
527+
goto release_ownership;
528+
}
529+
530+
/* enable tracer & trace events */
531+
err = mlx5_fw_tracer_set_mtrc_ctrl(tracer, 1, 1);
532+
if (err) {
533+
mlx5_core_warn(dev, "FWTracer: Failed to enable tracer %d\n", err);
534+
goto release_ownership;
535+
}
536+
537+
return 0;
538+
539+
release_ownership:
540+
mlx5_fw_tracer_ownership_release(tracer);
541+
return err;
542+
}
543+
544+
static void mlx5_fw_tracer_ownership_change(struct work_struct *work)
545+
{
546+
struct mlx5_fw_tracer *tracer =
547+
container_of(work, struct mlx5_fw_tracer, ownership_change_work);
548+
549+
if (tracer->owner) {
550+
tracer->owner = false;
551+
tracer->buff.consumer_index = 0;
336552
return;
337553
}
554+
555+
mlx5_fw_tracer_start(tracer);
338556
}
339557

558+
/* Create software resources (Buffers, etc ..) */
340559
struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev)
341560
{
342561
struct mlx5_fw_tracer *tracer = NULL;
@@ -361,6 +580,8 @@ struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev)
361580

362581
INIT_WORK(&tracer->ownership_change_work, mlx5_fw_tracer_ownership_change);
363582
INIT_WORK(&tracer->read_fw_strings_work, mlx5_tracer_read_strings_db);
583+
INIT_WORK(&tracer->handle_traces_work, mlx5_fw_tracer_handle_traces);
584+
364585

365586
err = mlx5_query_mtrc_caps(tracer);
366587
if (err) {
@@ -392,6 +613,9 @@ struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev)
392613
return ERR_PTR(err);
393614
}
394615

616+
/* Create HW resources + start tracer
617+
* must be called before Async EQ is created
618+
*/
395619
int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer)
396620
{
397621
struct mlx5_core_dev *dev;
@@ -417,22 +641,25 @@ int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer)
417641
goto err_dealloc_pd;
418642
}
419643

420-
err = mlx5_fw_tracer_ownership_acquire(tracer);
421-
if (err) /* Don't fail since ownership can be acquired on a later FW event */
422-
mlx5_core_dbg(dev, "FWTracer: Ownership was not granted %d\n", err);
644+
mlx5_fw_tracer_start(tracer);
423645

424646
return 0;
647+
425648
err_dealloc_pd:
426649
mlx5_core_dealloc_pd(dev, tracer->buff.pdn);
427650
return err;
428651
}
429652

653+
/* Stop tracer + Cleanup HW resources
654+
* must be called after Async EQ is destroyed
655+
*/
430656
void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer)
431657
{
432658
if (IS_ERR_OR_NULL(tracer))
433659
return;
434660

435661
cancel_work_sync(&tracer->ownership_change_work);
662+
cancel_work_sync(&tracer->handle_traces_work);
436663

437664
if (tracer->owner)
438665
mlx5_fw_tracer_ownership_release(tracer);
@@ -441,6 +668,7 @@ void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer)
441668
mlx5_core_dealloc_pd(tracer->dev, tracer->buff.pdn);
442669
}
443670

671+
/* Free software resources (Buffers, etc ..) */
444672
void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer)
445673
{
446674
if (IS_ERR_OR_NULL(tracer))
@@ -454,4 +682,26 @@ void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer)
454682
kfree(tracer);
455683
}
456684

685+
void mlx5_fw_tracer_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe)
686+
{
687+
struct mlx5_fw_tracer *tracer = dev->tracer;
688+
689+
if (!tracer)
690+
return;
691+
692+
switch (eqe->sub_type) {
693+
case MLX5_TRACER_SUBTYPE_OWNERSHIP_CHANGE:
694+
if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state))
695+
queue_work(tracer->work_queue, &tracer->ownership_change_work);
696+
break;
697+
case MLX5_TRACER_SUBTYPE_TRACES_AVAILABLE:
698+
if (likely(tracer->str_db.loaded))
699+
queue_work(tracer->work_queue, &tracer->handle_traces_work);
700+
break;
701+
default:
702+
mlx5_core_dbg(dev, "FWTracer: Event with unrecognized subtype: sub_type %d\n",
703+
eqe->sub_type);
704+
}
705+
}
706+
457707
EXPORT_TRACEPOINT_SYMBOL(mlx5_fw);

0 commit comments

Comments
 (0)