Skip to content

Commit 34f46ae

Browse files
mosheshemesh2Saeed Mahameed
authored andcommitted
net/mlx5: Add command failures data to debugfs
Add new counters to command interface debugfs to count command failures. The following counters added: total_failed - number of times command failed (any kind of failure). failed_mbox_status - number of times command failed on bad status returned by FW. In addition, add data about last command failure to command interface debugfs: last_failed_errno - last command failed returned errno. last_failed_mbox_status - last bad status returned by FW. Signed-off-by: Moshe Shemesh <[email protected]> Reviewed-by: Leon Romanovsky <[email protected]> Signed-off-by: Saeed Mahameed <[email protected]>
1 parent 371c2b3 commit 34f46ae

File tree

3 files changed

+51
-9
lines changed

3 files changed

+51
-9
lines changed

drivers/net/ethernet/mellanox/mlx5/core/cmd.c

Lines changed: 35 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1877,16 +1877,38 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
18771877
return err;
18781878
}
18791879

1880+
static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status, int err)
1881+
{
1882+
struct mlx5_cmd_stats *stats;
1883+
1884+
if (!err)
1885+
return;
1886+
1887+
stats = &dev->cmd.stats[opcode];
1888+
spin_lock_irq(&stats->lock);
1889+
stats->failed++;
1890+
if (err < 0)
1891+
stats->last_failed_errno = -err;
1892+
if (err == -EREMOTEIO) {
1893+
stats->failed_mbox_status++;
1894+
stats->last_failed_mbox_status = status;
1895+
}
1896+
spin_unlock_irq(&stats->lock);
1897+
}
1898+
18801899
/* preserve -EREMOTEIO for outbox.status != OK, otherwise return err as is */
1881-
static int cmd_status_err(int err, void *out)
1900+
static int cmd_status_err(struct mlx5_core_dev *dev, int err, u16 opcode, void *out)
18821901
{
1883-
if (err) /* -EREMOTEIO is preserved */
1884-
return err == -EREMOTEIO ? -EIO : err;
1902+
u8 status = MLX5_GET(mbox_out, out, status);
18851903

1886-
if (MLX5_GET(mbox_out, out, status) != MLX5_CMD_STAT_OK)
1887-
return -EREMOTEIO;
1904+
if (err == -EREMOTEIO) /* -EREMOTEIO is preserved */
1905+
err = -EIO;
18881906

1889-
return 0;
1907+
if (!err && status != MLX5_CMD_STAT_OK)
1908+
err = -EREMOTEIO;
1909+
1910+
cmd_status_log(dev, opcode, status, err);
1911+
return err;
18901912
}
18911913

18921914
/**
@@ -1910,8 +1932,10 @@ static int cmd_status_err(int err, void *out)
19101932
int mlx5_cmd_do(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size)
19111933
{
19121934
int err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, false);
1935+
u16 opcode = MLX5_GET(mbox_in, in, opcode);
19131936

1914-
return cmd_status_err(err, out);
1937+
err = cmd_status_err(dev, err, opcode, out);
1938+
return err;
19151939
}
19161940
EXPORT_SYMBOL(mlx5_cmd_do);
19171941

@@ -1954,8 +1978,9 @@ int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size,
19541978
void *out, int out_size)
19551979
{
19561980
int err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, true);
1981+
u16 opcode = MLX5_GET(mbox_in, in, opcode);
19571982

1958-
err = cmd_status_err(err, out);
1983+
err = cmd_status_err(dev, err, opcode, out);
19591984
return mlx5_cmd_check(dev, err, in, out);
19601985
}
19611986
EXPORT_SYMBOL(mlx5_cmd_exec_polling);
@@ -1991,7 +2016,7 @@ static void mlx5_cmd_exec_cb_handler(int status, void *_work)
19912016
struct mlx5_async_ctx *ctx;
19922017

19932018
ctx = work->ctx;
1994-
status = cmd_status_err(status, work->out);
2019+
status = cmd_status_err(ctx->dev, status, work->opcode, work->out);
19952020
work->user_callback(status, work);
19962021
if (atomic_dec_and_test(&ctx->num_inflight))
19972022
wake_up(&ctx->wait);
@@ -2005,6 +2030,7 @@ int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size,
20052030

20062031
work->ctx = ctx;
20072032
work->user_callback = callback;
2033+
work->opcode = MLX5_GET(mbox_in, in, opcode);
20082034
work->out = out;
20092035
if (WARN_ON(!atomic_inc_not_zero(&ctx->num_inflight)))
20102036
return -EIO;

drivers/net/ethernet/mellanox/mlx5/core/debugfs.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,13 @@ void mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev)
180180
debugfs_create_file("average", 0400, stats->root, stats,
181181
&stats_fops);
182182
debugfs_create_u64("n", 0400, stats->root, &stats->n);
183+
debugfs_create_u64("failed", 0400, stats->root, &stats->failed);
184+
debugfs_create_u64("failed_mbox_status", 0400, stats->root,
185+
&stats->failed_mbox_status);
186+
debugfs_create_u32("last_failed_errno", 0400, stats->root,
187+
&stats->last_failed_errno);
188+
debugfs_create_u8("last_failed_mbox_status", 0400, stats->root,
189+
&stats->last_failed_mbox_status);
183190
}
184191
}
185192
}

include/linux/mlx5/driver.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,14 @@ enum {
264264
struct mlx5_cmd_stats {
265265
u64 sum;
266266
u64 n;
267+
/* number of times command failed */
268+
u64 failed;
269+
/* number of times command failed on bad status returned by FW */
270+
u64 failed_mbox_status;
271+
/* last command failed returned errno */
272+
u32 last_failed_errno;
273+
/* last bad status returned by FW */
274+
u8 last_failed_mbox_status;
267275
struct dentry *root;
268276
/* protect command average calculations */
269277
spinlock_t lock;
@@ -955,6 +963,7 @@ typedef void (*mlx5_async_cbk_t)(int status, struct mlx5_async_work *context);
955963
struct mlx5_async_work {
956964
struct mlx5_async_ctx *ctx;
957965
mlx5_async_cbk_t user_callback;
966+
u16 opcode; /* cmd opcode */
958967
void *out; /* pointer to the cmd output buffer */
959968
};
960969

0 commit comments

Comments
 (0)