Blob Blame History Raw
From: Moshe Shemesh <moshe@nvidia.com>
Date: Thu, 27 Jan 2022 15:22:21 +0200
Subject: net/mlx5: Add command failures data to debugfs
Patch-mainline: v5.18-rc1
Git-commit: 34f46ae0d4b38e83cfb26fb6f06b5b5efea47fdc
References: jsc#PED-1549

Add new counters to command interface debugfs to count command failures.
The following counters added:
total_failed - number of times command failed (any kind of failure).
failed_mbox_status - number of times command failed on bad status
returned by FW.

In addition, add data about last command failure to command interface
debugfs:
last_failed_errno - last command failed returned errno.
last_failed_mbox_status - last bad status returned by FW.

Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Acked-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c     |   44 +++++++++++++++++-----
 drivers/net/ethernet/mellanox/mlx5/core/debugfs.c |    7 +++
 include/linux/mlx5/driver.h                       |    9 ++++
 3 files changed, 51 insertions(+), 9 deletions(-)

--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -1888,16 +1888,38 @@ out_in:
 	return err;
 }
 
+static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status, int err)
+{
+	struct mlx5_cmd_stats *stats;
+
+	if (!err)
+		return;
+
+	stats = &dev->cmd.stats[opcode];
+	spin_lock_irq(&stats->lock);
+	stats->failed++;
+	if (err < 0)
+		stats->last_failed_errno = -err;
+	if (err == -EREMOTEIO) {
+		stats->failed_mbox_status++;
+		stats->last_failed_mbox_status = status;
+	}
+	spin_unlock_irq(&stats->lock);
+}
+
 /* preserve -EREMOTEIO for outbox.status != OK, otherwise return err as is */
-static int cmd_status_err(int err, void *out)
+static int cmd_status_err(struct mlx5_core_dev *dev, int err, u16 opcode, void *out)
 {
-	if (err) /* -EREMOTEIO is preserved */
-		return err == -EREMOTEIO ? -EIO : err;
+	u8 status = MLX5_GET(mbox_out, out, status);
+
+	if (err == -EREMOTEIO) /* -EREMOTEIO is preserved */
+		err = -EIO;
 
-	if (MLX5_GET(mbox_out, out, status) != MLX5_CMD_STAT_OK)
-		return -EREMOTEIO;
+	if (!err && status != MLX5_CMD_STAT_OK)
+		err = -EREMOTEIO;
 
-	return 0;
+	cmd_status_log(dev, opcode, status, err);
+	return err;
 }
 
 /**
@@ -1921,8 +1943,10 @@ static int cmd_status_err(int err, void
 int mlx5_cmd_do(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size)
 {
 	int err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, false);
+	u16 opcode = MLX5_GET(mbox_in, in, opcode);
 
-	return cmd_status_err(err, out);
+	err = cmd_status_err(dev, err, opcode, out);
+	return err;
 }
 EXPORT_SYMBOL(mlx5_cmd_do);
 
@@ -1965,8 +1989,9 @@ int mlx5_cmd_exec_polling(struct mlx5_co
 			  void *out, int out_size)
 {
 	int err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, true);
+	u16 opcode = MLX5_GET(mbox_in, in, opcode);
 
-	err = cmd_status_err(err, out);
+	err = cmd_status_err(dev, err, opcode, out);
 	return mlx5_cmd_check(dev, err, in, out);
 }
 EXPORT_SYMBOL(mlx5_cmd_exec_polling);
@@ -2002,7 +2027,7 @@ static void mlx5_cmd_exec_cb_handler(int
 	struct mlx5_async_ctx *ctx;
 
 	ctx = work->ctx;
-	status = cmd_status_err(status, work->out);
+	status = cmd_status_err(ctx->dev, status, work->opcode, work->out);
 	work->user_callback(status, work);
 	if (atomic_dec_and_test(&ctx->num_inflight))
 		wake_up(&ctx->wait);
@@ -2016,6 +2041,7 @@ int mlx5_cmd_exec_cb(struct mlx5_async_c
 
 	work->ctx = ctx;
 	work->user_callback = callback;
+	work->opcode = MLX5_GET(mbox_in, in, opcode);
 	work->out = out;
 	if (WARN_ON(!atomic_inc_not_zero(&ctx->num_inflight)))
 		return -EIO;
--- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
@@ -180,6 +180,13 @@ void mlx5_cmdif_debugfs_init(struct mlx5
 			debugfs_create_file("average", 0400, stats->root, stats,
 					    &stats_fops);
 			debugfs_create_u64("n", 0400, stats->root, &stats->n);
+			debugfs_create_u64("failed", 0400, stats->root, &stats->failed);
+			debugfs_create_u64("failed_mbox_status", 0400, stats->root,
+					   &stats->failed_mbox_status);
+			debugfs_create_u32("last_failed_errno", 0400, stats->root,
+					   &stats->last_failed_errno);
+			debugfs_create_u8("last_failed_mbox_status", 0400, stats->root,
+					  &stats->last_failed_mbox_status);
 		}
 	}
 }
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -264,6 +264,14 @@ enum {
 struct mlx5_cmd_stats {
 	u64		sum;
 	u64		n;
+	/* number of times command failed */
+	u64		failed;
+	/* number of times command failed on bad status returned by FW */
+	u64		failed_mbox_status;
+	/* last command failed returned errno */
+	u32		last_failed_errno;
+	/* last bad status returned by FW */
+	u8		last_failed_mbox_status;
 	struct dentry  *root;
 	/* protect command average calculations */
 	spinlock_t	lock;
@@ -955,6 +963,7 @@ typedef void (*mlx5_async_cbk_t)(int sta
 struct mlx5_async_work {
 	struct mlx5_async_ctx *ctx;
 	mlx5_async_cbk_t user_callback;
+	u16 opcode; /* cmd opcode */
 	void *out; /* pointer to the cmd output buffer */
 };