Blob Blame History Raw
From: Moshe Shemesh <moshe@nvidia.com>
Date: Thu, 27 Jan 2022 07:51:14 +0200
Subject: net/mlx5: Add debugfs counters for page commands failures
Patch-mainline: v5.18-rc1
Git-commit: 32071187e9fb18da62f5be569bd2ea0d7a981ee8
References: jsc#PED-1549

Add the following new debugfs counters for debug and verbosity:
fw_pages_alloc_failed - number of pages FW requested but driver failed
to allocate.
give_pages_dropped - number of pages given to FW, but command give pages
failed by FW.
reclaim_pages_discard - number of pages which were about to reclaim back
and FW failed the command.

Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Acked-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
 drivers/net/ethernet/mellanox/mlx5/core/debugfs.c   |    4 ++++
 drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c |   14 +++++++++++---
 include/linux/mlx5/driver.h                         |    3 +++
 3 files changed, 18 insertions(+), 3 deletions(-)

--- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
@@ -222,6 +222,10 @@ void mlx5_pages_debugfs_init(struct mlx5
 	debugfs_create_u32("fw_pages_total", 0400, pages, &dev->priv.fw_pages);
 	debugfs_create_u32("fw_pages_vfs", 0400, pages, &dev->priv.vfs_pages);
 	debugfs_create_u32("fw_pages_host_pf", 0400, pages, &dev->priv.host_pf_pages);
+	debugfs_create_u32("fw_pages_alloc_failed", 0400, pages, &dev->priv.fw_pages_alloc_failed);
+	debugfs_create_u32("fw_pages_give_dropped", 0400, pages, &dev->priv.give_pages_dropped);
+	debugfs_create_u32("fw_pages_reclaim_discard", 0400, pages,
+			   &dev->priv.reclaim_pages_discard);
 }
 
 void mlx5_pages_debugfs_cleanup(struct mlx5_core_dev *dev)
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -352,8 +352,10 @@ retry:
 		if (err) {
 			if (err == -ENOMEM)
 				err = alloc_system_page(dev, function);
-			if (err)
+			if (err) {
+				dev->priv.fw_pages_alloc_failed += (npages - i);
 				goto out_4k;
+			}
 
 			goto retry;
 		}
@@ -372,14 +374,14 @@ retry:
 		/* if triggered by FW and failed by FW ignore */
 		if (event) {
 			err = 0;
-			goto out_4k;
+			goto out_dropped;
 		}
 	}
 	if (err) {
 		err = mlx5_cmd_check(dev, err, in, out);
 		mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n",
 			       func_id, npages, err);
-		goto out_4k;
+		goto out_dropped;
 	}
 
 	dev->priv.fw_pages += npages;
@@ -394,6 +396,8 @@ retry:
 	kvfree(in);
 	return 0;
 
+out_dropped:
+	dev->priv.give_pages_dropped += npages;
 out_4k:
 	for (i--; i >= 0; i--)
 		free_4k(dev, MLX5_GET64(manage_pages_in, in, pas[i]), function);
@@ -516,6 +520,10 @@ static int reclaim_pages(struct mlx5_cor
 	mlx5_core_dbg(dev, "func 0x%x, npages %d, outlen %d\n",
 		      func_id, npages, outlen);
 	err = reclaim_pages_cmd(dev, in, sizeof(in), out, outlen);
+	if (err) {
+		npages = MLX5_GET(manage_pages_in, in, input_num_entries);
+		dev->priv.reclaim_pages_discard += npages;
+	}
 	/* if triggered by FW event and failed by FW then ignore */
 	if (event && err == -EREMOTEIO)
 		err = 0;
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -575,6 +575,9 @@ struct mlx5_priv {
 	struct list_head	free_list;
 	u32			vfs_pages;
 	u32			host_pf_pages;
+	u32			fw_pages_alloc_failed;
+	u32			give_pages_dropped;
+	u32			reclaim_pages_discard;
 
 	struct mlx5_core_health health;
 	struct list_head	traps;