Blob Blame History Raw
From: Leon Romanovsky <leonro@mellanox.com>
Date: Wed, 21 Mar 2018 17:12:42 +0200
Subject: RDMA/restrack: Remove ambiguity in resource track clean logic
Patch-mainline: v4.17-rc1
Git-commit: 03286030ac0420c759fa25f5b976e40293bccaaf
References: bsc#1103992 FATE#326009

The restrack clean routine had simple, but powerful WARN_ON check
to see if all resources are cleared prior to releasing device.

The WARN_ON check performed very well, but lack of information
which device caused to resource leak, the object type and origin
made debug to be fun and challenging at the same time.

The fact that all dumps were the same because restrack_clean() is
called in dealloc() didn't help either.

So let's fix spelling error and convert WARN_ON to be more debug
friendly. The dmesg cut below gives example of how the output
will look output for the case fixed in patch [1]

[  438.421372] restrack: ------------[ cut here ]------------
[  438.423448] restrack: BUG: RESTRACK detected leak of resources on mlx5_2
[  438.425600] restrack: Kernel PD object allocated by mlx5_ib is not freed
[  438.427753] restrack: Kernel CQ object allocated by mlx5_ib is not freed
[  438.429660] restrack: ------------[ cut here ]------------

[1] https://patchwork.kernel.org/patch/10298695/

Cc: Michal Kalderon <Michal.Kalderon@cavium.com>
Cc: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Mark Bloch <markb@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Acked-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
 drivers/infiniband/core/restrack.c |   45 ++++++++++++++++++++++++++++++++++++-
 include/rdma/restrack.h            |    2 -
 2 files changed, 45 insertions(+), 2 deletions(-)

--- a/drivers/infiniband/core/restrack.c
+++ b/drivers/infiniband/core/restrack.c
@@ -17,9 +17,52 @@ void rdma_restrack_init(struct rdma_rest
 	init_rwsem(&res->rwsem);
 }
 
+static const char *type2str(enum rdma_restrack_type type)
+{
+	static const char * const names[RDMA_RESTRACK_MAX] = {
+		[RDMA_RESTRACK_PD] = "PD",
+		[RDMA_RESTRACK_CQ] = "CQ",
+		[RDMA_RESTRACK_QP] = "QP",
+		[RDMA_RESTRACK_CM_ID] = "CM_ID",
+		[RDMA_RESTRACK_MR] = "MR",
+	};
+
+	return names[type];
+};
+
 void rdma_restrack_clean(struct rdma_restrack_root *res)
 {
-	WARN_ON_ONCE(!hash_empty(res->hash));
+	struct rdma_restrack_entry *e;
+	char buf[TASK_COMM_LEN];
+	struct ib_device *dev;
+	const char *owner;
+	int bkt;
+
+	if (hash_empty(res->hash))
+		return;
+
+	dev = container_of(res, struct ib_device, res);
+	pr_err("restrack: %s", CUT_HERE);
+	pr_err("restrack: BUG: RESTRACK detected leak of resources on %s\n",
+	       dev->name);
+	hash_for_each(res->hash, bkt, e, node) {
+		if (rdma_is_kernel_res(e)) {
+			owner = e->kern_name;
+		} else {
+			/*
+			 * There is no need to call get_task_struct here,
+			 * because we can be here only if there are more
+			 * get_task_struct() call than put_task_struct().
+			 */
+			get_task_comm(buf, e->task);
+			owner = buf;
+		}
+
+		pr_err("restrack: %s %s object allocated by %s is not freed\n",
+		       rdma_is_kernel_res(e) ? "Kernel" : "User",
+		       type2str(e->type), owner);
+	}
+	pr_err("restrack: %s", CUT_HERE);
 }
 
 int rdma_restrack_count(struct rdma_restrack_root *res,
--- a/include/rdma/restrack.h
+++ b/include/rdma/restrack.h
@@ -155,7 +155,7 @@ static inline bool rdma_is_kernel_res(st
 int __must_check rdma_restrack_get(struct rdma_restrack_entry *res);
 
 /**
- * rdma_restrack_put() - relase resource
+ * rdma_restrack_put() - release resource
  * @res:  resource entry
  */
 int rdma_restrack_put(struct rdma_restrack_entry *res);