Blob Blame History Raw
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Tue, 18 Aug 2020 15:05:18 +0300
Subject: RDMA/cma: Add missing locking to rdma_accept()
Patch-mainline: v5.10-rc1
Git-commit: d114c6feedfe0600c19b9f9479a4026354d1f7fd
References: bsc#1181147

In almost all cases rdma_accept() is called under the handler_mutex by
ULPs from their handler callbacks. The one exception was ucma which did
not get the handler_mutex.

To improve the understand-ability of the locking scheme obtain the mutex
for ucma as well.

This improves how ucma works by allowing it to directly use handler_mutex
for some of its internal locking against the handler callbacks intead of
the global file->mut lock.

There does not seem to be a serious bug here, other than a DISCONNECT event
can be delivered concurrently with accept succeeding.

Link: https://lore.kernel.org/r/20200818120526.702120-7-leon@kernel.org
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Acked-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
 drivers/infiniband/core/cma.c  |   25 ++++++++++++++++++++++---
 drivers/infiniband/core/ucma.c |   12 ++++++++----
 include/rdma/rdma_cm.h         |    5 +++++
 3 files changed, 35 insertions(+), 7 deletions(-)

--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -4157,14 +4157,15 @@ static int cma_send_sidr_rep(struct rdma
 int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
 		  const char *caller)
 {
-	struct rdma_id_private *id_priv;
+	struct rdma_id_private *id_priv =
+		container_of(id, struct rdma_id_private, id);
 	int ret;
 
-	id_priv = container_of(id, struct rdma_id_private, id);
+	lockdep_assert_held(&id_priv->handler_mutex);
 
 	rdma_restrack_set_task(&id_priv->res, caller);
 
-	if (!cma_comp(id_priv, RDMA_CM_CONNECT))
+	if (READ_ONCE(id_priv->state) != RDMA_CM_CONNECT)
 		return -EINVAL;
 
 	if (!id->qp && conn_param) {
@@ -4217,6 +4218,24 @@ int __rdma_accept_ece(struct rdma_cm_id
 }
 EXPORT_SYMBOL(__rdma_accept_ece);
 
+void rdma_lock_handler(struct rdma_cm_id *id)
+{
+	struct rdma_id_private *id_priv =
+		container_of(id, struct rdma_id_private, id);
+
+	mutex_lock(&id_priv->handler_mutex);
+}
+EXPORT_SYMBOL(rdma_lock_handler);
+
+void rdma_unlock_handler(struct rdma_cm_id *id)
+{
+	struct rdma_id_private *id_priv =
+		container_of(id, struct rdma_id_private, id);
+
+	mutex_unlock(&id_priv->handler_mutex);
+}
+EXPORT_SYMBOL(rdma_unlock_handler);
+
 int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
 {
 	struct rdma_id_private *id_priv;
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -1162,16 +1162,20 @@ static ssize_t ucma_accept(struct ucma_f
 
 	if (cmd.conn_param.valid) {
 		ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param);
-		mutex_lock(&file->mut);
 		mutex_lock(&ctx->mutex);
+		rdma_lock_handler(ctx->cm_id);
 		ret = __rdma_accept_ece(ctx->cm_id, &conn_param, NULL, &ece);
-		mutex_unlock(&ctx->mutex);
-		if (!ret)
+		if (!ret) {
+			/* The uid must be set atomically with the handler */
 			ctx->uid = cmd.uid;
-		mutex_unlock(&file->mut);
+		}
+		rdma_unlock_handler(ctx->cm_id);
+		mutex_unlock(&ctx->mutex);
 	} else {
 		mutex_lock(&ctx->mutex);
+		rdma_lock_handler(ctx->cm_id);
 		ret = __rdma_accept_ece(ctx->cm_id, NULL, NULL, &ece);
+		rdma_unlock_handler(ctx->cm_id);
 		mutex_unlock(&ctx->mutex);
 	}
 	ucma_put_ctx(ctx);
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -253,6 +253,8 @@ int rdma_listen(struct rdma_cm_id *id, i
 int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
 		  const char *caller);
 
+void rdma_lock_handler(struct rdma_cm_id *id);
+void rdma_unlock_handler(struct rdma_cm_id *id);
 int __rdma_accept_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
 		      const char *caller, struct rdma_ucm_ece *ece);
 
@@ -270,6 +272,9 @@ int __rdma_accept_ece(struct rdma_cm_id
  * In the case of error, a reject message is sent to the remote side and the
  * state of the qp associated with the id is modified to error, such that any
  * previously posted receive buffers would be flushed.
+ *
+ * This function is for use by kernel ULPs and must be called from under the
+ * handler callback.
  */
 #define rdma_accept(id, conn_param) \
 	__rdma_accept((id), (conn_param),  KBUILD_MODNAME)