Blob Blame History Raw
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Tue, 16 Jan 2018 16:14:13 -0800
Subject: IB/srpt: Fix login-related race conditions
Patch-mainline: v4.16-rc1
Git-commit: db7683d7deb25d6edc9c59ac45c56c6a48a45514
References: bsc#1103992 FATE#326009

Make sure that sport->mutex is not released between the duplicate
channel check, adding a channel to the channel list and performing
the sport enabled check. Avoid that srpt_disconnect_ch() can be
invoked concurrently with the ib_send_cm_rep() call by
srpt_cm_req_recv().

Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
Acked-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
 drivers/infiniband/ulp/srpt/ib_srpt.c |  195 +++++++++++++++++++---------------
 1 file changed, 111 insertions(+), 84 deletions(-)

--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -2040,7 +2040,7 @@ static int srpt_cm_req_recv(struct ib_cm
 	struct srpt_rdma_ch *ch;
 	char i_port_id[36];
 	u32 it_iu_len;
-	int i, ret = 0;
+	int i, ret;
 
 	WARN_ON_ONCE(irqs_disabled());
 
@@ -2063,69 +2063,43 @@ static int srpt_cm_req_recv(struct ib_cm
 		goto out;
 	}
 
+	ret = -ENOMEM;
 	rsp = kzalloc(sizeof(*rsp), GFP_KERNEL);
 	rej = kzalloc(sizeof(*rej), GFP_KERNEL);
 	rep_param = kzalloc(sizeof(*rep_param), GFP_KERNEL);
-
-	if (!rsp || !rej || !rep_param) {
-		ret = -ENOMEM;
+	if (!rsp || !rej || !rep_param)
 		goto out;
-	}
 
+	ret = -EINVAL;
 	if (it_iu_len > srp_max_req_size || it_iu_len < 64) {
 		rej->reason = cpu_to_be32(
-			      SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE);
-		ret = -EINVAL;
-		pr_err("rejected SRP_LOGIN_REQ because its"
-		       " length (%d bytes) is out of range (%d .. %d)\n",
+				SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE);
+		pr_err("rejected SRP_LOGIN_REQ because its length (%d bytes) is out of range (%d .. %d)\n",
 		       it_iu_len, 64, srp_max_req_size);
 		goto reject;
 	}
 
 	if (!sport->enabled) {
-		rej->reason = cpu_to_be32(
-			      SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
-		ret = -EINVAL;
-		pr_err("rejected SRP_LOGIN_REQ because the target port"
-		       " has not yet been enabled\n");
+		rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
+		pr_info("rejected SRP_LOGIN_REQ because target port %s_%d has not yet been enabled\n",
+			sport->sdev->device->name, param->port);
 		goto reject;
 	}
 
-	if ((req->req_flags & SRP_MTCH_ACTION) == SRP_MULTICHAN_SINGLE) {
-		struct srpt_rdma_ch *ch2;
-
-		rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_NO_CHAN;
-
-		mutex_lock(&sport->mutex);
-		list_for_each_entry(ch2, &nexus->ch_list, list) {
-			if (srpt_disconnect_ch(ch2) < 0)
-				continue;
-			pr_info("Relogin - closed existing channel %s\n",
-				ch2->sess_name);
-			rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_TERMINATED;
-		}
-		mutex_unlock(&sport->mutex);
-	} else {
-		rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_MAINTAINED;
-	}
-
 	if (*(__be64 *)req->target_port_id != cpu_to_be64(srpt_service_guid)
 	    || *(__be64 *)(req->target_port_id + 8) !=
 	       cpu_to_be64(srpt_service_guid)) {
 		rej->reason = cpu_to_be32(
-			      SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL);
-		ret = -ENOMEM;
-		pr_err("rejected SRP_LOGIN_REQ because it"
-		       " has an invalid target port identifier.\n");
+				SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL);
+		pr_err("rejected SRP_LOGIN_REQ because it has an invalid target port identifier.\n");
 		goto reject;
 	}
 
+	ret = -ENOMEM;
 	ch = kzalloc(sizeof(*ch), GFP_KERNEL);
 	if (!ch) {
-		rej->reason = cpu_to_be32(
-			      SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
-		pr_err("rejected SRP_LOGIN_REQ because no memory.\n");
-		ret = -ENOMEM;
+		rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
+		pr_err("rejected SRP_LOGIN_REQ because out of memory.\n");
 		goto reject;
 	}
 
@@ -2153,8 +2127,11 @@ static int srpt_cm_req_recv(struct ib_cm
 		srpt_alloc_ioctx_ring(ch->sport->sdev, ch->rq_size,
 				      sizeof(*ch->ioctx_ring[0]),
 				      ch->max_rsp_size, DMA_TO_DEVICE);
-	if (!ch->ioctx_ring)
+	if (!ch->ioctx_ring) {
+		pr_err("rejected SRP_LOGIN_REQ because creating a new QP SQ ring failed.\n");
+		rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
 		goto free_ch;
+	}
 
 	INIT_LIST_HEAD(&ch->free_list);
 	for (i = 0; i < ch->rq_size; i++) {
@@ -2177,19 +2154,9 @@ static int srpt_cm_req_recv(struct ib_cm
 
 	ret = srpt_create_ch_ib(ch);
 	if (ret) {
-		rej->reason = cpu_to_be32(
-			      SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
-		pr_err("rejected SRP_LOGIN_REQ because creating"
-		       " a new RDMA channel failed.\n");
-		goto free_recv_ring;
-	}
-
-	ret = srpt_ch_qp_rtr(ch, ch->qp);
-	if (ret) {
 		rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
-		pr_err("rejected SRP_LOGIN_REQ because enabling"
-		       " RTR failed (error code = %d)\n", ret);
-		goto destroy_ib;
+		pr_err("rejected SRP_LOGIN_REQ because creating a new RDMA channel failed.\n");
+		goto free_recv_ring;
 	}
 
 	srpt_format_guid(ch->sess_name, sizeof(ch->sess_name),
@@ -2214,11 +2181,51 @@ static int srpt_cm_req_recv(struct ib_cm
 						TARGET_PROT_NORMAL,
 						i_port_id + 2, ch, NULL);
 	if (IS_ERR_OR_NULL(ch->sess)) {
-		pr_info("Rejected login because no ACL has been configured yet for initiator %s.\n",
-			ch->sess_name);
-		rej->reason = cpu_to_be32((PTR_ERR(ch->sess) == -ENOMEM) ?
+		ret = PTR_ERR(ch->sess);
+		pr_info("Rejected login for initiator %s: ret = %d.\n",
+			ch->sess_name, ret);
+		rej->reason = cpu_to_be32(ret == -ENOMEM ?
 				SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES :
 				SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED);
+		goto reject;
+	}
+
+	mutex_lock(&sport->mutex);
+
+	if ((req->req_flags & SRP_MTCH_ACTION) == SRP_MULTICHAN_SINGLE) {
+		struct srpt_rdma_ch *ch2;
+
+		rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_NO_CHAN;
+
+		list_for_each_entry(ch2, &nexus->ch_list, list) {
+			if (srpt_disconnect_ch(ch2) < 0)
+				continue;
+			pr_info("Relogin - closed existing channel %s\n",
+				ch2->sess_name);
+			rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_TERMINATED;
+		}
+	} else {
+		rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_MAINTAINED;
+	}
+
+	list_add_tail_rcu(&ch->list, &nexus->ch_list);
+
+	if (!sport->enabled) {
+		rej->reason = cpu_to_be32(
+				SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
+		pr_info("rejected SRP_LOGIN_REQ because target %s_%d is not enabled\n",
+			sdev->device->name, param->port);
+		mutex_unlock(&sport->mutex);
+		goto reject;
+	}
+
+	mutex_unlock(&sport->mutex);
+
+	ret = srpt_ch_qp_rtr(ch, ch->qp);
+	if (ret) {
+		rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
+		pr_err("rejected SRP_LOGIN_REQ because enabling RTR failed (error code = %d)\n",
+		       ret);
 		goto destroy_ib;
 	}
 
@@ -2231,8 +2238,8 @@ static int srpt_cm_req_recv(struct ib_cm
 	rsp->max_it_iu_len = req->req_it_iu_len;
 	rsp->max_ti_iu_len = req->req_it_iu_len;
 	ch->max_ti_iu_len = it_iu_len;
-	rsp->buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT
-				   | SRP_BUF_FORMAT_INDIRECT);
+	rsp->buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
+				   SRP_BUF_FORMAT_INDIRECT);
 	rsp->req_lim_delta = cpu_to_be32(ch->rq_size);
 	atomic_set(&ch->req_lim, ch->rq_size);
 	atomic_set(&ch->req_lim_delta, 0);
@@ -2248,24 +2255,30 @@ static int srpt_cm_req_recv(struct ib_cm
 	rep_param->responder_resources = 4;
 	rep_param->initiator_depth = 4;
 
-	ret = ib_send_cm_rep(cm_id, rep_param);
-	if (ret) {
-		pr_err("sending SRP_LOGIN_REQ response failed"
-		       " (error code = %d)\n", ret);
-		goto release_channel;
-	}
-
+	/*
+	 * Hold the sport mutex while accepting a connection to avoid that
+	 * srpt_disconnect_ch() is invoked concurrently with this code.
+	 */
 	mutex_lock(&sport->mutex);
-	list_add_tail_rcu(&ch->list, &nexus->ch_list);
+	if (sport->enabled && ch->state == CH_CONNECTING)
+		ret = ib_send_cm_rep(cm_id, rep_param);
+	else
+		ret = -EINVAL;
 	mutex_unlock(&sport->mutex);
 
-	goto out;
+	switch (ret) {
+	case 0:
+		break;
+	case -EINVAL:
+		goto reject;
+	default:
+		rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
+		pr_err("sending SRP_LOGIN_REQ response failed (error code = %d)\n",
+		       ret);
+		goto reject;
+	}
 
-release_channel:
-	srpt_disconnect_ch(ch);
-	transport_deregister_session_configfs(ch->sess);
-	transport_deregister_session(ch->sess);
-	ch->sess = NULL;
+	goto out;
 
 destroy_ib:
 	srpt_destroy_ch_ib(ch);
@@ -2280,13 +2293,18 @@ free_ring:
 			     ch->sport->sdev, ch->rq_size,
 			     ch->max_rsp_size, DMA_TO_DEVICE);
 free_ch:
+	cm_id->context = NULL;
 	kfree(ch);
+	ch = NULL;
+
+	WARN_ON_ONCE(ret == 0);
 
 reject:
+	pr_info("Rejecting login with reason %#x\n", be32_to_cpu(rej->reason));
 	rej->opcode = SRP_LOGIN_REJ;
 	rej->tag = req->tag;
-	rej->buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT
-				   | SRP_BUF_FORMAT_INDIRECT);
+	rej->buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
+				   SRP_BUF_FORMAT_INDIRECT);
 
 	ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
 			     (void *)rej, sizeof(*rej));
@@ -2329,17 +2347,26 @@ static void srpt_cm_rtu_recv(struct srpt
 {
 	int ret;
 
-	if (srpt_set_ch_state(ch, CH_LIVE)) {
-		ret = srpt_ch_qp_rts(ch, ch->qp);
-
-		if (ret == 0) {
-			/* Trigger wait list processing. */
-			ret = srpt_zerolength_write(ch);
-			WARN_ONCE(ret < 0, "%d\n", ret);
-		} else {
-			srpt_close_ch(ch);
-		}
+	ret = srpt_ch_qp_rts(ch, ch->qp);
+	if (ret < 0) {
+		pr_err("%s-%d: QP transition to RTS failed\n", ch->sess_name,
+		       ch->qp->qp_num);
+		srpt_close_ch(ch);
+		return;
 	}
+
+	/* Trigger wait list processing. */
+	ret = srpt_zerolength_write(ch);
+	WARN_ONCE(ret < 0, "%d\n", ret);
+
+	/*
+	 * Note: calling srpt_close_ch() if the transition to the LIVE state
+	 * fails is not necessary since that means that that function has
+	 * already been invoked from another thread.
+	 */
+	if (!srpt_set_ch_state(ch, CH_LIVE))
+		pr_err("%s-%d: channel transition to LIVE state failed\n",
+		       ch->sess_name, ch->qp->qp_num);
 }
 
 /**