Blob Blame History Raw
From: Bob Pearson <rpearsonhpe@gmail.com>
Date: Wed, 20 Jul 2022 04:56:07 -0400
Subject: RDMA/rxe: Split qp state for requester and completer
Patch-mainline: v6.0-rc1
Git-commit: 62494ec7fbca4d58900eb62e075f2fedc85b5fb9
References: jsc#PED-1111

Currently the requester can continue to process send wqes after an local
qp operation error is detected because the setting of the qp state to the
error state is deferred until later. This patch splits the qp state for
the completer and requester into two separate states and sets
qp->req.state = QP_STATE_ERROR as soon as the error is detected before
another wqe can be executed.

Link: https://lore.kernel.org/r/1658307368-1851-4-git-send-email-lizhijian@fujitsu.com
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Acked-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
 drivers/infiniband/sw/rxe/rxe_comp.c  |    6 +++---
 drivers/infiniband/sw/rxe/rxe_qp.c    |    5 +++++
 drivers/infiniband/sw/rxe/rxe_req.c   |    1 +
 drivers/infiniband/sw/rxe/rxe_verbs.h |    1 +
 4 files changed, 10 insertions(+), 3 deletions(-)

--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -567,10 +567,10 @@ int rxe_completer(void *arg)
 	if (!rxe_get(qp))
 		return -EAGAIN;
 
-	if (!qp->valid || qp->req.state == QP_STATE_ERROR ||
-	    qp->req.state == QP_STATE_RESET) {
+	if (!qp->valid || qp->comp.state == QP_STATE_ERROR ||
+	    qp->comp.state == QP_STATE_RESET) {
 		rxe_drain_resp_pkts(qp, qp->valid &&
-				    qp->req.state == QP_STATE_ERROR);
+				    qp->comp.state == QP_STATE_ERROR);
 		goto exit;
 	}
 
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -228,6 +228,7 @@ static int rxe_qp_init_req(struct rxe_de
 					       QUEUE_TYPE_FROM_CLIENT);
 
 	qp->req.state		= QP_STATE_RESET;
+	qp->comp.state		= QP_STATE_RESET;
 	qp->req.opcode		= -1;
 	qp->comp.opcode		= -1;
 
@@ -488,6 +489,7 @@ static void rxe_qp_reset(struct rxe_qp *
 
 	/* move qp to the reset state */
 	qp->req.state = QP_STATE_RESET;
+	qp->comp.state = QP_STATE_RESET;
 	qp->resp.state = QP_STATE_RESET;
 
 	/* let state machines reset themselves drain work and packet queues
@@ -551,6 +553,7 @@ void rxe_qp_error(struct rxe_qp *qp)
 {
 	qp->req.state = QP_STATE_ERROR;
 	qp->resp.state = QP_STATE_ERROR;
+	qp->comp.state = QP_STATE_ERROR;
 	qp->attr.qp_state = IB_QPS_ERR;
 
 	/* drain work and packet queues */
@@ -688,6 +691,7 @@ int rxe_qp_from_attr(struct rxe_qp *qp,
 			pr_debug("qp#%d state -> INIT\n", qp_num(qp));
 			qp->req.state = QP_STATE_INIT;
 			qp->resp.state = QP_STATE_INIT;
+			qp->comp.state = QP_STATE_INIT;
 			break;
 
 		case IB_QPS_RTR:
@@ -698,6 +702,7 @@ int rxe_qp_from_attr(struct rxe_qp *qp,
 		case IB_QPS_RTS:
 			pr_debug("qp#%d state -> RTS\n", qp_num(qp));
 			qp->req.state = QP_STATE_READY;
+			qp->comp.state = QP_STATE_READY;
 			break;
 
 		case IB_QPS_SQD:
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -816,6 +816,7 @@ err:
 	/* update wqe_index for each wqe completion */
 	qp->req.wqe_index = queue_next_index(qp->sq.queue, qp->req.wqe_index);
 	wqe->state = wqe_state_error;
+	qp->req.state = QP_STATE_ERROR;
 	rxe_run_task(&qp->comp.task, 0);
 exit:
 	ret = -EAGAIN;
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -129,6 +129,7 @@ struct rxe_req_info {
 };
 
 struct rxe_comp_info {
+	enum rxe_qp_state	state;
 	u32			psn;
 	int			opcode;
 	int			timeout;