NeilBrown c0de1e
From: Trond Myklebust <trond.myklebust@hammerspace.com>
NeilBrown c0de1e
Date: Tue, 25 May 2021 18:43:38 -0400
NeilBrown c0de1e
Subject: [PATCH] SUNRPC: More fixes for backlog congestion
NeilBrown c0de1e
Git-commit: e86be3a04bc4aeaf12f93af35f08f8d4385bcd98
NeilBrown c0de1e
Patch-mainline: v5.13-rc4
NeilBrown c0de1e
References: bsc#1185428
NeilBrown c0de1e
NeilBrown c0de1e
Ensure that we fix the XPRT_CONGESTED starvation issue for RDMA as well
NeilBrown c0de1e
as socket based transports.
NeilBrown c0de1e
Ensure we always initialise the request after waking up from the backlog
NeilBrown c0de1e
list.
NeilBrown c0de1e
NeilBrown c0de1e
Fixes: e877a88d1f06 ("SUNRPC in case of backlog, hand free slots directly to waiting task")
NeilBrown c0de1e
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
NeilBrown c0de1e
Acked-by: NeilBrown <neilb@suse.com>
NeilBrown c0de1e
NeilBrown c0de1e
---
NeilBrown c0de1e
 include/linux/sunrpc/xprt.h     |    2 +
NeilBrown c0de1e
 net/sunrpc/xprt.c               |   60 +++++++++++++++++++---------------------
NeilBrown c0de1e
 net/sunrpc/xprtrdma/transport.c |   12 ++++----
NeilBrown c0de1e
 net/sunrpc/xprtrdma/verbs.c     |   18 ++++++++++--
NeilBrown c0de1e
 net/sunrpc/xprtrdma/xprt_rdma.h |    1 
NeilBrown c0de1e
 5 files changed, 53 insertions(+), 40 deletions(-)
NeilBrown c0de1e
NeilBrown c0de1e
--- a/include/linux/sunrpc/xprt.h
NeilBrown c0de1e
+++ b/include/linux/sunrpc/xprt.h
NeilBrown c0de1e
@@ -377,6 +377,8 @@ struct rpc_xprt *	xprt_alloc(struct net
NeilBrown c0de1e
 				unsigned int num_prealloc,
NeilBrown c0de1e
 				unsigned int max_req);
NeilBrown c0de1e
 void			xprt_free(struct rpc_xprt *);
NeilBrown c0de1e
+void			xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task);
NeilBrown c0de1e
+bool			xprt_wake_up_backlog(struct rpc_xprt *xprt, struct rpc_rqst *req);
NeilBrown c0de1e
 
NeilBrown c0de1e
 static inline int
NeilBrown c0de1e
 xprt_enable_swap(struct rpc_xprt *xprt)
NeilBrown c0de1e
--- a/net/sunrpc/xprt.c
NeilBrown c0de1e
+++ b/net/sunrpc/xprt.c
NeilBrown c0de1e
@@ -1542,11 +1542,18 @@ xprt_transmit(struct rpc_task *task)
NeilBrown c0de1e
 	spin_unlock(&xprt->queue_lock);
NeilBrown c0de1e
 }
NeilBrown c0de1e
 
NeilBrown c0de1e
-static void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task)
NeilBrown c0de1e
+static void xprt_complete_request_init(struct rpc_task *task)
NeilBrown c0de1e
+{
NeilBrown c0de1e
+	if (task->tk_rqstp)
NeilBrown c0de1e
+		xprt_request_init(task);
NeilBrown c0de1e
+}
NeilBrown c0de1e
+
NeilBrown c0de1e
+void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task)
NeilBrown c0de1e
 {
NeilBrown c0de1e
 	set_bit(XPRT_CONGESTED, &xprt->state);
NeilBrown c0de1e
-	rpc_sleep_on(&xprt->backlog, task, NULL);
NeilBrown c0de1e
+	rpc_sleep_on(&xprt->backlog, task, xprt_complete_request_init);
NeilBrown c0de1e
 }
NeilBrown c0de1e
+EXPORT_SYMBOL_GPL(xprt_add_backlog);
NeilBrown c0de1e
 
NeilBrown c0de1e
 static bool __xprt_set_rq(struct rpc_task *task, void *data)
NeilBrown c0de1e
 {
NeilBrown c0de1e
@@ -1554,14 +1561,13 @@ static bool __xprt_set_rq(struct rpc_tas
NeilBrown c0de1e
 
NeilBrown c0de1e
 	if (task->tk_rqstp == NULL) {
NeilBrown c0de1e
 		memset(req, 0, sizeof(*req));	/* mark unused */
NeilBrown c0de1e
-		task->tk_status = -EAGAIN;
NeilBrown c0de1e
 		task->tk_rqstp = req;
NeilBrown c0de1e
 		return true;
NeilBrown c0de1e
 	}
NeilBrown c0de1e
 	return false;
NeilBrown c0de1e
 }
NeilBrown c0de1e
 
NeilBrown c0de1e
-static bool xprt_wake_up_backlog(struct rpc_xprt *xprt, struct rpc_rqst *req)
NeilBrown c0de1e
+bool xprt_wake_up_backlog(struct rpc_xprt *xprt, struct rpc_rqst *req)
NeilBrown c0de1e
 {
NeilBrown c0de1e
 	if (rpc_wake_up_first(&xprt->backlog, __xprt_set_rq, req) == NULL) {
NeilBrown c0de1e
 		clear_bit(XPRT_CONGESTED, &xprt->state);
NeilBrown c0de1e
@@ -1569,6 +1575,7 @@ static bool xprt_wake_up_backlog(struct
NeilBrown c0de1e
 	}
NeilBrown c0de1e
 	return true;
NeilBrown c0de1e
 }
NeilBrown c0de1e
+EXPORT_SYMBOL_GPL(xprt_wake_up_backlog);
NeilBrown c0de1e
 
NeilBrown c0de1e
 static bool xprt_throttle_congested(struct rpc_xprt *xprt, struct rpc_task *task)
NeilBrown c0de1e
 {
NeilBrown c0de1e
@@ -1578,7 +1585,7 @@ static bool xprt_throttle_congested(stru
NeilBrown c0de1e
 		goto out;
NeilBrown c0de1e
 	spin_lock(&xprt->reserve_lock);
NeilBrown c0de1e
 	if (test_bit(XPRT_CONGESTED, &xprt->state)) {
NeilBrown c0de1e
-		rpc_sleep_on(&xprt->backlog, task, NULL);
NeilBrown c0de1e
+		xprt_add_backlog(xprt, task);
NeilBrown c0de1e
 		ret = true;
NeilBrown c0de1e
 	}
NeilBrown c0de1e
 	spin_unlock(&xprt->reserve_lock);
NeilBrown c0de1e
@@ -1747,10 +1754,6 @@ xprt_request_init(struct rpc_task *task)
NeilBrown c0de1e
 	struct rpc_xprt *xprt = task->tk_xprt;
NeilBrown c0de1e
 	struct rpc_rqst	*req = task->tk_rqstp;
NeilBrown c0de1e
 
NeilBrown c0de1e
-	if (req->rq_task)
NeilBrown c0de1e
-		/* Already initialized */
NeilBrown c0de1e
-		return;
NeilBrown c0de1e
-
NeilBrown c0de1e
 	req->rq_task	= task;
NeilBrown c0de1e
 	req->rq_xprt    = xprt;
NeilBrown c0de1e
 	req->rq_buffer  = NULL;
NeilBrown c0de1e
@@ -1811,10 +1814,8 @@ void xprt_retry_reserve(struct rpc_task
NeilBrown c0de1e
 	struct rpc_xprt *xprt = task->tk_xprt;
NeilBrown c0de1e
 
NeilBrown c0de1e
 	task->tk_status = 0;
NeilBrown c0de1e
-	if (task->tk_rqstp != NULL) {
NeilBrown c0de1e
-		xprt_request_init(task);
NeilBrown c0de1e
+	if (task->tk_rqstp != NULL)
NeilBrown c0de1e
 		return;
NeilBrown c0de1e
-	}
NeilBrown c0de1e
 
NeilBrown c0de1e
 	task->tk_status = -EAGAIN;
NeilBrown c0de1e
 	xprt_do_reserve(xprt, task);
NeilBrown c0de1e
@@ -1839,25 +1840,22 @@ void xprt_release(struct rpc_task *task)
NeilBrown c0de1e
 	}
NeilBrown c0de1e
 
NeilBrown c0de1e
 	xprt = req->rq_xprt;
NeilBrown c0de1e
-	if (xprt) {
NeilBrown c0de1e
-		xprt_request_dequeue_xprt(task);
NeilBrown c0de1e
-		spin_lock(&xprt->transport_lock);
NeilBrown c0de1e
-		xprt->ops->release_xprt(xprt, task);
NeilBrown c0de1e
-		if (xprt->ops->release_request)
NeilBrown c0de1e
-			xprt->ops->release_request(task);
NeilBrown c0de1e
-		xprt_schedule_autodisconnect(xprt);
NeilBrown c0de1e
-		spin_unlock(&xprt->transport_lock);
NeilBrown c0de1e
-		if (req->rq_buffer)
NeilBrown c0de1e
-			xprt->ops->buf_free(task);
NeilBrown c0de1e
-		xprt_inject_disconnect(xprt);
NeilBrown c0de1e
-		xdr_free_bvec(&req->rq_rcv_buf);
NeilBrown c0de1e
-		xdr_free_bvec(&req->rq_snd_buf);
NeilBrown c0de1e
-		if (req->rq_cred != NULL)
NeilBrown c0de1e
-			put_rpccred(req->rq_cred);
NeilBrown c0de1e
-		if (req->rq_release_snd_buf)
NeilBrown c0de1e
-			req->rq_release_snd_buf(req);
NeilBrown c0de1e
-	} else
NeilBrown c0de1e
-		xprt = task->tk_xprt;
NeilBrown c0de1e
+	xprt_request_dequeue_xprt(task);
NeilBrown c0de1e
+	spin_lock(&xprt->transport_lock);
NeilBrown c0de1e
+	xprt->ops->release_xprt(xprt, task);
NeilBrown c0de1e
+	if (xprt->ops->release_request)
NeilBrown c0de1e
+		xprt->ops->release_request(task);
NeilBrown c0de1e
+	xprt_schedule_autodisconnect(xprt);
NeilBrown c0de1e
+	spin_unlock(&xprt->transport_lock);
NeilBrown c0de1e
+	if (req->rq_buffer)
NeilBrown c0de1e
+		xprt->ops->buf_free(task);
NeilBrown c0de1e
+	xprt_inject_disconnect(xprt);
NeilBrown c0de1e
+	xdr_free_bvec(&req->rq_rcv_buf);
NeilBrown c0de1e
+	xdr_free_bvec(&req->rq_snd_buf);
NeilBrown c0de1e
+	if (req->rq_cred != NULL)
NeilBrown c0de1e
+		put_rpccred(req->rq_cred);
NeilBrown c0de1e
+	if (req->rq_release_snd_buf)
NeilBrown c0de1e
+		req->rq_release_snd_buf(req);
NeilBrown c0de1e
 
NeilBrown c0de1e
 	task->tk_rqstp = NULL;
NeilBrown c0de1e
 	dprintk("RPC: %5u release request %p\n", task->tk_pid, req);
NeilBrown c0de1e
--- a/net/sunrpc/xprtrdma/transport.c
NeilBrown c0de1e
+++ b/net/sunrpc/xprtrdma/transport.c
NeilBrown c0de1e
@@ -557,9 +557,8 @@ xprt_rdma_alloc_slot(struct rpc_xprt *xp
NeilBrown c0de1e
 	return;
NeilBrown c0de1e
 
NeilBrown c0de1e
 out_sleep:
NeilBrown c0de1e
-	set_bit(XPRT_CONGESTED, &xprt->state);
NeilBrown c0de1e
-	rpc_sleep_on(&xprt->backlog, task, NULL);
NeilBrown c0de1e
 	task->tk_status = -EAGAIN;
NeilBrown c0de1e
+	xprt_add_backlog(xprt, task);
NeilBrown c0de1e
 }
NeilBrown c0de1e
 
NeilBrown c0de1e
 /**
NeilBrown c0de1e
@@ -574,10 +573,11 @@ xprt_rdma_free_slot(struct rpc_xprt *xpr
NeilBrown c0de1e
 	struct rpcrdma_xprt *r_xprt =
NeilBrown c0de1e
 		container_of(xprt, struct rpcrdma_xprt, rx_xprt);
NeilBrown c0de1e
 
NeilBrown c0de1e
-	memset(rqst, 0, sizeof(*rqst));
NeilBrown c0de1e
-	rpcrdma_buffer_put(&r_xprt->rx_buf, rpcr_to_rdmar(rqst));
NeilBrown c0de1e
-	if (unlikely(!rpc_wake_up_next(&xprt->backlog)))
NeilBrown c0de1e
-		clear_bit(XPRT_CONGESTED, &xprt->state);
NeilBrown c0de1e
+	rpcrdma_reply_put(&r_xprt->rx_buf, rpcr_to_rdmar(rqst));
NeilBrown c0de1e
+	if (!xprt_wake_up_backlog(xprt, rqst)) {
NeilBrown c0de1e
+		memset(rqst, 0, sizeof(*rqst));
NeilBrown c0de1e
+		rpcrdma_buffer_put(&r_xprt->rx_buf, rpcr_to_rdmar(rqst));
NeilBrown c0de1e
+	}
NeilBrown c0de1e
 }
NeilBrown c0de1e
 
NeilBrown c0de1e
 static bool rpcrdma_check_regbuf(struct rpcrdma_xprt *r_xprt,
NeilBrown c0de1e
--- a/net/sunrpc/xprtrdma/verbs.c
NeilBrown c0de1e
+++ b/net/sunrpc/xprtrdma/verbs.c
NeilBrown c0de1e
@@ -1308,6 +1308,20 @@ void rpcrdma_mr_put(struct rpcrdma_mr *m
NeilBrown c0de1e
 }
NeilBrown c0de1e
 
NeilBrown c0de1e
 /**
NeilBrown c0de1e
+ * rpcrdma_reply_put - Put reply buffers back into pool
NeilBrown c0de1e
+ * @buffers: buffer pool
NeilBrown c0de1e
+ * @req: object to return
NeilBrown c0de1e
+ *
NeilBrown c0de1e
+ */
NeilBrown c0de1e
+void rpcrdma_reply_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req)
NeilBrown c0de1e
+{
NeilBrown c0de1e
+	if (req->rl_reply) {
NeilBrown c0de1e
+		rpcrdma_rep_put(buffers, req->rl_reply);
NeilBrown c0de1e
+		req->rl_reply = NULL;
NeilBrown c0de1e
+	}
NeilBrown c0de1e
+}
NeilBrown c0de1e
+
NeilBrown c0de1e
+/**
NeilBrown c0de1e
  * rpcrdma_buffer_get - Get a request buffer
NeilBrown c0de1e
  * @buffers: Buffer pool from which to obtain a buffer
NeilBrown c0de1e
  *
NeilBrown c0de1e
@@ -1335,9 +1349,7 @@ rpcrdma_buffer_get(struct rpcrdma_buffer
NeilBrown c0de1e
  */
NeilBrown c0de1e
 void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req)
NeilBrown c0de1e
 {
NeilBrown c0de1e
-	if (req->rl_reply)
NeilBrown c0de1e
-		rpcrdma_rep_put(buffers, req->rl_reply);
NeilBrown c0de1e
-	req->rl_reply = NULL;
NeilBrown c0de1e
+	rpcrdma_reply_put(buffers, req);
NeilBrown c0de1e
 
NeilBrown c0de1e
 	spin_lock(&buffers->rb_lock);
NeilBrown c0de1e
 	list_add(&req->rl_list, &buffers->rb_send_bufs);
NeilBrown c0de1e
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
NeilBrown c0de1e
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
NeilBrown c0de1e
@@ -492,6 +492,7 @@ struct rpcrdma_req *rpcrdma_buffer_get(s
NeilBrown c0de1e
 void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers,
NeilBrown c0de1e
 			struct rpcrdma_req *req);
NeilBrown c0de1e
 void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
NeilBrown c0de1e
+void rpcrdma_reply_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req);
NeilBrown c0de1e
 
NeilBrown c0de1e
 bool rpcrdma_regbuf_realloc(struct rpcrdma_regbuf *rb, size_t size,
NeilBrown c0de1e
 			    gfp_t flags);