NeilBrown 862f15
From: NeilBrown <neilb@suse.de>
NeilBrown 862f15
Date: Mon, 17 May 2021 09:34:38 +1000
NeilBrown 862f15
Subject: [PATCH] SUNRPC in case of backlog, hand free slots directly to
NeilBrown 862f15
 waiting task
NeilBrown 862f15
Git-commit: e877a88d1f069edced4160792f42c2a8e2dba942
NeilBrown c0de1e
Patch-mainline: v5.13-rc4
NeilBrown 862f15
References: bsc#1185428
NeilBrown 862f15
NeilBrown 862f15
If sunrpc.tcp_max_slot_table_entries is small and there are tasks
NeilBrown 862f15
on the backlog queue, then when a request completes it is freed and the
NeilBrown 862f15
first task on the queue is woken.  The expectation is that it will wake
NeilBrown 862f15
and claim that request.  However if it was a sync task and the waiting
NeilBrown 862f15
process was killed at just that moment, it will wake and NOT claim the
NeilBrown 862f15
request.
NeilBrown 862f15
NeilBrown 862f15
As long as TASK_CONGESTED remains set, requests can only be claimed by
NeilBrown 862f15
tasks woken from the backlog, and they are woken only as requests are
NeilBrown 862f15
freed, so when a task doesn't claim a request, no other task can ever
NeilBrown 862f15
get that request until TASK_CONGESTED is cleared.  Each time this
NeilBrown 862f15
happens the number of available requests is decreased by one.
NeilBrown 862f15
NeilBrown 862f15
With a sufficiently high workload and sufficiently low setting of
NeilBrown 862f15
max_slot (16 in the case where this was seen), TASK_CONGESTED can remain
NeilBrown 862f15
set for an extended period, and the above scenario (of a process being
NeilBrown 862f15
killed just as its task was woken) can repeat until no requests can be
NeilBrown 862f15
allocated.  Then traffic stops.
NeilBrown 862f15
NeilBrown 862f15
This patch addresses the problem by introducing a positive handover of a
NeilBrown 862f15
request from a completing task to a backlog task - the request is never
NeilBrown 862f15
freed when there is a backlog.
NeilBrown 862f15
NeilBrown 862f15
When a task is woken it might already have a request attached in which
NeilBrown 862f15
case it is *not* freed (as with current code) but is initialised (if
NeilBrown 862f15
needed) and used.  If it isn't used it will eventually be freed by
NeilBrown 862f15
rpc_exit_task().  xprt_release() is enhanced to be able to correctly
NeilBrown 862f15
release an uninitialised request.
NeilBrown 862f15
NeilBrown 862f15
Signed-off-by: NeilBrown <neilb@suse.de>
NeilBrown 862f15
Acked-by: NeilBrown <neilb@suse.com>
NeilBrown 862f15
NeilBrown 862f15
---
NeilBrown 862f15
 net/sunrpc/clnt.c |    7 -----
NeilBrown 862f15
 net/sunrpc/xprt.c |   70 +++++++++++++++++++++++++++++++++++++-----------------
NeilBrown 862f15
 2 files changed, 48 insertions(+), 29 deletions(-)
NeilBrown 862f15
NeilBrown 862f15
--- a/net/sunrpc/clnt.c
NeilBrown 862f15
+++ b/net/sunrpc/clnt.c
NeilBrown 862f15
@@ -1677,13 +1677,6 @@ call_reserveresult(struct rpc_task *task
NeilBrown 862f15
 		return;
NeilBrown 862f15
 	}
NeilBrown 862f15
 
NeilBrown 862f15
-	/*
NeilBrown 862f15
-	 * Even though there was an error, we may have acquired
NeilBrown 862f15
-	 * a request slot somehow.  Make sure not to leak it.
NeilBrown 862f15
-	 */
NeilBrown 862f15
-	if (task->tk_rqstp)
NeilBrown 862f15
-		xprt_release(task);
NeilBrown 862f15
-
NeilBrown 862f15
 	switch (status) {
NeilBrown 862f15
 	case -ENOMEM:
NeilBrown 862f15
 		rpc_delay(task, HZ >> 2);
NeilBrown 862f15
--- a/net/sunrpc/xprt.c
NeilBrown 862f15
+++ b/net/sunrpc/xprt.c
NeilBrown 862f15
@@ -70,6 +70,7 @@
NeilBrown 862f15
 static void	 xprt_init(struct rpc_xprt *xprt, struct net *net);
NeilBrown 862f15
 static __be32	xprt_alloc_xid(struct rpc_xprt *xprt);
NeilBrown 862f15
 static void	 xprt_destroy(struct rpc_xprt *xprt);
NeilBrown 862f15
+static void	 xprt_request_init(struct rpc_task *task);
NeilBrown 862f15
 
NeilBrown 862f15
 static DEFINE_SPINLOCK(xprt_list_lock);
NeilBrown 862f15
 static LIST_HEAD(xprt_list);
NeilBrown 862f15
@@ -1547,10 +1548,26 @@ static void xprt_add_backlog(struct rpc_
NeilBrown 862f15
 	rpc_sleep_on(&xprt->backlog, task, NULL);
NeilBrown 862f15
 }
NeilBrown 862f15
 
NeilBrown 862f15
-static void xprt_wake_up_backlog(struct rpc_xprt *xprt)
NeilBrown 862f15
+static bool __xprt_set_rq(struct rpc_task *task, void *data)
NeilBrown 862f15
 {
NeilBrown 862f15
-	if (rpc_wake_up_next(&xprt->backlog) == NULL)
NeilBrown 862f15
+	struct rpc_rqst *req = data;
NeilBrown 862f15
+
NeilBrown 862f15
+	if (task->tk_rqstp == NULL) {
NeilBrown 862f15
+		memset(req, 0, sizeof(*req));	/* mark unused */
NeilBrown 862f15
+		task->tk_status = -EAGAIN;
NeilBrown 862f15
+		task->tk_rqstp = req;
NeilBrown 862f15
+		return true;
NeilBrown 862f15
+	}
NeilBrown 862f15
+	return false;
NeilBrown 862f15
+}
NeilBrown 862f15
+
NeilBrown 862f15
+static bool xprt_wake_up_backlog(struct rpc_xprt *xprt, struct rpc_rqst *req)
NeilBrown 862f15
+{
NeilBrown 862f15
+	if (rpc_wake_up_first(&xprt->backlog, __xprt_set_rq, req) == NULL) {
NeilBrown 862f15
 		clear_bit(XPRT_CONGESTED, &xprt->state);
NeilBrown 862f15
+		return false;
NeilBrown 862f15
+	}
NeilBrown 862f15
+	return true;
NeilBrown 862f15
 }
NeilBrown 862f15
 
NeilBrown 862f15
 static bool xprt_throttle_congested(struct rpc_xprt *xprt, struct rpc_task *task)
NeilBrown 862f15
@@ -1638,11 +1655,11 @@ EXPORT_SYMBOL_GPL(xprt_alloc_slot);
NeilBrown 862f15
 void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
NeilBrown 862f15
 {
NeilBrown 862f15
 	spin_lock(&xprt->reserve_lock);
NeilBrown 862f15
-	if (!xprt_dynamic_free_slot(xprt, req)) {
NeilBrown 862f15
+	if (!xprt_wake_up_backlog(xprt, req) &&
NeilBrown 862f15
+	    !xprt_dynamic_free_slot(xprt, req)) {
NeilBrown 862f15
 		memset(req, 0, sizeof(*req));	/* mark unused */
NeilBrown 862f15
 		list_add(&req->rq_list, &xprt->free);
NeilBrown 862f15
 	}
NeilBrown 862f15
-	xprt_wake_up_backlog(xprt);
NeilBrown 862f15
 	spin_unlock(&xprt->reserve_lock);
NeilBrown 862f15
 }
NeilBrown 862f15
 EXPORT_SYMBOL_GPL(xprt_free_slot);
NeilBrown 862f15
@@ -1730,6 +1747,10 @@ xprt_request_init(struct rpc_task *task)
NeilBrown 862f15
 	struct rpc_xprt *xprt = task->tk_xprt;
NeilBrown 862f15
 	struct rpc_rqst	*req = task->tk_rqstp;
NeilBrown 862f15
 
NeilBrown 862f15
+	if (req->rq_task)
NeilBrown 862f15
+		/* Already initialized */
NeilBrown 862f15
+		return;
NeilBrown 862f15
+
NeilBrown 862f15
 	req->rq_task	= task;
NeilBrown 862f15
 	req->rq_xprt    = xprt;
NeilBrown 862f15
 	req->rq_buffer  = NULL;
NeilBrown 862f15
@@ -1790,8 +1811,10 @@ void xprt_retry_reserve(struct rpc_task
NeilBrown 862f15
 	struct rpc_xprt *xprt = task->tk_xprt;
NeilBrown 862f15
 
NeilBrown 862f15
 	task->tk_status = 0;
NeilBrown 862f15
-	if (task->tk_rqstp != NULL)
NeilBrown 862f15
+	if (task->tk_rqstp != NULL) {
NeilBrown 862f15
+		xprt_request_init(task);
NeilBrown 862f15
 		return;
NeilBrown 862f15
+	}
NeilBrown 862f15
 
NeilBrown 862f15
 	task->tk_status = -EAGAIN;
NeilBrown 862f15
 	xprt_do_reserve(xprt, task);
NeilBrown 862f15
@@ -1816,24 +1839,27 @@ void xprt_release(struct rpc_task *task)
NeilBrown 862f15
 	}
NeilBrown 862f15
 
NeilBrown 862f15
 	xprt = req->rq_xprt;
NeilBrown 862f15
-	xprt_request_dequeue_xprt(task);
NeilBrown 862f15
-	spin_lock(&xprt->transport_lock);
NeilBrown 862f15
-	xprt->ops->release_xprt(xprt, task);
NeilBrown 862f15
-	if (xprt->ops->release_request)
NeilBrown 862f15
-		xprt->ops->release_request(task);
NeilBrown 862f15
-	xprt_schedule_autodisconnect(xprt);
NeilBrown 862f15
-	spin_unlock(&xprt->transport_lock);
NeilBrown 862f15
-	if (req->rq_buffer)
NeilBrown 862f15
-		xprt->ops->buf_free(task);
NeilBrown 862f15
-	xprt_inject_disconnect(xprt);
NeilBrown 862f15
-	xdr_free_bvec(&req->rq_rcv_buf);
NeilBrown 862f15
-	xdr_free_bvec(&req->rq_snd_buf);
NeilBrown 862f15
-	if (req->rq_cred != NULL)
NeilBrown 862f15
-		put_rpccred(req->rq_cred);
NeilBrown 862f15
-	task->tk_rqstp = NULL;
NeilBrown 862f15
-	if (req->rq_release_snd_buf)
NeilBrown 862f15
-		req->rq_release_snd_buf(req);
NeilBrown 862f15
+	if (xprt) {
NeilBrown 862f15
+		xprt_request_dequeue_xprt(task);
NeilBrown 862f15
+		spin_lock(&xprt->transport_lock);
NeilBrown 862f15
+		xprt->ops->release_xprt(xprt, task);
NeilBrown 862f15
+		if (xprt->ops->release_request)
NeilBrown 862f15
+			xprt->ops->release_request(task);
NeilBrown 862f15
+		xprt_schedule_autodisconnect(xprt);
NeilBrown 862f15
+		spin_unlock(&xprt->transport_lock);
NeilBrown 862f15
+		if (req->rq_buffer)
NeilBrown 862f15
+			xprt->ops->buf_free(task);
NeilBrown 862f15
+		xprt_inject_disconnect(xprt);
NeilBrown 862f15
+		xdr_free_bvec(&req->rq_rcv_buf);
NeilBrown 862f15
+		xdr_free_bvec(&req->rq_snd_buf);
NeilBrown 862f15
+		if (req->rq_cred != NULL)
NeilBrown 862f15
+			put_rpccred(req->rq_cred);
NeilBrown 862f15
+		if (req->rq_release_snd_buf)
NeilBrown 862f15
+			req->rq_release_snd_buf(req);
NeilBrown 862f15
+	} else
NeilBrown 862f15
+		xprt = task->tk_xprt;
NeilBrown 862f15
 
NeilBrown 862f15
+	task->tk_rqstp = NULL;
NeilBrown 862f15
 	dprintk("RPC: %5u release request %p\n", task->tk_pid, req);
NeilBrown 862f15
 	if (likely(!bc_prealloc(req)))
NeilBrown 862f15
 		xprt->ops->free_slot(xprt, req);