NeilBrown 60c5b3
From: NeilBrown <neilb@suse.de>
NeilBrown 60c5b3
Date: Mon, 17 May 2021 09:34:38 +1000
NeilBrown 60c5b3
Subject: [PATCH] SUNRPC in case of backlog, hand free slots directly to
NeilBrown 60c5b3
 waiting task
NeilBrown 60c5b3
Git-commit: e877a88d1f069edced4160792f42c2a8e2dba942
NeilBrown 60c5b3
Git-repo: git://git.linux-nfs.org/projects/trondmy/nfs-2.6.git
NeilBrown 60c5b3
Patch-mainline: Queued in subsystem maintainer repository
NeilBrown 60c5b3
References: bsc#1185428
NeilBrown 60c5b3
NeilBrown 60c5b3
If sunrpc.tcp_max_slot_table_entries is small and there are tasks
NeilBrown 60c5b3
on the backlog queue, then when a request completes it is freed and the
NeilBrown 60c5b3
first task on the queue is woken.  The expectation is that it will wake
NeilBrown 60c5b3
and claim that request.  However if it was a sync task and the waiting
NeilBrown 60c5b3
process was killed at just that moment, it will wake and NOT claim the
NeilBrown 60c5b3
request.
NeilBrown 60c5b3
NeilBrown 60c5b3
As long as TASK_CONGESTED remains set, requests can only be claimed by
NeilBrown 60c5b3
tasks woken from the backlog, and they are woken only as requests are
NeilBrown 60c5b3
freed, so when a task doesn't claim a request, no other task can ever
NeilBrown 60c5b3
get that request until TASK_CONGESTED is cleared.  Each time this
NeilBrown 60c5b3
happens the number of available requests is decreased by one.
NeilBrown 60c5b3
NeilBrown 60c5b3
With a sufficiently high workload and sufficiently low setting of
NeilBrown 60c5b3
max_slot (16 in the case where this was seen), TASK_CONGESTED can remain
NeilBrown 60c5b3
set for an extended period, and the above scenario (of a process being
NeilBrown 60c5b3
killed just as its task was woken) can repeat until no requests can be
NeilBrown 60c5b3
allocated.  Then traffic stops.
NeilBrown 60c5b3
NeilBrown 60c5b3
This patch addresses the problem by introducing a positive handover of a
NeilBrown 60c5b3
request from a completing task to a backlog task - the request is never
NeilBrown 60c5b3
freed when there is a backlog.
NeilBrown 60c5b3
NeilBrown 60c5b3
When a task is woken it might already have a request attached in which
NeilBrown 60c5b3
case it is *not* freed (as with current code) but is initialised (if
NeilBrown 60c5b3
needed) and used.  If it isn't used it will eventually be freed by
NeilBrown 60c5b3
rpc_exit_task().  xprt_release() is enhanced to be able to correctly
NeilBrown 60c5b3
release an uninitialised request.
NeilBrown 60c5b3
NeilBrown 60c5b3
Signed-off-by: NeilBrown <neilb@suse.de>
NeilBrown 60c5b3
Acked-by: NeilBrown <neilb@suse.com>
NeilBrown 60c5b3
NeilBrown 60c5b3
---
NeilBrown 60c5b3
 net/sunrpc/clnt.c |   10 ------
NeilBrown 60c5b3
 net/sunrpc/xprt.c |   86 +++++++++++++++++++++++++++++++++++-------------------
NeilBrown 60c5b3
 2 files changed, 56 insertions(+), 40 deletions(-)
NeilBrown 60c5b3
NeilBrown 60c5b3
--- a/net/sunrpc/clnt.c
NeilBrown 60c5b3
+++ b/net/sunrpc/clnt.c
NeilBrown 60c5b3
@@ -1630,16 +1630,6 @@ call_reserveresult(struct rpc_task *task
NeilBrown 60c5b3
 		return;
NeilBrown 60c5b3
 	}
NeilBrown 60c5b3
 
NeilBrown 60c5b3
-	/*
NeilBrown 60c5b3
-	 * Even though there was an error, we may have acquired
NeilBrown 60c5b3
-	 * a request slot somehow.  Make sure not to leak it.
NeilBrown 60c5b3
-	 */
NeilBrown 60c5b3
-	if (task->tk_rqstp) {
NeilBrown 60c5b3
-		printk(KERN_ERR "%s: status=%d, request allocated anyway\n",
NeilBrown 60c5b3
-				__func__, status);
NeilBrown 60c5b3
-		xprt_release(task);
NeilBrown 60c5b3
-	}
NeilBrown 60c5b3
-
NeilBrown 60c5b3
 	switch (status) {
NeilBrown 60c5b3
 	case -ENOMEM:
NeilBrown 60c5b3
 		rpc_delay(task, HZ >> 2);
NeilBrown 60c5b3
--- a/net/sunrpc/xprt.c
NeilBrown 60c5b3
+++ b/net/sunrpc/xprt.c
NeilBrown 60c5b3
@@ -71,6 +71,7 @@ static void	xprt_connect_status(struct r
NeilBrown 60c5b3
 static int      __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
NeilBrown 60c5b3
 static void     __xprt_put_cong(struct rpc_xprt *, struct rpc_rqst *);
NeilBrown 60c5b3
 static void	 xprt_destroy(struct rpc_xprt *xprt);
NeilBrown 60c5b3
+static void	 xprt_request_init(struct rpc_task *task);
NeilBrown 60c5b3
 
NeilBrown 60c5b3
 static DEFINE_SPINLOCK(xprt_list_lock);
NeilBrown 60c5b3
 static LIST_HEAD(xprt_list);
NeilBrown 60c5b3
@@ -1088,10 +1089,26 @@ static void xprt_add_backlog(struct rpc_
NeilBrown 60c5b3
 	rpc_sleep_on(&xprt->backlog, task, NULL);
NeilBrown 60c5b3
 }
NeilBrown 60c5b3
 
NeilBrown 60c5b3
-static void xprt_wake_up_backlog(struct rpc_xprt *xprt)
NeilBrown 60c5b3
+static bool __xprt_set_rq(struct rpc_task *task, void *data)
NeilBrown 60c5b3
 {
NeilBrown 60c5b3
-	if (rpc_wake_up_next(&xprt->backlog) == NULL)
NeilBrown 60c5b3
+	struct rpc_rqst *req = data;
NeilBrown 60c5b3
+
NeilBrown 60c5b3
+	if (task->tk_rqstp == NULL) {
NeilBrown 60c5b3
+		memset(req, 0, sizeof(*req));	/* mark unused */
NeilBrown 60c5b3
+		task->tk_status = -EAGAIN;
NeilBrown 60c5b3
+		task->tk_rqstp = req;
NeilBrown 60c5b3
+		return true;
NeilBrown 60c5b3
+	}
NeilBrown 60c5b3
+	return false;
NeilBrown 60c5b3
+}
NeilBrown 60c5b3
+
NeilBrown 60c5b3
+static bool xprt_wake_up_backlog(struct rpc_xprt *xprt, struct rpc_rqst *req)
NeilBrown 60c5b3
+{
NeilBrown 60c5b3
+	if (rpc_wake_up_first(&xprt->backlog, __xprt_set_rq, req) == NULL) {
NeilBrown 60c5b3
 		clear_bit(XPRT_CONGESTED, &xprt->state);
NeilBrown 60c5b3
+		return false;
NeilBrown 60c5b3
+	}
NeilBrown 60c5b3
+	return true;
NeilBrown 60c5b3
 }
NeilBrown 60c5b3
 
NeilBrown 60c5b3
 static bool xprt_throttle_congested(struct rpc_xprt *xprt, struct rpc_task *task)
NeilBrown 60c5b3
@@ -1192,11 +1209,11 @@ EXPORT_SYMBOL_GPL(xprt_lock_and_alloc_sl
NeilBrown 60c5b3
 void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
NeilBrown 60c5b3
 {
NeilBrown 60c5b3
 	spin_lock(&xprt->reserve_lock);
NeilBrown 60c5b3
-	if (!xprt_dynamic_free_slot(xprt, req)) {
NeilBrown 60c5b3
+	if (!xprt_wake_up_backlog(xprt, req) &&
NeilBrown 60c5b3
+	    !xprt_dynamic_free_slot(xprt, req)) {
NeilBrown 60c5b3
 		memset(req, 0, sizeof(*req));	/* mark unused */
NeilBrown 60c5b3
 		list_add(&req->rq_list, &xprt->free);
NeilBrown 60c5b3
 	}
NeilBrown 60c5b3
-	xprt_wake_up_backlog(xprt);
NeilBrown 60c5b3
 	spin_unlock(&xprt->reserve_lock);
NeilBrown 60c5b3
 }
NeilBrown 60c5b3
 EXPORT_SYMBOL_GPL(xprt_free_slot);
NeilBrown 60c5b3
@@ -1272,6 +1289,10 @@ xprt_request_init(struct rpc_task *task)
NeilBrown 60c5b3
 	struct rpc_xprt *xprt = task->tk_xprt;
NeilBrown 60c5b3
 	struct rpc_rqst	*req = task->tk_rqstp;
NeilBrown 60c5b3
 
NeilBrown 60c5b3
+	if (req->rq_task)
NeilBrown 60c5b3
+		/* Already initialized */
NeilBrown 60c5b3
+		return;
NeilBrown 60c5b3
+
NeilBrown 60c5b3
 	INIT_LIST_HEAD(&req->rq_list);
NeilBrown 60c5b3
 	req->rq_timeout = task->tk_client->cl_timeout->to_initval;
NeilBrown 60c5b3
 	req->rq_task	= task;
NeilBrown 60c5b3
@@ -1334,8 +1355,10 @@ void xprt_retry_reserve(struct rpc_task
NeilBrown 60c5b3
 	struct rpc_xprt *xprt = task->tk_xprt;
NeilBrown 60c5b3
 
NeilBrown 60c5b3
 	task->tk_status = 0;
NeilBrown 60c5b3
-	if (task->tk_rqstp != NULL)
NeilBrown 60c5b3
+	if (task->tk_rqstp != NULL) {
NeilBrown 60c5b3
+		xprt_request_init(task);
NeilBrown 60c5b3
 		return;
NeilBrown 60c5b3
+	}
NeilBrown 60c5b3
 
NeilBrown 60c5b3
 	task->tk_timeout = 0;
NeilBrown 60c5b3
 	task->tk_status = -EAGAIN;
NeilBrown 60c5b3
@@ -1362,32 +1385,35 @@ void xprt_release(struct rpc_task *task)
NeilBrown 60c5b3
 	}
NeilBrown 60c5b3
 
NeilBrown 60c5b3
 	xprt = req->rq_xprt;
NeilBrown 60c5b3
-	if (task->tk_ops->rpc_count_stats != NULL)
NeilBrown 60c5b3
-		task->tk_ops->rpc_count_stats(task, task->tk_calldata);
NeilBrown 60c5b3
-	else if (task->tk_client)
NeilBrown 60c5b3
-		rpc_count_iostats(task, task->tk_client->cl_metrics);
NeilBrown 60c5b3
-	spin_lock(&xprt->recv_lock);
NeilBrown 60c5b3
-	if (!list_empty(&req->rq_list)) {
NeilBrown 60c5b3
-		list_del_init(&req->rq_list);
NeilBrown 60c5b3
-		xprt_wait_on_pinned_rqst(req);
NeilBrown 60c5b3
-	}
NeilBrown 60c5b3
-	spin_unlock(&xprt->recv_lock);
NeilBrown 60c5b3
-	spin_lock_bh(&xprt->transport_lock);
NeilBrown 60c5b3
-	xprt->ops->release_xprt(xprt, task);
NeilBrown 60c5b3
-	if (xprt->ops->release_request)
NeilBrown 60c5b3
-		xprt->ops->release_request(task);
NeilBrown 60c5b3
-	xprt->last_used = jiffies;
NeilBrown 60c5b3
-	xprt_schedule_autodisconnect(xprt);
NeilBrown 60c5b3
-	spin_unlock_bh(&xprt->transport_lock);
NeilBrown 60c5b3
-	if (req->rq_buffer)
NeilBrown 60c5b3
-		xprt->ops->buf_free(task);
NeilBrown 60c5b3
-	xprt_inject_disconnect(xprt);
NeilBrown 60c5b3
-	if (req->rq_cred != NULL)
NeilBrown 60c5b3
-		put_rpccred(req->rq_cred);
NeilBrown 60c5b3
-	task->tk_rqstp = NULL;
NeilBrown 60c5b3
-	if (req->rq_release_snd_buf)
NeilBrown 60c5b3
-		req->rq_release_snd_buf(req);
NeilBrown 60c5b3
+	if (xprt) {
NeilBrown 60c5b3
+		if (task->tk_ops->rpc_count_stats != NULL)
NeilBrown 60c5b3
+			task->tk_ops->rpc_count_stats(task, task->tk_calldata);
NeilBrown 60c5b3
+		else if (task->tk_client)
NeilBrown 60c5b3
+			rpc_count_iostats(task, task->tk_client->cl_metrics);
NeilBrown 60c5b3
+		spin_lock(&xprt->recv_lock);
NeilBrown 60c5b3
+		if (!list_empty(&req->rq_list)) {
NeilBrown 60c5b3
+			list_del_init(&req->rq_list);
NeilBrown 60c5b3
+			xprt_wait_on_pinned_rqst(req);
NeilBrown 60c5b3
+		}
NeilBrown 60c5b3
+		spin_unlock(&xprt->recv_lock);
NeilBrown 60c5b3
+		spin_lock_bh(&xprt->transport_lock);
NeilBrown 60c5b3
+		xprt->ops->release_xprt(xprt, task);
NeilBrown 60c5b3
+		if (xprt->ops->release_request)
NeilBrown 60c5b3
+			xprt->ops->release_request(task);
NeilBrown 60c5b3
+		xprt->last_used = jiffies;
NeilBrown 60c5b3
+		xprt_schedule_autodisconnect(xprt);
NeilBrown 60c5b3
+		spin_unlock_bh(&xprt->transport_lock);
NeilBrown 60c5b3
+		if (req->rq_buffer)
NeilBrown 60c5b3
+			xprt->ops->buf_free(task);
NeilBrown 60c5b3
+		xprt_inject_disconnect(xprt);
NeilBrown 60c5b3
+		if (req->rq_cred != NULL)
NeilBrown 60c5b3
+			put_rpccred(req->rq_cred);
NeilBrown 60c5b3
+		if (req->rq_release_snd_buf)
NeilBrown 60c5b3
+			req->rq_release_snd_buf(req);
NeilBrown 60c5b3
+	} else
NeilBrown 60c5b3
+		xprt = task->tk_xprt;
NeilBrown 60c5b3
 
NeilBrown 60c5b3
+	task->tk_rqstp = NULL;
NeilBrown 60c5b3
 	dprintk("RPC: %5u release request %p\n", task->tk_pid, req);
NeilBrown 60c5b3
 	if (likely(!bc_prealloc(req)))
NeilBrown 60c5b3
 		xprt->ops->free_slot(xprt, req);