|
NeilBrown |
c0de1e |
From: Trond Myklebust <trond.myklebust@hammerspace.com>
|
|
NeilBrown |
c0de1e |
Date: Tue, 25 May 2021 18:43:38 -0400
|
|
NeilBrown |
c0de1e |
Subject: [PATCH] SUNRPC: More fixes for backlog congestion
|
|
NeilBrown |
c0de1e |
Git-commit: e86be3a04bc4aeaf12f93af35f08f8d4385bcd98
|
|
NeilBrown |
c0de1e |
Patch-mainline: v5.13-rc4
|
|
NeilBrown |
c0de1e |
References: bsc#1185428
|
|
NeilBrown |
c0de1e |
|
|
NeilBrown |
c0de1e |
Ensure that we fix the XPRT_CONGESTED starvation issue for RDMA as well
|
|
NeilBrown |
c0de1e |
as socket based transports.
|
|
NeilBrown |
c0de1e |
Ensure we always initialise the request after waking up from the backlog
|
|
NeilBrown |
c0de1e |
list.
|
|
NeilBrown |
c0de1e |
|
|
NeilBrown |
c0de1e |
Fixes: e877a88d1f06 ("SUNRPC in case of backlog, hand free slots directly to waiting task")
|
|
NeilBrown |
c0de1e |
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
|
|
NeilBrown |
c0de1e |
Acked-by: NeilBrown <neilb@suse.com>
|
|
NeilBrown |
c0de1e |
|
|
NeilBrown |
c0de1e |
---
|
|
NeilBrown |
c0de1e |
include/linux/sunrpc/xprt.h | 2 +
|
|
NeilBrown |
c0de1e |
net/sunrpc/xprt.c | 60 +++++++++++++++++++---------------------
|
|
NeilBrown |
c0de1e |
net/sunrpc/xprtrdma/transport.c | 12 ++++----
|
|
NeilBrown |
c0de1e |
net/sunrpc/xprtrdma/verbs.c | 18 ++++++++++--
|
|
NeilBrown |
c0de1e |
net/sunrpc/xprtrdma/xprt_rdma.h | 1
|
|
NeilBrown |
c0de1e |
5 files changed, 53 insertions(+), 40 deletions(-)
|
|
NeilBrown |
c0de1e |
|
|
NeilBrown |
c0de1e |
--- a/include/linux/sunrpc/xprt.h
|
|
NeilBrown |
c0de1e |
+++ b/include/linux/sunrpc/xprt.h
|
|
NeilBrown |
c0de1e |
@@ -377,6 +377,8 @@ struct rpc_xprt * xprt_alloc(struct net
|
|
NeilBrown |
c0de1e |
unsigned int num_prealloc,
|
|
NeilBrown |
c0de1e |
unsigned int max_req);
|
|
NeilBrown |
c0de1e |
void xprt_free(struct rpc_xprt *);
|
|
NeilBrown |
c0de1e |
+void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task);
|
|
NeilBrown |
c0de1e |
+bool xprt_wake_up_backlog(struct rpc_xprt *xprt, struct rpc_rqst *req);
|
|
NeilBrown |
c0de1e |
|
|
NeilBrown |
c0de1e |
static inline int
|
|
NeilBrown |
c0de1e |
xprt_enable_swap(struct rpc_xprt *xprt)
|
|
NeilBrown |
c0de1e |
--- a/net/sunrpc/xprt.c
|
|
NeilBrown |
c0de1e |
+++ b/net/sunrpc/xprt.c
|
|
NeilBrown |
c0de1e |
@@ -1542,11 +1542,18 @@ xprt_transmit(struct rpc_task *task)
|
|
NeilBrown |
c0de1e |
spin_unlock(&xprt->queue_lock);
|
|
NeilBrown |
c0de1e |
}
|
|
NeilBrown |
c0de1e |
|
|
NeilBrown |
c0de1e |
-static void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task)
|
|
NeilBrown |
c0de1e |
+static void xprt_complete_request_init(struct rpc_task *task)
|
|
NeilBrown |
c0de1e |
+{
|
|
NeilBrown |
c0de1e |
+ if (task->tk_rqstp)
|
|
NeilBrown |
c0de1e |
+ xprt_request_init(task);
|
|
NeilBrown |
c0de1e |
+}
|
|
NeilBrown |
c0de1e |
+
|
|
NeilBrown |
c0de1e |
+void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task)
|
|
NeilBrown |
c0de1e |
{
|
|
NeilBrown |
c0de1e |
set_bit(XPRT_CONGESTED, &xprt->state);
|
|
NeilBrown |
c0de1e |
- rpc_sleep_on(&xprt->backlog, task, NULL);
|
|
NeilBrown |
c0de1e |
+ rpc_sleep_on(&xprt->backlog, task, xprt_complete_request_init);
|
|
NeilBrown |
c0de1e |
}
|
|
NeilBrown |
c0de1e |
+EXPORT_SYMBOL_GPL(xprt_add_backlog);
|
|
NeilBrown |
c0de1e |
|
|
NeilBrown |
c0de1e |
static bool __xprt_set_rq(struct rpc_task *task, void *data)
|
|
NeilBrown |
c0de1e |
{
|
|
NeilBrown |
c0de1e |
@@ -1554,14 +1561,13 @@ static bool __xprt_set_rq(struct rpc_tas
|
|
NeilBrown |
c0de1e |
|
|
NeilBrown |
c0de1e |
if (task->tk_rqstp == NULL) {
|
|
NeilBrown |
c0de1e |
memset(req, 0, sizeof(*req)); /* mark unused */
|
|
NeilBrown |
c0de1e |
- task->tk_status = -EAGAIN;
|
|
NeilBrown |
c0de1e |
task->tk_rqstp = req;
|
|
NeilBrown |
c0de1e |
return true;
|
|
NeilBrown |
c0de1e |
}
|
|
NeilBrown |
c0de1e |
return false;
|
|
NeilBrown |
c0de1e |
}
|
|
NeilBrown |
c0de1e |
|
|
NeilBrown |
c0de1e |
-static bool xprt_wake_up_backlog(struct rpc_xprt *xprt, struct rpc_rqst *req)
|
|
NeilBrown |
c0de1e |
+bool xprt_wake_up_backlog(struct rpc_xprt *xprt, struct rpc_rqst *req)
|
|
NeilBrown |
c0de1e |
{
|
|
NeilBrown |
c0de1e |
if (rpc_wake_up_first(&xprt->backlog, __xprt_set_rq, req) == NULL) {
|
|
NeilBrown |
c0de1e |
clear_bit(XPRT_CONGESTED, &xprt->state);
|
|
NeilBrown |
c0de1e |
@@ -1569,6 +1575,7 @@ static bool xprt_wake_up_backlog(struct
|
|
NeilBrown |
c0de1e |
}
|
|
NeilBrown |
c0de1e |
return true;
|
|
NeilBrown |
c0de1e |
}
|
|
NeilBrown |
c0de1e |
+EXPORT_SYMBOL_GPL(xprt_wake_up_backlog);
|
|
NeilBrown |
c0de1e |
|
|
NeilBrown |
c0de1e |
static bool xprt_throttle_congested(struct rpc_xprt *xprt, struct rpc_task *task)
|
|
NeilBrown |
c0de1e |
{
|
|
NeilBrown |
c0de1e |
@@ -1578,7 +1585,7 @@ static bool xprt_throttle_congested(stru
|
|
NeilBrown |
c0de1e |
goto out;
|
|
NeilBrown |
c0de1e |
spin_lock(&xprt->reserve_lock);
|
|
NeilBrown |
c0de1e |
if (test_bit(XPRT_CONGESTED, &xprt->state)) {
|
|
NeilBrown |
c0de1e |
- rpc_sleep_on(&xprt->backlog, task, NULL);
|
|
NeilBrown |
c0de1e |
+ xprt_add_backlog(xprt, task);
|
|
NeilBrown |
c0de1e |
ret = true;
|
|
NeilBrown |
c0de1e |
}
|
|
NeilBrown |
c0de1e |
spin_unlock(&xprt->reserve_lock);
|
|
NeilBrown |
c0de1e |
@@ -1747,10 +1754,6 @@ xprt_request_init(struct rpc_task *task)
|
|
NeilBrown |
c0de1e |
struct rpc_xprt *xprt = task->tk_xprt;
|
|
NeilBrown |
c0de1e |
struct rpc_rqst *req = task->tk_rqstp;
|
|
NeilBrown |
c0de1e |
|
|
NeilBrown |
c0de1e |
- if (req->rq_task)
|
|
NeilBrown |
c0de1e |
- /* Already initialized */
|
|
NeilBrown |
c0de1e |
- return;
|
|
NeilBrown |
c0de1e |
-
|
|
NeilBrown |
c0de1e |
req->rq_task = task;
|
|
NeilBrown |
c0de1e |
req->rq_xprt = xprt;
|
|
NeilBrown |
c0de1e |
req->rq_buffer = NULL;
|
|
NeilBrown |
c0de1e |
@@ -1811,10 +1814,8 @@ void xprt_retry_reserve(struct rpc_task
|
|
NeilBrown |
c0de1e |
struct rpc_xprt *xprt = task->tk_xprt;
|
|
NeilBrown |
c0de1e |
|
|
NeilBrown |
c0de1e |
task->tk_status = 0;
|
|
NeilBrown |
c0de1e |
- if (task->tk_rqstp != NULL) {
|
|
NeilBrown |
c0de1e |
- xprt_request_init(task);
|
|
NeilBrown |
c0de1e |
+ if (task->tk_rqstp != NULL)
|
|
NeilBrown |
c0de1e |
return;
|
|
NeilBrown |
c0de1e |
- }
|
|
NeilBrown |
c0de1e |
|
|
NeilBrown |
c0de1e |
task->tk_status = -EAGAIN;
|
|
NeilBrown |
c0de1e |
xprt_do_reserve(xprt, task);
|
|
NeilBrown |
c0de1e |
@@ -1839,25 +1840,22 @@ void xprt_release(struct rpc_task *task)
|
|
NeilBrown |
c0de1e |
}
|
|
NeilBrown |
c0de1e |
|
|
NeilBrown |
c0de1e |
xprt = req->rq_xprt;
|
|
NeilBrown |
c0de1e |
- if (xprt) {
|
|
NeilBrown |
c0de1e |
- xprt_request_dequeue_xprt(task);
|
|
NeilBrown |
c0de1e |
- spin_lock(&xprt->transport_lock);
|
|
NeilBrown |
c0de1e |
- xprt->ops->release_xprt(xprt, task);
|
|
NeilBrown |
c0de1e |
- if (xprt->ops->release_request)
|
|
NeilBrown |
c0de1e |
- xprt->ops->release_request(task);
|
|
NeilBrown |
c0de1e |
- xprt_schedule_autodisconnect(xprt);
|
|
NeilBrown |
c0de1e |
- spin_unlock(&xprt->transport_lock);
|
|
NeilBrown |
c0de1e |
- if (req->rq_buffer)
|
|
NeilBrown |
c0de1e |
- xprt->ops->buf_free(task);
|
|
NeilBrown |
c0de1e |
- xprt_inject_disconnect(xprt);
|
|
NeilBrown |
c0de1e |
- xdr_free_bvec(&req->rq_rcv_buf);
|
|
NeilBrown |
c0de1e |
- xdr_free_bvec(&req->rq_snd_buf);
|
|
NeilBrown |
c0de1e |
- if (req->rq_cred != NULL)
|
|
NeilBrown |
c0de1e |
- put_rpccred(req->rq_cred);
|
|
NeilBrown |
c0de1e |
- if (req->rq_release_snd_buf)
|
|
NeilBrown |
c0de1e |
- req->rq_release_snd_buf(req);
|
|
NeilBrown |
c0de1e |
- } else
|
|
NeilBrown |
c0de1e |
- xprt = task->tk_xprt;
|
|
NeilBrown |
c0de1e |
+ xprt_request_dequeue_xprt(task);
|
|
NeilBrown |
c0de1e |
+ spin_lock(&xprt->transport_lock);
|
|
NeilBrown |
c0de1e |
+ xprt->ops->release_xprt(xprt, task);
|
|
NeilBrown |
c0de1e |
+ if (xprt->ops->release_request)
|
|
NeilBrown |
c0de1e |
+ xprt->ops->release_request(task);
|
|
NeilBrown |
c0de1e |
+ xprt_schedule_autodisconnect(xprt);
|
|
NeilBrown |
c0de1e |
+ spin_unlock(&xprt->transport_lock);
|
|
NeilBrown |
c0de1e |
+ if (req->rq_buffer)
|
|
NeilBrown |
c0de1e |
+ xprt->ops->buf_free(task);
|
|
NeilBrown |
c0de1e |
+ xprt_inject_disconnect(xprt);
|
|
NeilBrown |
c0de1e |
+ xdr_free_bvec(&req->rq_rcv_buf);
|
|
NeilBrown |
c0de1e |
+ xdr_free_bvec(&req->rq_snd_buf);
|
|
NeilBrown |
c0de1e |
+ if (req->rq_cred != NULL)
|
|
NeilBrown |
c0de1e |
+ put_rpccred(req->rq_cred);
|
|
NeilBrown |
c0de1e |
+ if (req->rq_release_snd_buf)
|
|
NeilBrown |
c0de1e |
+ req->rq_release_snd_buf(req);
|
|
NeilBrown |
c0de1e |
|
|
NeilBrown |
c0de1e |
task->tk_rqstp = NULL;
|
|
NeilBrown |
c0de1e |
dprintk("RPC: %5u release request %p\n", task->tk_pid, req);
|
|
NeilBrown |
c0de1e |
--- a/net/sunrpc/xprtrdma/transport.c
|
|
NeilBrown |
c0de1e |
+++ b/net/sunrpc/xprtrdma/transport.c
|
|
NeilBrown |
c0de1e |
@@ -557,9 +557,8 @@ xprt_rdma_alloc_slot(struct rpc_xprt *xp
|
|
NeilBrown |
c0de1e |
return;
|
|
NeilBrown |
c0de1e |
|
|
NeilBrown |
c0de1e |
out_sleep:
|
|
NeilBrown |
c0de1e |
- set_bit(XPRT_CONGESTED, &xprt->state);
|
|
NeilBrown |
c0de1e |
- rpc_sleep_on(&xprt->backlog, task, NULL);
|
|
NeilBrown |
c0de1e |
task->tk_status = -EAGAIN;
|
|
NeilBrown |
c0de1e |
+ xprt_add_backlog(xprt, task);
|
|
NeilBrown |
c0de1e |
}
|
|
NeilBrown |
c0de1e |
|
|
NeilBrown |
c0de1e |
/**
|
|
NeilBrown |
c0de1e |
@@ -574,10 +573,11 @@ xprt_rdma_free_slot(struct rpc_xprt *xpr
|
|
NeilBrown |
c0de1e |
struct rpcrdma_xprt *r_xprt =
|
|
NeilBrown |
c0de1e |
container_of(xprt, struct rpcrdma_xprt, rx_xprt);
|
|
NeilBrown |
c0de1e |
|
|
NeilBrown |
c0de1e |
- memset(rqst, 0, sizeof(*rqst));
|
|
NeilBrown |
c0de1e |
- rpcrdma_buffer_put(&r_xprt->rx_buf, rpcr_to_rdmar(rqst));
|
|
NeilBrown |
c0de1e |
- if (unlikely(!rpc_wake_up_next(&xprt->backlog)))
|
|
NeilBrown |
c0de1e |
- clear_bit(XPRT_CONGESTED, &xprt->state);
|
|
NeilBrown |
c0de1e |
+ rpcrdma_reply_put(&r_xprt->rx_buf, rpcr_to_rdmar(rqst));
|
|
NeilBrown |
c0de1e |
+ if (!xprt_wake_up_backlog(xprt, rqst)) {
|
|
NeilBrown |
c0de1e |
+ memset(rqst, 0, sizeof(*rqst));
|
|
NeilBrown |
c0de1e |
+ rpcrdma_buffer_put(&r_xprt->rx_buf, rpcr_to_rdmar(rqst));
|
|
NeilBrown |
c0de1e |
+ }
|
|
NeilBrown |
c0de1e |
}
|
|
NeilBrown |
c0de1e |
|
|
NeilBrown |
c0de1e |
static bool rpcrdma_check_regbuf(struct rpcrdma_xprt *r_xprt,
|
|
NeilBrown |
c0de1e |
--- a/net/sunrpc/xprtrdma/verbs.c
|
|
NeilBrown |
c0de1e |
+++ b/net/sunrpc/xprtrdma/verbs.c
|
|
NeilBrown |
c0de1e |
@@ -1308,6 +1308,20 @@ void rpcrdma_mr_put(struct rpcrdma_mr *m
|
|
NeilBrown |
c0de1e |
}
|
|
NeilBrown |
c0de1e |
|
|
NeilBrown |
c0de1e |
/**
|
|
NeilBrown |
c0de1e |
+ * rpcrdma_reply_put - Put reply buffers back into pool
|
|
NeilBrown |
c0de1e |
+ * @buffers: buffer pool
|
|
NeilBrown |
c0de1e |
+ * @req: object to return
|
|
NeilBrown |
c0de1e |
+ *
|
|
NeilBrown |
c0de1e |
+ */
|
|
NeilBrown |
c0de1e |
+void rpcrdma_reply_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req)
|
|
NeilBrown |
c0de1e |
+{
|
|
NeilBrown |
c0de1e |
+ if (req->rl_reply) {
|
|
NeilBrown |
c0de1e |
+ rpcrdma_rep_put(buffers, req->rl_reply);
|
|
NeilBrown |
c0de1e |
+ req->rl_reply = NULL;
|
|
NeilBrown |
c0de1e |
+ }
|
|
NeilBrown |
c0de1e |
+}
|
|
NeilBrown |
c0de1e |
+
|
|
NeilBrown |
c0de1e |
+/**
|
|
NeilBrown |
c0de1e |
* rpcrdma_buffer_get - Get a request buffer
|
|
NeilBrown |
c0de1e |
* @buffers: Buffer pool from which to obtain a buffer
|
|
NeilBrown |
c0de1e |
*
|
|
NeilBrown |
c0de1e |
@@ -1335,9 +1349,7 @@ rpcrdma_buffer_get(struct rpcrdma_buffer
|
|
NeilBrown |
c0de1e |
*/
|
|
NeilBrown |
c0de1e |
void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req)
|
|
NeilBrown |
c0de1e |
{
|
|
NeilBrown |
c0de1e |
- if (req->rl_reply)
|
|
NeilBrown |
c0de1e |
- rpcrdma_rep_put(buffers, req->rl_reply);
|
|
NeilBrown |
c0de1e |
- req->rl_reply = NULL;
|
|
NeilBrown |
c0de1e |
+ rpcrdma_reply_put(buffers, req);
|
|
NeilBrown |
c0de1e |
|
|
NeilBrown |
c0de1e |
spin_lock(&buffers->rb_lock);
|
|
NeilBrown |
c0de1e |
list_add(&req->rl_list, &buffers->rb_send_bufs);
|
|
NeilBrown |
c0de1e |
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
|
|
NeilBrown |
c0de1e |
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
|
|
NeilBrown |
c0de1e |
@@ -492,6 +492,7 @@ struct rpcrdma_req *rpcrdma_buffer_get(s
|
|
NeilBrown |
c0de1e |
void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers,
|
|
NeilBrown |
c0de1e |
struct rpcrdma_req *req);
|
|
NeilBrown |
c0de1e |
void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
|
|
NeilBrown |
c0de1e |
+void rpcrdma_reply_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req);
|
|
NeilBrown |
c0de1e |
|
|
NeilBrown |
c0de1e |
bool rpcrdma_regbuf_realloc(struct rpcrdma_regbuf *rb, size_t size,
|
|
NeilBrown |
c0de1e |
gfp_t flags);
|