|
Lee, Chun-Yi |
e7d21c |
From: Dust Li <dust.li@linux.alibaba.com>
|
|
Lee, Chun-Yi |
e7d21c |
Date: Tue, 1 Mar 2022 17:44:02 +0800
|
|
Lee, Chun-Yi |
e7d21c |
Subject: net/smc: don't send in the BH context if sock_owned_by_user
|
|
Lee, Chun-Yi |
e7d21c |
Patch-mainline: v5.18-rc1
|
|
Lee, Chun-Yi |
e7d21c |
Git-commit: 6b88af839d204c9283ae09357555e5c4f56c6da5
|
|
Lee, Chun-Yi |
e7d21c |
References: jsc#PED-612
|
|
Lee, Chun-Yi |
e7d21c |
|
|
Lee, Chun-Yi |
e7d21c |
Send data all the way down to the RDMA device is a time
|
|
Lee, Chun-Yi |
e7d21c |
consuming operation(get a new slot, maybe do RDMA Write
|
|
Lee, Chun-Yi |
e7d21c |
and send a CDC, etc). Moving those operations from BH
|
|
Lee, Chun-Yi |
e7d21c |
to user context is good for performance.
|
|
Lee, Chun-Yi |
e7d21c |
|
|
Lee, Chun-Yi |
e7d21c |
If the sock_lock is hold by user, we don't try to send
|
|
Lee, Chun-Yi |
e7d21c |
data out in the BH context, but just mark we should
|
|
Lee, Chun-Yi |
e7d21c |
send. Since the user will release the sock_lock soon, we
|
|
Lee, Chun-Yi |
e7d21c |
can do the sending there.
|
|
Lee, Chun-Yi |
e7d21c |
|
|
Lee, Chun-Yi |
e7d21c |
Add smc_release_cb() which will be called in release_sock()
|
|
Lee, Chun-Yi |
e7d21c |
and try send in the callback if needed.
|
|
Lee, Chun-Yi |
e7d21c |
|
|
Lee, Chun-Yi |
e7d21c |
This patch moves the sending part out from BH if sock lock
|
|
Lee, Chun-Yi |
e7d21c |
is hold by user. In my testing environment, this saves about
|
|
Lee, Chun-Yi |
e7d21c |
20% softirq in the qperf 4K tcp_bw test in the sender side
|
|
Lee, Chun-Yi |
e7d21c |
with no noticeable throughput drop.
|
|
Lee, Chun-Yi |
e7d21c |
|
|
Lee, Chun-Yi |
e7d21c |
Signed-off-by: Dust Li <dust.li@linux.alibaba.com>
|
|
Lee, Chun-Yi |
e7d21c |
Signed-off-by: David S. Miller <davem@davemloft.net>
|
|
Lee, Chun-Yi |
e7d21c |
Acked-by: Lee, Chun-Yi <jlee@suse.com>
|
|
Lee, Chun-Yi |
e7d21c |
---
|
|
Lee, Chun-Yi |
e7d21c |
net/smc/af_smc.c | 16 ++++++++++++++++
|
|
Lee, Chun-Yi |
e7d21c |
net/smc/smc.h | 4 ++++
|
|
Lee, Chun-Yi |
e7d21c |
net/smc/smc_cdc.c | 19 ++++++++++++++-----
|
|
Lee, Chun-Yi |
e7d21c |
3 files changed, 34 insertions(+), 5 deletions(-)
|
|
Lee, Chun-Yi |
e7d21c |
|
|
Lee, Chun-Yi |
e7d21c |
--- a/net/smc/af_smc.c
|
|
Lee, Chun-Yi |
e7d21c |
+++ b/net/smc/af_smc.c
|
|
Lee, Chun-Yi |
e7d21c |
@@ -193,12 +193,27 @@ void smc_unhash_sk(struct sock *sk)
|
|
Lee, Chun-Yi |
e7d21c |
}
|
|
Lee, Chun-Yi |
e7d21c |
EXPORT_SYMBOL_GPL(smc_unhash_sk);
|
|
Lee, Chun-Yi |
e7d21c |
|
|
Lee, Chun-Yi |
e7d21c |
+/* This will be called before user really release sock_lock. So do the
|
|
Lee, Chun-Yi |
e7d21c |
+ * work which we didn't do because of user hold the sock_lock in the
|
|
Lee, Chun-Yi |
e7d21c |
+ * BH context
|
|
Lee, Chun-Yi |
e7d21c |
+ */
|
|
Lee, Chun-Yi |
e7d21c |
+static void smc_release_cb(struct sock *sk)
|
|
Lee, Chun-Yi |
e7d21c |
+{
|
|
Lee, Chun-Yi |
e7d21c |
+ struct smc_sock *smc = smc_sk(sk);
|
|
Lee, Chun-Yi |
e7d21c |
+
|
|
Lee, Chun-Yi |
e7d21c |
+ if (smc->conn.tx_in_release_sock) {
|
|
Lee, Chun-Yi |
e7d21c |
+ smc_tx_pending(&smc->conn);
|
|
Lee, Chun-Yi |
e7d21c |
+ smc->conn.tx_in_release_sock = false;
|
|
Lee, Chun-Yi |
e7d21c |
+ }
|
|
Lee, Chun-Yi |
e7d21c |
+}
|
|
Lee, Chun-Yi |
e7d21c |
+
|
|
Lee, Chun-Yi |
e7d21c |
struct proto smc_proto = {
|
|
Lee, Chun-Yi |
e7d21c |
.name = "SMC",
|
|
Lee, Chun-Yi |
e7d21c |
.owner = THIS_MODULE,
|
|
Lee, Chun-Yi |
e7d21c |
.keepalive = smc_set_keepalive,
|
|
Lee, Chun-Yi |
e7d21c |
.hash = smc_hash_sk,
|
|
Lee, Chun-Yi |
e7d21c |
.unhash = smc_unhash_sk,
|
|
Lee, Chun-Yi |
e7d21c |
+ .release_cb = smc_release_cb,
|
|
Lee, Chun-Yi |
e7d21c |
.obj_size = sizeof(struct smc_sock),
|
|
Lee, Chun-Yi |
e7d21c |
.h.smc_hash = &smc_v4_hashinfo,
|
|
Lee, Chun-Yi |
e7d21c |
.slab_flags = SLAB_TYPESAFE_BY_RCU,
|
|
Lee, Chun-Yi |
e7d21c |
@@ -211,6 +226,7 @@ struct proto smc_proto6 = {
|
|
Lee, Chun-Yi |
e7d21c |
.keepalive = smc_set_keepalive,
|
|
Lee, Chun-Yi |
e7d21c |
.hash = smc_hash_sk,
|
|
Lee, Chun-Yi |
e7d21c |
.unhash = smc_unhash_sk,
|
|
Lee, Chun-Yi |
e7d21c |
+ .release_cb = smc_release_cb,
|
|
Lee, Chun-Yi |
e7d21c |
.obj_size = sizeof(struct smc_sock),
|
|
Lee, Chun-Yi |
e7d21c |
.h.smc_hash = &smc_v6_hashinfo,
|
|
Lee, Chun-Yi |
e7d21c |
.slab_flags = SLAB_TYPESAFE_BY_RCU,
|
|
Lee, Chun-Yi |
e7d21c |
--- a/net/smc/smc.h
|
|
Lee, Chun-Yi |
e7d21c |
+++ b/net/smc/smc.h
|
|
Lee, Chun-Yi |
e7d21c |
@@ -213,6 +213,10 @@ struct smc_connection {
|
|
Lee, Chun-Yi |
e7d21c |
* data still pending
|
|
Lee, Chun-Yi |
e7d21c |
*/
|
|
Lee, Chun-Yi |
e7d21c |
char urg_rx_byte; /* urgent byte */
|
|
Lee, Chun-Yi |
e7d21c |
+ bool tx_in_release_sock;
|
|
Lee, Chun-Yi |
e7d21c |
+ /* flush pending tx data in
|
|
Lee, Chun-Yi |
e7d21c |
+ * sock release_cb()
|
|
Lee, Chun-Yi |
e7d21c |
+ */
|
|
Lee, Chun-Yi |
e7d21c |
atomic_t bytes_to_rcv; /* arrived data,
|
|
Lee, Chun-Yi |
e7d21c |
* not yet received
|
|
Lee, Chun-Yi |
e7d21c |
*/
|
|
Lee, Chun-Yi |
e7d21c |
--- a/net/smc/smc_cdc.c
|
|
Lee, Chun-Yi |
e7d21c |
+++ b/net/smc/smc_cdc.c
|
|
Lee, Chun-Yi |
e7d21c |
@@ -49,10 +49,15 @@ static void smc_cdc_tx_handler(struct sm
|
|
Lee, Chun-Yi |
e7d21c |
}
|
|
Lee, Chun-Yi |
e7d21c |
|
|
Lee, Chun-Yi |
e7d21c |
if (atomic_dec_and_test(&conn->cdc_pend_tx_wr)) {
|
|
Lee, Chun-Yi |
e7d21c |
- /* If this is the last pending WR complete, we must push to
|
|
Lee, Chun-Yi |
e7d21c |
- * prevent hang when autocork enabled.
|
|
Lee, Chun-Yi |
e7d21c |
+ /* If user owns the sock_lock, mark the connection need sending.
|
|
Lee, Chun-Yi |
e7d21c |
+ * User context will later try to send when it release sock_lock
|
|
Lee, Chun-Yi |
e7d21c |
+ * in smc_release_cb()
|
|
Lee, Chun-Yi |
e7d21c |
*/
|
|
Lee, Chun-Yi |
e7d21c |
- smc_tx_sndbuf_nonempty(conn);
|
|
Lee, Chun-Yi |
e7d21c |
+ if (sock_owned_by_user(&smc->sk))
|
|
Lee, Chun-Yi |
e7d21c |
+ conn->tx_in_release_sock = true;
|
|
Lee, Chun-Yi |
e7d21c |
+ else
|
|
Lee, Chun-Yi |
e7d21c |
+ smc_tx_pending(conn);
|
|
Lee, Chun-Yi |
e7d21c |
+
|
|
Lee, Chun-Yi |
e7d21c |
if (unlikely(wq_has_sleeper(&conn->cdc_pend_tx_wq)))
|
|
Lee, Chun-Yi |
e7d21c |
wake_up(&conn->cdc_pend_tx_wq);
|
|
Lee, Chun-Yi |
e7d21c |
}
|
|
Lee, Chun-Yi |
e7d21c |
@@ -355,8 +360,12 @@ static void smc_cdc_msg_recv_action(stru
|
|
Lee, Chun-Yi |
e7d21c |
/* trigger sndbuf consumer: RDMA write into peer RMBE and CDC */
|
|
Lee, Chun-Yi |
e7d21c |
if ((diff_cons && smc_tx_prepared_sends(conn)) ||
|
|
Lee, Chun-Yi |
e7d21c |
conn->local_rx_ctrl.prod_flags.cons_curs_upd_req ||
|
|
Lee, Chun-Yi |
e7d21c |
- conn->local_rx_ctrl.prod_flags.urg_data_pending)
|
|
Lee, Chun-Yi |
e7d21c |
- smc_tx_sndbuf_nonempty(conn);
|
|
Lee, Chun-Yi |
e7d21c |
+ conn->local_rx_ctrl.prod_flags.urg_data_pending) {
|
|
Lee, Chun-Yi |
e7d21c |
+ if (!sock_owned_by_user(&smc->sk))
|
|
Lee, Chun-Yi |
e7d21c |
+ smc_tx_pending(conn);
|
|
Lee, Chun-Yi |
e7d21c |
+ else
|
|
Lee, Chun-Yi |
e7d21c |
+ conn->tx_in_release_sock = true;
|
|
Lee, Chun-Yi |
e7d21c |
+ }
|
|
Lee, Chun-Yi |
e7d21c |
|
|
Lee, Chun-Yi |
e7d21c |
if (diff_cons && conn->urg_tx_pend &&
|
|
Lee, Chun-Yi |
e7d21c |
atomic_read(&conn->peer_rmbe_space) == conn->peer_rmbe_size) {
|