Lee, Chun-Yi e7d21c
From: Dust Li <dust.li@linux.alibaba.com>
Lee, Chun-Yi e7d21c
Date: Tue, 1 Mar 2022 17:44:02 +0800
Lee, Chun-Yi e7d21c
Subject: net/smc: don't send in the BH context if sock_owned_by_user
Lee, Chun-Yi e7d21c
Patch-mainline: v5.18-rc1
Lee, Chun-Yi e7d21c
Git-commit: 6b88af839d204c9283ae09357555e5c4f56c6da5
Lee, Chun-Yi e7d21c
References: jsc#PED-612
Lee, Chun-Yi e7d21c
Lee, Chun-Yi e7d21c
Send data all the way down to the RDMA device is a time
Lee, Chun-Yi e7d21c
consuming operation(get a new slot, maybe do RDMA Write
Lee, Chun-Yi e7d21c
and send a CDC, etc). Moving those operations from BH
Lee, Chun-Yi e7d21c
to user context is good for performance.
Lee, Chun-Yi e7d21c
Lee, Chun-Yi e7d21c
If the sock_lock is hold by user, we don't try to send
Lee, Chun-Yi e7d21c
data out in the BH context, but just mark we should
Lee, Chun-Yi e7d21c
send. Since the user will release the sock_lock soon, we
Lee, Chun-Yi e7d21c
can do the sending there.
Lee, Chun-Yi e7d21c
Lee, Chun-Yi e7d21c
Add smc_release_cb() which will be called in release_sock()
Lee, Chun-Yi e7d21c
and try send in the callback if needed.
Lee, Chun-Yi e7d21c
Lee, Chun-Yi e7d21c
This patch moves the sending part out from BH if sock lock
Lee, Chun-Yi e7d21c
is hold by user. In my testing environment, this saves about
Lee, Chun-Yi e7d21c
20% softirq in the qperf 4K tcp_bw test in the sender side
Lee, Chun-Yi e7d21c
with no noticeable throughput drop.
Lee, Chun-Yi e7d21c
Lee, Chun-Yi e7d21c
Signed-off-by: Dust Li <dust.li@linux.alibaba.com>
Lee, Chun-Yi e7d21c
Signed-off-by: David S. Miller <davem@davemloft.net>
Lee, Chun-Yi e7d21c
Acked-by: Lee, Chun-Yi <jlee@suse.com>
Lee, Chun-Yi e7d21c
---
Lee, Chun-Yi e7d21c
 net/smc/af_smc.c  |   16 ++++++++++++++++
Lee, Chun-Yi e7d21c
 net/smc/smc.h     |    4 ++++
Lee, Chun-Yi e7d21c
 net/smc/smc_cdc.c |   19 ++++++++++++++-----
Lee, Chun-Yi e7d21c
 3 files changed, 34 insertions(+), 5 deletions(-)
Lee, Chun-Yi e7d21c
Lee, Chun-Yi e7d21c
--- a/net/smc/af_smc.c
Lee, Chun-Yi e7d21c
+++ b/net/smc/af_smc.c
Lee, Chun-Yi e7d21c
@@ -193,12 +193,27 @@ void smc_unhash_sk(struct sock *sk)
Lee, Chun-Yi e7d21c
 }
Lee, Chun-Yi e7d21c
 EXPORT_SYMBOL_GPL(smc_unhash_sk);
Lee, Chun-Yi e7d21c
 
Lee, Chun-Yi e7d21c
+/* This will be called before user really release sock_lock. So do the
Lee, Chun-Yi e7d21c
+ * work which we didn't do because of user hold the sock_lock in the
Lee, Chun-Yi e7d21c
+ * BH context
Lee, Chun-Yi e7d21c
+ */
Lee, Chun-Yi e7d21c
+static void smc_release_cb(struct sock *sk)
Lee, Chun-Yi e7d21c
+{
Lee, Chun-Yi e7d21c
+	struct smc_sock *smc = smc_sk(sk);
Lee, Chun-Yi e7d21c
+
Lee, Chun-Yi e7d21c
+	if (smc->conn.tx_in_release_sock) {
Lee, Chun-Yi e7d21c
+		smc_tx_pending(&smc->conn);
Lee, Chun-Yi e7d21c
+		smc->conn.tx_in_release_sock = false;
Lee, Chun-Yi e7d21c
+	}
Lee, Chun-Yi e7d21c
+}
Lee, Chun-Yi e7d21c
+
Lee, Chun-Yi e7d21c
 struct proto smc_proto = {
Lee, Chun-Yi e7d21c
 	.name		= "SMC",
Lee, Chun-Yi e7d21c
 	.owner		= THIS_MODULE,
Lee, Chun-Yi e7d21c
 	.keepalive	= smc_set_keepalive,
Lee, Chun-Yi e7d21c
 	.hash		= smc_hash_sk,
Lee, Chun-Yi e7d21c
 	.unhash		= smc_unhash_sk,
Lee, Chun-Yi e7d21c
+	.release_cb	= smc_release_cb,
Lee, Chun-Yi e7d21c
 	.obj_size	= sizeof(struct smc_sock),
Lee, Chun-Yi e7d21c
 	.h.smc_hash	= &smc_v4_hashinfo,
Lee, Chun-Yi e7d21c
 	.slab_flags	= SLAB_TYPESAFE_BY_RCU,
Lee, Chun-Yi e7d21c
@@ -211,6 +226,7 @@ struct proto smc_proto6 = {
Lee, Chun-Yi e7d21c
 	.keepalive	= smc_set_keepalive,
Lee, Chun-Yi e7d21c
 	.hash		= smc_hash_sk,
Lee, Chun-Yi e7d21c
 	.unhash		= smc_unhash_sk,
Lee, Chun-Yi e7d21c
+	.release_cb	= smc_release_cb,
Lee, Chun-Yi e7d21c
 	.obj_size	= sizeof(struct smc_sock),
Lee, Chun-Yi e7d21c
 	.h.smc_hash	= &smc_v6_hashinfo,
Lee, Chun-Yi e7d21c
 	.slab_flags	= SLAB_TYPESAFE_BY_RCU,
Lee, Chun-Yi e7d21c
--- a/net/smc/smc.h
Lee, Chun-Yi e7d21c
+++ b/net/smc/smc.h
Lee, Chun-Yi e7d21c
@@ -213,6 +213,10 @@ struct smc_connection {
Lee, Chun-Yi e7d21c
 						 * data still pending
Lee, Chun-Yi e7d21c
 						 */
Lee, Chun-Yi e7d21c
 	char			urg_rx_byte;	/* urgent byte */
Lee, Chun-Yi e7d21c
+	bool			tx_in_release_sock;
Lee, Chun-Yi e7d21c
+						/* flush pending tx data in
Lee, Chun-Yi e7d21c
+						 * sock release_cb()
Lee, Chun-Yi e7d21c
+						 */
Lee, Chun-Yi e7d21c
 	atomic_t		bytes_to_rcv;	/* arrived data,
Lee, Chun-Yi e7d21c
 						 * not yet received
Lee, Chun-Yi e7d21c
 						 */
Lee, Chun-Yi e7d21c
--- a/net/smc/smc_cdc.c
Lee, Chun-Yi e7d21c
+++ b/net/smc/smc_cdc.c
Lee, Chun-Yi e7d21c
@@ -49,10 +49,15 @@ static void smc_cdc_tx_handler(struct sm
Lee, Chun-Yi e7d21c
 	}
Lee, Chun-Yi e7d21c
 
Lee, Chun-Yi e7d21c
 	if (atomic_dec_and_test(&conn->cdc_pend_tx_wr)) {
Lee, Chun-Yi e7d21c
-		/* If this is the last pending WR complete, we must push to
Lee, Chun-Yi e7d21c
-		 * prevent hang when autocork enabled.
Lee, Chun-Yi e7d21c
+		/* If user owns the sock_lock, mark the connection need sending.
Lee, Chun-Yi e7d21c
+		 * User context will later try to send when it release sock_lock
Lee, Chun-Yi e7d21c
+		 * in smc_release_cb()
Lee, Chun-Yi e7d21c
 		 */
Lee, Chun-Yi e7d21c
-		smc_tx_sndbuf_nonempty(conn);
Lee, Chun-Yi e7d21c
+		if (sock_owned_by_user(&smc->sk))
Lee, Chun-Yi e7d21c
+			conn->tx_in_release_sock = true;
Lee, Chun-Yi e7d21c
+		else
Lee, Chun-Yi e7d21c
+			smc_tx_pending(conn);
Lee, Chun-Yi e7d21c
+
Lee, Chun-Yi e7d21c
 		if (unlikely(wq_has_sleeper(&conn->cdc_pend_tx_wq)))
Lee, Chun-Yi e7d21c
 			wake_up(&conn->cdc_pend_tx_wq);
Lee, Chun-Yi e7d21c
 	}
Lee, Chun-Yi e7d21c
@@ -355,8 +360,12 @@ static void smc_cdc_msg_recv_action(stru
Lee, Chun-Yi e7d21c
 	/* trigger sndbuf consumer: RDMA write into peer RMBE and CDC */
Lee, Chun-Yi e7d21c
 	if ((diff_cons && smc_tx_prepared_sends(conn)) ||
Lee, Chun-Yi e7d21c
 	    conn->local_rx_ctrl.prod_flags.cons_curs_upd_req ||
Lee, Chun-Yi e7d21c
-	    conn->local_rx_ctrl.prod_flags.urg_data_pending)
Lee, Chun-Yi e7d21c
-		smc_tx_sndbuf_nonempty(conn);
Lee, Chun-Yi e7d21c
+	    conn->local_rx_ctrl.prod_flags.urg_data_pending) {
Lee, Chun-Yi e7d21c
+		if (!sock_owned_by_user(&smc->sk))
Lee, Chun-Yi e7d21c
+			smc_tx_pending(conn);
Lee, Chun-Yi e7d21c
+		else
Lee, Chun-Yi e7d21c
+			conn->tx_in_release_sock = true;
Lee, Chun-Yi e7d21c
+	}
Lee, Chun-Yi e7d21c
 
Lee, Chun-Yi e7d21c
 	if (diff_cons && conn->urg_tx_pend &&
Lee, Chun-Yi e7d21c
 	    atomic_read(&conn->peer_rmbe_space) == conn->peer_rmbe_size) {