Blob Blame History Raw
From: Stefan Raspl <raspl@linux.ibm.com>
Subject: net/smc: sockopts TCP_NODELAY and TCP_CORK
Patch-mainline: v4.18-rc1
Git-commit: 01d2f7e2cdd31becffafa0cb82809a5e36558ec0
References: FATE#325694, LTC#167874, bsc#1113480

Summary:     net/smc: SMC-R MVP
Description: Add latest upstream patches to push SMC-R to the MVP level

Upstream-Description:

             net/smc: sockopts TCP_NODELAY and TCP_CORK

             Setting sockopt TCP_NODELAY or resetting sockopt TCP_CORK
             triggers data transfer.

             For a corked SMC socket RDMA writes are deferred, if there is
             still sufficient send buffer space available.

             Signed-off-by: Ursula Braun <ubraun@linux.ibm.com>
             Signed-off-by: David S. Miller <davem@davemloft.net>

Signed-off-by: Stefan Raspl <raspl@linux.ibm.com>
Acked-by: Petr Tesarik <ptesarik@suse.com>
---
 net/smc/af_smc.c |   20 +++++++++++++++++++-
 net/smc/smc_tx.c |   24 +++++++++++++++++++++---
 2 files changed, 40 insertions(+), 4 deletions(-)

--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -1290,7 +1290,7 @@ static int smc_setsockopt(struct socket
 {
 	struct sock *sk = sock->sk;
 	struct smc_sock *smc;
-	int rc;
+	int val, rc;
 
 	smc = smc_sk(sk);
 
@@ -1306,6 +1306,10 @@ static int smc_setsockopt(struct socket
 	if (rc)
 		return rc;
 
+	if (optlen < sizeof(int))
+		return rc;
+	get_user(val, (int __user *)optval);
+
 	lock_sock(sk);
 	switch (optname) {
 	case TCP_ULP:
@@ -1319,6 +1323,20 @@ static int smc_setsockopt(struct socket
 				rc = -EINVAL;
 		}
 		break;
+	case TCP_NODELAY:
+		if (sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) {
+			if (val)
+				mod_delayed_work(system_wq, &smc->conn.tx_work,
+						 0);
+		}
+		break;
+	case TCP_CORK:
+		if (sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) {
+			if (!val)
+				mod_delayed_work(system_wq, &smc->conn.tx_work,
+						 0);
+		}
+		break;
 	default:
 		break;
 	}
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -18,6 +18,7 @@
 #include <linux/sched/signal.h>
 
 #include <net/sock.h>
+#include <net/tcp.h>
 
 #include "smc.h"
 #include "smc_wr.h"
@@ -25,6 +26,7 @@
 #include "smc_tx.h"
 
 #define SMC_TX_WORK_DELAY	HZ
+#define SMC_TX_CORK_DELAY	(HZ >> 2)	/* 250 ms */
 
 /***************************** sndbuf producer *******************************/
 
@@ -114,6 +116,13 @@ static int smc_tx_wait_memory(struct smc
 	return rc;
 }
 
+static bool smc_tx_is_corked(struct smc_sock *smc)
+{
+	struct tcp_sock *tp = tcp_sk(smc->clcsock->sk);
+
+	return (tp->nonagle & TCP_NAGLE_CORK) ? true : false;
+}
+
 /* sndbuf producer: main API called by socket layer.
  * called under sock lock.
  */
@@ -208,7 +217,16 @@ int smc_tx_sendmsg(struct smc_sock *smc,
 		/* since we just produced more new data into sndbuf,
 		 * trigger sndbuf consumer: RDMA write into peer RMBE and CDC
 		 */
-		smc_tx_sndbuf_nonempty(conn);
+		if ((msg->msg_flags & MSG_MORE || smc_tx_is_corked(smc)) &&
+		    (atomic_read(&conn->sndbuf_space) >
+						(conn->sndbuf_size >> 1)))
+			/* for a corked socket defer the RDMA writes if there
+			 * is still sufficient sndbuf_space available
+			 */
+			schedule_delayed_work(&conn->tx_work,
+					      SMC_TX_CORK_DELAY);
+		else
+			smc_tx_sndbuf_nonempty(conn);
 	} /* while (msg_data_left(msg)) */
 
 	return send_done;
@@ -408,8 +426,8 @@ int smc_tx_sndbuf_nonempty(struct smc_co
 			}
 			rc = 0;
 			if (conn->alert_token_local) /* connection healthy */
-				schedule_delayed_work(&conn->tx_work,
-						      SMC_TX_WORK_DELAY);
+				mod_delayed_work(system_wq, &conn->tx_work,
+						 SMC_TX_WORK_DELAY);
 		}
 		goto out_unlock;
 	}