From: Stefan Raspl <raspl@linux.ibm.com>
Subject: net/smc: replace sock_put worker by socket refcounting
Patch-mainline: v4.16-rc1
Git-commit: 51f1de79ad8ed3555fd01ae8fd432691d397684b
References: FATE#325694, LTC#167874, bsc#1113480
Summary: net/smc: SMC-R MVP
Description: Add latest upstream patches to push SMC-R to the MVP level
Upstream-Description:
net/smc: replace sock_put worker by socket refcounting
Proper socket refcounting makes the sock_put worker obsolete.
Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Stefan Raspl <raspl@linux.ibm.com>
Acked-by: Petr Tesarik <ptesarik@suse.com>
---
net/smc/af_smc.c | 65 ++++++++++++++++++++++++++++------------------------
net/smc/smc.h | 1
net/smc/smc_cdc.c | 20 ++++++++--------
net/smc/smc_close.c | 63 ++++++++++++++++++++++++++++++--------------------
net/smc/smc_close.h | 1
net/smc/smc_core.c | 6 ++--
6 files changed, 88 insertions(+), 68 deletions(-)
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -115,7 +115,6 @@ static int smc_release(struct socket *so
goto out;
smc = smc_sk(sk);
- sock_hold(sk);
if (sk->sk_state == SMC_LISTEN)
/* smc_close_non_accepted() is called and acquires
* sock lock for child sockets again
@@ -124,10 +123,7 @@ static int smc_release(struct socket *so
else
lock_sock(sk);
- if (smc->use_fallback) {
- sk->sk_state = SMC_CLOSED;
- sk->sk_state_change(sk);
- } else {
+ if (!smc->use_fallback) {
rc = smc_close_active(smc);
sock_set_flag(sk, SOCK_DEAD);
sk->sk_shutdown |= SHUTDOWN_MASK;
@@ -136,20 +132,21 @@ static int smc_release(struct socket *so
sock_release(smc->clcsock);
smc->clcsock = NULL;
}
+ if (smc->use_fallback) {
+ sock_put(sk); /* passive closing */
+ sk->sk_state = SMC_CLOSED;
+ sk->sk_state_change(sk);
+ }
/* detach socket */
sock_orphan(sk);
sock->sk = NULL;
- if (smc->use_fallback) {
- schedule_delayed_work(&smc->sock_put_work, TCP_TIMEWAIT_LEN);
- } else if (sk->sk_state == SMC_CLOSED) {
+ if (!smc->use_fallback && sk->sk_state == SMC_CLOSED)
smc_conn_free(&smc->conn);
- schedule_delayed_work(&smc->sock_put_work,
- SMC_CLOSE_SOCK_PUT_DELAY);
- }
release_sock(sk);
- sock_put(sk);
+ sk->sk_prot->unhash(sk);
+ sock_put(sk); /* final sock_put */
out:
return rc;
}
@@ -181,7 +178,6 @@ static struct sock *smc_sock_alloc(struc
INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
INIT_LIST_HEAD(&smc->accept_q);
spin_lock_init(&smc->accept_q_lock);
- INIT_DELAYED_WORK(&smc->sock_put_work, smc_close_sock_put_work);
sk->sk_prot->hash(sk);
sk_refcnt_debug_inc(sk);
@@ -399,6 +395,8 @@ static int smc_connect_rdma(struct smc_s
int rc = 0;
u8 ibport;
+ sock_hold(&smc->sk); /* sock put in passive closing */
+
if (!tcp_sk(smc->clcsock->sk)->syn_smc) {
/* peer has not signalled SMC-capability */
smc->use_fallback = true;
@@ -542,6 +540,8 @@ out_err_unlock:
mutex_unlock(&smc_create_lgr_pending);
smc_conn_free(&smc->conn);
out_err:
+ if (smc->sk.sk_state == SMC_INIT)
+ sock_put(&smc->sk); /* passive closing */
return rc;
}
@@ -620,7 +620,7 @@ static int smc_clcsock_accept(struct smc
new_sk->sk_state = SMC_CLOSED;
sock_set_flag(new_sk, SOCK_DEAD);
new_sk->sk_prot->unhash(new_sk);
- sock_put(new_sk);
+ sock_put(new_sk); /* final */
*new_smc = NULL;
goto out;
}
@@ -637,7 +637,7 @@ static void smc_accept_enqueue(struct so
{
struct smc_sock *par = smc_sk(parent);
- sock_hold(sk);
+ sock_hold(sk); /* sock_put in smc_accept_unlink () */
spin_lock(&par->accept_q_lock);
list_add_tail(&smc_sk(sk)->accept_q, &par->accept_q);
spin_unlock(&par->accept_q_lock);
@@ -653,7 +653,7 @@ static void smc_accept_unlink(struct soc
list_del_init(&smc_sk(sk)->accept_q);
spin_unlock(&par->accept_q_lock);
sk_acceptq_removed(&smc_sk(sk)->listen_smc->sk);
- sock_put(sk);
+ sock_put(sk); /* sock_hold in smc_accept_enqueue */
}
/* remove a sock from the accept queue to bind it to a new socket created
@@ -671,7 +671,7 @@ struct sock *smc_accept_dequeue(struct s
smc_accept_unlink(new_sk);
if (new_sk->sk_state == SMC_CLOSED) {
new_sk->sk_prot->unhash(new_sk);
- sock_put(new_sk);
+ sock_put(new_sk); /* final */
continue;
}
if (new_sock)
@@ -686,14 +686,11 @@ void smc_close_non_accepted(struct sock
{
struct smc_sock *smc = smc_sk(sk);
- sock_hold(sk);
lock_sock(sk);
if (!sk->sk_lingertime)
/* wait for peer closing */
sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT;
- if (smc->use_fallback) {
- sk->sk_state = SMC_CLOSED;
- } else {
+ if (!smc->use_fallback) {
smc_close_active(smc);
sock_set_flag(sk, SOCK_DEAD);
sk->sk_shutdown |= SHUTDOWN_MASK;
@@ -706,14 +703,15 @@ void smc_close_non_accepted(struct sock
sock_release(tcp);
}
if (smc->use_fallback) {
- schedule_delayed_work(&smc->sock_put_work, TCP_TIMEWAIT_LEN);
- } else if (sk->sk_state == SMC_CLOSED) {
- smc_conn_free(&smc->conn);
- schedule_delayed_work(&smc->sock_put_work,
- SMC_CLOSE_SOCK_PUT_DELAY);
+ sock_put(sk); /* passive closing */
+ sk->sk_state = SMC_CLOSED;
+ } else {
+ if (sk->sk_state == SMC_CLOSED)
+ smc_conn_free(&smc->conn);
}
release_sock(sk);
- sock_put(sk);
+ sk->sk_prot->unhash(sk);
+ sock_put(sk); /* final sock_put */
}
static int smc_serv_conf_first_link(struct smc_sock *smc)
@@ -937,6 +935,8 @@ out_err_unlock:
smc_lgr_forget(new_smc->conn.lgr);
mutex_unlock(&smc_create_lgr_pending);
out_err:
+ if (newsmcsk->sk_state == SMC_INIT)
+ sock_put(&new_smc->sk); /* passive closing */
newsmcsk->sk_state = SMC_CLOSED;
smc_conn_free(&new_smc->conn);
goto enqueue; /* queue new sock with sk_err set */
@@ -963,12 +963,15 @@ static void smc_tcp_listen_work(struct w
sock_hold(lsk); /* sock_put in smc_listen_work */
INIT_WORK(&new_smc->smc_listen_work, smc_listen_work);
smc_copy_sock_settings_to_smc(new_smc);
- schedule_work(&new_smc->smc_listen_work);
+ sock_hold(&new_smc->sk); /* sock_put in passive closing */
+ if (!schedule_work(&new_smc->smc_listen_work))
+ sock_put(&new_smc->sk);
}
out:
release_sock(lsk);
lsk->sk_data_ready(lsk); /* no more listening, wake accept */
+ sock_put(&lsmc->sk); /* sock_hold in smc_listen */
}
static int smc_listen(struct socket *sock, int backlog)
@@ -1002,7 +1005,9 @@ static int smc_listen(struct socket *soc
sk->sk_ack_backlog = 0;
sk->sk_state = SMC_LISTEN;
INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
- schedule_work(&smc->tcp_listen_work);
+ sock_hold(sk); /* sock_hold in tcp_listen_worker */
+ if (!schedule_work(&smc->tcp_listen_work))
+ sock_put(sk);
out:
release_sock(sk);
@@ -1019,6 +1024,7 @@ static int smc_accept(struct socket *soc
int rc = 0;
lsmc = smc_sk(sk);
+ sock_hold(sk); /* sock_put below */
lock_sock(sk);
if (lsmc->sk.sk_state != SMC_LISTEN) {
@@ -1053,6 +1059,7 @@ static int smc_accept(struct socket *soc
out:
release_sock(sk);
+ sock_put(sk); /* sock_hold above */
return rc;
}
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -177,7 +177,6 @@ struct smc_sock { /* smc sock contain
struct work_struct smc_listen_work;/* prepare new accept socket */
struct list_head accept_q; /* sockets to be accepted */
spinlock_t accept_q_lock; /* protects accept_q */
- struct delayed_work sock_put_work; /* final socket freeing */
bool use_fallback; /* fallback to tcp */
u8 wait_close_tx_prepared : 1;
/* shutdown wr or close
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -211,6 +211,14 @@ static void smc_cdc_msg_recv_action(stru
smc->sk.sk_data_ready(&smc->sk);
}
+ /* piggy backed tx info */
+ /* trigger sndbuf consumer: RDMA write into peer RMBE and CDC */
+ if (diff_cons && smc_tx_prepared_sends(conn)) {
+ smc_tx_sndbuf_nonempty(conn);
+ /* trigger socket release if connection closed */
+ smc_close_wake_tx_prepared(smc);
+ }
+
if (conn->local_rx_ctrl.conn_state_flags.peer_conn_abort) {
smc->sk.sk_err = ECONNRESET;
conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
@@ -220,15 +228,9 @@ static void smc_cdc_msg_recv_action(stru
if (smc->clcsock && smc->clcsock->sk)
smc->clcsock->sk->sk_shutdown |= RCV_SHUTDOWN;
sock_set_flag(&smc->sk, SOCK_DONE);
- schedule_work(&conn->close_work);
- }
-
- /* piggy backed tx info */
- /* trigger sndbuf consumer: RDMA write into peer RMBE and CDC */
- if (diff_cons && smc_tx_prepared_sends(conn)) {
- smc_tx_sndbuf_nonempty(conn);
- /* trigger socket release if connection closed */
- smc_close_wake_tx_prepared(smc);
+ sock_hold(&smc->sk); /* sock_put in close_work */
+ if (!schedule_work(&conn->close_work))
+ sock_put(&smc->sk);
}
}
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c
@@ -109,6 +109,7 @@ static void smc_close_active_abort(struc
release_sock(sk);
cancel_delayed_work_sync(&smc->conn.tx_work);
lock_sock(sk);
+ sock_put(sk); /* passive closing */
break;
case SMC_APPCLOSEWAIT1:
case SMC_APPCLOSEWAIT2:
@@ -124,11 +125,13 @@ static void smc_close_active_abort(struc
case SMC_PEERCLOSEWAIT1:
case SMC_PEERCLOSEWAIT2:
if (!txflags->peer_conn_closed) {
+ /* just SHUTDOWN_SEND done */
sk->sk_state = SMC_PEERABORTWAIT;
sock_release(smc->clcsock);
} else {
sk->sk_state = SMC_CLOSED;
}
+ sock_put(sk); /* passive closing */
break;
case SMC_PROCESSABORT:
case SMC_APPFINCLOSEWAIT:
@@ -137,6 +140,8 @@ static void smc_close_active_abort(struc
sk->sk_state = SMC_CLOSED;
break;
case SMC_PEERFINCLOSEWAIT:
+ sock_put(sk); /* passive closing */
+ break;
case SMC_PEERABORTWAIT:
case SMC_CLOSED:
break;
@@ -228,12 +233,14 @@ again:
rc = smc_close_final(conn);
if (rc)
break;
- if (smc_cdc_rxed_any_close(conn))
+ if (smc_cdc_rxed_any_close(conn)) {
/* peer has closed the socket already */
sk->sk_state = SMC_CLOSED;
- else
+ sock_put(sk); /* postponed passive closing */
+ } else {
/* peer has just issued a shutdown write */
sk->sk_state = SMC_PEERFINCLOSEWAIT;
+ }
break;
case SMC_PEERCLOSEWAIT1:
case SMC_PEERCLOSEWAIT2:
@@ -271,27 +278,33 @@ static void smc_close_passive_abort_rece
struct sock *sk = &smc->sk;
switch (sk->sk_state) {
+ case SMC_INIT:
case SMC_ACTIVE:
- case SMC_APPFINCLOSEWAIT:
case SMC_APPCLOSEWAIT1:
- case SMC_APPCLOSEWAIT2:
+ sk->sk_state = SMC_PROCESSABORT;
+ sock_put(sk); /* passive closing */
+ break;
+ case SMC_APPFINCLOSEWAIT:
sk->sk_state = SMC_PROCESSABORT;
break;
case SMC_PEERCLOSEWAIT1:
case SMC_PEERCLOSEWAIT2:
if (txflags->peer_done_writing &&
- !smc_close_sent_any_close(&smc->conn)) {
+ !smc_close_sent_any_close(&smc->conn))
/* just shutdown, but not yet closed locally */
sk->sk_state = SMC_PROCESSABORT;
- } else {
+ else
sk->sk_state = SMC_CLOSED;
- }
+ sock_put(sk); /* passive closing */
break;
+ case SMC_APPCLOSEWAIT2:
case SMC_PEERFINCLOSEWAIT:
+ sk->sk_state = SMC_CLOSED;
+ sock_put(sk); /* passive closing */
+ break;
case SMC_PEERABORTWAIT:
sk->sk_state = SMC_CLOSED;
break;
- case SMC_INIT:
case SMC_PROCESSABORT:
/* nothing to do, add tracing in future patch */
break;
@@ -335,13 +348,18 @@ static void smc_close_passive_work(struc
case SMC_INIT:
if (atomic_read(&conn->bytes_to_rcv) ||
(rxflags->peer_done_writing &&
- !smc_cdc_rxed_any_close(conn)))
+ !smc_cdc_rxed_any_close(conn))) {
sk->sk_state = SMC_APPCLOSEWAIT1;
- else
+ } else {
sk->sk_state = SMC_CLOSED;
+ sock_put(sk); /* passive closing */
+ }
break;
case SMC_ACTIVE:
sk->sk_state = SMC_APPCLOSEWAIT1;
+ /* postpone sock_put() for passive closing to cover
+ * received SEND_SHUTDOWN as well
+ */
break;
case SMC_PEERCLOSEWAIT1:
if (rxflags->peer_done_writing)
@@ -358,13 +376,20 @@ static void smc_close_passive_work(struc
/* just shutdown, but not yet closed locally */
sk->sk_state = SMC_APPFINCLOSEWAIT;
}
+ sock_put(sk); /* passive closing */
break;
case SMC_PEERFINCLOSEWAIT:
- if (smc_cdc_rxed_any_close(conn))
+ if (smc_cdc_rxed_any_close(conn)) {
sk->sk_state = SMC_CLOSED;
+ sock_put(sk); /* passive closing */
+ }
break;
case SMC_APPCLOSEWAIT1:
case SMC_APPCLOSEWAIT2:
+ /* postpone sock_put() for passive closing to cover
+ * received SEND_SHUTDOWN as well
+ */
+ break;
case SMC_APPFINCLOSEWAIT:
case SMC_PEERABORTWAIT:
case SMC_PROCESSABORT:
@@ -380,23 +405,11 @@ wakeup:
if (old_state != sk->sk_state) {
sk->sk_state_change(sk);
if ((sk->sk_state == SMC_CLOSED) &&
- (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) {
+ (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket))
smc_conn_free(conn);
- schedule_delayed_work(&smc->sock_put_work,
- SMC_CLOSE_SOCK_PUT_DELAY);
- }
}
release_sock(sk);
-}
-
-void smc_close_sock_put_work(struct work_struct *work)
-{
- struct smc_sock *smc = container_of(to_delayed_work(work),
- struct smc_sock,
- sock_put_work);
-
- smc->sk.sk_prot->unhash(&smc->sk);
- sock_put(&smc->sk);
+ sock_put(sk); /* sock_hold done by schedulers of close_work */
}
int smc_close_shutdown_write(struct smc_sock *smc)
--- a/net/smc/smc_close.h
+++ b/net/smc/smc_close.h
@@ -20,7 +20,6 @@
void smc_close_wake_tx_prepared(struct smc_sock *smc);
int smc_close_active(struct smc_sock *smc);
-void smc_close_sock_put_work(struct work_struct *work);
int smc_close_shutdown_write(struct smc_sock *smc);
void smc_close_init(struct smc_sock *smc);
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -327,13 +327,13 @@ void smc_lgr_terminate(struct smc_link_g
while (node) {
conn = rb_entry(node, struct smc_connection, alert_node);
smc = container_of(conn, struct smc_sock, conn);
- sock_hold(&smc->sk);
+ sock_hold(&smc->sk); /* sock_put in close work */
conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
__smc_lgr_unregister_conn(conn);
write_unlock_bh(&lgr->conns_lock);
- schedule_work(&conn->close_work);
+ if (!schedule_work(&conn->close_work))
+ sock_put(&smc->sk);
write_lock_bh(&lgr->conns_lock);
- sock_put(&smc->sk);
node = rb_first(&lgr->conns_all);
}
write_unlock_bh(&lgr->conns_lock);