Blob Blame History Raw
From: Karsten Graul <kgraul@linux.ibm.com>
Subject: net/smc: allow fallback after clc timeouts
Patch-mainline: v5.0-rc1
Git-commit: 9ed28556a388fdb894bdf9bd64c05cf6e7783ba3
References: FATE#325698, LTC#167867, bsc#1113481

Description:  net/smc: bugfix and compatibility patches
Symptom:      Random hangs in smc processing:
                user space application hangs in socket send() or recv() call or
                does never get a notification from a select() call.
              Missing compatibility to other platforms:
                confirm rkey and delete rkey processing is required by the
                design, but delete rkey processing is missing. This leads to
                protocol failures when communicating with other platforms like
                zOS. The SMC-D shutdown signal support is missing, so there is
                no detection if the remote peer closed the link group.
              Broken administration of available WR send payload buffers due to
              a use-after-free condition.
Problem:      Misbehaviour regarding the user space api can lead to hang
              situations. SMC is not fully compatible to some other platforms
              due to missing rkey processing and SMC-D shutdown signal support.
Solution:     Fixed protocoll deficiencies by implementing the required rkey
              processing. For SMC-D, the cursors are now handled atomically to
              handle parallel modifications. The SMC-D shutdown signal is now
              processed when received and sent to the remote peer if needed.
              Prereq patches are included.
Reproduction: Run SMC on a loaded system against zOS as peer system.

Upstream-Description:

              net/smc: allow fallback after clc timeouts

              If connection initialization fails for the LLC CONFIRM LINK or the
              LLC ADD LINK step, fallback to TCP should be enabled. Thus
              the negative return code -EAGAIN should switch to a positive timeout
              reason code in these cases, and the internal CLC socket should
              not have a set sk_err.

              Signed-off-by: Ursula Braun <ubraun@linux.ibm.com>
              Signed-off-by: David S. Miller <davem@davemloft.net>


Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Acked-by: Petr Tesarik <ptesarik@suse.com>
---
 net/smc/af_smc.c  |    8 ++++----
 net/smc/smc_clc.c |    9 +++++++--
 2 files changed, 11 insertions(+), 6 deletions(-)

--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -338,7 +338,7 @@ static int smc_clnt_conf_first_link(stru
 
 		rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
 				      SMC_CLC_DECLINE);
-		return rc;
+		return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc;
 	}
 
 	if (link->llc_confirm_rc)
@@ -366,7 +366,7 @@ static int smc_clnt_conf_first_link(stru
 
 		rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
 				      SMC_CLC_DECLINE);
-		return rc;
+		return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_AL : rc;
 	}
 
 	/* send add link reject message, only one link supported for now */
@@ -969,7 +969,7 @@ static int smc_serv_conf_first_link(stru
 
 		rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
 				      SMC_CLC_DECLINE);
-		return rc;
+		return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc;
 	}
 
 	if (link->llc_confirm_resp_rc)
@@ -990,7 +990,7 @@ static int smc_serv_conf_first_link(stru
 
 		rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
 				      SMC_CLC_DECLINE);
-		return rc;
+		return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_AL : rc;
 	}
 
 	smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time);
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -296,7 +296,11 @@ int smc_clc_wait_msg(struct smc_sock *sm
 	}
 	if (clc_sk->sk_err) {
 		reason_code = -clc_sk->sk_err;
-		smc->sk.sk_err = clc_sk->sk_err;
+		if (clc_sk->sk_err == EAGAIN &&
+		    expected_type == SMC_CLC_DECLINE)
+			clc_sk->sk_err = 0; /* reset for fallback usage */
+		else
+			smc->sk.sk_err = clc_sk->sk_err;
 		goto out;
 	}
 	if (!len) { /* peer has performed orderly shutdown */
@@ -305,7 +309,8 @@ int smc_clc_wait_msg(struct smc_sock *sm
 		goto out;
 	}
 	if (len < 0) {
-		smc->sk.sk_err = -len;
+		if (len != -EAGAIN || expected_type != SMC_CLC_DECLINE)
+			smc->sk.sk_err = -len;
 		reason_code = len;
 		goto out;
 	}