Blob Blame History Raw
From: Stefan Raspl <raspl@linux.ibm.com>
Subject: net/smc: handle device, port, and QP error events
Patch-mainline: v4.16-rc1
Git-commit: da05bf2981f1035fc37d4253ccf0692faf19b8c0
References: FATE#325694, LTC#167874, bsc#1113480

Summary:     net/smc: SMC-R MVP
Description: Add latest upstream patches to push SMC-R to the MVP level

Upstream-Description:

             net/smc: handle device, port, and QP error events

             RoCE device changes cause an IB event, processed in the global event
             handler for the ROCE device. Problems for a certain Queue Pair cause a QP
             event, processed in the QP event handler for this QP.
             Among those events are port errors and other fatal device errors. All
             link groups using such a port or device must be terminated in those cases.

             Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
             Signed-off-by: David S. Miller <davem@davemloft.net>

Signed-off-by: Stefan Raspl <raspl@linux.ibm.com>
Acked-by: Petr Tesarik <ptesarik@suse.com>
---
 net/smc/smc_ib.c |   38 +++++++++++++++++++++++++-------------
 1 file changed, 25 insertions(+), 13 deletions(-)

--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -140,6 +140,17 @@ out:
 	return rc;
 }
 
+static void smc_ib_port_terminate(struct smc_ib_device *smcibdev, u8 ibport)
+{
+	struct smc_link_group *lgr, *l;
+
+	list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
+		if (lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev &&
+		    lgr->lnk[SMC_SINGLE_LINK].ibport == ibport)
+			smc_lgr_terminate(lgr);
+	}
+}
+
 /* process context wrapper for might_sleep smc_ib_remember_port_attr */
 static void smc_ib_port_event_work(struct work_struct *work)
 {
@@ -150,6 +161,8 @@ static void smc_ib_port_event_work(struc
 	for_each_set_bit(port_idx, &smcibdev->port_event_mask, SMC_MAX_PORTS) {
 		smc_ib_remember_port_attr(smcibdev, port_idx + 1);
 		clear_bit(port_idx, &smcibdev->port_event_mask);
+		if (!smc_ib_port_active(smcibdev, port_idx + 1))
+			smc_ib_port_terminate(smcibdev, port_idx + 1);
 	}
 }
 
@@ -164,15 +177,7 @@ static void smc_ib_global_event_handler(
 
 	switch (ibevent->event) {
 	case IB_EVENT_PORT_ERR:
-		port_idx = ibevent->element.port_num - 1;
-		set_bit(port_idx, &smcibdev->port_event_mask);
-		schedule_work(&smcibdev->port_event_work);
-		/* fall through */
 	case IB_EVENT_DEVICE_FATAL:
-		/* tbd in follow-on patch:
-		 * abnormal close of corresponding connections
-		 */
-		break;
 	case IB_EVENT_PORT_ACTIVE:
 		port_idx = ibevent->element.port_num - 1;
 		set_bit(port_idx, &smcibdev->port_event_mask);
@@ -185,7 +190,8 @@ static void smc_ib_global_event_handler(
 
 void smc_ib_dealloc_protection_domain(struct smc_link *lnk)
 {
-	ib_dealloc_pd(lnk->roce_pd);
+	if (lnk->roce_pd)
+		ib_dealloc_pd(lnk->roce_pd);
 	lnk->roce_pd = NULL;
 }
 
@@ -202,14 +208,18 @@ int smc_ib_create_protection_domain(stru
 
 static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv)
 {
+	struct smc_ib_device *smcibdev =
+		(struct smc_ib_device *)ibevent->device;
+	u8 port_idx;
+
 	switch (ibevent->event) {
 	case IB_EVENT_DEVICE_FATAL:
 	case IB_EVENT_GID_CHANGE:
 	case IB_EVENT_PORT_ERR:
 	case IB_EVENT_QP_ACCESS_ERR:
-		/* tbd in follow-on patch:
-		 * abnormal close of corresponding connections
-		 */
+		port_idx = ibevent->element.port_num - 1;
+		set_bit(port_idx, &smcibdev->port_event_mask);
+		schedule_work(&smcibdev->port_event_work);
 		break;
 	default:
 		break;
@@ -218,7 +228,8 @@ static void smc_ib_qp_event_handler(stru
 
 void smc_ib_destroy_queue_pair(struct smc_link *lnk)
 {
-	ib_destroy_qp(lnk->roce_qp);
+	if (lnk->roce_qp)
+		ib_destroy_qp(lnk->roce_qp);
 	lnk->roce_qp = NULL;
 }
 
@@ -461,6 +472,7 @@ static void smc_ib_cleanup_per_ibdev(str
 {
 	if (!smcibdev->initialized)
 		return;
+	smcibdev->initialized = 0;
 	smc_wr_remove_dev(smcibdev);
 	ib_unregister_event_handler(&smcibdev->event_handler);
 	ib_destroy_cq(smcibdev->roce_cq_recv);