Blob Blame History Raw
From: Ursula Braun <ubraun@linux.ibm.com>
Date: Wed, 8 Jul 2020 17:05:13 +0200
Subject: net/smc: fix sleep bug in smc_pnet_find_roce_resource()
Git-commit: 92f3cb0e11dda530d1daa42d7a11af5a92ed89e4
Patch-mainline: v5.8-rc5
References: git-fixes

Tests showed this BUG:
[572555.252867] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:935
[572555.252876] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 131031, name: smcapp
[572555.252879] INFO: lockdep is turned off.
[572555.252883] CPU: 1 PID: 131031 Comm: smcapp Tainted: G           O      5.7.0-rc3uschi+ #356
[572555.252885] Hardware name: IBM 3906 M03 703 (LPAR)
[572555.252887] Call Trace:
[572555.252896]  [<00000000ac364554>] show_stack+0x94/0xe8
[572555.252901]  [<00000000aca1f400>] dump_stack+0xa0/0xe0
[572555.252906]  [<00000000ac3c8c10>] ___might_sleep+0x260/0x280
[572555.252910]  [<00000000acdc0c98>] __mutex_lock+0x48/0x940
[572555.252912]  [<00000000acdc15c2>] mutex_lock_nested+0x32/0x40
[572555.252975]  [<000003ff801762d0>] mlx5_lag_get_roce_netdev+0x30/0xc0 [mlx5_core]
[572555.252996]  [<000003ff801fb3aa>] mlx5_ib_get_netdev+0x3a/0xe0 [mlx5_ib]
[572555.253007]  [<000003ff80063848>] smc_pnet_find_roce_resource+0x1d8/0x310 [smc]
[572555.253011]  [<000003ff800602f0>] __smc_connect+0x1f0/0x3e0 [smc]
[572555.253015]  [<000003ff80060634>] smc_connect+0x154/0x190 [smc]
[572555.253022]  [<00000000acbed8d4>] __sys_connect+0x94/0xd0
[572555.253025]  [<00000000acbef620>] __s390x_sys_socketcall+0x170/0x360
[572555.253028]  [<00000000acdc6800>] system_call+0x298/0x2b8
[572555.253030] INFO: lockdep is turned off.

Function smc_pnet_find_rdma_dev() might be called from
smc_pnet_find_roce_resource(). It holds the smc_ib_devices list
spinlock while calling infiniband op get_netdev(). At least for mlx5
the get_netdev operation wants mutex serialization, which conflicts
with the smc_ib_devices spinlock.
This patch switches the smc_ib_devices spinlock into a mutex to
allow sleeping when calling get_netdev().

Fixes: a4cf0443c414 ("smc: introduce SMC as an IB-client")
Signed-off-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
[ ptesarik: smc_core_going_away() hunks removed because of missing
  commit c3d9494e68c4a5d23227ede822fda9bd68bef8e3.
  Additional changes in _smc_pnet_dump are needed because of missing
  commit fdff704dc60418e9a1bac78ae09c857d05c65aa3. ]
Signed-off-by: Petr Tesarik <ptesarik@suse.com>
---
 net/smc/smc_ib.c   |   11 ++++++-----
 net/smc/smc_ib.h   |    3 ++-
 net/smc/smc_pnet.c |   25 +++++++++++++------------
 3 files changed, 21 insertions(+), 18 deletions(-)

--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -15,6 +15,7 @@
 #include <linux/workqueue.h>
 #include <linux/scatterlist.h>
 #include <linux/if_vlan.h>
+#include <linux/mutex.h>
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_cache.h>
 
@@ -32,7 +33,7 @@
 #define SMC_QP_RNR_RETRY			7 /* 7: infinite */
 
 struct smc_ib_devices smc_ib_devices = {	/* smc-registered ib devices */
-	.lock = __SPIN_LOCK_UNLOCKED(smc_ib_devices.lock),
+	.mutex = __MUTEX_INITIALIZER(smc_ib_devices.mutex),
 	.list = LIST_HEAD_INIT(smc_ib_devices.list),
 };
 
@@ -533,9 +534,9 @@ static void smc_ib_add_dev(struct ib_dev
 	smcibdev->ibdev = ibdev;
 	INIT_WORK(&smcibdev->port_event_work, smc_ib_port_event_work);
 
-	spin_lock(&smc_ib_devices.lock);
+	mutex_lock(&smc_ib_devices.mutex);
 	list_add_tail(&smcibdev->list, &smc_ib_devices.list);
-	spin_unlock(&smc_ib_devices.lock);
+	mutex_unlock(&smc_ib_devices.mutex);
 	ib_set_client_data(ibdev, &smc_ib_client, smcibdev);
 	INIT_IB_EVENT_HANDLER(&smcibdev->event_handler, smcibdev->ibdev,
 			      smc_ib_global_event_handler);
@@ -563,9 +564,9 @@ static void smc_ib_remove_dev(struct ib_
 	if (!smcibdev || smcibdev->ibdev != ibdev)
 		return;
 	ib_set_client_data(ibdev, &smc_ib_client, NULL);
-	spin_lock(&smc_ib_devices.lock);
+	mutex_lock(&smc_ib_devices.mutex);
 	list_del_init(&smcibdev->list); /* remove from smc_ib_devices */
-	spin_unlock(&smc_ib_devices.lock);
+	mutex_unlock(&smc_ib_devices.mutex);
 	smc_ib_cleanup_per_ibdev(smcibdev);
 	ib_unregister_event_handler(&smcibdev->event_handler);
 	cancel_work_sync(&smcibdev->port_event_work);
--- a/net/smc/smc_ib.h
+++ b/net/smc/smc_ib.h
@@ -13,6 +13,7 @@
 
 #include <linux/interrupt.h>
 #include <linux/if_ether.h>
+#include <linux/mutex.h>
 #include <rdma/ib_verbs.h>
 #include <net/smc.h>
 
@@ -23,7 +24,7 @@
 
 struct smc_ib_devices {			/* list of smc ib devices definition */
 	struct list_head	list;
-	spinlock_t		lock;	/* protects list of smc ib devices */
+	struct mutex		mutex;	/* protects list of smc ib devices */
 };
 
 extern struct smc_ib_devices	smc_ib_devices; /* list of smc ib devices */
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -11,6 +11,7 @@
 #include <linux/module.h>
 #include <linux/list.h>
 #include <linux/ctype.h>
+#include <linux/mutex.h>
 #include <net/netlink.h>
 #include <net/genetlink.h>
 
@@ -124,7 +125,7 @@ static int smc_pnet_remove_by_pnetid(str
 		return rc;
 
 	/* remove ib devices */
-	spin_lock(&smc_ib_devices.lock);
+	mutex_lock(&smc_ib_devices.mutex);
 	list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
 		for (ibport = 0; ibport < SMC_MAX_PORTS; ibport++) {
 			if (ibdev->pnetid_by_user[ibport] &&
@@ -138,7 +139,7 @@ static int smc_pnet_remove_by_pnetid(str
 			}
 		}
 	}
-	spin_unlock(&smc_ib_devices.lock);
+	mutex_unlock(&smc_ib_devices.mutex);
 	/* remove smcd devices */
 	spin_lock(&smcd_dev_list.lock);
 	list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) {
@@ -200,14 +201,14 @@ static int smc_pnet_enter(struct smc_pne
 		struct smc_ib_device *ib_dev = new_pnetelem->smcibdev;
 		int ib_port = new_pnetelem->ib_port;
 
-		spin_lock(&smc_ib_devices.lock);
+		mutex_lock(&smc_ib_devices.mutex);
 		if (smc_pnet_match(ib_dev->pnetid[ib_port - 1], pnet_null)) {
 			memcpy(ib_dev->pnetid[ib_port - 1],
 			       new_pnetelem->pnet_name, SMC_MAX_PNETID_LEN);
 			ib_dev->pnetid_by_user[ib_port - 1] = true;
 			new_ibdev = true;
 		}
-		spin_unlock(&smc_ib_devices.lock);
+		mutex_unlock(&smc_ib_devices.mutex);
 	}
 	if (new_pnetelem->smcd_dev) {
 		struct smcd_dev *smcd_dev = new_pnetelem->smcd_dev;
@@ -290,7 +291,7 @@ static struct smc_ib_device *smc_pnet_fi
 {
 	struct smc_ib_device *ibdev;
 
-	spin_lock(&smc_ib_devices.lock);
+	mutex_lock(&smc_ib_devices.mutex);
 	list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
 		if (!strncmp(ibdev->ibdev->name, ib_name,
 			     sizeof(ibdev->ibdev->name)) ||
@@ -301,7 +302,7 @@ static struct smc_ib_device *smc_pnet_fi
 	}
 	ibdev = NULL;
 out:
-	spin_unlock(&smc_ib_devices.lock);
+	mutex_unlock(&smc_ib_devices.mutex);
 	return ibdev;
 }
 
@@ -505,7 +506,7 @@ static int _smc_pnet_dump(struct net *ne
 		return idx;
 
 	/* dump ib devices */
-	spin_lock(&smc_ib_devices.lock);
+	mutex_lock(&smc_ib_devices.mutex);
 	list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
 		for (ibport = 0; ibport < SMC_MAX_PORTS; ibport++) {
 			if (ibdev->pnetid_by_user[ibport]) {
@@ -530,7 +531,7 @@ static int _smc_pnet_dump(struct net *ne
 			}
 		}
 	}
-	spin_unlock(&smc_ib_devices.lock);
+	mutex_unlock(&smc_ib_devices.mutex);
 
 	/* dump smcd devices */
 	spin_lock(&smcd_dev_list.lock);
@@ -761,7 +762,7 @@ static void smc_pnet_find_rdma_dev(struc
 {
 	struct smc_ib_device *ibdev;
 
-	spin_lock(&smc_ib_devices.lock);
+	mutex_lock(&smc_ib_devices.mutex);
 	list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
 		struct net_device *ndev;
 		int i;
@@ -785,7 +786,7 @@ static void smc_pnet_find_rdma_dev(struc
 			}
 		}
 	}
-	spin_unlock(&smc_ib_devices.lock);
+	mutex_unlock(&smc_ib_devices.mutex);
 }
 
 /* Determine the corresponding IB device port based on the hardware PNETID.
@@ -809,7 +810,7 @@ static void smc_pnet_find_roce_by_pnetid
 		return; /* pnetid could not be determined */
 	}
 
-	spin_lock(&smc_ib_devices.lock);
+	mutex_lock(&smc_ib_devices.mutex);
 	list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
 		for (i = 1; i <= SMC_MAX_PORTS; i++) {
 			if (!rdma_is_port_valid(ibdev->ibdev, i))
@@ -825,7 +826,7 @@ static void smc_pnet_find_roce_by_pnetid
 		}
 	}
 out:
-	spin_unlock(&smc_ib_devices.lock);
+	mutex_unlock(&smc_ib_devices.mutex);
 }
 
 static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev,