Blob Blame History Raw
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Thu, 7 Jan 2021 18:24:40 +0100
Subject: s390/qeth: fix deadlock during recovery
Git-commit: 0b9902c1fcc59ba75268386c0420a554f8844168
Patch-mainline: v5.11-rc3
References: git-fixes

When qeth_dev_layer2_store() - holding the discipline_mutex - waits
inside qeth_l*_remove_device() for a qeth_do_reset() thread to complete,
we can hit a deadlock if qeth_do_reset() concurrently calls
qeth_set_online() and thus tries to aquire the discipline_mutex.

Move the discipline_mutex locking outside of qeth_set_online() and
qeth_set_offline(), and turn the discipline into a parameter so that
callers understand the dependency.

To fix the deadlock, we can now relax the locking:
As already established, qeth_l*_remove_device() waits for
qeth_do_reset() to complete. So qeth_do_reset() itself is under no risk
of having card->discipline ripped out while it's running, and thus
doesn't need to take the discipline_mutex.

Fixes: 9dc48ccc68b9 ("qeth: serialize sysfs-triggered device configurations")
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Reviewed-by: Alexandra Winter <wintera@linux.ibm.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
[ ptesarik: Since SLE15-SP1 does not contain upstream commit
  91003f354e6bbe8d225b4d54127d80c694d201d9, there is no qeth_do_reset().
  Recovery is done by qeth_l2_recover() or qeth_l3_recover() instead,
  and the deadlock needs fixing in both implementations. ]
Signed-off-by: Petr Tesarik <ptesarik@suse.com>
---
 drivers/s390/net/qeth_core_main.c |   12 +++++++++++-
 drivers/s390/net/qeth_l2_main.c   |   25 +++++++++++++++++--------
 drivers/s390/net/qeth_l3_main.c   |   25 +++++++++++++++++--------
 3 files changed, 45 insertions(+), 17 deletions(-)

--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -5937,7 +5937,11 @@ static int qeth_core_set_online(struct c
 			goto err;
 		}
 	}
+
+	mutex_lock(&card->discipline_mutex);
 	rc = card->discipline->set_online(gdev);
+	mutex_unlock(&card->discipline_mutex);
+
 err:
 	return rc;
 }
@@ -5945,7 +5949,13 @@ err:
 static int qeth_core_set_offline(struct ccwgroup_device *gdev)
 {
 	struct qeth_card *card = dev_get_drvdata(&gdev->dev);
-	return card->discipline->set_offline(gdev);
+	int rc;
+
+	mutex_lock(&card->discipline_mutex);
+	rc = card->discipline->set_offline(gdev);
+	mutex_unlock(&card->discipline_mutex);
+
+	return rc;
 }
 
 static void qeth_core_shutdown(struct ccwgroup_device *gdev)
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -825,8 +825,11 @@ static void qeth_l2_remove_device(struct
 	qeth_set_allowed_threads(card, 0, 1);
 	wait_event(card->wait_q, qeth_threads_running(card, 0xffffffff) == 0);
 
-	if (cgdev->state == CCWGROUP_ONLINE)
+	if (cgdev->state == CCWGROUP_ONLINE) {
+		mutex_lock(&card->discipline_mutex);
 		qeth_l2_set_offline(cgdev);
+		mutex_unlock(&card->discipline_mutex);
+	}
 
 	cancel_work_sync(&card->close_dev_work);
 	if (qeth_netdev_is_registered(card->dev))
@@ -959,7 +962,6 @@ static int __qeth_l2_set_online(struct c
 	enum qeth_card_states recover_flag;
 	bool carrier_ok;
 
-	mutex_lock(&card->discipline_mutex);
 	mutex_lock(&card->conf_mutex);
 	QETH_DBF_TEXT(SETUP, 2, "setonlin");
 	QETH_DBF_HEX(SETUP, 2, &card, sizeof(void *));
@@ -1045,7 +1047,6 @@ static int __qeth_l2_set_online(struct c
 	/* let user_space know that device is online */
 	kobject_uevent(&gdev->dev.kobj, KOBJ_CHANGE);
 	mutex_unlock(&card->conf_mutex);
-	mutex_unlock(&card->discipline_mutex);
 	return 0;
 
 out_remove:
@@ -1059,7 +1060,6 @@ out_remove:
 	else
 		card->state = CARD_STATE_DOWN;
 	mutex_unlock(&card->conf_mutex);
-	mutex_unlock(&card->discipline_mutex);
 	return rc;
 }
 
@@ -1075,7 +1075,6 @@ static int __qeth_l2_set_offline(struct
 	int rc = 0, rc2 = 0, rc3 = 0;
 	enum qeth_card_states recover_flag;
 
-	mutex_lock(&card->discipline_mutex);
 	mutex_lock(&card->conf_mutex);
 	QETH_DBF_TEXT(SETUP, 3, "setoffl");
 	QETH_DBF_HEX(SETUP, 3, &card, sizeof(void *));
@@ -1100,7 +1099,6 @@ static int __qeth_l2_set_offline(struct
 	/* let user_space know that device is offline */
 	kobject_uevent(&cgdev->dev.kobj, KOBJ_CHANGE);
 	mutex_unlock(&card->conf_mutex);
-	mutex_unlock(&card->discipline_mutex);
 	return 0;
 }
 
@@ -1122,6 +1120,7 @@ static int qeth_l2_recover(void *ptr)
 	dev_warn(&card->gdev->dev,
 		"A recovery process has been started for the device\n");
 	qeth_set_recovery_task(card);
+	/* Lock-free, other users will block until we are done. */
 	__qeth_l2_set_offline(card->gdev, 1);
 	rc = __qeth_l2_set_online(card->gdev, 1);
 	if (!rc)
@@ -1161,9 +1160,14 @@ static int qeth_l2_pm_suspend(struct ccw
 	if (card->state == CARD_STATE_UP) {
 		if (card->info.hwtrap)
 			qeth_hw_trap(card, QETH_DIAGS_TRAP_DISARM);
+		mutex_lock(&card->discipline_mutex);
 		__qeth_l2_set_offline(card->gdev, 1);
-	} else
+		mutex_unlock(&card->discipline_mutex);
+	} else {
+		mutex_lock(&card->discipline_mutex);
 		__qeth_l2_set_offline(card->gdev, 0);
+		mutex_unlock(&card->discipline_mutex);
+	}
 	return 0;
 }
 
@@ -1173,14 +1177,19 @@ static int qeth_l2_pm_resume(struct ccwg
 	int rc = 0;
 
 	if (card->state == CARD_STATE_RECOVER) {
+		mutex_lock(&card->discipline_mutex);
 		rc = __qeth_l2_set_online(card->gdev, 1);
+		mutex_unlock(&card->discipline_mutex);
 		if (rc) {
 			rtnl_lock();
 			dev_close(card->dev);
 			rtnl_unlock();
 		}
-	} else
+	} else {
+		mutex_lock(&card->discipline_mutex);
 		rc = __qeth_l2_set_online(card->gdev, 0);
+		mutex_unlock(&card->discipline_mutex);
+	}
 
 	qeth_set_allowed_threads(card, 0xffffffff, 0);
 	netif_device_attach(card->dev);
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -2475,8 +2475,11 @@ static void qeth_l3_remove_device(struct
 	qeth_set_allowed_threads(card, 0, 1);
 	wait_event(card->wait_q, qeth_threads_running(card, 0xffffffff) == 0);
 
-	if (cgdev->state == CCWGROUP_ONLINE)
+	if (cgdev->state == CCWGROUP_ONLINE) {
+		mutex_lock(&card->discipline_mutex);
 		qeth_l3_set_offline(cgdev);
+		mutex_unlock(&card->discipline_mutex);
+	}
 
 	cancel_work_sync(&card->close_dev_work);
 	if (qeth_netdev_is_registered(card->dev))
@@ -2492,7 +2495,6 @@ static int __qeth_l3_set_online(struct c
 	enum qeth_card_states recover_flag;
 	bool carrier_ok;
 
-	mutex_lock(&card->discipline_mutex);
 	mutex_lock(&card->conf_mutex);
 	QETH_DBF_TEXT(SETUP, 2, "setonlin");
 	QETH_DBF_HEX(SETUP, 2, &card, sizeof(void *));
@@ -2566,7 +2568,6 @@ static int __qeth_l3_set_online(struct c
 	/* let user_space know that device is online */
 	kobject_uevent(&gdev->dev.kobj, KOBJ_CHANGE);
 	mutex_unlock(&card->conf_mutex);
-	mutex_unlock(&card->discipline_mutex);
 	return 0;
 out_remove:
 	qeth_l3_stop_card(card, 0);
@@ -2579,7 +2580,6 @@ out_remove:
 	else
 		card->state = CARD_STATE_DOWN;
 	mutex_unlock(&card->conf_mutex);
-	mutex_unlock(&card->discipline_mutex);
 	return rc;
 }
 
@@ -2595,7 +2595,6 @@ static int __qeth_l3_set_offline(struct
 	int rc = 0, rc2 = 0, rc3 = 0;
 	enum qeth_card_states recover_flag;
 
-	mutex_lock(&card->discipline_mutex);
 	mutex_lock(&card->conf_mutex);
 	QETH_DBF_TEXT(SETUP, 3, "setoffl");
 	QETH_DBF_HEX(SETUP, 3, &card, sizeof(void *));
@@ -2625,7 +2624,6 @@ static int __qeth_l3_set_offline(struct
 	/* let user_space know that device is offline */
 	kobject_uevent(&cgdev->dev.kobj, KOBJ_CHANGE);
 	mutex_unlock(&card->conf_mutex);
-	mutex_unlock(&card->discipline_mutex);
 	return 0;
 }
 
@@ -2648,6 +2646,7 @@ static int qeth_l3_recover(void *ptr)
 	dev_warn(&card->gdev->dev,
 		"A recovery process has been started for the device\n");
 	qeth_set_recovery_task(card);
+	/* Lock-free, other users will block until we are done. */
 	__qeth_l3_set_offline(card->gdev, 1);
 	rc = __qeth_l3_set_online(card->gdev, 1);
 	if (!rc)
@@ -2676,9 +2675,14 @@ static int qeth_l3_pm_suspend(struct ccw
 	if (card->state == CARD_STATE_UP) {
 		if (card->info.hwtrap)
 			qeth_hw_trap(card, QETH_DIAGS_TRAP_DISARM);
+		mutex_lock(&card->discipline_mutex);
 		__qeth_l3_set_offline(card->gdev, 1);
-	} else
+		mutex_unlock(&card->discipline_mutex);
+	} else {
+		mutex_lock(&card->discipline_mutex);
 		__qeth_l3_set_offline(card->gdev, 0);
+		mutex_unlock(&card->discipline_mutex);
+	}
 	return 0;
 }
 
@@ -2688,14 +2692,19 @@ static int qeth_l3_pm_resume(struct ccwg
 	int rc = 0;
 
 	if (card->state == CARD_STATE_RECOVER) {
+		mutex_lock(&card->discipline_mutex);
 		rc = __qeth_l3_set_online(card->gdev, 1);
+		mutex_unlock(&card->discipline_mutex);
 		if (rc) {
 			rtnl_lock();
 			dev_close(card->dev);
 			rtnl_unlock();
 		}
-	} else
+	} else {
+		mutex_lock(&card->discipline_mutex);
 		rc = __qeth_l3_set_online(card->gdev, 0);
+		mutex_unlock(&card->discipline_mutex);
+	}
 
 	qeth_set_allowed_threads(card, 0xffffffff, 0);
 	netif_device_attach(card->dev);