From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Thu, 7 Jan 2021 18:24:40 +0100
Subject: s390/qeth: fix deadlock during recovery
Git-commit: 0b9902c1fcc59ba75268386c0420a554f8844168
Patch-mainline: v5.11-rc3
References: git-fixes
When qeth_dev_layer2_store() - holding the discipline_mutex - waits
inside qeth_l*_remove_device() for a qeth_do_reset() thread to complete,
we can hit a deadlock if qeth_do_reset() concurrently calls
qeth_set_online() and thus tries to aquire the discipline_mutex.
Move the discipline_mutex locking outside of qeth_set_online() and
qeth_set_offline(), and turn the discipline into a parameter so that
callers understand the dependency.
To fix the deadlock, we can now relax the locking:
As already established, qeth_l*_remove_device() waits for
qeth_do_reset() to complete. So qeth_do_reset() itself is under no risk
of having card->discipline ripped out while it's running, and thus
doesn't need to take the discipline_mutex.
Fixes: 9dc48ccc68b9 ("qeth: serialize sysfs-triggered device configurations")
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Reviewed-by: Alexandra Winter <wintera@linux.ibm.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
[ ptesarik: Since SLE15-SP1 does not contain upstream commit
91003f354e6bbe8d225b4d54127d80c694d201d9, there is no qeth_do_reset().
Recovery is done by qeth_l2_recover() or qeth_l3_recover() instead,
and the deadlock needs fixing in both implementations. ]
Signed-off-by: Petr Tesarik <ptesarik@suse.com>
---
drivers/s390/net/qeth_core_main.c | 12 +++++++++++-
drivers/s390/net/qeth_l2_main.c | 25 +++++++++++++++++--------
drivers/s390/net/qeth_l3_main.c | 25 +++++++++++++++++--------
3 files changed, 45 insertions(+), 17 deletions(-)
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -5937,7 +5937,11 @@ static int qeth_core_set_online(struct c
goto err;
}
}
+
+ mutex_lock(&card->discipline_mutex);
rc = card->discipline->set_online(gdev);
+ mutex_unlock(&card->discipline_mutex);
+
err:
return rc;
}
@@ -5945,7 +5949,13 @@ err:
static int qeth_core_set_offline(struct ccwgroup_device *gdev)
{
struct qeth_card *card = dev_get_drvdata(&gdev->dev);
- return card->discipline->set_offline(gdev);
+ int rc;
+
+ mutex_lock(&card->discipline_mutex);
+ rc = card->discipline->set_offline(gdev);
+ mutex_unlock(&card->discipline_mutex);
+
+ return rc;
}
static void qeth_core_shutdown(struct ccwgroup_device *gdev)
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -825,8 +825,11 @@ static void qeth_l2_remove_device(struct
qeth_set_allowed_threads(card, 0, 1);
wait_event(card->wait_q, qeth_threads_running(card, 0xffffffff) == 0);
- if (cgdev->state == CCWGROUP_ONLINE)
+ if (cgdev->state == CCWGROUP_ONLINE) {
+ mutex_lock(&card->discipline_mutex);
qeth_l2_set_offline(cgdev);
+ mutex_unlock(&card->discipline_mutex);
+ }
cancel_work_sync(&card->close_dev_work);
if (qeth_netdev_is_registered(card->dev))
@@ -959,7 +962,6 @@ static int __qeth_l2_set_online(struct c
enum qeth_card_states recover_flag;
bool carrier_ok;
- mutex_lock(&card->discipline_mutex);
mutex_lock(&card->conf_mutex);
QETH_DBF_TEXT(SETUP, 2, "setonlin");
QETH_DBF_HEX(SETUP, 2, &card, sizeof(void *));
@@ -1045,7 +1047,6 @@ static int __qeth_l2_set_online(struct c
/* let user_space know that device is online */
kobject_uevent(&gdev->dev.kobj, KOBJ_CHANGE);
mutex_unlock(&card->conf_mutex);
- mutex_unlock(&card->discipline_mutex);
return 0;
out_remove:
@@ -1059,7 +1060,6 @@ out_remove:
else
card->state = CARD_STATE_DOWN;
mutex_unlock(&card->conf_mutex);
- mutex_unlock(&card->discipline_mutex);
return rc;
}
@@ -1075,7 +1075,6 @@ static int __qeth_l2_set_offline(struct
int rc = 0, rc2 = 0, rc3 = 0;
enum qeth_card_states recover_flag;
- mutex_lock(&card->discipline_mutex);
mutex_lock(&card->conf_mutex);
QETH_DBF_TEXT(SETUP, 3, "setoffl");
QETH_DBF_HEX(SETUP, 3, &card, sizeof(void *));
@@ -1100,7 +1099,6 @@ static int __qeth_l2_set_offline(struct
/* let user_space know that device is offline */
kobject_uevent(&cgdev->dev.kobj, KOBJ_CHANGE);
mutex_unlock(&card->conf_mutex);
- mutex_unlock(&card->discipline_mutex);
return 0;
}
@@ -1122,6 +1120,7 @@ static int qeth_l2_recover(void *ptr)
dev_warn(&card->gdev->dev,
"A recovery process has been started for the device\n");
qeth_set_recovery_task(card);
+ /* Lock-free, other users will block until we are done. */
__qeth_l2_set_offline(card->gdev, 1);
rc = __qeth_l2_set_online(card->gdev, 1);
if (!rc)
@@ -1161,9 +1160,14 @@ static int qeth_l2_pm_suspend(struct ccw
if (card->state == CARD_STATE_UP) {
if (card->info.hwtrap)
qeth_hw_trap(card, QETH_DIAGS_TRAP_DISARM);
+ mutex_lock(&card->discipline_mutex);
__qeth_l2_set_offline(card->gdev, 1);
- } else
+ mutex_unlock(&card->discipline_mutex);
+ } else {
+ mutex_lock(&card->discipline_mutex);
__qeth_l2_set_offline(card->gdev, 0);
+ mutex_unlock(&card->discipline_mutex);
+ }
return 0;
}
@@ -1173,14 +1177,19 @@ static int qeth_l2_pm_resume(struct ccwg
int rc = 0;
if (card->state == CARD_STATE_RECOVER) {
+ mutex_lock(&card->discipline_mutex);
rc = __qeth_l2_set_online(card->gdev, 1);
+ mutex_unlock(&card->discipline_mutex);
if (rc) {
rtnl_lock();
dev_close(card->dev);
rtnl_unlock();
}
- } else
+ } else {
+ mutex_lock(&card->discipline_mutex);
rc = __qeth_l2_set_online(card->gdev, 0);
+ mutex_unlock(&card->discipline_mutex);
+ }
qeth_set_allowed_threads(card, 0xffffffff, 0);
netif_device_attach(card->dev);
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -2475,8 +2475,11 @@ static void qeth_l3_remove_device(struct
qeth_set_allowed_threads(card, 0, 1);
wait_event(card->wait_q, qeth_threads_running(card, 0xffffffff) == 0);
- if (cgdev->state == CCWGROUP_ONLINE)
+ if (cgdev->state == CCWGROUP_ONLINE) {
+ mutex_lock(&card->discipline_mutex);
qeth_l3_set_offline(cgdev);
+ mutex_unlock(&card->discipline_mutex);
+ }
cancel_work_sync(&card->close_dev_work);
if (qeth_netdev_is_registered(card->dev))
@@ -2492,7 +2495,6 @@ static int __qeth_l3_set_online(struct c
enum qeth_card_states recover_flag;
bool carrier_ok;
- mutex_lock(&card->discipline_mutex);
mutex_lock(&card->conf_mutex);
QETH_DBF_TEXT(SETUP, 2, "setonlin");
QETH_DBF_HEX(SETUP, 2, &card, sizeof(void *));
@@ -2566,7 +2568,6 @@ static int __qeth_l3_set_online(struct c
/* let user_space know that device is online */
kobject_uevent(&gdev->dev.kobj, KOBJ_CHANGE);
mutex_unlock(&card->conf_mutex);
- mutex_unlock(&card->discipline_mutex);
return 0;
out_remove:
qeth_l3_stop_card(card, 0);
@@ -2579,7 +2580,6 @@ out_remove:
else
card->state = CARD_STATE_DOWN;
mutex_unlock(&card->conf_mutex);
- mutex_unlock(&card->discipline_mutex);
return rc;
}
@@ -2595,7 +2595,6 @@ static int __qeth_l3_set_offline(struct
int rc = 0, rc2 = 0, rc3 = 0;
enum qeth_card_states recover_flag;
- mutex_lock(&card->discipline_mutex);
mutex_lock(&card->conf_mutex);
QETH_DBF_TEXT(SETUP, 3, "setoffl");
QETH_DBF_HEX(SETUP, 3, &card, sizeof(void *));
@@ -2625,7 +2624,6 @@ static int __qeth_l3_set_offline(struct
/* let user_space know that device is offline */
kobject_uevent(&cgdev->dev.kobj, KOBJ_CHANGE);
mutex_unlock(&card->conf_mutex);
- mutex_unlock(&card->discipline_mutex);
return 0;
}
@@ -2648,6 +2646,7 @@ static int qeth_l3_recover(void *ptr)
dev_warn(&card->gdev->dev,
"A recovery process has been started for the device\n");
qeth_set_recovery_task(card);
+ /* Lock-free, other users will block until we are done. */
__qeth_l3_set_offline(card->gdev, 1);
rc = __qeth_l3_set_online(card->gdev, 1);
if (!rc)
@@ -2676,9 +2675,14 @@ static int qeth_l3_pm_suspend(struct ccw
if (card->state == CARD_STATE_UP) {
if (card->info.hwtrap)
qeth_hw_trap(card, QETH_DIAGS_TRAP_DISARM);
+ mutex_lock(&card->discipline_mutex);
__qeth_l3_set_offline(card->gdev, 1);
- } else
+ mutex_unlock(&card->discipline_mutex);
+ } else {
+ mutex_lock(&card->discipline_mutex);
__qeth_l3_set_offline(card->gdev, 0);
+ mutex_unlock(&card->discipline_mutex);
+ }
return 0;
}
@@ -2688,14 +2692,19 @@ static int qeth_l3_pm_resume(struct ccwg
int rc = 0;
if (card->state == CARD_STATE_RECOVER) {
+ mutex_lock(&card->discipline_mutex);
rc = __qeth_l3_set_online(card->gdev, 1);
+ mutex_unlock(&card->discipline_mutex);
if (rc) {
rtnl_lock();
dev_close(card->dev);
rtnl_unlock();
}
- } else
+ } else {
+ mutex_lock(&card->discipline_mutex);
rc = __qeth_l3_set_online(card->gdev, 0);
+ mutex_unlock(&card->discipline_mutex);
+ }
qeth_set_allowed_threads(card, 0xffffffff, 0);
netif_device_attach(card->dev);