Blob Blame History Raw
From: Xiang Chen <chenxiang66@hisilicon.com>
Date: Mon, 20 Dec 2021 19:21:35 +0800
Subject: scsi: libsas: Defer works of new phys during suspend
Git-commit: bf19aea4607cb5f4a652ab70d8d8035a72a6b8da
Patch-mainline: v5.17-rc1
References: bsc#1198802

During the processing of event PORT_BYTES_DMAED, the driver queues work
DISCE_DISCOVER_DOMAIN and then flushes workqueue ha->disco_q.  If a new
phyup event occurs during resuming the controller, the work
PORTE_BYTES_DMAED of new phy occurs before suspended phy's. The work
DISCE_DISCOVER_DOMAIN of new phy requires an active SAS controller (it
needs to resume SAS controller by function scsi_sysfs_add_sdev() and some
other functions such as function add_device_link()). However, the
activation of the SAS controller requires completion of work
PORTE_BYTES_DMAED of suspended phys while it is blocked by new phy's work
on ha->event_q. So there is a deadlock and it is released only after resume
timeout.

To solve the issue, defer works of new phys during suspend and queue those
defer works after SAS controller becomes active.

Link: https://lore.kernel.org/r/1639999298-244569-13-git-send-email-chenxiang66@hisilicon.com
Reviewed-by: John Garry <john.garry@huawei.com>
Signed-off-by: Xiang Chen <chenxiang66@hisilicon.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Acked-by: Lee Duncan <lduncan@suse.com>
---
 drivers/scsi/libsas/sas_event.c | 24 ++++++++++++++++++++++++
 drivers/scsi/libsas/sas_init.c  |  1 +
 2 files changed, 25 insertions(+)

diff --git a/drivers/scsi/libsas/sas_event.c b/drivers/scsi/libsas/sas_event.c
index 01e544ca518a..626ef96b9348 100644
--- a/drivers/scsi/libsas/sas_event.c
+++ b/drivers/scsi/libsas/sas_event.c
@@ -139,6 +139,24 @@ static void sas_phy_event_worker(struct work_struct *work)
 	sas_free_event(ev);
 }
 
+/* defer works of new phys during suspend */
+static bool sas_defer_event(struct asd_sas_phy *phy, struct asd_sas_event *ev)
+{
+	struct sas_ha_struct *ha = phy->ha;
+	unsigned long flags;
+	bool deferred = false;
+
+	spin_lock_irqsave(&ha->lock, flags);
+	if (test_bit(SAS_HA_RESUMING, &ha->state) && !phy->suspended) {
+		struct sas_work *sw = &ev->work;
+
+		list_add_tail(&sw->drain_node, &ha->defer_q);
+		deferred = true;
+	}
+	spin_unlock_irqrestore(&ha->lock, flags);
+	return deferred;
+}
+
 int sas_notify_port_event(struct asd_sas_phy *phy, enum port_event event,
 			  gfp_t gfp_flags)
 {
@@ -154,6 +172,9 @@ int sas_notify_port_event(struct asd_sas_phy *phy, enum port_event event,
 
 	INIT_SAS_EVENT(ev, sas_port_event_worker, phy, event);
 
+	if (sas_defer_event(phy, ev))
+		return 0;
+
 	ret = sas_queue_event(event, &ev->work, ha);
 	if (ret != 1)
 		sas_free_event(ev);
@@ -177,6 +198,9 @@ int sas_notify_phy_event(struct asd_sas_phy *phy, enum phy_event event,
 
 	INIT_SAS_EVENT(ev, sas_phy_event_worker, phy, event);
 
+	if (sas_defer_event(phy, ev))
+		return 0;
+
 	ret = sas_queue_event(event, &ev->work, ha);
 	if (ret != 1)
 		sas_free_event(ev);
diff --git a/drivers/scsi/libsas/sas_init.c b/drivers/scsi/libsas/sas_init.c
index 069e40fc8411..dc35f0f8eae3 100644
--- a/drivers/scsi/libsas/sas_init.c
+++ b/drivers/scsi/libsas/sas_init.c
@@ -446,6 +446,7 @@ static void _sas_resume_ha(struct sas_ha_struct *ha, bool drain)
 		sas_drain_work(ha);
 	clear_bit(SAS_HA_RESUMING, &ha->state);
 
+	sas_queue_deferred_work(ha);
 	/* send event PORTE_BROADCAST_RCVD to identify some new inserted
 	 * disks for expander
 	 */