Blob Blame History Raw
From: Manish Rangankar <mrangankar@marvell.com>
Date: Thu, 19 Mar 2020 01:38:10 -0700
Subject: scsi: qedi: Add MFW error recovery process
Git-commit: 4b1068f5d74b6cc92319bd7eba40809b1222e73f
Patch-mainline: v5.7-rc1
References: bsc#1177385 jre#sle-15142

This patch adds the mfw error recovery process in the qedi driver. The
process includes a partial/customized driver unload and load to reset
context by preserving active iSCSI session kernel state.

Link: https://lore.kernel.org/r/20200319083811.19499-2-mrangankar@marvell.com
Signed-off-by: Manish Rangankar <mrangankar@marvell.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Acked-by: Lee Duncan <lduncan@suse.com>
---
 drivers/scsi/qedi/qedi.h      |  1 +
 drivers/scsi/qedi/qedi_main.c | 87 ++++++++++++++++++++++++++++++++-----------
 2 files changed, 66 insertions(+), 22 deletions(-)

diff --git a/drivers/scsi/qedi/qedi.h b/drivers/scsi/qedi/qedi.h
index 9513fd320ffd..812e368cf100 100644
--- a/drivers/scsi/qedi/qedi.h
+++ b/drivers/scsi/qedi/qedi.h
@@ -331,6 +331,7 @@ struct qedi_ctx {
 	u16 ll2_mtu;
 
 	struct workqueue_struct *dpc_wq;
+	struct delayed_work recovery_work;
 
 	spinlock_t task_idx_lock;	/* To protect gbl context */
 	s32 last_tidx_alloc;
diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c
index acb930b8c6a6..cfa705aa16a6 100644
--- a/drivers/scsi/qedi/qedi_main.c
+++ b/drivers/scsi/qedi/qedi_main.c
@@ -58,6 +58,7 @@ static struct qedi_cmd *qedi_get_cmd_from_tid(struct qedi_ctx *qedi, u32 tid);
 static void qedi_reset_uio_rings(struct qedi_uio_dev *udev);
 static void qedi_ll2_free_skbs(struct qedi_ctx *qedi);
 static struct nvm_iscsi_block *qedi_get_nvram_block(struct qedi_ctx *qedi);
+static void qedi_recovery_handler(struct work_struct *work);
 
 static int qedi_iscsi_event_cb(void *context, u8 fw_event_code, void *fw_handle)
 {
@@ -1113,6 +1114,20 @@ static void qedi_get_protocol_tlv_data(void *dev, void *data)
 	return;
 }
 
+static void qedi_schedule_recovery_handler(void *dev)
+{
+	struct qedi_ctx *qedi = dev;
+
+	QEDI_ERR(&qedi->dbg_ctx, "Recovery handler scheduled.\n");
+
+	if (test_and_set_bit(QEDI_IN_RECOVERY, &qedi->flags))
+		return;
+
+	atomic_set(&qedi->link_state, QEDI_LINK_DOWN);
+
+	schedule_delayed_work(&qedi->recovery_work, 0);
+}
+
 static void qedi_link_update(void *dev, struct qed_link_output *link)
 {
 	struct qedi_ctx *qedi = (struct qedi_ctx *)dev;
@@ -1130,6 +1145,7 @@ static void qedi_link_update(void *dev, struct qed_link_output *link)
 static struct qed_iscsi_cb_ops qedi_cb_ops = {
 	{
 		.link_update =		qedi_link_update,
+		.schedule_recovery_handler = qedi_schedule_recovery_handler,
 		.get_protocol_tlv_data = qedi_get_protocol_tlv_data,
 		.get_generic_tlv_data = qedi_get_generic_tlv_data,
 	}
@@ -2328,16 +2344,18 @@ static void __qedi_remove(struct pci_dev *pdev, int mode)
 	struct qedi_ctx *qedi = pci_get_drvdata(pdev);
 	int rval;
 
-	if (qedi->tmf_thread) {
-		flush_workqueue(qedi->tmf_thread);
-		destroy_workqueue(qedi->tmf_thread);
-		qedi->tmf_thread = NULL;
-	}
+	if (mode == QEDI_MODE_NORMAL) {
+		if (qedi->tmf_thread) {
+			flush_workqueue(qedi->tmf_thread);
+			destroy_workqueue(qedi->tmf_thread);
+			qedi->tmf_thread = NULL;
+		}
 
-	if (qedi->offload_thread) {
-		flush_workqueue(qedi->offload_thread);
-		destroy_workqueue(qedi->offload_thread);
-		qedi->offload_thread = NULL;
+		if (qedi->offload_thread) {
+			flush_workqueue(qedi->offload_thread);
+			destroy_workqueue(qedi->offload_thread);
+			qedi->offload_thread = NULL;
+		}
 	}
 
 #ifdef CONFIG_DEBUG_FS
@@ -2353,8 +2371,7 @@ static void __qedi_remove(struct pci_dev *pdev, int mode)
 		qedi_ops->ll2->stop(qedi->cdev);
 	}
 
-	if (mode == QEDI_MODE_NORMAL)
-		qedi_free_iscsi_pf_param(qedi);
+	qedi_free_iscsi_pf_param(qedi);
 
 	rval = qedi_ops->common->update_drv_state(qedi->cdev, false);
 	if (rval)
@@ -2373,9 +2390,6 @@ static void __qedi_remove(struct pci_dev *pdev, int mode)
 		qedi_free_uio(qedi->udev);
 		qedi_free_itt(qedi);
 
-		iscsi_host_remove(qedi->shost);
-		iscsi_host_free(qedi->shost);
-
 		if (qedi->ll2_recv_thread) {
 			kthread_stop(qedi->ll2_recv_thread);
 			qedi->ll2_recv_thread = NULL;
@@ -2384,6 +2398,9 @@ static void __qedi_remove(struct pci_dev *pdev, int mode)
 
 		if (qedi->boot_kset)
 			iscsi_boot_destroy_kset(qedi->boot_kset);
+
+		iscsi_host_remove(qedi->shost);
+		iscsi_host_free(qedi->shost);
 	}
 }
 
@@ -2435,14 +2452,12 @@ static int __qedi_probe(struct pci_dev *pdev, int mode)
 		  qedi->dev_info.common.num_hwfns,
 		  qedi_ops->common->get_affin_hwfn_idx(qedi->cdev));
 
-	if (mode != QEDI_MODE_RECOVERY) {
-		rc = qedi_set_iscsi_pf_param(qedi);
-		if (rc) {
-			rc = -ENOMEM;
-			QEDI_ERR(&qedi->dbg_ctx,
-				 "Set iSCSI pf param fail\n");
-			goto free_host;
-		}
+	rc = qedi_set_iscsi_pf_param(qedi);
+	if (rc) {
+		rc = -ENOMEM;
+		QEDI_ERR(&qedi->dbg_ctx,
+			 "Set iSCSI pf param fail\n");
+		goto free_host;
 	}
 
 	qedi_ops->common->update_pf_params(qedi->cdev, &qedi->pf_params);
@@ -2633,6 +2648,8 @@ static int __qedi_probe(struct pci_dev *pdev, int mode)
 			goto free_cid_que;
 		}
 
+		INIT_DELAYED_WORK(&qedi->recovery_work, qedi_recovery_handler);
+
 		/* F/w needs 1st task context memory entry for performance */
 		set_bit(QEDI_RESERVE_TASK_ID, qedi->task_idx_map);
 		atomic_set(&qedi->num_offloads, 0);
@@ -2673,6 +2690,32 @@ static int __qedi_probe(struct pci_dev *pdev, int mode)
 	return rc;
 }
 
+static void qedi_mark_conn_recovery(struct iscsi_cls_session *cls_session)
+{
+	struct iscsi_session *session = cls_session->dd_data;
+	struct iscsi_conn *conn = session->leadconn;
+	struct qedi_conn *qedi_conn = conn->dd_data;
+
+	iscsi_conn_failure(qedi_conn->cls_conn->dd_data, ISCSI_ERR_CONN_FAILED);
+}
+
+static void qedi_recovery_handler(struct work_struct *work)
+{
+	struct qedi_ctx *qedi =
+			container_of(work, struct qedi_ctx, recovery_work.work);
+
+	iscsi_host_for_each_session(qedi->shost, qedi_mark_conn_recovery);
+
+	/* Call common_ops->recovery_prolog to allow the MFW to quiesce
+	 * any PCI transactions.
+	 */
+	qedi_ops->common->recovery_prolog(qedi->cdev);
+
+	__qedi_remove(qedi->pdev, QEDI_MODE_RECOVERY);
+	__qedi_probe(qedi->pdev, QEDI_MODE_RECOVERY);
+	clear_bit(QEDI_IN_RECOVERY, &qedi->flags);
+}
+
 static int qedi_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	return __qedi_probe(pdev, QEDI_MODE_NORMAL);