|
Daniel Wagner |
3120dd |
From: Quinn Tran <qutran@marvell.com>
|
|
Daniel Wagner |
3120dd |
Date: Wed, 15 Jun 2022 22:35:00 -0700
|
|
Daniel Wagner |
3120dd |
Subject: scsi: qla2xxx: Wind down adapter after PCIe error
|
|
Denis Kirjanov |
718367 |
Patch-mainline: v5.20-rc1
|
|
Daniel Wagner |
3120dd |
Git-commit: d3117c83ba316b3200d9f2fe900f2b9a5525a25c
|
|
Daniel Wagner |
3120dd |
References: bsc#1201958
|
|
Daniel Wagner |
3120dd |
|
|
Daniel Wagner |
3120dd |
Put adapter into a wind down state if OS does not make any attempt to
|
|
Daniel Wagner |
3120dd |
recover the adapter after PCIe error.
|
|
Daniel Wagner |
3120dd |
|
|
Daniel Wagner |
3120dd |
Link: https://lore.kernel.org/r/20220616053508.27186-4-njavali@marvell.com
|
|
Daniel Wagner |
3120dd |
Cc: stable@vger.kernel.org
|
|
Daniel Wagner |
3120dd |
Signed-off-by: Quinn Tran <qutran@marvell.com>
|
|
Daniel Wagner |
3120dd |
Signed-off-by: Nilesh Javali <njavali@marvell.com>
|
|
Daniel Wagner |
3120dd |
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
|
|
Daniel Wagner |
3120dd |
Acked-by: Daniel Wagner <dwagner@suse.de>
|
|
Daniel Wagner |
3120dd |
---
|
|
Daniel Wagner |
3120dd |
drivers/scsi/qla2xxx/qla_bsg.c | 10 +++++++-
|
|
Daniel Wagner |
3120dd |
drivers/scsi/qla2xxx/qla_def.h | 4 +++
|
|
Daniel Wagner |
3120dd |
drivers/scsi/qla2xxx/qla_init.c | 20 ++++++++++++++++
|
|
Daniel Wagner |
3120dd |
drivers/scsi/qla2xxx/qla_os.c | 48 ++++++++++++++++++++++++++++++++++++++++
|
|
Daniel Wagner |
3120dd |
4 files changed, 81 insertions(+), 1 deletion(-)
|
|
Daniel Wagner |
3120dd |
|
|
Daniel Wagner |
3120dd |
--- a/drivers/scsi/qla2xxx/qla_bsg.c
|
|
Daniel Wagner |
3120dd |
+++ b/drivers/scsi/qla2xxx/qla_bsg.c
|
|
Daniel Wagner |
3120dd |
@@ -3062,6 +3062,13 @@ qla24xx_bsg_timeout(struct bsg_job *bsg_
|
|
Daniel Wagner |
3120dd |
|
|
Daniel Wagner |
3120dd |
ql_log(ql_log_info, vha, 0x708b, "%s CMD timeout. bsg ptr %p.\n",
|
|
Daniel Wagner |
3120dd |
__func__, bsg_job);
|
|
Daniel Wagner |
3120dd |
+
|
|
Daniel Wagner |
3120dd |
+ if (qla2x00_isp_reg_stat(ha)) {
|
|
Daniel Wagner |
3120dd |
+ ql_log(ql_log_info, vha, 0x9007,
|
|
Daniel Wagner |
3120dd |
+ "PCI/Register disconnect.\n");
|
|
Daniel Wagner |
3120dd |
+ qla_pci_set_eeh_busy(vha);
|
|
Daniel Wagner |
3120dd |
+ }
|
|
Daniel Wagner |
3120dd |
+
|
|
Daniel Wagner |
3120dd |
/* find the bsg job from the active list of commands */
|
|
Daniel Wagner |
3120dd |
spin_lock_irqsave(&ha->hardware_lock, flags);
|
|
Daniel Wagner |
3120dd |
for (que = 0; que < ha->max_req_queues; que++) {
|
|
Daniel Wagner |
3120dd |
@@ -3079,7 +3086,8 @@ qla24xx_bsg_timeout(struct bsg_job *bsg_
|
|
Daniel Wagner |
3120dd |
sp->u.bsg_job == bsg_job) {
|
|
Daniel Wagner |
3120dd |
req->outstanding_cmds[cnt] = NULL;
|
|
Daniel Wagner |
3120dd |
spin_unlock_irqrestore(&ha->hardware_lock, flags);
|
|
Daniel Wagner |
3120dd |
- if (ha->isp_ops->abort_command(sp)) {
|
|
Daniel Wagner |
3120dd |
+
|
|
Daniel Wagner |
3120dd |
+ if (!ha->flags.eeh_busy && ha->isp_ops->abort_command(sp)) {
|
|
Daniel Wagner |
3120dd |
ql_log(ql_log_warn, vha, 0x7089,
|
|
Daniel Wagner |
3120dd |
"mbx abort_command failed.\n");
|
|
Daniel Wagner |
3120dd |
bsg_reply->result = -EIO;
|
|
Daniel Wagner |
3120dd |
--- a/drivers/scsi/qla2xxx/qla_def.h
|
|
Daniel Wagner |
3120dd |
+++ b/drivers/scsi/qla2xxx/qla_def.h
|
|
Daniel Wagner |
3120dd |
@@ -4054,6 +4054,9 @@ struct qla_hw_data {
|
|
Daniel Wagner |
3120dd |
uint32_t n2n_fw_acc_sec:1;
|
|
Daniel Wagner |
3120dd |
uint32_t plogi_template_valid:1;
|
|
Daniel Wagner |
3120dd |
uint32_t port_isolated:1;
|
|
Daniel Wagner |
3120dd |
+ uint32_t eeh_flush:2;
|
|
Daniel Wagner |
3120dd |
+#define EEH_FLUSH_RDY 1
|
|
Daniel Wagner |
3120dd |
+#define EEH_FLUSH_DONE 2
|
|
Daniel Wagner |
3120dd |
} flags;
|
|
Daniel Wagner |
3120dd |
|
|
Daniel Wagner |
3120dd |
uint16_t max_exchg;
|
|
Daniel Wagner |
3120dd |
@@ -4088,6 +4091,7 @@ struct qla_hw_data {
|
|
Daniel Wagner |
3120dd |
uint32_t rsp_que_len;
|
|
Daniel Wagner |
3120dd |
uint32_t req_que_off;
|
|
Daniel Wagner |
3120dd |
uint32_t rsp_que_off;
|
|
Daniel Wagner |
3120dd |
+ unsigned long eeh_jif;
|
|
Daniel Wagner |
3120dd |
|
|
Daniel Wagner |
3120dd |
/* Multi queue data structs */
|
|
Daniel Wagner |
3120dd |
device_reg_t *mqiobase;
|
|
Daniel Wagner |
3120dd |
--- a/drivers/scsi/qla2xxx/qla_init.c
|
|
Daniel Wagner |
3120dd |
+++ b/drivers/scsi/qla2xxx/qla_init.c
|
|
Daniel Wagner |
3120dd |
@@ -48,6 +48,7 @@ qla2x00_sp_timeout(unsigned long __data)
|
|
Daniel Wagner |
3120dd |
{
|
|
Daniel Wagner |
3120dd |
srb_t *sp = (srb_t *)__data;
|
|
Daniel Wagner |
3120dd |
struct srb_iocb *iocb;
|
|
Daniel Wagner |
3120dd |
+ scsi_qla_host_t *vha = sp->vha;
|
|
Daniel Wagner |
3120dd |
|
|
Daniel Wagner |
3120dd |
WARN_ON(irqs_disabled());
|
|
Daniel Wagner |
3120dd |
iocb = &sp->u.iocb_cmd;
|
|
Daniel Wagner |
3120dd |
@@ -55,6 +56,12 @@ qla2x00_sp_timeout(unsigned long __data)
|
|
Daniel Wagner |
3120dd |
|
|
Daniel Wagner |
3120dd |
/* ref: TMR */
|
|
Daniel Wagner |
3120dd |
kref_put(&sp->cmd_kref, qla2x00_sp_release);
|
|
Daniel Wagner |
3120dd |
+
|
|
Daniel Wagner |
3120dd |
+ if (vha && qla2x00_isp_reg_stat(vha->hw)) {
|
|
Daniel Wagner |
3120dd |
+ ql_log(ql_log_info, vha, 0x9008,
|
|
Daniel Wagner |
3120dd |
+ "PCI/Register disconnect.\n");
|
|
Daniel Wagner |
3120dd |
+ qla_pci_set_eeh_busy(vha);
|
|
Daniel Wagner |
3120dd |
+ }
|
|
Daniel Wagner |
3120dd |
}
|
|
Daniel Wagner |
3120dd |
|
|
Daniel Wagner |
3120dd |
void qla2x00_sp_free(srb_t *sp)
|
|
Daniel Wagner |
3120dd |
@@ -9671,6 +9678,12 @@ int qla2xxx_disable_port(struct Scsi_Hos
|
|
Daniel Wagner |
3120dd |
|
|
Daniel Wagner |
3120dd |
vha->hw->flags.port_isolated = 1;
|
|
Daniel Wagner |
3120dd |
|
|
Daniel Wagner |
3120dd |
+ if (qla2x00_isp_reg_stat(vha->hw)) {
|
|
Daniel Wagner |
3120dd |
+ ql_log(ql_log_info, vha, 0x9006,
|
|
Daniel Wagner |
3120dd |
+ "PCI/Register disconnect, exiting.\n");
|
|
Daniel Wagner |
3120dd |
+ qla_pci_set_eeh_busy(vha);
|
|
Daniel Wagner |
3120dd |
+ return FAILED;
|
|
Daniel Wagner |
3120dd |
+ }
|
|
Daniel Wagner |
3120dd |
if (qla2x00_chip_is_down(vha))
|
|
Daniel Wagner |
3120dd |
return 0;
|
|
Daniel Wagner |
3120dd |
|
|
Daniel Wagner |
3120dd |
@@ -9686,6 +9699,13 @@ int qla2xxx_enable_port(struct Scsi_Host
|
|
Daniel Wagner |
3120dd |
{
|
|
Daniel Wagner |
3120dd |
scsi_qla_host_t *vha = shost_priv(host);
|
|
Daniel Wagner |
3120dd |
|
|
Daniel Wagner |
3120dd |
+ if (qla2x00_isp_reg_stat(vha->hw)) {
|
|
Daniel Wagner |
3120dd |
+ ql_log(ql_log_info, vha, 0x9001,
|
|
Daniel Wagner |
3120dd |
+ "PCI/Register disconnect, exiting.\n");
|
|
Daniel Wagner |
3120dd |
+ qla_pci_set_eeh_busy(vha);
|
|
Daniel Wagner |
3120dd |
+ return FAILED;
|
|
Daniel Wagner |
3120dd |
+ }
|
|
Daniel Wagner |
3120dd |
+
|
|
Daniel Wagner |
3120dd |
vha->hw->flags.port_isolated = 0;
|
|
Daniel Wagner |
3120dd |
/* Set the flag to 1, so that isp_abort can proceed */
|
|
Daniel Wagner |
3120dd |
vha->flags.online = 1;
|
|
Daniel Wagner |
3120dd |
--- a/drivers/scsi/qla2xxx/qla_os.c
|
|
Daniel Wagner |
3120dd |
+++ b/drivers/scsi/qla2xxx/qla_os.c
|
|
Daniel Wagner |
3120dd |
@@ -340,6 +340,11 @@ MODULE_PARM_DESC(ql2xabts_wait_nvme,
|
|
Daniel Wagner |
3120dd |
"To wait for ABTS response on I/O timeouts for NVMe. (default: 1)");
|
|
Daniel Wagner |
3120dd |
|
|
Daniel Wagner |
3120dd |
|
|
Daniel Wagner |
3120dd |
+u32 ql2xdelay_before_pci_error_handling = 5;
|
|
Daniel Wagner |
3120dd |
+module_param(ql2xdelay_before_pci_error_handling, uint, 0644);
|
|
Daniel Wagner |
3120dd |
+MODULE_PARM_DESC(ql2xdelay_before_pci_error_handling,
|
|
Daniel Wagner |
3120dd |
+ "Number of seconds delayed before qla begin PCI error self-handling (default: 5).\n");
|
|
Daniel Wagner |
3120dd |
+
|
|
Daniel Wagner |
3120dd |
static void qla2x00_clear_drv_active(struct qla_hw_data *);
|
|
Daniel Wagner |
3120dd |
static void qla2x00_free_device(scsi_qla_host_t *);
|
|
Daniel Wagner |
3120dd |
static int qla2xxx_map_queues(struct Scsi_Host *shost);
|
|
Daniel Wagner |
3120dd |
@@ -7275,6 +7280,44 @@ static void qla_heart_beat(struct scsi_q
|
|
Daniel Wagner |
3120dd |
}
|
|
Daniel Wagner |
3120dd |
}
|
|
Daniel Wagner |
3120dd |
|
|
Daniel Wagner |
3120dd |
+static void qla_wind_down_chip(scsi_qla_host_t *vha)
|
|
Daniel Wagner |
3120dd |
+{
|
|
Daniel Wagner |
3120dd |
+ struct qla_hw_data *ha = vha->hw;
|
|
Daniel Wagner |
3120dd |
+
|
|
Daniel Wagner |
3120dd |
+ if (!ha->flags.eeh_busy)
|
|
Daniel Wagner |
3120dd |
+ return;
|
|
Daniel Wagner |
3120dd |
+ if (ha->pci_error_state)
|
|
Daniel Wagner |
3120dd |
+ /* system is trying to recover */
|
|
Daniel Wagner |
3120dd |
+ return;
|
|
Daniel Wagner |
3120dd |
+
|
|
Daniel Wagner |
3120dd |
+ /*
|
|
Daniel Wagner |
3120dd |
+ * Current system is not handling PCIE error. At this point, this is
|
|
Daniel Wagner |
3120dd |
+ * best effort to wind down the adapter.
|
|
Daniel Wagner |
3120dd |
+ */
|
|
Daniel Wagner |
3120dd |
+ if (time_after_eq(jiffies, ha->eeh_jif + ql2xdelay_before_pci_error_handling * HZ) &&
|
|
Daniel Wagner |
3120dd |
+ !ha->flags.eeh_flush) {
|
|
Daniel Wagner |
3120dd |
+ ql_log(ql_log_info, vha, 0x9009,
|
|
Daniel Wagner |
3120dd |
+ "PCI Error detected, attempting to reset hardware.\n");
|
|
Daniel Wagner |
3120dd |
+
|
|
Daniel Wagner |
3120dd |
+ ha->isp_ops->reset_chip(vha);
|
|
Daniel Wagner |
3120dd |
+ ha->isp_ops->disable_intrs(ha);
|
|
Daniel Wagner |
3120dd |
+
|
|
Daniel Wagner |
3120dd |
+ ha->flags.eeh_flush = EEH_FLUSH_RDY;
|
|
Daniel Wagner |
3120dd |
+ ha->eeh_jif = jiffies;
|
|
Daniel Wagner |
3120dd |
+
|
|
Daniel Wagner |
3120dd |
+ } else if (ha->flags.eeh_flush == EEH_FLUSH_RDY &&
|
|
Daniel Wagner |
3120dd |
+ time_after_eq(jiffies, ha->eeh_jif + 5 * HZ)) {
|
|
Daniel Wagner |
3120dd |
+ pci_clear_master(ha->pdev);
|
|
Daniel Wagner |
3120dd |
+
|
|
Daniel Wagner |
3120dd |
+ /* flush all command */
|
|
Daniel Wagner |
3120dd |
+ qla2x00_abort_isp_cleanup(vha);
|
|
Daniel Wagner |
3120dd |
+ ha->flags.eeh_flush = EEH_FLUSH_DONE;
|
|
Daniel Wagner |
3120dd |
+
|
|
Daniel Wagner |
3120dd |
+ ql_log(ql_log_info, vha, 0x900a,
|
|
Daniel Wagner |
3120dd |
+ "PCI Error handling complete, all IOs aborted.\n");
|
|
Daniel Wagner |
3120dd |
+ }
|
|
Daniel Wagner |
3120dd |
+}
|
|
Daniel Wagner |
3120dd |
+
|
|
Daniel Wagner |
3120dd |
/**************************************************************************
|
|
Daniel Wagner |
3120dd |
* qla2x00_timer
|
|
Daniel Wagner |
3120dd |
*
|
|
Daniel Wagner |
3120dd |
@@ -7297,6 +7340,8 @@ qla2x00_timer(scsi_qla_host_t *vha)
|
|
Daniel Wagner |
3120dd |
fc_port_t *fcport = NULL;
|
|
Daniel Wagner |
3120dd |
|
|
Daniel Wagner |
3120dd |
if (ha->flags.eeh_busy) {
|
|
Daniel Wagner |
3120dd |
+ qla_wind_down_chip(vha);
|
|
Daniel Wagner |
3120dd |
+
|
|
Daniel Wagner |
3120dd |
ql_dbg(ql_dbg_timer, vha, 0x6000,
|
|
Daniel Wagner |
3120dd |
"EEH = %d, restarting timer.\n",
|
|
Daniel Wagner |
3120dd |
ha->flags.eeh_busy);
|
|
Daniel Wagner |
3120dd |
@@ -7877,6 +7922,9 @@ void qla_pci_set_eeh_busy(struct scsi_ql
|
|
Daniel Wagner |
3120dd |
|
|
Daniel Wagner |
3120dd |
spin_lock_irqsave(&base_vha->work_lock, flags);
|
|
Daniel Wagner |
3120dd |
if (!ha->flags.eeh_busy) {
|
|
Daniel Wagner |
3120dd |
+ ha->eeh_jif = jiffies;
|
|
Daniel Wagner |
3120dd |
+ ha->flags.eeh_flush = 0;
|
|
Daniel Wagner |
3120dd |
+
|
|
Daniel Wagner |
3120dd |
ha->flags.eeh_busy = 1;
|
|
Daniel Wagner |
3120dd |
do_cleanup = true;
|
|
Daniel Wagner |
3120dd |
}
|