Blob Blame History Raw
From: Quinn Tran <quinn.tran@cavium.com>
Date: Tue, 4 Sep 2018 14:19:12 -0700
Subject: [PATCH] scsi: qla2xxx: Add logic to detect ABTS hang and response
 completion
References: bsc#1108870
Git-commit: 0691094ff3f2cfa357b9de9030b65eddc4dab29d
Patch-mainline: v4.20-rc1

ABTS error completion can trigger an exchange cleanup from the driver and
another ABTS response will be generated.  This retry of ABTS response can
cause loop between driver trying to send ABTS and firmware returning error.

This patch fixes this issue by adding logic to check for unresolved exchanges
and clean up before ABTS is retried.  This patch also addes the fix to use the
same qpair as the ABTS completion for the terminatation of exchange as well as
retry of ABTS response.

Signed-off-by: Quinn Tran <quinn.tran@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Hannes Reinecke <hare@suse.de>
---
 drivers/scsi/qla2xxx/qla_def.h    |  3 ++
 drivers/scsi/qla2xxx/qla_os.c     |  1 +
 drivers/scsi/qla2xxx/qla_target.c | 72 ++++++++++++++++++++++++++++++++++-----
 3 files changed, 67 insertions(+), 9 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 16dd59bcd60a..8722abff7061 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -3485,6 +3485,9 @@ struct qla_qpair {
 	struct list_head qp_list_elem; /* vha->qp_list */
 	struct list_head hints_list;
 	uint16_t cpuid;
+	uint16_t retry_term_cnt;
+	uint32_t retry_term_exchg_addr;
+	uint64_t retry_term_jiff;
 	struct qla_tgt_counters tgt_counters;
 };
 
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 9628fe4a967f..f3787fc48533 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -391,6 +391,7 @@ static void qla_init_base_qpair(struct scsi_qla_host *vha, struct req_que *req,
 	struct qla_hw_data *ha = vha->hw;
 	rsp->qpair = ha->base_qpair;
 	rsp->req = req;
+	ha->base_qpair->hw = ha;
 	ha->base_qpair->req = req;
 	ha->base_qpair->rsp = rsp;
 	ha->base_qpair->vha = vha;
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 3796525aec71..954d5fa1374c 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -1800,15 +1800,15 @@ static void qlt_24xx_send_abts_resp(struct qla_qpair *qpair,
  * ha->hardware_lock supposed to be held on entry. Might drop it, then reaquire
  */
 static void qlt_24xx_retry_term_exchange(struct scsi_qla_host *vha,
-	struct abts_resp_from_24xx_fw *entry)
+    struct qla_qpair *qpair, struct abts_resp_from_24xx_fw *entry)
 {
 	struct ctio7_to_24xx *ctio;
+	u16 tmp;
 
 	ql_dbg(ql_dbg_tgt, vha, 0xe007,
 	    "Sending retry TERM EXCH CTIO7 (ha=%p)\n", vha->hw);
 
-	ctio = (struct ctio7_to_24xx *)qla2x00_alloc_iocbs_ready(
-	    vha->hw->base_qpair, NULL);
+	ctio = (struct ctio7_to_24xx *)qla2x00_alloc_iocbs_ready(qpair, NULL);
 	if (ctio == NULL) {
 		ql_dbg(ql_dbg_tgt, vha, 0xe04b,
 		    "qla_target(%d): %s failed: unable to allocate "
@@ -1831,16 +1831,21 @@ static void qlt_24xx_retry_term_exchange(struct scsi_qla_host *vha,
 	ctio->initiator_id[1] = entry->fcp_hdr_le.d_id[1];
 	ctio->initiator_id[2] = entry->fcp_hdr_le.d_id[2];
 	ctio->exchange_addr = entry->exchange_addr_to_abort;
-	ctio->u.status1.flags = cpu_to_le16(CTIO7_FLAGS_STATUS_MODE_1 |
-					    CTIO7_FLAGS_TERMINATE);
+	tmp = (CTIO7_FLAGS_STATUS_MODE_1 | CTIO7_FLAGS_TERMINATE);
+	if (qpair->retry_term_cnt & 1)
+		tmp |= (0x4 << 9);
+	ctio->u.status1.flags = cpu_to_le16(tmp);
+
 	ctio->u.status1.ox_id = cpu_to_le16(entry->fcp_hdr_le.ox_id);
 
 	/* Memory Barrier */
 	wmb();
-	qla2x00_start_iocbs(vha, vha->req);
+	if (qpair->reqq_start_iocbs)
+		qpair->reqq_start_iocbs(qpair);
+	else
+		qla2x00_start_iocbs(vha, qpair->req);
 
-	qlt_24xx_send_abts_resp(vha->hw->base_qpair,
-	    (struct abts_recv_from_24xx *)entry,
+	qlt_24xx_send_abts_resp(qpair, (struct abts_recv_from_24xx *)entry,
 	    FCP_TMF_CMPL, true);
 }
 
@@ -5667,6 +5672,52 @@ static void qlt_24xx_atio_pkt(struct scsi_qla_host *vha,
 	tgt->atio_irq_cmd_count--;
 }
 
+/*
+ * qpair lock is assume to be held
+ * rc = 0 : send terminate & abts respond
+ * rc != 0: do not send term & abts respond
+ */
+static int qlt_chk_unresolv_exchg(struct scsi_qla_host *vha,
+    struct qla_qpair *qpair, struct abts_resp_from_24xx_fw *entry)
+{
+	struct qla_hw_data *ha = vha->hw;
+	int rc = 0;
+
+	/*
+	 * Detect unresolved exchange. If the same ABTS is unable
+	 * to terminate an existing command and the same ABTS loops
+	 * between FW & Driver, then force FW dump. Under 1 jiff,
+	 * we should see multiple loops.
+	 */
+	if (qpair->retry_term_exchg_addr == entry->exchange_addr_to_abort &&
+	    qpair->retry_term_jiff == jiffies) {
+		/* found existing exchange */
+		qpair->retry_term_cnt++;
+		if (qpair->retry_term_cnt >= 5) {
+			rc = EIO;
+			qpair->retry_term_cnt = 0;
+			ql_log(ql_log_warn, vha, 0xffff,
+			    "Unable to send ABTS Respond. Dumping firmware.\n");
+			ql_dump_buffer(ql_dbg_tgt_mgt + ql_dbg_buffer,
+			    vha, 0xffff, (uint8_t *)entry, sizeof(*entry));
+
+			if (qpair == ha->base_qpair)
+				ha->isp_ops->fw_dump(vha, 1);
+			else
+				ha->isp_ops->fw_dump(vha, 0);
+
+			set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags);
+			qla2xxx_wake_dpc(vha);
+		}
+	} else if (qpair->retry_term_jiff != jiffies) {
+		qpair->retry_term_exchg_addr = entry->exchange_addr_to_abort;
+		qpair->retry_term_cnt = 0;
+		qpair->retry_term_jiff = jiffies;
+	}
+
+	return rc;
+}
+
 /* ha->hardware_lock supposed to be held on entry */
 /* called via callback from qla2xxx */
 static void qlt_response_pkt(struct scsi_qla_host *vha,
@@ -5824,8 +5875,11 @@ static void qlt_response_pkt(struct scsi_qla_host *vha,
 					 * (re)terminate the exchange and retry
 					 * the abort response.
 					 */
+					if (qlt_chk_unresolv_exchg(vha,
+						rsp->qpair, entry))
+						break;
 					qlt_24xx_retry_term_exchange(vha,
-					    entry);
+					    rsp->qpair, entry);
 				} else
 					ql_dbg(ql_dbg_tgt, vha, 0xe063,
 					    "qla_target(%d): ABTS_RESP_24XX "
-- 
2.16.4