Blob Blame History Raw
From: Giridhar Malavali <gmalavali@marvell.com>
Date: Tue, 2 Apr 2019 14:24:33 -0700
Subject: scsi: qla2xxx: Change abort wait_loop from msleep to
 wait_event_timeout
Patch-mainline: v5.2-rc1
Git-commit: 711a08d79f718abcdd3f86f44ffa8473ef1486ef
References: bsc#1082635 bsc#1123034 bsc#1131304 bsc#1127988 bsc#1141340 bsc#1143706

This patch converts driver wait time from using msleep to
wair_event_timeout to prevent race condition.

Signed-off-by: Giridhar Malavali <gmalavali@marvell.com>
Signed-off-by: Himanshu Madhani <hmadhani@marvell.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Acked-by: Daniel Wagner <dwagner@suse.de>
---
 drivers/scsi/qla2xxx/qla_def.h |    2 
 drivers/scsi/qla2xxx/qla_os.c  |  104 +++++++++++++++++++++++++----------------
 2 files changed, 65 insertions(+), 41 deletions(-)

--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -546,6 +546,7 @@ typedef struct srb {
 	int rc;
 	int retry_count;
 	struct completion comp;
+	wait_queue_head_t *cwaitq;
 	union {
 		struct srb_iocb iocb_cmd;
 		struct bsg_job *bsg_job;
@@ -4792,5 +4793,4 @@ struct sff_8247_a0 {
 #include "qla_gbl.h"
 #include "qla_dbg.h"
 #include "qla_inline.h"
-
 #endif
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -23,6 +23,23 @@
 
 #include "qla_target.h"
 
+#define __wait_event_lock_irq_timeout(wq_head, condition, lock, timeout, state) \
+	___wait_event(wq_head, ___wait_cond_timeout(condition),			\
+		      state, 0, timeout,					\
+		      spin_unlock_irq(&lock);					\
+		      __ret = schedule_timeout(__ret);				\
+		      spin_lock_irq(&lock));
+
+#define wait_event_lock_irq_timeout(wq_head, condition, lock, timeout)	\
+({										\
+	long __ret = timeout;							\
+	if (!___wait_cond_timeout(condition))					\
+		__ret = __wait_event_lock_irq_timeout(				\
+					wq_head, condition, lock, timeout,	\
+					TASK_UNINTERRUPTIBLE);			\
+	__ret;									\
+})
+
 /*
  * Driver version
  */
@@ -729,7 +746,7 @@ qla2x00_sp_free_dma(void *ptr)
 	}
 
 	if (!ctx)
-		goto end;
+		return;
 
 	if (sp->flags & SRB_CRC_CTX_DSD_VALID) {
 		/* List assured to be having elements */
@@ -754,12 +771,6 @@ qla2x00_sp_free_dma(void *ptr)
 		ha->gbl_dsd_avail += ctx1->dsd_use_cnt;
 		mempool_free(ctx1, ha->ctx_mempool);
 	}
-
-end:
-	if (sp->type != SRB_NVME_CMD && sp->type != SRB_NVME_LS) {
-		CMD_SP(cmd) = NULL;
-		qla2x00_rel_sp(sp);
-	}
 }
 
 void
@@ -767,6 +778,7 @@ qla2x00_sp_compl(void *ptr, int res)
 {
 	srb_t *sp = ptr;
 	struct scsi_cmnd *cmd = GET_CMD_SP(sp);
+	wait_queue_head_t *cwaitq = sp->cwaitq;
 
 	if (atomic_read(&sp->ref_count) == 0) {
 		ql_dbg(ql_dbg_io, sp->vha, 0x3015,
@@ -781,7 +793,11 @@ qla2x00_sp_compl(void *ptr, int res)
 
 	sp->free(sp);
 	cmd->result = res;
+	CMD_SP(cmd) = NULL;
 	cmd->scsi_done(cmd);
+	if (cwaitq)
+		wake_up(cwaitq);
+	qla2x00_rel_sp(sp);
 }
 
 void
@@ -804,7 +820,7 @@ qla2xxx_qpair_sp_free_dma(void *ptr)
 	}
 
 	if (!ctx)
-		goto end;
+		return;
 
 	if (sp->flags & SRB_CRC_CTX_DSD_VALID) {
 		/* List assured to be having elements */
@@ -864,10 +880,6 @@ qla2xxx_qpair_sp_free_dma(void *ptr)
 		}
 		sp->flags &= ~SRB_DIF_BUNDL_DMA_VALID;
 	}
-
-end:
-	CMD_SP(cmd) = NULL;
-	qla2xxx_rel_qpair_sp(sp->qpair, sp);
 }
 
 void
@@ -875,8 +887,7 @@ qla2xxx_qpair_sp_compl(void *ptr, int re
 {
 	srb_t *sp = ptr;
 	struct scsi_cmnd *cmd = GET_CMD_SP(sp);
-
-	cmd->result = res;
+	wait_queue_head_t *cwaitq = sp->cwaitq;
 
 	if (atomic_read(&sp->ref_count) == 0) {
 		ql_dbg(ql_dbg_io, sp->fcport->vha, 0x3079,
@@ -890,7 +901,12 @@ qla2xxx_qpair_sp_compl(void *ptr, int re
 		return;
 
 	sp->free(sp);
+	cmd->result = res;
+	CMD_SP(cmd) = NULL;
 	cmd->scsi_done(cmd);
+	if (cwaitq)
+		wake_up(cwaitq);
+	qla2xxx_rel_qpair_sp(sp->qpair, sp);
 }
 
 /* If we are SP1 here, we need to still take and release the host_lock as SP1
@@ -1384,7 +1400,6 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
 	    vha->host_no, id, lun, sp, cmd, sp->handle);
 
 	/* Get a reference to the sp and drop the lock.*/
-
 	rval = ha->isp_ops->abort_command(sp);
 	if (rval) {
 		if (rval == QLA_FUNCTION_PARAMETER_ERROR)
@@ -1401,37 +1416,46 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
 	}
 
 	spin_lock_irqsave(qpair->qp_lock_ptr, flags);
-	/*
-	 * Clear the slot in the oustanding_cmds array if we can't find the
-	 * command to reclaim the resources.
-	 */
-	if (rval == QLA_FUNCTION_PARAMETER_ERROR)
-		vha->req->outstanding_cmds[sp->handle] = NULL;
 
 	/*
-	 * sp->done will do ref_count--
-	 * sp_get() took an extra count above
+	 * Releasing of the SRB and associated command resources
+	 * is managed through ref_count.
+	 * Whether we need to wait for the abort completion or complete
+	 * the abort handler should be based on the ref_count.
 	 */
-	sp->done(sp, DID_RESET << 16);
-
-	/* Did the command return during mailbox execution? */
-	if (ret == FAILED && !CMD_SP(cmd))
-		ret = SUCCESS;
-
-	if (!CMD_SP(cmd))
-		wait = 0;
-
-	spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
-
-	/* Wait for the command to be returned. */
-	if (wait) {
-		if (qla2x00_eh_wait_on_command(cmd) != QLA_SUCCESS) {
-			ql_log(ql_log_warn, vha, 0x8006,
-			    "Abort handler timed out cmd=%p.\n", cmd);
+	if (atomic_read(&sp->ref_count) > 1) {
+		/*
+		 * The command is not yet completed. We need to wait for either
+		 * command completion or abort completion.
+		 */
+		DECLARE_WAIT_QUEUE_HEAD_ONSTACK(eh_waitq);
+		uint32_t ratov = ha->r_a_tov/10;
+
+		/* Go ahead and release the extra ref_count obtained earlier */
+		sp->done(sp, DID_RESET << 16);
+		sp->cwaitq = &eh_waitq;
+
+		if (!wait_event_lock_irq_timeout(eh_waitq,
+		    CMD_SP(cmd) == NULL, *qpair->qp_lock_ptr,
+		    msecs_to_jiffies(4 * ratov * 1000))) {
+			/*
+			 * The abort got dropped, LOGO will be sent and the
+			 * original command will be completed with CS_TIMEOUT
+			 * completion
+			 */
+			ql_dbg(ql_dbg_taskm, vha, 0xffff,
+			    "%s: Abort wait timer (4 * R_A_TOV[%d]) expired\n",
+			    __func__, ha->r_a_tov);
+			sp->cwaitq = NULL;
 			ret = FAILED;
+			goto end;
 		}
+	} else {
+		/* Command completed while processing the abort */
+		sp->done(sp, DID_RESET << 16);
 	}
-
+end:
+	spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
 	ql_log(ql_log_info, vha, 0x801c,
 	    "Abort command issued nexus=%ld:%d:%llu --  %d %x.\n",
 	    vha->host_no, id, lun, wait, ret);