From: Shyam Sundar Date: Tue, 26 Mar 2019 00:38:37 -0700 Subject: scsi: qedf: Modify flush routine to handle all I/Os and TMF Git-commit: 5d5e55659b375a39a42dad988869cd9966d20255 Patch-mainline: v5.2-rc1 References: bsc#1136467 jsc#SLE-4694 The purpose of flush routine is to cleanup I/Os to the firmware and complete them to scsi middle layer. This routine is invoked before connection is uploaded because of rport going away. - Don't process any I/Os, aborts, TMFs coming when flush in progress. - Add flags to handle cleanup and release of I/Os because flush can prematurely complete I/Os. - Original command can get completed to driver when cleanup for same is posted to firmware, handle this condition. - Modify flush to handle I/Os in all the states like abort, TMF, RRQ and timeouts. Signed-off-by: Shyam Sundar Signed-off-by: Chad Dupuis Signed-off-by: Saurav Kashyap Signed-off-by: Martin K. Petersen Acked-by: Lee Duncan --- drivers/scsi/qedf/qedf.h | 6 drivers/scsi/qedf/qedf_els.c | 4 drivers/scsi/qedf/qedf_io.c | 285 ++++++++++++++++++++++++++++++++++++++---- drivers/scsi/qedf/qedf_main.c | 8 - 4 files changed, 277 insertions(+), 26 deletions(-) --- a/drivers/scsi/qedf/qedf.h +++ b/drivers/scsi/qedf/qedf.h @@ -117,6 +117,8 @@ struct qedf_ioreq { #define QEDF_CMD_IN_ABORT 0x1 #define QEDF_CMD_IN_CLEANUP 0x2 #define QEDF_CMD_SRR_SENT 0x3 +#define QEDF_CMD_DIRTY 0x4 +#define QEDF_CMD_ERR_SCSI_DONE 0x5 u8 io_req_flags; uint8_t tm_flags; struct qedf_rport *fcport; @@ -133,6 +135,7 @@ struct qedf_ioreq { struct fcoe_task_params *task_params; struct scsi_sgl_task_params *sgl_task_params; int idx; + int lun; /* * Need to allocate enough room for both sense data and FCP response data * which has a max length of 8 bytes according to spec. @@ -172,6 +175,8 @@ struct qedf_ioreq { * during some form of error processing. */ bool return_scsi_cmd_on_abts; + + unsigned int alloc; }; extern struct workqueue_struct *qedf_io_wq; @@ -191,6 +196,7 @@ struct qedf_rport { void __iomem *p_doorbell; /* Send queue management */ atomic_t free_sqes; + atomic_t ios_to_queue; atomic_t num_active_ios; struct fcoe_wqe *sq; dma_addr_t sq_dma; --- a/drivers/scsi/qedf/qedf_els.c +++ b/drivers/scsi/qedf/qedf_els.c @@ -143,6 +143,8 @@ retry_els: QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS, "Ringing doorbell for ELS " "req\n"); qedf_ring_doorbell(fcport); + set_bit(QEDF_CMD_OUTSTANDING, &els_req->flags); + spin_unlock_irqrestore(&fcport->rport_lock, flags); els_err: return rc; @@ -159,6 +161,8 @@ void qedf_process_els_compl(struct qedf_ QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS, "Entered with xid = 0x%x" " cmd_type = %d.\n", els_req->xid, els_req->cmd_type); + clear_bit(QEDF_CMD_OUTSTANDING, &els_req->flags); + /* Kill the ELS timer */ cancel_delayed_work(&els_req->timeout_work); --- a/drivers/scsi/qedf/qedf_io.c +++ b/drivers/scsi/qedf/qedf_io.c @@ -329,7 +329,7 @@ struct qedf_ioreq *qedf_alloc_cmd(struct cmd_mgr->idx = 0; /* Check to make sure command was previously freed */ - if (!test_bit(QEDF_CMD_OUTSTANDING, &io_req->flags)) + if (!io_req->alloc) break; } @@ -338,7 +338,9 @@ struct qedf_ioreq *qedf_alloc_cmd(struct goto out_failed; } - set_bit(QEDF_CMD_OUTSTANDING, &io_req->flags); + /* Clear any flags now that we've reallocated the xid */ + io_req->flags = 0; + io_req->alloc = 1; spin_unlock_irqrestore(&cmd_mgr->lock, flags); atomic_inc(&fcport->num_active_ios); @@ -349,6 +351,10 @@ struct qedf_ioreq *qedf_alloc_cmd(struct io_req->cmd_mgr = cmd_mgr; io_req->fcport = fcport; + /* Clear any stale sc_cmd back pointer */ + io_req->sc_cmd = NULL; + io_req->lun = -1; + /* Hold the io_req against deletion */ kref_init(&io_req->refcount); @@ -412,6 +418,10 @@ void qedf_release_cmd(struct kref *ref) container_of(ref, struct qedf_ioreq, refcount); struct qedf_cmd_mgr *cmd_mgr = io_req->cmd_mgr; struct qedf_rport *fcport = io_req->fcport; + unsigned long flags; + + if (io_req->cmd_type == QEDF_SCSI_CMD) + WARN_ON(io_req->sc_cmd); if (io_req->cmd_type == QEDF_ELS || io_req->cmd_type == QEDF_TASK_MGMT_CMD) @@ -424,8 +434,14 @@ void qedf_release_cmd(struct kref *ref) /* Increment task retry identifier now that the request is released */ io_req->task_retry_identifier++; + io_req->fcport = NULL; - clear_bit(QEDF_CMD_OUTSTANDING, &io_req->flags); + clear_bit(QEDF_CMD_DIRTY, &io_req->flags); + io_req->cpu = 0; + spin_lock_irqsave(&cmd_mgr->lock, flags); + io_req->fcport = NULL; + io_req->alloc = 0; + spin_unlock_irqrestore(&cmd_mgr->lock, flags); } static int qedf_split_bd(struct qedf_ioreq *io_req, u64 addr, int sg_len, @@ -895,16 +911,24 @@ int qedf_post_io_req(struct qedf_rport * /* Build buffer descriptor list for firmware from sg list */ if (qedf_build_bd_list_from_sg(io_req)) { QEDF_ERR(&(qedf->dbg_ctx), "BD list creation failed.\n"); + /* Release cmd will release io_req, but sc_cmd is assigned */ + io_req->sc_cmd = NULL; kref_put(&io_req->refcount, qedf_release_cmd); return -EAGAIN; } - if (!test_bit(QEDF_RPORT_SESSION_READY, &fcport->flags)) { + if (!test_bit(QEDF_RPORT_SESSION_READY, &fcport->flags) || + test_bit(QEDF_RPORT_UPLOADING_CONNECTION, &fcport->flags)) { QEDF_ERR(&(qedf->dbg_ctx), "Session not offloaded yet.\n"); + /* Release cmd will release io_req, but sc_cmd is assigned */ + io_req->sc_cmd = NULL; kref_put(&io_req->refcount, qedf_release_cmd); return -EINVAL; } + /* Record LUN number for later use if we neeed them */ + io_req->lun = (int)sc_cmd->device->lun; + /* Obtain free SQE */ sqe_idx = qedf_get_sqe_idx(fcport); sqe = &fcport->sq[sqe_idx]; @@ -915,6 +939,8 @@ int qedf_post_io_req(struct qedf_rport * if (!task_ctx) { QEDF_WARN(&(qedf->dbg_ctx), "task_ctx is NULL, xid=%d.\n", xid); + /* Release cmd will release io_req, but sc_cmd is assigned */ + io_req->sc_cmd = NULL; kref_put(&io_req->refcount, qedf_release_cmd); return -EINVAL; } @@ -924,6 +950,9 @@ int qedf_post_io_req(struct qedf_rport * /* Ring doorbell */ qedf_ring_doorbell(fcport); + /* Set that command is with the firmware now */ + set_bit(QEDF_CMD_OUTSTANDING, &io_req->flags); + if (qedf_io_tracing && io_req->sc_cmd) qedf_trace_io(fcport, io_req, QEDF_IO_TRACE_REQ); @@ -990,12 +1019,16 @@ qedf_queuecommand(struct Scsi_Host *host rc = SCSI_MLQUEUE_TARGET_BUSY; goto exit_qcmd; } + + atomic_inc(&fcport->ios_to_queue); + if (fcport->retry_delay_timestamp) { if (time_after(jiffies, fcport->retry_delay_timestamp)) { fcport->retry_delay_timestamp = 0; } else { /* If retry_delay timer is active, flow off the ML */ rc = SCSI_MLQUEUE_TARGET_BUSY; + atomic_dec(&fcport->ios_to_queue); goto exit_qcmd; } } @@ -1003,6 +1036,7 @@ qedf_queuecommand(struct Scsi_Host *host io_req = qedf_alloc_cmd(fcport, QEDF_SCSI_CMD); if (!io_req) { rc = SCSI_MLQUEUE_HOST_BUSY; + atomic_dec(&fcport->ios_to_queue); goto exit_qcmd; } @@ -1017,6 +1051,7 @@ qedf_queuecommand(struct Scsi_Host *host rc = SCSI_MLQUEUE_HOST_BUSY; } spin_unlock_irqrestore(&fcport->rport_lock, flags); + atomic_dec(&fcport->ios_to_queue); exit_qcmd: return rc; @@ -1107,6 +1142,15 @@ void qedf_scsi_completion(struct qedf_ct if (!cqe) return; + if (!test_bit(QEDF_CMD_OUTSTANDING, &io_req->flags) || + test_bit(QEDF_CMD_IN_CLEANUP, &io_req->flags) || + test_bit(QEDF_CMD_IN_ABORT, &io_req->flags)) { + QEDF_ERR(&qedf->dbg_ctx, + "io_req xid=0x%x already in cleanup or abort processing or already completed.\n", + io_req->xid); + return; + } + xid = io_req->xid; task_ctx = qedf_get_task_mem(&qedf->tasks, xid); sc_cmd = io_req->sc_cmd; @@ -1123,6 +1167,12 @@ void qedf_scsi_completion(struct qedf_ct return; } + if (!sc_cmd->device) { + QEDF_ERR(&qedf->dbg_ctx, + "Device for sc_cmd %p is NULL.\n", sc_cmd); + return; + } + if (!sc_cmd->request) { QEDF_WARN(&(qedf->dbg_ctx), "sc_cmd->request is NULL, " "sc_cmd=%p.\n", sc_cmd); @@ -1250,6 +1300,12 @@ out: if (qedf_io_tracing) qedf_trace_io(fcport, io_req, QEDF_IO_TRACE_RSP); + /* + * We wait till the end of the function to clear the + * outstanding bit in case we need to send an abort + */ + clear_bit(QEDF_CMD_OUTSTANDING, &io_req->flags); + io_req->sc_cmd = NULL; sc_cmd->SCp.ptr = NULL; sc_cmd->scsi_done(sc_cmd); @@ -1267,6 +1323,19 @@ void qedf_scsi_done(struct qedf_ctx *qed if (!io_req) return; + if (test_and_set_bit(QEDF_CMD_ERR_SCSI_DONE, &io_req->flags)) { + QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_IO, + "io_req:%p scsi_done handling already done\n", + io_req); + return; + } + + /* + * We will be done with this command after this call so clear the + * outstanding bit. + */ + clear_bit(QEDF_CMD_OUTSTANDING, &io_req->flags); + xid = io_req->xid; sc_cmd = io_req->sc_cmd; @@ -1275,12 +1344,23 @@ void qedf_scsi_done(struct qedf_ctx *qed return; } + if (!virt_addr_valid(sc_cmd)) { + QEDF_ERR(&qedf->dbg_ctx, "sc_cmd=%p is not valid.", sc_cmd); + return; + } + if (!sc_cmd->SCp.ptr) { QEDF_WARN(&(qedf->dbg_ctx), "SCp.ptr is NULL, returned in " "another context.\n"); return; } + if (!sc_cmd->device) { + QEDF_ERR(&qedf->dbg_ctx, "Device for sc_cmd %p is NULL.\n", + sc_cmd); + return; + } + qedf_unmap_sg_list(qedf, io_req); sc_cmd->result = result << 16; @@ -1445,6 +1525,10 @@ void qedf_flush_active_ios(struct qedf_r struct qedf_ctx *qedf; struct qedf_cmd_mgr *cmd_mgr; int i, rc; + unsigned long flags; + int flush_cnt = 0; + int wait_cnt = 100; + int refcount = 0; if (!fcport) return; @@ -1456,18 +1540,87 @@ void qedf_flush_active_ios(struct qedf_r } qedf = fcport->qedf; + + if (!qedf) { + QEDF_ERR(NULL, "qedf is NULL.\n"); + return; + } + + /* Only wait for all commands to be queued in the Upload context */ + if (test_bit(QEDF_RPORT_UPLOADING_CONNECTION, &fcport->flags) && + (lun == -1)) { + while (atomic_read(&fcport->ios_to_queue)) { + QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_IO, + "Waiting for %d I/Os to be queued\n", + atomic_read(&fcport->ios_to_queue)); + if (wait_cnt == 0) { + QEDF_ERR(NULL, + "%d IOs request could not be queued\n", + atomic_read(&fcport->ios_to_queue)); + } + msleep(20); + wait_cnt--; + } + } + cmd_mgr = qedf->cmd_mgr; - QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO, "Flush active i/o's.\n"); + QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_IO, + "Flush active i/o's num=0x%x fcport=0x%p port_id=0x%06x scsi_id=%d.\n", + atomic_read(&fcport->num_active_ios), fcport, + fcport->rdata->ids.port_id, fcport->rport->scsi_target_id); + QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_IO, "Locking flush mutex.\n"); + + mutex_lock(&qedf->flush_mutex); + if (lun == -1) { + set_bit(QEDF_RPORT_IN_TARGET_RESET, &fcport->flags); + } else { + set_bit(QEDF_RPORT_IN_LUN_RESET, &fcport->flags); + fcport->lun_reset_lun = lun; + } for (i = 0; i < FCOE_PARAMS_NUM_TASKS; i++) { io_req = &cmd_mgr->cmds[i]; if (!io_req) continue; + if (!io_req->fcport) + continue; + + spin_lock_irqsave(&cmd_mgr->lock, flags); + + if (io_req->alloc) { + if (!test_bit(QEDF_CMD_OUTSTANDING, &io_req->flags)) { + if (io_req->cmd_type == QEDF_SCSI_CMD) + QEDF_ERR(&qedf->dbg_ctx, + "Allocated but not queued, xid=0x%x\n", + io_req->xid); + } + spin_unlock_irqrestore(&cmd_mgr->lock, flags); + } else { + spin_unlock_irqrestore(&cmd_mgr->lock, flags); + continue; + } + if (io_req->fcport != fcport) continue; - if (io_req->cmd_type == QEDF_ELS) { + + /* In case of ABTS, CMD_OUTSTANDING is cleared on ABTS response, + * but RRQ is still pending. + * Workaround: Within qedf_send_rrq, we check if the fcport is + * NULL, and we drop the ref on the io_req to clean it up. + */ + if (!test_bit(QEDF_CMD_OUTSTANDING, &io_req->flags)) { + refcount = kref_read(&io_req->refcount); + QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_IO, + "Not outstanding, xid=0x%x, cmd_type=%d refcount=%d.\n", + io_req->xid, io_req->cmd_type, refcount); + continue; + } + + /* Only consider flushing ELS during target reset */ + if (io_req->cmd_type == QEDF_ELS && + lun == -1) { rc = kref_get_unless_zero(&io_req->refcount); if (!rc) { QEDF_ERR(&(qedf->dbg_ctx), @@ -1475,6 +1628,7 @@ void qedf_flush_active_ios(struct qedf_r io_req, io_req->xid); continue; } + flush_cnt++; qedf_flush_els_req(qedf, io_req); /* * Release the kref and go back to the top of the @@ -1491,28 +1645,49 @@ void qedf_flush_active_ios(struct qedf_r io_req, io_req->xid); continue; } + if (lun != -1 && io_req->lun != lun) + goto free_cmd; + QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_IO, "Flushing abort xid=0x%x.\n", io_req->xid); - clear_bit(QEDF_CMD_IN_ABORT, &io_req->flags); - - if (io_req->sc_cmd) { - if (io_req->return_scsi_cmd_on_abts) - qedf_scsi_done(qedf, io_req, DID_ERROR); + if (cancel_delayed_work_sync(&io_req->rrq_work)) { + QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_IO, + "Putting reference for pending RRQ work xid=0x%x.\n", + io_req->xid); + kref_put(&io_req->refcount, qedf_release_cmd); } + /* Cancel any timeout work */ + cancel_delayed_work_sync(&io_req->timeout_work); + + if (!test_bit(QEDF_CMD_IN_ABORT, &io_req->flags)) + goto free_cmd; + + qedf_initiate_cleanup(io_req, true); + flush_cnt++; + /* Notify eh_abort handler that ABTS is complete */ - complete(&io_req->abts_done); kref_put(&io_req->refcount, qedf_release_cmd); + complete(&io_req->abts_done); goto free_cmd; } if (!io_req->sc_cmd) continue; - if (lun > 0) { - if (io_req->sc_cmd->device->lun != - (u64)lun) + if (!io_req->sc_cmd->device) { + QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_IO, + "Device backpointer NULL for sc_cmd=%p.\n", + io_req->sc_cmd); + /* Put reference for non-existent scsi_cmnd */ + io_req->sc_cmd = NULL; + qedf_initiate_cleanup(io_req, false); + kref_put(&io_req->refcount, qedf_release_cmd); + continue; + } + if (lun > -1) { + if (io_req->lun != lun) continue; } @@ -1526,8 +1701,10 @@ void qedf_flush_active_ios(struct qedf_r "io_req=0x%p xid=0x%x\n", io_req, io_req->xid); continue; } + QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO, "Cleanup xid=0x%x.\n", io_req->xid); + flush_cnt++; /* Cleanup task and return I/O mid-layer */ qedf_initiate_cleanup(io_req, true); @@ -1535,6 +1712,52 @@ void qedf_flush_active_ios(struct qedf_r free_cmd: kref_put(&io_req->refcount, qedf_release_cmd); } + + wait_cnt = 60; + QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_IO, + "Flushed 0x%x I/Os, active=0x%x.\n", + flush_cnt, atomic_read(&fcport->num_active_ios)); + /* Only wait for all commands to complete in the Upload context */ + if (test_bit(QEDF_RPORT_UPLOADING_CONNECTION, &fcport->flags) && + (lun == -1)) { + while (atomic_read(&fcport->num_active_ios)) { + QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_IO, + "Flushed 0x%x I/Os, active=0x%x cnt=%d.\n", + flush_cnt, + atomic_read(&fcport->num_active_ios), + wait_cnt); + if (wait_cnt == 0) { + QEDF_ERR(&qedf->dbg_ctx, + "Flushed %d I/Os, active=%d.\n", + flush_cnt, + atomic_read(&fcport->num_active_ios)); + for (i = 0; i < FCOE_PARAMS_NUM_TASKS; i++) { + io_req = &cmd_mgr->cmds[i]; + if (io_req->fcport && + io_req->fcport == fcport) { + refcount = + kref_read(&io_req->refcount); + QEDF_ERR(&qedf->dbg_ctx, + "Outstanding io_req =%p xid=0x%x flags=0x%lx, sc_cmd=%p refcount=%d cmd_type=%d.\n", + io_req, io_req->xid, + io_req->flags, + io_req->sc_cmd, + refcount, + io_req->cmd_type); + } + } + WARN_ON(1); + break; + } + msleep(500); + wait_cnt--; + } + } + + clear_bit(QEDF_RPORT_IN_LUN_RESET, &fcport->flags); + clear_bit(QEDF_RPORT_IN_TARGET_RESET, &fcport->flags); + QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_IO, "Unlocking flush mutex.\n"); + mutex_unlock(&qedf->flush_mutex); } /* @@ -1810,6 +2033,7 @@ int qedf_initiate_cleanup(struct qedf_io unsigned long flags; struct fcoe_wqe *sqe; u16 sqe_idx; + int refcount = 0; fcport = io_req->fcport; if (!fcport) { @@ -1831,7 +2055,7 @@ int qedf_initiate_cleanup(struct qedf_io } if (!test_bit(QEDF_CMD_OUTSTANDING, &io_req->flags) || - test_bit(QEDF_CMD_IN_CLEANUP, &io_req->flags)) { + test_and_set_bit(QEDF_CMD_IN_CLEANUP, &io_req->flags)) { QEDF_ERR(&(qedf->dbg_ctx), "io_req xid=0x%x already in " "cleanup processing or already completed.\n", io_req->xid); @@ -1844,20 +2068,26 @@ int qedf_initiate_cleanup(struct qedf_io return FAILED; } + if (io_req->cmd_type == QEDF_CLEANUP) { + QEDF_ERR(&qedf->dbg_ctx, + "io_req=0x%x is already a cleanup command cmd_type=%d.\n", + io_req->xid, io_req->cmd_type); + clear_bit(QEDF_CMD_IN_CLEANUP, &io_req->flags); + return SUCCESS; + } + + refcount = kref_read(&io_req->refcount); - QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO, "Entered xid=0x%x\n", - io_req->xid); + QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_IO, + "Entered xid=0x%x sc_cmd=%p cmd_type=%d flags=0x%lx refcount=%d\n", + io_req->xid, io_req->sc_cmd, io_req->cmd_type, io_req->flags, + refcount); /* Cleanup cmds re-use the same TID as the original I/O */ xid = io_req->xid; io_req->cmd_type = QEDF_CLEANUP; io_req->return_scsi_cmd_on_abts = return_scsi_cmd_on_abts; - /* Set the return CPU to be the same as the request one */ - io_req->cpu = smp_processor_id(); - - set_bit(QEDF_CMD_IN_CLEANUP, &io_req->flags); - task = qedf_get_task_mem(&qedf->tasks, xid); init_completion(&io_req->tm_done); @@ -1888,6 +2118,15 @@ int qedf_initiate_cleanup(struct qedf_io qedf_drain_request(qedf); } + /* If it TASK MGMT handle it, reference will be decreased + * in qedf_execute_tmf + */ + if (io_req->cmd_type == QEDF_TASK_MGMT_CMD) { + clear_bit(QEDF_CMD_OUTSTANDING, &io_req->flags); + io_req->sc_cmd = NULL; + complete(&io_req->tm_done); + } + if (io_req->sc_cmd) { if (io_req->return_scsi_cmd_on_abts) qedf_scsi_done(qedf, io_req, DID_ERROR); --- a/drivers/scsi/qedf/qedf_main.c +++ b/drivers/scsi/qedf/qedf_main.c @@ -1370,10 +1370,12 @@ static void qedf_rport_event_handler(str fcport = (struct qedf_rport *)&rp[1]; /* Only free this fcport if it is offloaded already */ - if (test_bit(QEDF_RPORT_SESSION_READY, &fcport->flags)) { - set_bit(QEDF_RPORT_UPLOADING_CONNECTION, &fcport->flags); + if (test_bit(QEDF_RPORT_SESSION_READY, &fcport->flags) && + !test_bit(QEDF_RPORT_UPLOADING_CONNECTION, + &fcport->flags)) { + set_bit(QEDF_RPORT_UPLOADING_CONNECTION, + &fcport->flags); qedf_cleanup_fcport(qedf, fcport); - /* * Remove fcport to list of qedf_ctx list of offloaded * ports