Blob Blame History Raw
From: Kevin Barnett <kevin.barnett@hpe.com>
Date: Wed, 3 May 2017 18:54:43 -0500
Subject: scsi: smartpqi: correct aio error path
Patch-mainline: v4.13-rc1
Git-commit: 376fb880a4fbf6903918a88081b16c167819af3f
References: bsc#1038125

set the internal flag that causes I/O to be sent down the
RAID path when the AIO path is disabled

Reviewed-by: Scott Benesh <scott.benesh@microsemi.com>
Signed-off-by: Kevin Barnett <kevin.barnett@microsemi.com>
Signed-off-by: Don Brace <don.brace@microsemi.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Johannes Thumshirn <jthumshirn@suse.de>

---
 drivers/scsi/smartpqi/smartpqi.h      |    8 
 drivers/scsi/smartpqi/smartpqi_init.c |  345 +++++++++++++++++++++++++++-------
 2 files changed, 285 insertions(+), 68 deletions(-)

--- a/drivers/scsi/smartpqi/smartpqi.h
+++ b/drivers/scsi/smartpqi/smartpqi.h
@@ -785,11 +785,11 @@ struct pqi_scsi_dev {
 	u8	is_physical_device : 1;
 	u8	is_external_raid_device : 1;
 	u8	target_lun_valid : 1;
-	u8	aio_enabled : 1;	/* only valid for physical disks */
 	u8	device_gone : 1;
 	u8	new_device : 1;
 	u8	keep_device : 1;
 	u8	volume_offline : 1;
+	bool	aio_enabled;		/* only valid for physical disks */
 	bool	in_reset;
 	bool	device_offline;
 	u8	vendor[8];		/* bytes 8-15 of inquiry data */
@@ -911,7 +911,9 @@ struct pqi_io_request {
 	void (*io_complete_callback)(struct pqi_io_request *io_request,
 		void *context);
 	void		*context;
+	u8		raid_bypass : 1;
 	int		status;
+	struct pqi_queue_group *queue_group;
 	struct scsi_cmnd *scmd;
 	void		*error_info;
 	struct pqi_sg_descriptor *sg_chain_buffer;
@@ -1019,6 +1021,10 @@ struct pqi_ctrl_info {
 	atomic_t	num_busy_threads;
 	atomic_t	num_blocked_threads;
 	wait_queue_head_t block_requests_wait;
+
+	struct list_head raid_bypass_retry_list;
+	spinlock_t	raid_bypass_retry_list_lock;
+	struct work_struct raid_bypass_retry_work;
 };
 
 enum pqi_ctrl_mode {
--- a/drivers/scsi/smartpqi/smartpqi_init.c
+++ b/drivers/scsi/smartpqi/smartpqi_init.c
@@ -63,6 +63,9 @@ static char *microsemi_branded_controlle
 
 static void pqi_perform_lockup_action(void);
 static void pqi_take_ctrl_offline(struct pqi_ctrl_info *ctrl_info);
+static void pqi_complete_all_queued_raid_bypass_retries(
+	struct pqi_ctrl_info *ctrl_info, int result);
+static void pqi_retry_raid_bypass_requests(struct pqi_ctrl_info *ctrl_info);
 static int pqi_scan_scsi_devices(struct pqi_ctrl_info *ctrl_info);
 static void pqi_scan_start(struct Scsi_Host *shost);
 static void pqi_start_io(struct pqi_ctrl_info *ctrl_info,
@@ -74,7 +77,7 @@ static int pqi_submit_raid_request_synch
 static int pqi_aio_submit_io(struct pqi_ctrl_info *ctrl_info,
 	struct scsi_cmnd *scmd, u32 aio_handle, u8 *cdb,
 	unsigned int cdb_length, struct pqi_queue_group *queue_group,
-	struct pqi_encryption_info *encryption_info);
+	struct pqi_encryption_info *encryption_info, bool raid_bypass);
 
 /* for flags argument to pqi_submit_raid_request_synchronous() */
 #define PQI_SYNC_FLAGS_INTERRUPTABLE	0x1
@@ -227,6 +230,7 @@ static inline void pqi_ctrl_unblock_requ
 {
 	ctrl_info->block_requests = false;
 	wake_up_all(&ctrl_info->block_requests_wait);
+	pqi_retry_raid_bypass_requests(ctrl_info);
 	scsi_unblock_requests(ctrl_info->scsi_host);
 }
 
@@ -445,6 +449,14 @@ static int pqi_build_raid_path_request(s
 		buffer, buffer_length, pci_dir);
 }
 
+static inline void pqi_reinit_io_request(struct pqi_io_request *io_request)
+{
+	io_request->scmd = NULL;
+	io_request->status = 0;
+	io_request->error_info = NULL;
+	io_request->raid_bypass = false;
+}
+
 static struct pqi_io_request *pqi_alloc_io_request(
 	struct pqi_ctrl_info *ctrl_info)
 {
@@ -462,9 +474,7 @@ static struct pqi_io_request *pqi_alloc_
 	/* benignly racy */
 	ctrl_info->next_io_request_slot = (i + 1) % ctrl_info->max_io_slots;
 
-	io_request->scmd = NULL;
-	io_request->status = 0;
-	io_request->error_info = NULL;
+	pqi_reinit_io_request(io_request);
 
 	return io_request;
 }
@@ -1678,8 +1688,8 @@ static bool pqi_is_supported_device(stru
 		/*
 		 * Only support the HBA controller itself as a RAID
 		 * controller.  If it's a RAID controller other than
-		 * the HBA itself (an external RAID controller, MSA500
-		 * or similar), we don't support it.
+		 * the HBA itself (an external RAID controller, for
+		 * example), we don't support it.
 		 */
 		if (pqi_is_hba_lunid(device->scsi3addr))
 			is_supported = true;
@@ -2308,7 +2318,7 @@ static int pqi_raid_bypass_submit_scsi_c
 	}
 
 	return pqi_aio_submit_io(ctrl_info, scmd, aio_handle,
-		cdb, cdb_length, queue_group, encryption_info_ptr);
+		cdb, cdb_length, queue_group, encryption_info_ptr, true);
 }
 
 #define PQI_STATUS_IDLE		0x0
@@ -2381,6 +2391,7 @@ static inline void pqi_aio_path_disabled
 
 	device = io_request->scmd->device->hostdata;
 	device->offload_enabled = false;
+	device->aio_enabled = false;
 }
 
 static inline void pqi_take_device_offline(struct scsi_device *sdev, char *path)
@@ -2500,9 +2511,11 @@ static void pqi_process_aio_io_error(str
 			break;
 		case PQI_AIO_STATUS_NO_PATH_TO_DEVICE:
 		case PQI_AIO_STATUS_INVALID_DEVICE:
-			device_offline = true;
-			pqi_take_device_offline(scmd->device, "AIO");
-			host_byte = DID_NO_CONNECT;
+			if (!io_request->raid_bypass) {
+				device_offline = true;
+				pqi_take_device_offline(scmd->device, "AIO");
+				host_byte = DID_NO_CONNECT;
+			}
 			scsi_status = SAM_STAT_CHECK_CONDITION;
 			break;
 		case PQI_AIO_STATUS_IO_ERROR:
@@ -2751,48 +2764,6 @@ static void pqi_event_worker(struct work
 	pqi_schedule_rescan_worker(ctrl_info);
 }
 
-static void pqi_take_ctrl_offline(struct pqi_ctrl_info *ctrl_info)
-{
-	unsigned int i;
-	unsigned int path;
-	struct pqi_queue_group *queue_group;
-	unsigned long flags;
-	struct pqi_io_request *io_request;
-	struct pqi_io_request *next;
-	struct scsi_cmnd *scmd;
-
-	ctrl_info->controller_online = false;
-	dev_err(&ctrl_info->pci_dev->dev, "controller offline\n");
-	sis_shutdown_ctrl(ctrl_info);
-	pci_disable_device(ctrl_info->pci_dev);
-	pqi_perform_lockup_action();
-
-	for (i = 0; i < ctrl_info->num_queue_groups; i++) {
-		queue_group = &ctrl_info->queue_groups[i];
-
-		for (path = 0; path < 2; path++) {
-			spin_lock_irqsave(
-				&queue_group->submit_lock[path], flags);
-
-			list_for_each_entry_safe(io_request, next,
-				&queue_group->request_list[path],
-				request_list_entry) {
-
-				scmd = io_request->scmd;
-				if (scmd) {
-					set_host_byte(scmd, DID_NO_CONNECT);
-					pqi_scsi_done(scmd);
-				}
-
-				list_del(&io_request->request_list_entry);
-			}
-
-			spin_unlock_irqrestore(
-				&queue_group->submit_lock[path], flags);
-		}
-	}
-}
-
 #define PQI_HEARTBEAT_TIMER_INTERVAL	(10 * HZ)
 
 static void pqi_heartbeat_timer_handler(unsigned long data)
@@ -3461,9 +3432,11 @@ static void pqi_start_io(struct pqi_ctrl
 
 	spin_lock_irqsave(&queue_group->submit_lock[path], flags);
 
-	if (io_request)
+	if (io_request) {
+		io_request->queue_group = queue_group;
 		list_add_tail(&io_request->request_list_entry,
 			&queue_group->request_list[path]);
+	}
 
 	iq_pi = queue_group->iq_pi_copy[path];
 
@@ -3623,6 +3596,11 @@ static int pqi_submit_raid_request_synch
 		goto out;
 	}
 
+	if (pqi_ctrl_offline(ctrl_info)) {
+		rc = -ENXIO;
+		goto out;
+	}
+
 	io_request = pqi_alloc_io_request(ctrl_info);
 
 	put_unaligned_le16(io_request->index,
@@ -4509,21 +4487,18 @@ static void pqi_raid_io_complete(struct
 	pqi_scsi_done(scmd);
 }
 
-static int pqi_raid_submit_scsi_cmd(struct pqi_ctrl_info *ctrl_info,
+static int pqi_raid_submit_scsi_cmd_with_io_request(
+	struct pqi_ctrl_info *ctrl_info, struct pqi_io_request *io_request,
 	struct pqi_scsi_dev *device, struct scsi_cmnd *scmd,
 	struct pqi_queue_group *queue_group)
 {
 	int rc;
 	size_t cdb_length;
-	struct pqi_io_request *io_request;
 	struct pqi_raid_path_request *request;
 
-	io_request = pqi_alloc_io_request(ctrl_info);
 	io_request->io_complete_callback = pqi_raid_io_complete;
 	io_request->scmd = scmd;
 
-	scmd->host_scribble = (unsigned char *)io_request;
-
 	request = io_request->iu;
 	memset(request, 0,
 		offsetof(struct pqi_raid_path_request, sg_descriptors));
@@ -4602,6 +4577,183 @@ static int pqi_raid_submit_scsi_cmd(stru
 	return 0;
 }
 
+static inline int pqi_raid_submit_scsi_cmd(struct pqi_ctrl_info *ctrl_info,
+	struct pqi_scsi_dev *device, struct scsi_cmnd *scmd,
+	struct pqi_queue_group *queue_group)
+{
+	struct pqi_io_request *io_request;
+
+	io_request = pqi_alloc_io_request(ctrl_info);
+
+	return pqi_raid_submit_scsi_cmd_with_io_request(ctrl_info, io_request,
+		device, scmd, queue_group);
+}
+
+static inline void pqi_schedule_bypass_retry(struct pqi_ctrl_info *ctrl_info)
+{
+	if (!pqi_ctrl_blocked(ctrl_info))
+		schedule_work(&ctrl_info->raid_bypass_retry_work);
+}
+
+static bool pqi_raid_bypass_retry_needed(struct pqi_io_request *io_request)
+{
+	struct scsi_cmnd *scmd;
+	struct pqi_ctrl_info *ctrl_info;
+
+	if (!io_request->raid_bypass)
+		return false;
+
+	scmd = io_request->scmd;
+	if ((scmd->result & 0xff) == SAM_STAT_GOOD)
+		return false;
+	if (host_byte(scmd->result) == DID_NO_CONNECT)
+		return false;
+
+	ctrl_info = shost_to_hba(scmd->device->host);
+	if (pqi_ctrl_offline(ctrl_info))
+		return false;
+
+	return true;
+}
+
+static inline void pqi_add_to_raid_bypass_retry_list(
+	struct pqi_ctrl_info *ctrl_info,
+	struct pqi_io_request *io_request, bool at_head)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ctrl_info->raid_bypass_retry_list_lock, flags);
+	if (at_head)
+		list_add(&io_request->request_list_entry,
+			&ctrl_info->raid_bypass_retry_list);
+	else
+		list_add_tail(&io_request->request_list_entry,
+			&ctrl_info->raid_bypass_retry_list);
+	spin_unlock_irqrestore(&ctrl_info->raid_bypass_retry_list_lock, flags);
+}
+
+static void pqi_queued_raid_bypass_complete(struct pqi_io_request *io_request,
+	void *context)
+{
+	struct scsi_cmnd *scmd;
+
+	scmd = io_request->scmd;
+	pqi_free_io_request(io_request);
+	pqi_scsi_done(scmd);
+}
+
+static void pqi_queue_raid_bypass_retry(struct pqi_io_request *io_request)
+{
+	struct scsi_cmnd *scmd;
+	struct pqi_ctrl_info *ctrl_info;
+
+	io_request->io_complete_callback = pqi_queued_raid_bypass_complete;
+	scmd = io_request->scmd;
+	scmd->result = 0;
+	ctrl_info = shost_to_hba(scmd->device->host);
+
+	pqi_add_to_raid_bypass_retry_list(ctrl_info, io_request, false);
+	pqi_schedule_bypass_retry(ctrl_info);
+}
+
+static int pqi_retry_raid_bypass(struct pqi_io_request *io_request)
+{
+	struct scsi_cmnd *scmd;
+	struct pqi_scsi_dev *device;
+	struct pqi_ctrl_info *ctrl_info;
+	struct pqi_queue_group *queue_group;
+
+	scmd = io_request->scmd;
+	device = scmd->device->hostdata;
+	if (pqi_device_in_reset(device)) {
+		pqi_free_io_request(io_request);
+		set_host_byte(scmd, DID_RESET);
+		pqi_scsi_done(scmd);
+		return 0;
+	}
+
+	ctrl_info = shost_to_hba(scmd->device->host);
+	queue_group = io_request->queue_group;
+
+	pqi_reinit_io_request(io_request);
+
+	return pqi_raid_submit_scsi_cmd_with_io_request(ctrl_info, io_request,
+		device, scmd, queue_group);
+}
+
+static inline struct pqi_io_request *pqi_next_queued_raid_bypass_request(
+	struct pqi_ctrl_info *ctrl_info)
+{
+	unsigned long flags;
+	struct pqi_io_request *io_request;
+
+	spin_lock_irqsave(&ctrl_info->raid_bypass_retry_list_lock, flags);
+	io_request = list_first_entry_or_null(
+		&ctrl_info->raid_bypass_retry_list,
+		struct pqi_io_request, request_list_entry);
+	if (io_request)
+		list_del(&io_request->request_list_entry);
+	spin_unlock_irqrestore(&ctrl_info->raid_bypass_retry_list_lock, flags);
+
+	return io_request;
+}
+
+static void pqi_retry_raid_bypass_requests(struct pqi_ctrl_info *ctrl_info)
+{
+	int rc;
+	struct pqi_io_request *io_request;
+
+	pqi_ctrl_busy(ctrl_info);
+
+	while (1) {
+		if (pqi_ctrl_blocked(ctrl_info))
+			break;
+		io_request = pqi_next_queued_raid_bypass_request(ctrl_info);
+		if (!io_request)
+			break;
+		rc = pqi_retry_raid_bypass(io_request);
+		if (rc) {
+			pqi_add_to_raid_bypass_retry_list(ctrl_info, io_request,
+				true);
+			pqi_schedule_bypass_retry(ctrl_info);
+			break;
+		}
+	}
+
+	pqi_ctrl_unbusy(ctrl_info);
+}
+
+static void pqi_raid_bypass_retry_worker(struct work_struct *work)
+{
+	struct pqi_ctrl_info *ctrl_info;
+
+	ctrl_info = container_of(work, struct pqi_ctrl_info,
+		raid_bypass_retry_work);
+	pqi_retry_raid_bypass_requests(ctrl_info);
+}
+
+static void pqi_complete_all_queued_raid_bypass_retries(
+	struct pqi_ctrl_info *ctrl_info, int result)
+{
+	unsigned long flags;
+	struct pqi_io_request *io_request;
+	struct pqi_io_request *next;
+	struct scsi_cmnd *scmd;
+
+	spin_lock_irqsave(&ctrl_info->raid_bypass_retry_list_lock, flags);
+
+	list_for_each_entry_safe(io_request, next,
+		&ctrl_info->raid_bypass_retry_list, request_list_entry) {
+		list_del(&io_request->request_list_entry);
+		scmd = io_request->scmd;
+		pqi_free_io_request(io_request);
+		scmd->result = result;
+		pqi_scsi_done(scmd);
+	}
+
+	spin_unlock_irqrestore(&ctrl_info->raid_bypass_retry_list_lock, flags);
+}
+
 static void pqi_aio_io_complete(struct pqi_io_request *io_request,
 	void *context)
 {
@@ -4611,6 +4763,10 @@ static void pqi_aio_io_complete(struct p
 	scsi_dma_unmap(scmd);
 	if (io_request->status == -EAGAIN)
 		set_host_byte(scmd, DID_IMM_RETRY);
+	else if (pqi_raid_bypass_retry_needed(io_request)) {
+		pqi_queue_raid_bypass_retry(io_request);
+		return;
+	}
 	pqi_free_io_request(io_request);
 	pqi_scsi_done(scmd);
 }
@@ -4620,13 +4776,13 @@ static inline int pqi_aio_submit_scsi_cm
 	struct pqi_queue_group *queue_group)
 {
 	return pqi_aio_submit_io(ctrl_info, scmd, device->aio_handle,
-		scmd->cmnd, scmd->cmd_len, queue_group, NULL);
+		scmd->cmnd, scmd->cmd_len, queue_group, NULL, false);
 }
 
 static int pqi_aio_submit_io(struct pqi_ctrl_info *ctrl_info,
 	struct scsi_cmnd *scmd, u32 aio_handle, u8 *cdb,
 	unsigned int cdb_length, struct pqi_queue_group *queue_group,
-	struct pqi_encryption_info *encryption_info)
+	struct pqi_encryption_info *encryption_info, bool raid_bypass)
 {
 	int rc;
 	struct pqi_io_request *io_request;
@@ -4635,8 +4791,7 @@ static int pqi_aio_submit_io(struct pqi_
 	io_request = pqi_alloc_io_request(ctrl_info);
 	io_request->io_complete_callback = pqi_aio_io_complete;
 	io_request->scmd = scmd;
-
-	scmd->host_scribble = (unsigned char *)io_request;
+	io_request->raid_bypass = raid_bypass;
 
 	request = io_request->iu;
 	memset(request, 0,
@@ -4761,11 +4916,8 @@ static int pqi_scsi_queue_command(struct
 				!blk_rq_is_passthrough(scmd->request)) {
 			rc = pqi_raid_bypass_submit_scsi_cmd(ctrl_info, device,
 				scmd, queue_group);
-			if (rc == 0 ||
-				rc == SCSI_MLQUEUE_HOST_BUSY ||
-				rc == SAM_STAT_CHECK_CONDITION ||
-				rc == SAM_STAT_RESERVATION_CONFLICT)
-					raid_bypassed = true;
+			if (rc == 0 || rc == SCSI_MLQUEUE_HOST_BUSY)
+				raid_bypassed = true;
 		}
 		if (!raid_bypassed)
 			rc = pqi_raid_submit_scsi_cmd(ctrl_info, device, scmd,
@@ -6159,6 +6311,11 @@ static struct pqi_ctrl_info *pqi_alloc_c
 		PQI_RESERVED_IO_SLOTS_SYNCHRONOUS_REQUESTS);
 	init_waitqueue_head(&ctrl_info->block_requests_wait);
 
+	INIT_LIST_HEAD(&ctrl_info->raid_bypass_retry_list);
+	spin_lock_init(&ctrl_info->raid_bypass_retry_list_lock);
+	INIT_WORK(&ctrl_info->raid_bypass_retry_work,
+		pqi_raid_bypass_retry_worker);
+
 	ctrl_info->ctrl_id = atomic_inc_return(&pqi_controller_count) - 1;
 	ctrl_info->irq_mode = IRQ_MODE_NONE;
 	ctrl_info->max_msix_vectors = PQI_MAX_MSIX_VECTORS;
@@ -6228,6 +6385,60 @@ static void pqi_perform_lockup_action(vo
 	}
 }
 
+static void pqi_complete_all_queued_requests(struct pqi_ctrl_info *ctrl_info,
+	int result)
+{
+	unsigned int i;
+	unsigned int path;
+	struct pqi_queue_group *queue_group;
+	unsigned long flags;
+	struct pqi_io_request *io_request;
+	struct pqi_io_request *next;
+	struct scsi_cmnd *scmd;
+
+	for (i = 0; i < ctrl_info->num_queue_groups; i++) {
+		queue_group = &ctrl_info->queue_groups[i];
+
+		for (path = 0; path < 2; path++) {
+			spin_lock_irqsave(
+				&queue_group->submit_lock[path], flags);
+
+			list_for_each_entry_safe(io_request, next,
+				&queue_group->request_list[path],
+				request_list_entry) {
+
+				scmd = io_request->scmd;
+				if (scmd) {
+					scmd->result = result;
+					pqi_scsi_done(scmd);
+				}
+
+				list_del(&io_request->request_list_entry);
+			}
+
+			spin_unlock_irqrestore(
+				&queue_group->submit_lock[path], flags);
+		}
+	}
+}
+
+static void pqi_fail_all_queued_requests(struct pqi_ctrl_info *ctrl_info)
+{
+	pqi_complete_all_queued_requests(ctrl_info, DID_NO_CONNECT << 16);
+	pqi_complete_all_queued_raid_bypass_retries(ctrl_info,
+		DID_NO_CONNECT << 16);
+}
+
+static void pqi_take_ctrl_offline(struct pqi_ctrl_info *ctrl_info)
+{
+	ctrl_info->controller_online = false;
+	sis_shutdown_ctrl(ctrl_info);
+	pci_disable_device(ctrl_info->pci_dev);
+	dev_err(&ctrl_info->pci_dev->dev, "controller offline\n");
+	pqi_perform_lockup_action();
+	pqi_fail_all_queued_requests(ctrl_info);
+}
+
 static void pqi_print_ctrl_info(struct pci_dev *pci_dev,
 	const struct pci_device_id *id)
 {