Blob Blame History Raw
From: Kevin Barnett <kevin.barnett@microsemi.com>
Date: Wed, 3 May 2017 18:53:11 -0500
Subject: scsi: smartpqi: add heartbeat check
Patch-mainline: v4.13-rc1
Git-commit: 98f876674a6fba3591c342dfbcfdbaa7ecf0a84e
References: bsc#1038125

check for controller lockups

Reviewed-by: Scott Benesh <scott.benesh@microsemi.com>
Signed-off-by: Kevin Barnett <kevin.barnett@microsemi.com>
Signed-off-by: Don Brace <don.brace@microsemi.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Johannes Thumshirn <jthumshirn@suse.de>

---
 drivers/scsi/smartpqi/smartpqi.h      |  60 ++++++++++++++--
 drivers/scsi/smartpqi/smartpqi_init.c | 126 +++++++++++++++++++++++-----------
 drivers/scsi/smartpqi/smartpqi_sis.c  |   4 ++
 3 files changed, 143 insertions(+), 47 deletions(-)

diff --git a/drivers/scsi/smartpqi/smartpqi.h b/drivers/scsi/smartpqi/smartpqi.h
index 06e2b71..1ac09e7 100644
--- a/drivers/scsi/smartpqi/smartpqi.h
+++ b/drivers/scsi/smartpqi/smartpqi.h
@@ -490,7 +490,6 @@ struct pqi_raid_error_info {
 #define PQI_EVENT_TYPE_LOGICAL_DEVICE		0x5
 #define PQI_EVENT_TYPE_AIO_STATE_CHANGE		0xfd
 #define PQI_EVENT_TYPE_AIO_CONFIG_CHANGE	0xfe
-#define PQI_EVENT_TYPE_HEARTBEAT		0xff
 
 #pragma pack()
 
@@ -635,6 +634,58 @@ struct pqi_encryption_info {
 	u32	encrypt_tweak_upper;
 };
 
+#pragma pack(1)
+
+#define PQI_CONFIG_TABLE_SIGNATURE	"CFGTABLE"
+#define PQI_CONFIG_TABLE_MAX_LENGTH	((u16)~0)
+
+/* configuration table section IDs */
+#define PQI_CONFIG_TABLE_SECTION_GENERAL_INFO		0
+#define PQI_CONFIG_TABLE_SECTION_FIRMWARE_FEATURES	1
+#define PQI_CONFIG_TABLE_SECTION_FIRMWARE_ERRATA	2
+#define PQI_CONFIG_TABLE_SECTION_DEBUG			3
+#define PQI_CONFIG_TABLE_SECTION_HEARTBEAT		4
+
+struct pqi_config_table {
+	u8	signature[8];		/* "CFGTABLE" */
+	__le32	first_section_offset;	/* offset in bytes from the base */
+					/* address of this table to the */
+					/* first section */
+};
+
+struct pqi_config_table_section_header {
+	__le16	section_id;		/* as defined by the */
+					/* PQI_CONFIG_TABLE_SECTION_* */
+					/* manifest constants above */
+	__le16	next_section_offset;	/* offset in bytes from base */
+					/* address of the table of the */
+					/* next section or 0 if last entry */
+};
+
+struct pqi_config_table_general_info {
+	struct pqi_config_table_section_header header;
+	__le32	section_length;		/* size of this section in bytes */
+					/* including the section header */
+	__le32	max_outstanding_requests;	/* max. outstanding */
+						/* commands supported by */
+						/* the controller */
+	__le32	max_sg_size;		/* max. transfer size of a single */
+					/* command */
+	__le32	max_sg_per_request;	/* max. number of scatter-gather */
+					/* entries supported in a single */
+					/* command */
+};
+
+struct pqi_config_table_debug {
+	struct pqi_config_table_section_header header;
+	__le32	scratchpad;
+};
+
+struct pqi_config_table_heartbeat {
+	struct pqi_config_table_section_header header;
+	__le32	heartbeat_counter;
+};
+
 #define PQI_MAX_OUTSTANDING_REQUESTS	((u32)~0)
 #define PQI_MAX_TRANSFER_SIZE		(4 * 1024U * 1024U)
 
@@ -645,8 +696,6 @@ struct pqi_encryption_info {
 #define PQI_HBA_BUS			2
 #define PQI_MAX_BUS			PQI_HBA_BUS
 
-#pragma pack(1)
-
 struct report_lun_header {
 	__be32	list_length;
 	u8	extended_response;
@@ -870,7 +919,6 @@ struct pqi_io_request {
 	struct list_head request_list_entry;
 };
 
-#define PQI_EVENT_HEARTBEAT		0
 #define PQI_NUM_SUPPORTED_EVENTS	6
 
 struct pqi_event {
@@ -943,7 +991,6 @@ struct pqi_ctrl_info {
 	u8		inbound_spanning_supported : 1;
 	u8		outbound_spanning_supported : 1;
 	u8		pqi_mode_enabled : 1;
-	u8		heartbeat_timer_started : 1;
 	u8		update_time_worker_scheduled : 1;
 
 	struct list_head scsi_device_list;
@@ -963,7 +1010,8 @@ struct pqi_ctrl_info {
 
 	atomic_t	num_interrupts;
 	int		previous_num_interrupts;
-	unsigned int	num_heartbeats_requested;
+	u32		previous_heartbeat_count;
+	__le32 __iomem	*heartbeat_counter;
 	struct timer_list heartbeat_timer;
 
 	struct semaphore sync_request_sem;
diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c
index 7af4add..de33942 100644
--- a/drivers/scsi/smartpqi/smartpqi_init.c
+++ b/drivers/scsi/smartpqi/smartpqi_init.c
@@ -267,6 +267,14 @@ static inline void pqi_cancel_rescan_worker(struct pqi_ctrl_info *ctrl_info)
 	cancel_delayed_work_sync(&ctrl_info->rescan_work);
 }
 
+static inline u32 pqi_read_heartbeat_counter(struct pqi_ctrl_info *ctrl_info)
+{
+	if (!ctrl_info->heartbeat_counter)
+		return 0;
+
+	return readl(ctrl_info->heartbeat_counter);
+}
+
 static int pqi_map_single(struct pci_dev *pci_dev,
 	struct pqi_sg_descriptor *sg_descriptor, void *buffer,
 	size_t buffer_length, int data_direction)
@@ -2708,23 +2716,18 @@ static inline unsigned int pqi_num_elements_free(unsigned int pi,
 	return elements_in_queue - num_elements_used - 1;
 }
 
-#define PQI_EVENT_ACK_TIMEOUT	30
-
-static void pqi_start_event_ack(struct pqi_ctrl_info *ctrl_info,
+static void pqi_send_event_ack(struct pqi_ctrl_info *ctrl_info,
 	struct pqi_event_acknowledge_request *iu, size_t iu_length)
 {
 	pqi_index_t iq_pi;
 	pqi_index_t iq_ci;
 	unsigned long flags;
 	void *next_element;
-	unsigned long timeout;
 	struct pqi_queue_group *queue_group;
 
 	queue_group = &ctrl_info->queue_groups[PQI_DEFAULT_QUEUE_GROUP];
 	put_unaligned_le16(queue_group->oq_id, &iu->header.response_queue_id);
 
-	timeout = (PQI_EVENT_ACK_TIMEOUT * HZ) + jiffies;
-
 	while (1) {
 		spin_lock_irqsave(&queue_group->submit_lock[RAID_PATH], flags);
 
@@ -2738,11 +2741,8 @@ static void pqi_start_event_ack(struct pqi_ctrl_info *ctrl_info,
 		spin_unlock_irqrestore(
 			&queue_group->submit_lock[RAID_PATH], flags);
 
-		if (time_after(jiffies, timeout)) {
-			dev_err(&ctrl_info->pci_dev->dev,
-				"sending event acknowledge timed out\n");
+		if (pqi_ctrl_offline(ctrl_info))
 			return;
-		}
 	}
 
 	next_element = queue_group->iq_element_array[RAID_PATH] +
@@ -2751,7 +2751,6 @@ static void pqi_start_event_ack(struct pqi_ctrl_info *ctrl_info,
 	memcpy(next_element, iu, iu_length);
 
 	iq_pi = (iq_pi + 1) % ctrl_info->num_elements_per_iq;
-
 	queue_group->iq_pi_copy[RAID_PATH] = iq_pi;
 
 	/*
@@ -2777,7 +2776,7 @@ static void pqi_acknowledge_event(struct pqi_ctrl_info *ctrl_info,
 	request.event_id = event->event_id;
 	request.additional_event_id = event->additional_event_id;
 
-	pqi_start_event_ack(ctrl_info, &request, sizeof(request));
+	pqi_send_event_ack(ctrl_info, &request, sizeof(request));
 }
 
 static void pqi_event_worker(struct work_struct *work)
@@ -2785,7 +2784,6 @@ static void pqi_event_worker(struct work_struct *work)
 	unsigned int i;
 	struct pqi_ctrl_info *ctrl_info;
 	struct pqi_event *event;
-	bool got_non_heartbeat_event = false;
 
 	ctrl_info = container_of(work, struct pqi_ctrl_info, event_work);
 
@@ -2797,8 +2795,6 @@ static void pqi_event_worker(struct work_struct *work)
 		if (event->pending) {
 			event->pending = false;
 			pqi_acknowledge_event(ctrl_info, event);
-			if (i != PQI_EVENT_TYPE_HEARTBEAT)
-				got_non_heartbeat_event = true;
 		}
 		event++;
 	}
@@ -2848,57 +2844,58 @@ static void pqi_take_ctrl_offline(struct pqi_ctrl_info *ctrl_info)
 	}
 }
 
-#define PQI_HEARTBEAT_TIMER_INTERVAL	(5 * HZ)
-#define PQI_MAX_HEARTBEAT_REQUESTS	5
+#define PQI_HEARTBEAT_TIMER_INTERVAL	(10 * HZ)
 
 static void pqi_heartbeat_timer_handler(unsigned long data)
 {
 	int num_interrupts;
+	u32 heartbeat_count;
 	struct pqi_ctrl_info *ctrl_info = (struct pqi_ctrl_info *)data;
 
-	if (!ctrl_info->heartbeat_timer_started)
+	pqi_check_ctrl_health(ctrl_info);
+	if (pqi_ctrl_offline(ctrl_info))
 		return;
 
 	num_interrupts = atomic_read(&ctrl_info->num_interrupts);
+	heartbeat_count = pqi_read_heartbeat_counter(ctrl_info);
 
 	if (num_interrupts == ctrl_info->previous_num_interrupts) {
-		ctrl_info->num_heartbeats_requested++;
-		if (ctrl_info->num_heartbeats_requested >
-			PQI_MAX_HEARTBEAT_REQUESTS) {
+		if (heartbeat_count == ctrl_info->previous_heartbeat_count) {
+			dev_err(&ctrl_info->pci_dev->dev,
+				"no heartbeat detected - last heartbeat count: %u\n",
+				heartbeat_count);
 			pqi_take_ctrl_offline(ctrl_info);
 			return;
 		}
-		ctrl_info->events[PQI_EVENT_HEARTBEAT].pending = true;
-		schedule_work(&ctrl_info->event_work);
 	} else {
-		ctrl_info->num_heartbeats_requested = 0;
+		ctrl_info->previous_num_interrupts = num_interrupts;
 	}
 
-	ctrl_info->previous_num_interrupts = num_interrupts;
+	ctrl_info->previous_heartbeat_count = heartbeat_count;
 	mod_timer(&ctrl_info->heartbeat_timer,
 		jiffies + PQI_HEARTBEAT_TIMER_INTERVAL);
 }
 
 static void pqi_start_heartbeat_timer(struct pqi_ctrl_info *ctrl_info)
 {
+	if (!ctrl_info->heartbeat_counter)
+		return;
+
 	ctrl_info->previous_num_interrupts =
 		atomic_read(&ctrl_info->num_interrupts);
+	ctrl_info->previous_heartbeat_count =
+		pqi_read_heartbeat_counter(ctrl_info);
 
-	init_timer(&ctrl_info->heartbeat_timer);
 	ctrl_info->heartbeat_timer.expires =
 		jiffies + PQI_HEARTBEAT_TIMER_INTERVAL;
 	ctrl_info->heartbeat_timer.data = (unsigned long)ctrl_info;
 	ctrl_info->heartbeat_timer.function = pqi_heartbeat_timer_handler;
-	ctrl_info->heartbeat_timer_started = true;
 	add_timer(&ctrl_info->heartbeat_timer);
 }
 
 static inline void pqi_stop_heartbeat_timer(struct pqi_ctrl_info *ctrl_info)
 {
-	if (ctrl_info->heartbeat_timer_started) {
-		ctrl_info->heartbeat_timer_started = false;
-		del_timer_sync(&ctrl_info->heartbeat_timer);
-	}
+	del_timer_sync(&ctrl_info->heartbeat_timer);
 }
 
 static inline int pqi_event_type_to_event_index(unsigned int event_type)
@@ -2925,12 +2922,10 @@ static unsigned int pqi_process_event_intr(struct pqi_ctrl_info *ctrl_info)
 	struct pqi_event_queue *event_queue;
 	struct pqi_event_response *response;
 	struct pqi_event *event;
-	bool need_delayed_work;
 	int event_index;
 
 	event_queue = &ctrl_info->event_queue;
 	num_events = 0;
-	need_delayed_work = false;
 	oq_ci = event_queue->oq_ci_copy;
 
 	while (1) {
@@ -2953,10 +2948,6 @@ static unsigned int pqi_process_event_intr(struct pqi_ctrl_info *ctrl_info)
 				event->event_id = response->event_id;
 				event->additional_event_id =
 					response->additional_event_id;
-				if (event_index != PQI_EVENT_TYPE_HEARTBEAT) {
-					event->pending = true;
-					need_delayed_work = true;
-				}
 			}
 		}
 
@@ -2966,9 +2957,7 @@ static unsigned int pqi_process_event_intr(struct pqi_ctrl_info *ctrl_info)
 	if (num_events) {
 		event_queue->oq_ci_copy = oq_ci;
 		writel(oq_ci, event_queue->oq_ci);
-
-		if (need_delayed_work)
-			schedule_work(&ctrl_info->event_work);
+		schedule_work(&ctrl_info->event_work);
 	}
 
 	return num_events;
@@ -3220,7 +3209,7 @@ static int pqi_alloc_operational_queues(struct pqi_ctrl_info *ctrl_info)
 
 	if (!ctrl_info->queue_memory_base) {
 		dev_err(&ctrl_info->pci_dev->dev,
-			"failed to allocate memory for PQI admin queues\n");
+			"unable to allocate memory for PQI admin queues\n");
 		return -ENOMEM;
 	}
 
@@ -5672,6 +5661,55 @@ out:
 	return rc;
 }
 
+static int pqi_process_config_table(struct pqi_ctrl_info *ctrl_info)
+{
+	u32 table_length;
+	u32 section_offset;
+	void __iomem *table_iomem_addr;
+	struct pqi_config_table *config_table;
+	struct pqi_config_table_section_header *section;
+
+	table_length = ctrl_info->config_table_length;
+
+	config_table = kmalloc(table_length, GFP_KERNEL);
+	if (!config_table) {
+		dev_err(&ctrl_info->pci_dev->dev,
+			"unable to allocate memory for PQI configuration table\n");
+		return -ENOMEM;
+	}
+
+	/*
+	 * Copy the config table contents from I/O memory space into the
+	 * temporary buffer.
+	 */
+	table_iomem_addr = ctrl_info->iomem_base +
+		ctrl_info->config_table_offset;
+	memcpy_fromio(config_table, table_iomem_addr, table_length);
+
+	section_offset =
+		get_unaligned_le32(&config_table->first_section_offset);
+
+	while (section_offset) {
+		section = (void *)config_table + section_offset;
+
+		switch (get_unaligned_le16(&section->section_id)) {
+		case PQI_CONFIG_TABLE_SECTION_HEARTBEAT:
+			ctrl_info->heartbeat_counter = table_iomem_addr +
+				section_offset +
+				offsetof(struct pqi_config_table_heartbeat,
+					heartbeat_counter);
+			break;
+		}
+
+		section_offset =
+			get_unaligned_le16(&section->next_section_offset);
+	}
+
+	kfree(config_table);
+
+	return 0;
+}
+
 /* Switches the controller from PQI mode back into SIS mode. */
 
 static int pqi_revert_to_sis_mode(struct pqi_ctrl_info *ctrl_info)
@@ -5783,6 +5821,10 @@ static int pqi_ctrl_init(struct pqi_ctrl_info *ctrl_info)
 	ctrl_info->pqi_mode_enabled = true;
 	pqi_save_ctrl_mode(ctrl_info, PQI_MODE);
 
+	rc = pqi_process_config_table(ctrl_info);
+	if (rc)
+		return rc;
+
 	rc = pqi_alloc_admin_queues(ctrl_info);
 	if (rc) {
 		dev_err(&ctrl_info->pci_dev->dev,
@@ -6091,6 +6133,8 @@ static struct pqi_ctrl_info *pqi_alloc_ctrl_info(int numa_node)
 	INIT_DELAYED_WORK(&ctrl_info->rescan_work, pqi_rescan_worker);
 	INIT_DELAYED_WORK(&ctrl_info->update_time_work, pqi_update_time_worker);
 
+	init_timer(&ctrl_info->heartbeat_timer);
+
 	sema_init(&ctrl_info->sync_request_sem,
 		PQI_RESERVED_IO_SLOTS_SYNCHRONOUS_REQUESTS);
 	init_waitqueue_head(&ctrl_info->block_requests_wait);
diff --git a/drivers/scsi/smartpqi/smartpqi_sis.c b/drivers/scsi/smartpqi/smartpqi_sis.c
index 12853fd..3155bda 100644
--- a/drivers/scsi/smartpqi/smartpqi_sis.c
+++ b/drivers/scsi/smartpqi/smartpqi_sis.c
@@ -422,6 +422,10 @@ void sis_soft_reset(struct pqi_ctrl_info *ctrl_info)
 
 void sis_shutdown_ctrl(struct pqi_ctrl_info *ctrl_info)
 {
+	if (readl(&ctrl_info->registers->sis_firmware_status) &
+		SIS_CTRL_KERNEL_PANIC)
+		return;
+
 	writel(SIS_TRIGGER_SHUTDOWN,
 		&ctrl_info->registers->sis_host_to_ctrl_doorbell);
 }
-- 
1.8.5.6