Blob Blame History Raw
From: Evan Swanson <evan.swanson@intel.com>
Date: Fri, 15 May 2020 17:42:14 -0700
Subject: ice: Handle critical FW error during admin queue initialization
Patch-mainline: v5.8-rc1
Git-commit: b5c7f857e5c98c21271678992fb2634df38292f5
References: jsc#SLE-12878

A race condition between FW and SW can occur between admin queue setup and
the first command sent. A link event may occur and FW attempts to notify a
non-existent queue. FW will set the critical error bit and disable the
queue. When this happens retry queue setup.

Signed-off-by: Evan Swanson <evan.swanson@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Acked-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
 drivers/net/ethernet/intel/ice/ice_controlq.c   |  126 +++++++++++++-----------
 drivers/net/ethernet/intel/ice/ice_controlq.h   |    3 
 drivers/net/ethernet/intel/ice/ice_hw_autogen.h |    2 
 drivers/net/ethernet/intel/ice/ice_main.c       |    2 
 drivers/net/ethernet/intel/ice/ice_status.h     |    1 
 5 files changed, 80 insertions(+), 54 deletions(-)

--- a/drivers/net/ethernet/intel/ice/ice_controlq.c
+++ b/drivers/net/ethernet/intel/ice/ice_controlq.c
@@ -12,6 +12,7 @@ do {								\
 	(qinfo)->sq.bal = prefix##_ATQBAL;			\
 	(qinfo)->sq.len_mask = prefix##_ATQLEN_ATQLEN_M;	\
 	(qinfo)->sq.len_ena_mask = prefix##_ATQLEN_ATQENABLE_M;	\
+	(qinfo)->sq.len_crit_mask = prefix##_ATQLEN_ATQCRIT_M;	\
 	(qinfo)->sq.head_mask = prefix##_ATQH_ATQH_M;		\
 	(qinfo)->rq.head = prefix##_ARQH;			\
 	(qinfo)->rq.tail = prefix##_ARQT;			\
@@ -20,6 +21,7 @@ do {								\
 	(qinfo)->rq.bal = prefix##_ARQBAL;			\
 	(qinfo)->rq.len_mask = prefix##_ARQLEN_ARQLEN_M;	\
 	(qinfo)->rq.len_ena_mask = prefix##_ARQLEN_ARQENABLE_M;	\
+	(qinfo)->rq.len_crit_mask = prefix##_ARQLEN_ARQCRIT_M;	\
 	(qinfo)->rq.head_mask = prefix##_ARQH_ARQH_M;		\
 } while (0)
 
@@ -642,6 +644,50 @@ init_ctrlq_free_sq:
 }
 
 /**
+ * ice_shutdown_ctrlq - shutdown routine for any control queue
+ * @hw: pointer to the hardware structure
+ * @q_type: specific Control queue type
+ *
+ * NOTE: this function does not destroy the control queue locks.
+ */
+static void ice_shutdown_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type)
+{
+	struct ice_ctl_q_info *cq;
+
+	switch (q_type) {
+	case ICE_CTL_Q_ADMIN:
+		cq = &hw->adminq;
+		if (ice_check_sq_alive(hw, cq))
+			ice_aq_q_shutdown(hw, true);
+		break;
+	case ICE_CTL_Q_MAILBOX:
+		cq = &hw->mailboxq;
+		break;
+	default:
+		return;
+	}
+
+	ice_shutdown_sq(hw, cq);
+	ice_shutdown_rq(hw, cq);
+}
+
+/**
+ * ice_shutdown_all_ctrlq - shutdown routine for all control queues
+ * @hw: pointer to the hardware structure
+ *
+ * NOTE: this function does not destroy the control queue locks. The driver
+ * may call this at runtime to shutdown and later restart control queues, such
+ * as in response to a reset event.
+ */
+void ice_shutdown_all_ctrlq(struct ice_hw *hw)
+{
+	/* Shutdown FW admin queue */
+	ice_shutdown_ctrlq(hw, ICE_CTL_Q_ADMIN);
+	/* Shutdown PF-VF Mailbox */
+	ice_shutdown_ctrlq(hw, ICE_CTL_Q_MAILBOX);
+}
+
+/**
  * ice_init_all_ctrlq - main initialization routine for all control queues
  * @hw: pointer to the hardware structure
  *
@@ -656,17 +702,27 @@ init_ctrlq_free_sq:
  */
 enum ice_status ice_init_all_ctrlq(struct ice_hw *hw)
 {
-	enum ice_status ret_code;
+	enum ice_status status;
+	u32 retry = 0;
 
 	/* Init FW admin queue */
-	ret_code = ice_init_ctrlq(hw, ICE_CTL_Q_ADMIN);
-	if (ret_code)
-		return ret_code;
+	do {
+		status = ice_init_ctrlq(hw, ICE_CTL_Q_ADMIN);
+		if (status)
+			return status;
 
-	ret_code = ice_init_check_adminq(hw);
-	if (ret_code)
-		return ret_code;
+		status = ice_init_check_adminq(hw);
+		if (status != ICE_ERR_AQ_FW_CRITICAL)
+			break;
 
+		ice_debug(hw, ICE_DBG_AQ_MSG,
+			  "Retry Admin Queue init due to FW critical error\n");
+		ice_shutdown_ctrlq(hw, ICE_CTL_Q_ADMIN);
+		msleep(ICE_CTL_Q_ADMIN_INIT_MSEC);
+	} while (retry++ < ICE_CTL_Q_ADMIN_INIT_TIMEOUT);
+
+	if (status)
+		return status;
 	/* Init Mailbox queue */
 	return ice_init_ctrlq(hw, ICE_CTL_Q_MAILBOX);
 }
@@ -708,50 +764,6 @@ enum ice_status ice_create_all_ctrlq(str
 }
 
 /**
- * ice_shutdown_ctrlq - shutdown routine for any control queue
- * @hw: pointer to the hardware structure
- * @q_type: specific Control queue type
- *
- * NOTE: this function does not destroy the control queue locks.
- */
-static void ice_shutdown_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type)
-{
-	struct ice_ctl_q_info *cq;
-
-	switch (q_type) {
-	case ICE_CTL_Q_ADMIN:
-		cq = &hw->adminq;
-		if (ice_check_sq_alive(hw, cq))
-			ice_aq_q_shutdown(hw, true);
-		break;
-	case ICE_CTL_Q_MAILBOX:
-		cq = &hw->mailboxq;
-		break;
-	default:
-		return;
-	}
-
-	ice_shutdown_sq(hw, cq);
-	ice_shutdown_rq(hw, cq);
-}
-
-/**
- * ice_shutdown_all_ctrlq - shutdown routine for all control queues
- * @hw: pointer to the hardware structure
- *
- * NOTE: this function does not destroy the control queue locks. The driver
- * may call this at runtime to shutdown and later restart control queues, such
- * as in response to a reset event.
- */
-void ice_shutdown_all_ctrlq(struct ice_hw *hw)
-{
-	/* Shutdown FW admin queue */
-	ice_shutdown_ctrlq(hw, ICE_CTL_Q_ADMIN);
-	/* Shutdown PF-VF Mailbox */
-	ice_shutdown_ctrlq(hw, ICE_CTL_Q_MAILBOX);
-}
-
-/**
  * ice_destroy_ctrlq_locks - Destroy locks for a control queue
  * @cq: pointer to the control queue
  *
@@ -1049,9 +1061,15 @@ ice_sq_send_cmd(struct ice_hw *hw, struc
 
 	/* update the error if time out occurred */
 	if (!cmd_completed) {
-		ice_debug(hw, ICE_DBG_AQ_MSG,
-			  "Control Send Queue Writeback timeout.\n");
-		status = ICE_ERR_AQ_TIMEOUT;
+		if (rd32(hw, cq->rq.len) & cq->rq.len_crit_mask ||
+		    rd32(hw, cq->sq.len) & cq->sq.len_crit_mask) {
+			ice_debug(hw, ICE_DBG_AQ_MSG, "Critical FW error.\n");
+			status = ICE_ERR_AQ_FW_CRITICAL;
+		} else {
+			ice_debug(hw, ICE_DBG_AQ_MSG,
+				  "Control Send Queue Writeback timeout.\n");
+			status = ICE_ERR_AQ_TIMEOUT;
+		}
 	}
 
 sq_send_command_error:
--- a/drivers/net/ethernet/intel/ice/ice_controlq.h
+++ b/drivers/net/ethernet/intel/ice/ice_controlq.h
@@ -34,6 +34,8 @@ enum ice_ctl_q {
 /* Control Queue timeout settings - max delay 250ms */
 #define ICE_CTL_Q_SQ_CMD_TIMEOUT	2500  /* Count 2500 times */
 #define ICE_CTL_Q_SQ_CMD_USEC		100   /* Check every 100usec */
+#define ICE_CTL_Q_ADMIN_INIT_TIMEOUT	10    /* Count 10 times */
+#define ICE_CTL_Q_ADMIN_INIT_MSEC	100   /* Check every 100msec */
 
 struct ice_ctl_q_ring {
 	void *dma_head;			/* Virtual address to DMA head */
@@ -59,6 +61,7 @@ struct ice_ctl_q_ring {
 	u32 bal;
 	u32 len_mask;
 	u32 len_ena_mask;
+	u32 len_crit_mask;
 	u32 head_mask;
 };
 
--- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
+++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
@@ -39,6 +39,7 @@
 #define PF_MBX_ARQH_ARQH_M			ICE_M(0x3FF, 0)
 #define PF_MBX_ARQLEN				0x0022E480
 #define PF_MBX_ARQLEN_ARQLEN_M			ICE_M(0x3FF, 0)
+#define PF_MBX_ARQLEN_ARQCRIT_M			BIT(30)
 #define PF_MBX_ARQLEN_ARQENABLE_M		BIT(31)
 #define PF_MBX_ARQT				0x0022E580
 #define PF_MBX_ATQBAH				0x0022E180
@@ -47,6 +48,7 @@
 #define PF_MBX_ATQH_ATQH_M			ICE_M(0x3FF, 0)
 #define PF_MBX_ATQLEN				0x0022E200
 #define PF_MBX_ATQLEN_ATQLEN_M			ICE_M(0x3FF, 0)
+#define PF_MBX_ATQLEN_ATQCRIT_M			BIT(30)
 #define PF_MBX_ATQLEN_ATQENABLE_M		BIT(31)
 #define PF_MBX_ATQT				0x0022E300
 #define PRTDCB_GENC				0x00083000
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -5208,6 +5208,8 @@ const char *ice_stat_str(enum ice_status
 		return "ICE_ERR_AQ_NO_WORK";
 	case ICE_ERR_AQ_EMPTY:
 		return "ICE_ERR_AQ_EMPTY";
+	case ICE_ERR_AQ_FW_CRITICAL:
+		return "ICE_ERR_AQ_FW_CRITICAL";
 	}
 
 	return "ICE_ERR_UNKNOWN";
--- a/drivers/net/ethernet/intel/ice/ice_status.h
+++ b/drivers/net/ethernet/intel/ice/ice_status.h
@@ -37,6 +37,7 @@ enum ice_status {
 	ICE_ERR_AQ_FULL				= -102,
 	ICE_ERR_AQ_NO_WORK			= -103,
 	ICE_ERR_AQ_EMPTY			= -104,
+	ICE_ERR_AQ_FW_CRITICAL			= -105,
 };
 
 #endif /* _ICE_STATUS_H_ */