Blob Blame History Raw
From: Shiraz Saleem <shiraz.saleem@intel.com>
Date: Tue, 8 Aug 2017 20:38:45 -0500
Subject: i40iw: Improve CQP timeout logic
Patch-mainline: v4.14-rc1
Git-commit: d26875b43d45644e87f4c0b6bb2d7abf3c61d529
References: bsc#1058659 FATE#322535

The current timeout logic for Control Queue-Pair (CQP) OPs
does not take into account whether CQP makes progress but
rather blindly waits for a large timeout value, 100000 jiffies
for the completion event. Improve this by setting the timeout
based on whether the CQP is making progress or not. If the CQP
is hung, the timeout will happen sooner, in 5000 jiffies. Each
time the CQP progress is detetcted, the timeout extends by 5000
jiffies.

Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
Signed-off-by: Christopher N Bednarz <christopher.n.bednarz@intel.com>
Signed-off-by: Henry Orosco <henry.orosco@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
Acked-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
 drivers/infiniband/hw/i40iw/i40iw_ctrl.c  |   11 +++++++++++
 drivers/infiniband/hw/i40iw/i40iw_p.h     |   14 +++++++++-----
 drivers/infiniband/hw/i40iw/i40iw_type.h  |    5 +++++
 drivers/infiniband/hw/i40iw/i40iw_utils.c |   22 ++++++++++++++--------
 4 files changed, 39 insertions(+), 13 deletions(-)

--- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
@@ -54,6 +54,17 @@ static inline void i40iw_insert_wqe_hdr(
 	set_64bit_val(wqe, 24, header);
 }
 
+void i40iw_check_cqp_progress(struct i40iw_cqp_timeout *cqp_timeout, struct i40iw_sc_dev *dev)
+{
+	if (cqp_timeout->compl_cqp_cmds != dev->cqp_cmd_stats[OP_COMPLETED_COMMANDS]) {
+		cqp_timeout->compl_cqp_cmds = dev->cqp_cmd_stats[OP_COMPLETED_COMMANDS];
+		cqp_timeout->count = 0;
+	} else {
+		if (dev->cqp_cmd_stats[OP_REQUESTED_COMMANDS] != cqp_timeout->compl_cqp_cmds)
+			cqp_timeout->count++;
+	}
+}
+
 /**
  * i40iw_get_cqp_reg_info - get head and tail for cqp using registers
  * @cqp: struct for cqp hw
--- a/drivers/infiniband/hw/i40iw/i40iw_p.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_p.h
@@ -35,11 +35,13 @@
 #ifndef I40IW_P_H
 #define I40IW_P_H
 
-#define PAUSE_TIMER_VALUE  0xFFFF
-#define REFRESH_THRESHOLD  0x7FFF
-#define HIGH_THRESHOLD     0x800
-#define LOW_THRESHOLD      0x200
-#define ALL_TC2PFC         0xFF
+#define PAUSE_TIMER_VALUE       0xFFFF
+#define REFRESH_THRESHOLD       0x7FFF
+#define HIGH_THRESHOLD          0x800
+#define LOW_THRESHOLD           0x200
+#define ALL_TC2PFC              0xFF
+#define CQP_COMPL_WAIT_TIME     0x3E8
+#define CQP_TIMEOUT_THRESHOLD   5
 
 void i40iw_debug_buf(struct i40iw_sc_dev *dev, enum i40iw_debug_flag mask,
 		     char *desc, u64 *buf, u32 size);
@@ -51,6 +53,8 @@ void i40iw_sc_cqp_post_sq(struct i40iw_s
 
 u64 *i40iw_sc_cqp_get_next_send_wqe(struct i40iw_sc_cqp *cqp, u64 scratch);
 
+void i40iw_check_cqp_progress(struct i40iw_cqp_timeout *cqp_timeout, struct i40iw_sc_dev *dev);
+
 enum i40iw_status_code i40iw_sc_mr_fast_register(struct i40iw_sc_qp *qp,
 						 struct i40iw_fast_reg_stag_info *info,
 						 bool post_sq);
--- a/drivers/infiniband/hw/i40iw/i40iw_type.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_type.h
@@ -1345,4 +1345,9 @@ struct i40iw_virtchnl_work_info {
 	void *worker_vf_dev;
 };
 
+struct i40iw_cqp_timeout {
+	u64 compl_cqp_cmds;
+	u8 count;
+};
+
 #endif
--- a/drivers/infiniband/hw/i40iw/i40iw_utils.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c
@@ -445,23 +445,29 @@ static int i40iw_wait_event(struct i40iw
 {
 	struct cqp_commands_info *info = &cqp_request->info;
 	struct i40iw_cqp *iwcqp = &iwdev->cqp;
+	struct i40iw_cqp_timeout cqp_timeout;
 	bool cqp_error = false;
 	int err_code = 0;
-	int timeout_ret = 0;
+	memset(&cqp_timeout, 0, sizeof(cqp_timeout));
+	cqp_timeout.compl_cqp_cmds = iwdev->sc_dev.cqp_cmd_stats[OP_COMPLETED_COMMANDS];
+	do {
+		if (wait_event_timeout(cqp_request->waitq,
+				       cqp_request->request_done, CQP_COMPL_WAIT_TIME))
+			break;
 
-	timeout_ret = wait_event_timeout(cqp_request->waitq,
-					 cqp_request->request_done,
-					 I40IW_EVENT_TIMEOUT);
-	if (!timeout_ret) {
-		i40iw_pr_err("error cqp command 0x%x timed out ret = %d\n",
-			     info->cqp_cmd, timeout_ret);
+		i40iw_check_cqp_progress(&cqp_timeout, &iwdev->sc_dev);
+
+		if (cqp_timeout.count < CQP_TIMEOUT_THRESHOLD)
+			continue;
+
+		i40iw_pr_err("error cqp command 0x%x timed out", info->cqp_cmd);
 		err_code = -ETIME;
 		if (!iwdev->reset) {
 			iwdev->reset = true;
 			i40iw_request_reset(iwdev);
 		}
 		goto done;
-	}
+	} while (1);
 	cqp_error = cqp_request->compl_info.error;
 	if (cqp_error) {
 		i40iw_pr_err("error cqp command 0x%x completion maj = 0x%x min=0x%x\n",