Blob Blame History Raw
From: Vijay Immanuel <vijayi@attalasystems.com>
Date: Wed, 13 Jun 2018 18:47:30 -0700
Subject: IB/rxe: avoid back-to-back retries
Patch-mainline: v4.20-rc1
Git-commit: 4e4c53df567714b3d08b2b5d8ccb1d175fc9be01
References: bsc#1103992 FATE#326009

Error retries can occur due to timeouts, NAKs or receiving
packets beyond the current read request. Avoid back-to-back
retries due to packet processing, by only retrying the initial
attempt immediately. Subsequent retries must be due to timeouts.

Continue to process completion packets after scheduling a retry.

Signed-off-by: Vijay Immanuel <vijayi@attalasystems.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
Acked-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
 drivers/infiniband/sw/rxe/rxe_comp.c  |   18 +++++++++++++++++-
 drivers/infiniband/sw/rxe/rxe_verbs.h |    1 +
 2 files changed, 18 insertions(+), 1 deletion(-)

--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -191,6 +191,7 @@ static inline void reset_retry_counters(
 {
 	qp->comp.retry_cnt = qp->attr.retry_cnt;
 	qp->comp.rnr_retry = qp->attr.rnr_retry;
+	qp->comp.started_retry = 0;
 }
 
 static inline enum comp_state check_psn(struct rxe_qp *qp,
@@ -676,6 +677,20 @@ int rxe_completer(void *arg)
 				goto exit;
 			}
 
+			/* if we've started a retry, don't start another
+			 * retry sequence, unless this is a timeout.
+			 */
+			if (qp->comp.started_retry &&
+			    !qp->comp.timeout_retry) {
+				if (pkt) {
+					rxe_drop_ref(pkt->qp);
+					kfree_skb(skb);
+					skb = NULL;
+				}
+
+				goto done;
+			}
+
 			if (qp->comp.retry_cnt > 0) {
 				if (qp->comp.retry_cnt != 7)
 					qp->comp.retry_cnt--;
@@ -692,6 +707,7 @@ int rxe_completer(void *arg)
 					rxe_counter_inc(rxe,
 							RXE_CNT_COMP_RETRY);
 					qp->req.need_retry = 1;
+					qp->comp.started_retry = 1;
 					rxe_run_task(&qp->req.task, 1);
 				}
 
@@ -701,7 +717,7 @@ int rxe_completer(void *arg)
 					skb = NULL;
 				}
 
-				goto exit;
+				goto done;
 
 			} else {
 				rxe_counter_inc(rxe, RXE_CNT_RETRY_EXCEEDED);
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -158,6 +158,7 @@ struct rxe_comp_info {
 	int			opcode;
 	int			timeout;
 	int			timeout_retry;
+	int			started_retry;
 	u32			retry_cnt;
 	u32			rnr_retry;
 	struct rxe_task		task;