Blob Blame History Raw
From: "Michael J. Ruhl" <michael.j.ruhl@intel.com>
Date: Mon, 10 Sep 2018 09:49:27 -0700
Subject: IB/{hfi1, qib, rdmavt}: Schedule multi RC/UC packets instead of
 posting
Patch-mainline: v4.20-rc1
Git-commit: 0b79b27748cbec221e1ceabf63578198602bf01d
References: bsc#1114685 FATE#325854

The post_send() path determines if it should post directly or, schedule
the post for later.  The current logic is:

  if the swqe ring is empty or (for hfi1) wqe->length <= piothreshold
    post the send
  else
    schedule

This can allow large requests to call the send engine directly.  Large
requests can potentially produce a large number of packets prior to
returning to the caller, blocking the caller from posting more requests,
and allowing better parallel processing.

Allow the driver(s) more say in this logic (pass call_send to the driver,
rather than examining a return value).

Update hfi1/qib logic to schedule the send engine if an RC or UC message
is larger than the QP MTU size.

Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Acked-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
 drivers/infiniband/hw/hfi1/qp.c       |   12 +++++-------
 drivers/infiniband/hw/hfi1/verbs.h    |    3 ++-
 drivers/infiniband/hw/qib/qib_qp.c    |   17 +++++++----------
 drivers/infiniband/hw/qib/qib_verbs.h |    3 ++-
 drivers/infiniband/sw/rdmavt/qp.c     |   14 ++++++++------
 include/rdma/rdma_vt.h                |   10 ++++++++--
 6 files changed, 32 insertions(+), 27 deletions(-)

--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -285,17 +285,13 @@ void hfi1_modify_qp(struct rvt_qp *qp, s
  * hfi1_check_send_wqe - validate wqe
  * @qp - The qp
  * @wqe - The built wqe
- *
- * validate wqe.  This is called
- * prior to inserting the wqe into
- * the ring but after the wqe has been
- * setup.
+ * @call_send - Determine if the send should be posted or scheduled.
  *
  * Returns 0 on success, -EINVAL on failure
  *
  */
 int hfi1_check_send_wqe(struct rvt_qp *qp,
-			struct rvt_swqe *wqe)
+			struct rvt_swqe *wqe, bool *call_send)
 {
 	struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
 	struct rvt_ah *ah;
@@ -305,6 +301,8 @@ int hfi1_check_send_wqe(struct rvt_qp *q
 	case IB_QPT_UC:
 		if (wqe->length > 0x80000000U)
 			return -EINVAL;
+		if (wqe->length > qp->pmtu)
+			*call_send = false;
 		break;
 	case IB_QPT_SMI:
 		ah = ibah_to_rvtah(wqe->ud_wr.ah);
@@ -321,7 +319,7 @@ int hfi1_check_send_wqe(struct rvt_qp *q
 	default:
 		break;
 	}
-	return wqe->length <= piothreshold;
+	return 0;
 }
 
 /**
--- a/drivers/infiniband/hw/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@ -343,7 +343,8 @@ int hfi1_check_modify_qp(struct rvt_qp *
 void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
 		    int attr_mask, struct ib_udata *udata);
 void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait);
-int hfi1_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
+int hfi1_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe,
+			bool *call_send);
 
 extern const u32 rc_only_opcode;
 extern const u32 uc_only_opcode;
--- a/drivers/infiniband/hw/qib/qib_qp.c
+++ b/drivers/infiniband/hw/qib/qib_qp.c
@@ -378,25 +378,22 @@ void qib_flush_qp_waiters(struct rvt_qp
  * qib_check_send_wqe - validate wr/wqe
  * @qp - The qp
  * @wqe - The built wqe
+ * @call_send - Determine if the send should be posted or scheduled
  *
- * validate wr/wqe.  This is called
- * prior to inserting the wqe into
- * the ring but after the wqe has been
- * setup.
- *
- * Returns 1 to force direct progress, 0 otherwise, -EINVAL on failure
+ * Returns 0 on success, -EINVAL on failure
  */
 int qib_check_send_wqe(struct rvt_qp *qp,
-		       struct rvt_swqe *wqe)
+		       struct rvt_swqe *wqe, bool *call_send)
 {
 	struct rvt_ah *ah;
-	int ret = 0;
 
 	switch (qp->ibqp.qp_type) {
 	case IB_QPT_RC:
 	case IB_QPT_UC:
 		if (wqe->length > 0x80000000U)
 			return -EINVAL;
+		if (wqe->length > qp->pmtu)
+			*call_send = false;
 		break;
 	case IB_QPT_SMI:
 	case IB_QPT_GSI:
@@ -405,12 +402,12 @@ int qib_check_send_wqe(struct rvt_qp *qp
 		if (wqe->length > (1 << ah->log_pmtu))
 			return -EINVAL;
 		/* progress hint */
-		ret = 1;
+		*call_send = true;
 		break;
 	default:
 		break;
 	}
-	return ret;
+	return 0;
 }
 
 #ifdef CONFIG_DEBUG_FS
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -303,7 +303,8 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd
 
 int qib_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr);
 
-int qib_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
+int qib_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe,
+		       bool *call_send);
 
 struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid);
 
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -1721,7 +1721,7 @@ static inline int rvt_qp_is_avail(
  */
 static int rvt_post_one_wr(struct rvt_qp *qp,
 			   const struct ib_send_wr *wr,
-			   int *call_send)
+			   bool *call_send)
 {
 	struct rvt_swqe *wqe;
 	u32 next;
@@ -1828,11 +1828,9 @@ static int rvt_post_one_wr(struct rvt_qp
 
 	/* general part of wqe valid - allow for driver checks */
 	if (rdi->driver_f.check_send_wqe) {
-		ret = rdi->driver_f.check_send_wqe(qp, wqe);
+		ret = rdi->driver_f.check_send_wqe(qp, wqe, call_send);
 		if (ret < 0)
 			goto bail_inval_free;
-		if (ret)
-			*call_send = ret;
 	}
 
 	log_pmtu = qp->log_pmtu;
@@ -1900,7 +1898,7 @@ int rvt_post_send(struct ib_qp *ibqp, co
 	struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
 	struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
 	unsigned long flags = 0;
-	int call_send;
+	bool call_send;
 	unsigned nreq = 0;
 	int err = 0;
 
@@ -1933,7 +1931,11 @@ int rvt_post_send(struct ib_qp *ibqp, co
 bail:
 	spin_unlock_irqrestore(&qp->s_hlock, flags);
 	if (nreq) {
-		if (call_send)
+		/*
+		 * Only call do_send if there is exactly one packet, and the
+		 * driver said it was ok.
+		 */
+		if (nreq == 1 && call_send)
 			rdi->driver_f.do_send(qp);
 		else
 			rdi->driver_f.schedule_send_no_lock(qp);
--- a/include/rdma/rdma_vt.h
+++ b/include/rdma/rdma_vt.h
@@ -214,8 +214,14 @@ struct rvt_driver_provided {
 	void (*schedule_send)(struct rvt_qp *qp);
 	void (*schedule_send_no_lock)(struct rvt_qp *qp);
 
-	/* Driver specific work request checking */
-	int (*check_send_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe);
+	/*
+	 * Validate the wqe.  This needs to be done prior to inserting the
+	 * wqe into the ring, but after the wqe has been set up.  Allow for
+	 * driver specific work request checking by providing a callback.
+	 * call_send indicates if the wqe should be posted or scheduled.
+	 */
+	int (*check_send_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe,
+			      bool *call_send);
 
 	/*
 	 * Sometimes rdmavt needs to kick the driver's send progress. That is