Blob Blame History Raw
From: Raju Rangoju <rajur@chelsio.com>
Date: Wed, 25 Jul 2018 21:22:14 +0530
Subject: rdma/cxgb4: Add support for kernel mode SRQ's
Patch-mainline: v4.19-rc1
Git-commit: 6a0b6174d35a141dfa30a32c848a3903e2d7f495
References: bsc#1104276 FATE#325935

This patch implements the srq specific verbs such as create/destroy/modify
and post_srq_recv. And adds srq specific structures and defines to t4.h
and uapi.

Also updates the cq poll logic to deal with completions that are
associated with the SRQ's.

This patch also handles kernel mode SRQ_LIMIT events as well as flushed
SRQ buffers

Signed-off-by: Raju Rangoju <rajur@chelsio.com>
Reviewed-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Acked-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
 drivers/infiniband/hw/cxgb4/cm.c       |   42 +
 drivers/infiniband/hw/cxgb4/cq.c       |  142 ++++-
 drivers/infiniband/hw/cxgb4/device.c   |   19 
 drivers/infiniband/hw/cxgb4/iw_cxgb4.h |    3 
 drivers/infiniband/hw/cxgb4/provider.c |   12 
 drivers/infiniband/hw/cxgb4/qp.c       |  818 +++++++++++++++++++++++++++------
 drivers/infiniband/hw/cxgb4/resource.c |   51 +-
 drivers/infiniband/hw/cxgb4/t4.h       |    9 
 8 files changed, 929 insertions(+), 167 deletions(-)

--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -1855,10 +1855,34 @@ static int rx_data(struct c4iw_dev *dev,
 	return 0;
 }
 
+static void complete_cached_srq_buffers(struct c4iw_ep *ep, u32 srqidx_status)
+{
+	enum chip_type adapter_type;
+	u32 srqidx;
+	u8 status;
+
+	adapter_type = ep->com.dev->rdev.lldi.adapter_type;
+	status = ABORT_RSS_STATUS_G(be32_to_cpu(srqidx_status));
+	srqidx = ABORT_RSS_SRQIDX_G(be32_to_cpu(srqidx_status));
+
+	/*
+	 * If this TCB had a srq buffer cached, then we must complete
+	 * it. For user mode, that means saving the srqidx in the
+	 * user/kernel status page for this qp.  For kernel mode, just
+	 * synthesize the CQE now.
+	 */
+	if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T5 && srqidx) {
+		if (ep->com.qp->ibqp.uobject)
+			t4_set_wq_in_error(&ep->com.qp->wq, srqidx);
+		else
+			c4iw_flush_srqidx(ep->com.qp, srqidx);
+	}
+}
+
 static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
 {
 	struct c4iw_ep *ep;
-	struct cpl_abort_rpl_rss *rpl = cplhdr(skb);
+	struct cpl_abort_rpl_rss6 *rpl = cplhdr(skb);
 	int release = 0;
 	unsigned int tid = GET_TID(rpl);
 
@@ -1867,6 +1891,9 @@ static int abort_rpl(struct c4iw_dev *de
 		pr_warn("Abort rpl to freed endpoint\n");
 		return 0;
 	}
+
+	complete_cached_srq_buffers(ep, rpl->srqidx_status);
+
 	pr_debug("ep %p tid %u\n", ep, ep->hwtid);
 	mutex_lock(&ep->com.mutex);
 	switch (ep->com.state) {
@@ -2722,28 +2749,35 @@ static int peer_close(struct c4iw_dev *d
 
 static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
 {
-	struct cpl_abort_req_rss *req = cplhdr(skb);
+	struct cpl_abort_req_rss6 *req = cplhdr(skb);
 	struct c4iw_ep *ep;
 	struct sk_buff *rpl_skb;
 	struct c4iw_qp_attributes attrs;
 	int ret;
 	int release = 0;
 	unsigned int tid = GET_TID(req);
+	u8 status;
+
 	u32 len = roundup(sizeof(struct cpl_abort_rpl), 16);
 
 	ep = get_ep_from_tid(dev, tid);
 	if (!ep)
 		return 0;
 
-	if (cxgb_is_neg_adv(req->status)) {
+	status = ABORT_RSS_STATUS_G(be32_to_cpu(req->srqidx_status));
+
+	if (cxgb_is_neg_adv(status)) {
 		pr_debug("Negative advice on abort- tid %u status %d (%s)\n",
-			 ep->hwtid, req->status, neg_adv_str(req->status));
+			 ep->hwtid, status, neg_adv_str(status));
 		ep->stats.abort_neg_adv++;
 		mutex_lock(&dev->rdev.stats.lock);
 		dev->rdev.stats.neg_adv++;
 		mutex_unlock(&dev->rdev.stats.lock);
 		goto deref_ep;
 	}
+
+	complete_cached_srq_buffers(ep, req->srqidx_status);
+
 	pr_debug("ep %p tid %u state %u\n", ep, ep->hwtid,
 		 ep->com.state);
 	set_bit(PEER_ABORT, &ep->com.history);
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -182,7 +182,7 @@ err1:
 	return ret;
 }
 
-static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq)
+static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq, u32 srqidx)
 {
 	struct t4_cqe cqe;
 
@@ -195,6 +195,8 @@ static void insert_recv_cqe(struct t4_wq
 				 CQE_SWCQE_V(1) |
 				 CQE_QPID_V(wq->sq.qid));
 	cqe.bits_type_ts = cpu_to_be64(CQE_GENBIT_V((u64)cq->gen));
+	if (srqidx)
+		cqe.u.srcqe.abs_rqe_idx = cpu_to_be32(srqidx);
 	cq->sw_queue[cq->sw_pidx] = cqe;
 	t4_swcq_produce(cq);
 }
@@ -207,7 +209,7 @@ int c4iw_flush_rq(struct t4_wq *wq, stru
 	pr_debug("wq %p cq %p rq.in_use %u skip count %u\n",
 		 wq, cq, wq->rq.in_use, count);
 	while (in_use--) {
-		insert_recv_cqe(wq, cq);
+		insert_recv_cqe(wq, cq, 0);
 		flushed++;
 	}
 	return flushed;
@@ -458,6 +460,72 @@ void c4iw_count_rcqes(struct t4_cq *cq,
 	pr_debug("cq %p count %d\n", cq, *count);
 }
 
+static void post_pending_srq_wrs(struct t4_srq *srq)
+{
+	struct t4_srq_pending_wr *pwr;
+	u16 idx = 0;
+
+	while (srq->pending_in_use) {
+		pwr = &srq->pending_wrs[srq->pending_cidx];
+		srq->sw_rq[srq->pidx].wr_id = pwr->wr_id;
+		srq->sw_rq[srq->pidx].valid = 1;
+
+		pr_debug("%s posting pending cidx %u pidx %u wq_pidx %u in_use %u rq_size %u wr_id %llx\n",
+			 __func__,
+			 srq->cidx, srq->pidx, srq->wq_pidx,
+			 srq->in_use, srq->size,
+			 (unsigned long long)pwr->wr_id);
+
+		c4iw_copy_wr_to_srq(srq, &pwr->wqe, pwr->len16);
+		t4_srq_consume_pending_wr(srq);
+		t4_srq_produce(srq, pwr->len16);
+		idx += DIV_ROUND_UP(pwr->len16 * 16, T4_EQ_ENTRY_SIZE);
+	}
+
+	if (idx) {
+		t4_ring_srq_db(srq, idx, pwr->len16, &pwr->wqe);
+		srq->queue[srq->size].status.host_wq_pidx =
+			srq->wq_pidx;
+	}
+}
+
+static u64 reap_srq_cqe(struct t4_cqe *hw_cqe, struct t4_srq *srq)
+{
+	int rel_idx = CQE_ABS_RQE_IDX(hw_cqe) - srq->rqt_abs_idx;
+	u64 wr_id;
+
+	srq->sw_rq[rel_idx].valid = 0;
+	wr_id = srq->sw_rq[rel_idx].wr_id;
+
+	if (rel_idx == srq->cidx) {
+		pr_debug("%s in order cqe rel_idx %u cidx %u pidx %u wq_pidx %u in_use %u rq_size %u wr_id %llx\n",
+			 __func__, rel_idx, srq->cidx, srq->pidx,
+			 srq->wq_pidx, srq->in_use, srq->size,
+			 (unsigned long long)srq->sw_rq[rel_idx].wr_id);
+		t4_srq_consume(srq);
+		while (srq->ooo_count && !srq->sw_rq[srq->cidx].valid) {
+			pr_debug("%s eat ooo cidx %u pidx %u wq_pidx %u in_use %u rq_size %u ooo_count %u wr_id %llx\n",
+				 __func__, srq->cidx, srq->pidx,
+				 srq->wq_pidx, srq->in_use,
+				 srq->size, srq->ooo_count,
+				 (unsigned long long)
+				 srq->sw_rq[srq->cidx].wr_id);
+			t4_srq_consume_ooo(srq);
+		}
+		if (srq->ooo_count == 0 && srq->pending_in_use)
+			post_pending_srq_wrs(srq);
+	} else {
+		pr_debug("%s ooo cqe rel_idx %u cidx %u pidx %u wq_pidx %u in_use %u rq_size %u ooo_count %u wr_id %llx\n",
+			 __func__, rel_idx, srq->cidx,
+			 srq->pidx, srq->wq_pidx,
+			 srq->in_use, srq->size,
+			 srq->ooo_count,
+			 (unsigned long long)srq->sw_rq[rel_idx].wr_id);
+		t4_srq_produce_ooo(srq);
+	}
+	return wr_id;
+}
+
 /*
  * poll_cq
  *
@@ -475,7 +543,8 @@ void c4iw_count_rcqes(struct t4_cq *cq,
  *    -EOVERFLOW    CQ overflow detected.
  */
 static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
-		   u8 *cqe_flushed, u64 *cookie, u32 *credit)
+		   u8 *cqe_flushed, u64 *cookie, u32 *credit,
+		   struct t4_srq *srq)
 {
 	int ret = 0;
 	struct t4_cqe *hw_cqe, read_cqe;
@@ -540,7 +609,7 @@ static int poll_cq(struct t4_wq *wq, str
 		 */
 		if (CQE_TYPE(hw_cqe) == 1) {
 			if (CQE_STATUS(hw_cqe))
-				t4_set_wq_in_error(wq);
+				t4_set_wq_in_error(wq, 0);
 			ret = -EAGAIN;
 			goto skip_cqe;
 		}
@@ -551,7 +620,7 @@ static int poll_cq(struct t4_wq *wq, str
 		 */
 		if (CQE_WRID_STAG(hw_cqe) == 1) {
 			if (CQE_STATUS(hw_cqe))
-				t4_set_wq_in_error(wq);
+				t4_set_wq_in_error(wq, 0);
 			ret = -EAGAIN;
 			goto skip_cqe;
 		}
@@ -576,7 +645,7 @@ static int poll_cq(struct t4_wq *wq, str
 
 	if (CQE_STATUS(hw_cqe) || t4_wq_in_error(wq)) {
 		*cqe_flushed = (CQE_STATUS(hw_cqe) == T4_ERR_SWFLUSH);
-		t4_set_wq_in_error(wq);
+		t4_set_wq_in_error(wq, 0);
 	}
 
 	/*
@@ -590,15 +659,9 @@ static int poll_cq(struct t4_wq *wq, str
 		 * then we complete this with T4_ERR_MSN and mark the wq in
 		 * error.
 		 */
-
-		if (t4_rq_empty(wq)) {
-			t4_set_wq_in_error(wq);
-			ret = -EAGAIN;
-			goto skip_cqe;
-		}
 		if (unlikely(!CQE_STATUS(hw_cqe) &&
 			     CQE_WRID_MSN(hw_cqe) != wq->rq.msn)) {
-			t4_set_wq_in_error(wq);
+			t4_set_wq_in_error(wq, 0);
 			hw_cqe->header |= cpu_to_be32(CQE_STATUS_V(T4_ERR_MSN));
 		}
 		goto proc_cqe;
@@ -657,11 +720,16 @@ proc_cqe:
 			c4iw_log_wr_stats(wq, hw_cqe);
 		t4_sq_consume(wq);
 	} else {
-		pr_debug("completing rq idx %u\n", wq->rq.cidx);
-		*cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id;
-		if (c4iw_wr_log)
-			c4iw_log_wr_stats(wq, hw_cqe);
-		t4_rq_consume(wq);
+		if (!srq) {
+			pr_debug("completing rq idx %u\n", wq->rq.cidx);
+			*cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id;
+			if (c4iw_wr_log)
+				c4iw_log_wr_stats(wq, hw_cqe);
+			t4_rq_consume(wq);
+		} else {
+			*cookie = reap_srq_cqe(hw_cqe, srq);
+		}
+		wq->rq.msn++;
 		goto skip_cqe;
 	}
 
@@ -685,7 +753,7 @@ skip_cqe:
 }
 
 static int __c4iw_poll_cq_one(struct c4iw_cq *chp, struct c4iw_qp *qhp,
-			      struct ib_wc *wc)
+			      struct ib_wc *wc, struct c4iw_srq *srq)
 {
 	struct t4_cqe uninitialized_var(cqe);
 	struct t4_wq *wq = qhp ? &qhp->wq : NULL;
@@ -694,7 +762,8 @@ static int __c4iw_poll_cq_one(struct c4i
 	u64 cookie = 0;
 	int ret;
 
-	ret = poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit);
+	ret = poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit,
+		      srq ? &srq->wq : NULL);
 	if (ret)
 		goto out;
 
@@ -703,6 +772,13 @@ static int __c4iw_poll_cq_one(struct c4i
 	wc->vendor_err = CQE_STATUS(&cqe);
 	wc->wc_flags = 0;
 
+	/*
+	 * Simulate a SRQ_LIMIT_REACHED HW notification if required.
+	 */
+	if (srq && !(srq->flags & T4_SRQ_LIMIT_SUPPORT) && srq->armed &&
+	    srq->wq.in_use < srq->srq_limit)
+		c4iw_dispatch_srq_limit_reached_event(srq);
+
 	pr_debug("qpid 0x%x type %d opcode %d status 0x%x len %u wrid hi 0x%x lo 0x%x cookie 0x%llx\n",
 		 CQE_QPID(&cqe),
 		 CQE_TYPE(&cqe), CQE_OPCODE(&cqe),
@@ -828,6 +904,7 @@ out:
  */
 static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
 {
+	struct c4iw_srq *srq = NULL;
 	struct c4iw_qp *qhp = NULL;
 	struct t4_cqe *rd_cqe;
 	int ret;
@@ -840,10 +917,15 @@ static int c4iw_poll_cq_one(struct c4iw_
 	qhp = get_qhp(chp->rhp, CQE_QPID(rd_cqe));
 	if (qhp) {
 		spin_lock(&qhp->lock);
-		ret = __c4iw_poll_cq_one(chp, qhp, wc);
+		srq = qhp->srq;
+		if (srq)
+			spin_lock(&srq->lock);
+		ret = __c4iw_poll_cq_one(chp, qhp, wc, srq);
 		spin_unlock(&qhp->lock);
+		if (srq)
+			spin_unlock(&srq->lock);
 	} else {
-		ret = __c4iw_poll_cq_one(chp, NULL, wc);
+		ret = __c4iw_poll_cq_one(chp, NULL, wc, NULL);
 	}
 	return ret;
 }
@@ -1078,3 +1160,19 @@ int c4iw_arm_cq(struct ib_cq *ibcq, enum
 	spin_unlock_irqrestore(&chp->lock, flag);
 	return ret;
 }
+
+void c4iw_flush_srqidx(struct c4iw_qp *qhp, u32 srqidx)
+{
+	struct c4iw_cq *rchp = to_c4iw_cq(qhp->ibqp.recv_cq);
+	unsigned long flag;
+
+	/* locking heirarchy: cq lock first, then qp lock. */
+	spin_lock_irqsave(&rchp->lock, flag);
+	spin_lock(&qhp->lock);
+
+	/* create a SRQ RECV CQE for srqidx */
+	insert_recv_cqe(&qhp->wq, &rchp->cq, srqidx);
+
+	spin_unlock(&qhp->lock);
+	spin_unlock_irqrestore(&rchp->lock, flag);
+}
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -275,10 +275,11 @@ static int dump_qp(int id, void *p, void
 
 			set_ep_sin_addrs(ep, &lsin, &rsin, &m_lsin, &m_rsin);
 			cc = snprintf(qpd->buf + qpd->pos, space,
-				      "rc qp sq id %u rq id %u state %u "
+				      "rc qp sq id %u %s id %u state %u "
 				      "onchip %u ep tid %u state %u "
 				      "%pI4:%u/%u->%pI4:%u/%u\n",
-				      qp->wq.sq.qid, qp->wq.rq.qid,
+				      qp->wq.sq.qid, qp->srq ? "srq" : "rq",
+				      qp->srq ? qp->srq->idx : qp->wq.rq.qid,
 				      (int)qp->attr.state,
 				      qp->wq.sq.flags & T4_SQ_ONCHIP,
 				      ep->hwtid, (int)ep->com.state,
@@ -480,6 +481,9 @@ static int stats_show(struct seq_file *s
 	seq_printf(seq, "      QID: %10llu %10llu %10llu %10llu\n",
 			dev->rdev.stats.qid.total, dev->rdev.stats.qid.cur,
 			dev->rdev.stats.qid.max, dev->rdev.stats.qid.fail);
+	seq_printf(seq, "     SRQS: %10llu %10llu %10llu %10llu\n",
+		   dev->rdev.stats.srqt.total, dev->rdev.stats.srqt.cur,
+			dev->rdev.stats.srqt.max, dev->rdev.stats.srqt.fail);
 	seq_printf(seq, "   TPTMEM: %10llu %10llu %10llu %10llu\n",
 			dev->rdev.stats.stag.total, dev->rdev.stats.stag.cur,
 			dev->rdev.stats.stag.max, dev->rdev.stats.stag.fail);
@@ -530,6 +534,8 @@ static ssize_t stats_clear(struct file *
 	dev->rdev.stats.pbl.fail = 0;
 	dev->rdev.stats.rqt.max = 0;
 	dev->rdev.stats.rqt.fail = 0;
+	dev->rdev.stats.rqt.max = 0;
+	dev->rdev.stats.rqt.fail = 0;
 	dev->rdev.stats.ocqp.max = 0;
 	dev->rdev.stats.ocqp.fail = 0;
 	dev->rdev.stats.db_full = 0;
@@ -802,7 +808,7 @@ static int c4iw_rdev_open(struct c4iw_rd
 
 	rdev->qpmask = rdev->lldi.udb_density - 1;
 	rdev->cqmask = rdev->lldi.ucq_density - 1;
-	pr_debug("dev %s stag start 0x%0x size 0x%0x num stags %d pbl start 0x%0x size 0x%0x rq start 0x%0x size 0x%0x qp qid start %u size %u cq qid start %u size %u\n",
+	pr_debug("dev %s stag start 0x%0x size 0x%0x num stags %d pbl start 0x%0x size 0x%0x rq start 0x%0x size 0x%0x qp qid start %u size %u cq qid start %u size %u srq size %u\n",
 		 pci_name(rdev->lldi.pdev), rdev->lldi.vr->stag.start,
 		 rdev->lldi.vr->stag.size, c4iw_num_stags(rdev),
 		 rdev->lldi.vr->pbl.start,
@@ -811,7 +817,8 @@ static int c4iw_rdev_open(struct c4iw_rd
 		 rdev->lldi.vr->qp.start,
 		 rdev->lldi.vr->qp.size,
 		 rdev->lldi.vr->cq.start,
-		 rdev->lldi.vr->cq.size);
+		 rdev->lldi.vr->cq.size,
+		 rdev->lldi.vr->srq.size);
 	pr_debug("udb %pR db_reg %p gts_reg %p qpmask 0x%x cqmask 0x%x\n",
 		 &rdev->lldi.pdev->resource[2],
 		 rdev->lldi.db_reg, rdev->lldi.gts_reg,
@@ -824,10 +831,12 @@ static int c4iw_rdev_open(struct c4iw_rd
 	rdev->stats.stag.total = rdev->lldi.vr->stag.size;
 	rdev->stats.pbl.total = rdev->lldi.vr->pbl.size;
 	rdev->stats.rqt.total = rdev->lldi.vr->rq.size;
+	rdev->stats.srqt.total = rdev->lldi.vr->srq.size;
 	rdev->stats.ocqp.total = rdev->lldi.vr->ocq.size;
 	rdev->stats.qid.total = rdev->lldi.vr->qp.size;
 
-	err = c4iw_init_resource(rdev, c4iw_num_stags(rdev), T4_MAX_NUM_PD);
+	err = c4iw_init_resource(rdev, c4iw_num_stags(rdev),
+				 T4_MAX_NUM_PD, rdev->lldi.vr->srq.size);
 	if (err) {
 		pr_err("error %d initializing resources\n", err);
 		return err;
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -1014,7 +1014,8 @@ void c4iw_put_qpid(struct c4iw_rdev *rde
 		   struct c4iw_dev_ucontext *uctx);
 u32 c4iw_get_resource(struct c4iw_id_table *id_table);
 void c4iw_put_resource(struct c4iw_id_table *id_table, u32 entry);
-int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, u32 nr_pdid);
+int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt,
+		       u32 nr_pdid, u32 nr_srqt);
 int c4iw_init_ctrl_qp(struct c4iw_rdev *rdev);
 int c4iw_pblpool_create(struct c4iw_rdev *rdev);
 int c4iw_rqtpool_create(struct c4iw_rdev *rdev);
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -342,9 +342,12 @@ static int c4iw_query_device(struct ib_d
 	props->vendor_part_id = (u32)dev->rdev.lldi.pdev->device;
 	props->max_mr_size = T4_MAX_MR_SIZE;
 	props->max_qp = dev->rdev.lldi.vr->qp.size / 2;
+	props->max_srq = dev->rdev.lldi.vr->srq.size;
 	props->max_qp_wr = dev->rdev.hw_queue.t4_max_qp_depth;
+	props->max_srq_wr = dev->rdev.hw_queue.t4_max_qp_depth;
 	props->max_send_sge = min(T4_MAX_SEND_SGE, T4_MAX_WRITE_SGE);
 	props->max_recv_sge = T4_MAX_RECV_SGE;
+	props->max_srq_sge = T4_MAX_RECV_SGE;
 	props->max_sge_rd = 1;
 	props->max_res_rd_atom = dev->rdev.lldi.max_ird_adapter;
 	props->max_qp_rd_atom = min(dev->rdev.lldi.max_ordird_qp,
@@ -593,7 +596,10 @@ void c4iw_register_device(struct work_st
 	    (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
 	    (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
 	    (1ull << IB_USER_VERBS_CMD_POST_SEND) |
-	    (1ull << IB_USER_VERBS_CMD_POST_RECV);
+	    (1ull << IB_USER_VERBS_CMD_POST_RECV) |
+	    (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
+	    (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
+	    (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
 	dev->ibdev.node_type = RDMA_NODE_RNIC;
 	BUILD_BUG_ON(sizeof(C4IW_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX);
 	memcpy(dev->ibdev.node_desc, C4IW_NODE_DESC, sizeof(C4IW_NODE_DESC));
@@ -615,6 +621,9 @@ void c4iw_register_device(struct work_st
 	dev->ibdev.modify_qp = c4iw_ib_modify_qp;
 	dev->ibdev.query_qp = c4iw_ib_query_qp;
 	dev->ibdev.destroy_qp = c4iw_destroy_qp;
+	dev->ibdev.create_srq = c4iw_create_srq;
+	dev->ibdev.modify_srq = c4iw_modify_srq;
+	dev->ibdev.destroy_srq = c4iw_destroy_srq;
 	dev->ibdev.create_cq = c4iw_create_cq;
 	dev->ibdev.destroy_cq = c4iw_destroy_cq;
 	dev->ibdev.resize_cq = c4iw_resize_cq;
@@ -632,6 +641,7 @@ void c4iw_register_device(struct work_st
 	dev->ibdev.req_notify_cq = c4iw_arm_cq;
 	dev->ibdev.post_send = c4iw_post_send;
 	dev->ibdev.post_recv = c4iw_post_receive;
+	dev->ibdev.post_srq_recv = c4iw_post_srq_recv;
 	dev->ibdev.alloc_hw_stats = c4iw_alloc_stats;
 	dev->ibdev.get_hw_stats = c4iw_get_mib;
 	dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION;
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -147,21 +147,24 @@ static int alloc_sq(struct c4iw_rdev *rd
 }
 
 static int destroy_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
-		      struct c4iw_dev_ucontext *uctx)
+		      struct c4iw_dev_ucontext *uctx, int has_rq)
 {
 	/*
 	 * uP clears EQ contexts when the connection exits rdma mode,
 	 * so no need to post a RESET WR for these EQs.
 	 */
-	dma_free_coherent(&(rdev->lldi.pdev->dev),
-			  wq->rq.memsize, wq->rq.queue,
-			  dma_unmap_addr(&wq->rq, mapping));
 	dealloc_sq(rdev, &wq->sq);
-	c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size);
-	kfree(wq->rq.sw_rq);
 	kfree(wq->sq.sw_sq);
-	c4iw_put_qpid(rdev, wq->rq.qid, uctx);
 	c4iw_put_qpid(rdev, wq->sq.qid, uctx);
+
+	if (has_rq) {
+		dma_free_coherent(&rdev->lldi.pdev->dev,
+				  wq->rq.memsize, wq->rq.queue,
+				  dma_unmap_addr(&wq->rq, mapping));
+		c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size);
+		kfree(wq->rq.sw_rq);
+		c4iw_put_qpid(rdev, wq->rq.qid, uctx);
+	}
 	return 0;
 }
 
@@ -195,7 +198,8 @@ void __iomem *c4iw_bar2_addrs(struct c4i
 static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
 		     struct t4_cq *rcq, struct t4_cq *scq,
 		     struct c4iw_dev_ucontext *uctx,
-		     struct c4iw_wr_wait *wr_waitp)
+		     struct c4iw_wr_wait *wr_waitp,
+		     int need_rq)
 {
 	int user = (uctx != &rdev->uctx);
 	struct fw_ri_res_wr *res_wr;
@@ -209,10 +213,12 @@ static int create_qp(struct c4iw_rdev *r
 	if (!wq->sq.qid)
 		return -ENOMEM;
 
-	wq->rq.qid = c4iw_get_qpid(rdev, uctx);
-	if (!wq->rq.qid) {
-		ret = -ENOMEM;
-		goto free_sq_qid;
+	if (need_rq) {
+		wq->rq.qid = c4iw_get_qpid(rdev, uctx);
+		if (!wq->rq.qid) {
+			ret = -ENOMEM;
+			goto free_sq_qid;
+		}
 	}
 
 	if (!user) {
@@ -220,25 +226,31 @@ static int create_qp(struct c4iw_rdev *r
 				 GFP_KERNEL);
 		if (!wq->sq.sw_sq) {
 			ret = -ENOMEM;
-			goto free_rq_qid;
+			goto free_rq_qid;//FIXME
 		}
 
-		wq->rq.sw_rq = kzalloc(wq->rq.size * sizeof *wq->rq.sw_rq,
-				 GFP_KERNEL);
-		if (!wq->rq.sw_rq) {
-			ret = -ENOMEM;
-			goto free_sw_sq;
+		if (need_rq) {
+			wq->rq.sw_rq = kcalloc(wq->rq.size,
+					       sizeof(*wq->rq.sw_rq),
+					       GFP_KERNEL);
+			if (!wq->rq.sw_rq) {
+				ret = -ENOMEM;
+				goto free_sw_sq;
+			}
 		}
 	}
 
-	/*
-	 * RQT must be a power of 2 and at least 16 deep.
-	 */
-	wq->rq.rqt_size = roundup_pow_of_two(max_t(u16, wq->rq.size, 16));
-	wq->rq.rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rq.rqt_size);
-	if (!wq->rq.rqt_hwaddr) {
-		ret = -ENOMEM;
-		goto free_sw_rq;
+	if (need_rq) {
+		/*
+		 * RQT must be a power of 2 and at least 16 deep.
+		 */
+		wq->rq.rqt_size =
+			roundup_pow_of_two(max_t(u16, wq->rq.size, 16));
+		wq->rq.rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rq.rqt_size);
+		if (!wq->rq.rqt_hwaddr) {
+			ret = -ENOMEM;
+			goto free_sw_rq;
+		}
 	}
 
 	ret = alloc_sq(rdev, &wq->sq, user);
@@ -247,34 +259,39 @@ static int create_qp(struct c4iw_rdev *r
 	memset(wq->sq.queue, 0, wq->sq.memsize);
 	dma_unmap_addr_set(&wq->sq, mapping, wq->sq.dma_addr);
 
-	wq->rq.queue = dma_alloc_coherent(&(rdev->lldi.pdev->dev),
-					  wq->rq.memsize, &(wq->rq.dma_addr),
-					  GFP_KERNEL);
-	if (!wq->rq.queue) {
-		ret = -ENOMEM;
-		goto free_sq;
+	if (need_rq) {
+		wq->rq.queue = dma_alloc_coherent(&rdev->lldi.pdev->dev,
+						  wq->rq.memsize,
+						  &wq->rq.dma_addr,
+						  GFP_KERNEL);
+		if (!wq->rq.queue) {
+			ret = -ENOMEM;
+			goto free_sq;
+		}
+		pr_debug("sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx\n",
+			 wq->sq.queue,
+			 (unsigned long long)virt_to_phys(wq->sq.queue),
+			 wq->rq.queue,
+			 (unsigned long long)virt_to_phys(wq->rq.queue));
+		memset(wq->rq.queue, 0, wq->rq.memsize);
+		dma_unmap_addr_set(&wq->rq, mapping, wq->rq.dma_addr);
 	}
-	pr_debug("sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx\n",
-		 wq->sq.queue,
-		 (unsigned long long)virt_to_phys(wq->sq.queue),
-		 wq->rq.queue,
-		 (unsigned long long)virt_to_phys(wq->rq.queue));
-	memset(wq->rq.queue, 0, wq->rq.memsize);
-	dma_unmap_addr_set(&wq->rq, mapping, wq->rq.dma_addr);
 
 	wq->db = rdev->lldi.db_reg;
 
 	wq->sq.bar2_va = c4iw_bar2_addrs(rdev, wq->sq.qid, T4_BAR2_QTYPE_EGRESS,
 					 &wq->sq.bar2_qid,
 					 user ? &wq->sq.bar2_pa : NULL);
-	wq->rq.bar2_va = c4iw_bar2_addrs(rdev, wq->rq.qid, T4_BAR2_QTYPE_EGRESS,
-					 &wq->rq.bar2_qid,
-					 user ? &wq->rq.bar2_pa : NULL);
+	if (need_rq)
+		wq->rq.bar2_va = c4iw_bar2_addrs(rdev, wq->rq.qid,
+						 T4_BAR2_QTYPE_EGRESS,
+						 &wq->rq.bar2_qid,
+						 user ? &wq->rq.bar2_pa : NULL);
 
 	/*
 	 * User mode must have bar2 access.
 	 */
-	if (user && (!wq->sq.bar2_pa || !wq->rq.bar2_pa)) {
+	if (user && (!wq->sq.bar2_pa || (need_rq && !wq->rq.bar2_pa))) {
 		pr_warn("%s: sqid %u or rqid %u not in BAR2 range\n",
 			pci_name(rdev->lldi.pdev), wq->sq.qid, wq->rq.qid);
 		goto free_dma;
@@ -285,7 +302,8 @@ static int create_qp(struct c4iw_rdev *r
 
 	/* build fw_ri_res_wr */
 	wr_len = sizeof *res_wr + 2 * sizeof *res;
-
+	if (need_rq)
+		wr_len += sizeof(*res);
 	skb = alloc_skb(wr_len, GFP_KERNEL);
 	if (!skb) {
 		ret = -ENOMEM;
@@ -296,7 +314,7 @@ static int create_qp(struct c4iw_rdev *r
 	res_wr = __skb_put_zero(skb, wr_len);
 	res_wr->op_nres = cpu_to_be32(
 			FW_WR_OP_V(FW_RI_RES_WR) |
-			FW_RI_RES_WR_NRES_V(2) |
+			FW_RI_RES_WR_NRES_V(need_rq ? 2 : 1) |
 			FW_WR_COMPL_F);
 	res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
 	res_wr->cookie = (uintptr_t)wr_waitp;
@@ -327,30 +345,36 @@ static int create_qp(struct c4iw_rdev *r
 		FW_RI_RES_WR_EQSIZE_V(eqsize));
 	res->u.sqrq.eqid = cpu_to_be32(wq->sq.qid);
 	res->u.sqrq.eqaddr = cpu_to_be64(wq->sq.dma_addr);
-	res++;
-	res->u.sqrq.restype = FW_RI_RES_TYPE_RQ;
-	res->u.sqrq.op = FW_RI_RES_OP_WRITE;
 
-	/*
-	 * eqsize is the number of 64B entries plus the status page size.
-	 */
-	eqsize = wq->rq.size * T4_RQ_NUM_SLOTS +
-		rdev->hw_queue.t4_eq_status_entries;
-	res->u.sqrq.fetchszm_to_iqid = cpu_to_be32(
-		FW_RI_RES_WR_HOSTFCMODE_V(0) |	/* no host cidx updates */
-		FW_RI_RES_WR_CPRIO_V(0) |	/* don't keep in chip cache */
-		FW_RI_RES_WR_PCIECHN_V(0) |	/* set by uP at ri_init time */
-		FW_RI_RES_WR_IQID_V(rcq->cqid));
-	res->u.sqrq.dcaen_to_eqsize = cpu_to_be32(
-		FW_RI_RES_WR_DCAEN_V(0) |
-		FW_RI_RES_WR_DCACPU_V(0) |
-		FW_RI_RES_WR_FBMIN_V(2) |
-		FW_RI_RES_WR_FBMAX_V(3) |
-		FW_RI_RES_WR_CIDXFTHRESHO_V(0) |
-		FW_RI_RES_WR_CIDXFTHRESH_V(0) |
-		FW_RI_RES_WR_EQSIZE_V(eqsize));
-	res->u.sqrq.eqid = cpu_to_be32(wq->rq.qid);
-	res->u.sqrq.eqaddr = cpu_to_be64(wq->rq.dma_addr);
+	if (need_rq) {
+		res++;
+		res->u.sqrq.restype = FW_RI_RES_TYPE_RQ;
+		res->u.sqrq.op = FW_RI_RES_OP_WRITE;
+
+		/*
+		 * eqsize is the number of 64B entries plus the status page size
+		 */
+		eqsize = wq->rq.size * T4_RQ_NUM_SLOTS +
+			rdev->hw_queue.t4_eq_status_entries;
+		res->u.sqrq.fetchszm_to_iqid =
+			/* no host cidx updates */
+			cpu_to_be32(FW_RI_RES_WR_HOSTFCMODE_V(0) |
+			/* don't keep in chip cache */
+			FW_RI_RES_WR_CPRIO_V(0) |
+			/* set by uP at ri_init time */
+			FW_RI_RES_WR_PCIECHN_V(0) |
+			FW_RI_RES_WR_IQID_V(rcq->cqid));
+		res->u.sqrq.dcaen_to_eqsize =
+			cpu_to_be32(FW_RI_RES_WR_DCAEN_V(0) |
+			FW_RI_RES_WR_DCACPU_V(0) |
+			FW_RI_RES_WR_FBMIN_V(2) |
+			FW_RI_RES_WR_FBMAX_V(3) |
+			FW_RI_RES_WR_CIDXFTHRESHO_V(0) |
+			FW_RI_RES_WR_CIDXFTHRESH_V(0) |
+			FW_RI_RES_WR_EQSIZE_V(eqsize));
+		res->u.sqrq.eqid = cpu_to_be32(wq->rq.qid);
+		res->u.sqrq.eqaddr = cpu_to_be64(wq->rq.dma_addr);
+	}
 
 	c4iw_init_wr_wait(wr_waitp);
 	ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, wq->sq.qid, __func__);
@@ -363,19 +387,23 @@ static int create_qp(struct c4iw_rdev *r
 
 	return 0;
 free_dma:
-	dma_free_coherent(&(rdev->lldi.pdev->dev),
-			  wq->rq.memsize, wq->rq.queue,
-			  dma_unmap_addr(&wq->rq, mapping));
+	if (need_rq)
+		dma_free_coherent(&rdev->lldi.pdev->dev,
+				  wq->rq.memsize, wq->rq.queue,
+				  dma_unmap_addr(&wq->rq, mapping));
 free_sq:
 	dealloc_sq(rdev, &wq->sq);
 free_hwaddr:
-	c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size);
+	if (need_rq)
+		c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size);
 free_sw_rq:
-	kfree(wq->rq.sw_rq);
+	if (need_rq)
+		kfree(wq->rq.sw_rq);
 free_sw_sq:
 	kfree(wq->sq.sw_sq);
 free_rq_qid:
-	c4iw_put_qpid(rdev, wq->rq.qid, uctx);
+	if (need_rq)
+		c4iw_put_qpid(rdev, wq->rq.qid, uctx);
 free_sq_qid:
 	c4iw_put_qpid(rdev, wq->sq.qid, uctx);
 	return ret;
@@ -605,6 +633,20 @@ static int build_rdma_recv(struct c4iw_q
 	return 0;
 }
 
+static int build_srq_recv(union t4_recv_wr *wqe, struct ib_recv_wr *wr,
+			  u8 *len16)
+{
+	int ret;
+
+	ret = build_isgl((__be64 *)wqe, (__be64 *)(wqe + 1),
+			 &wqe->recv.isgl, wr->sg_list, wr->num_sge, NULL);
+	if (ret)
+		return ret;
+	*len16 = DIV_ROUND_UP(sizeof(wqe->recv) +
+			      wr->num_sge * sizeof(struct fw_ri_sge), 16);
+	return 0;
+}
+
 static void build_tpte_memreg(struct fw_ri_fr_nsmr_tpte_wr *fr,
 			      struct ib_reg_wr *wr, struct c4iw_mr *mhp,
 			      u8 *len16)
@@ -721,7 +763,7 @@ static void free_qp_work(struct work_str
 
 	pr_debug("qhp %p ucontext %p\n", qhp, ucontext);
 	destroy_qp(&rhp->rdev, &qhp->wq,
-		   ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
+		   ucontext ? &ucontext->uctx : &rhp->rdev.uctx, !qhp->srq);
 
 	if (ucontext)
 		c4iw_put_ucontext(ucontext);
@@ -1145,6 +1187,89 @@ int c4iw_post_receive(struct ib_qp *ibqp
 	return err;
 }
 
+static void defer_srq_wr(struct t4_srq *srq, union t4_recv_wr *wqe,
+			 u64 wr_id, u8 len16)
+{
+	struct t4_srq_pending_wr *pwr = &srq->pending_wrs[srq->pending_pidx];
+
+	pr_debug("%s cidx %u pidx %u wq_pidx %u in_use %u ooo_count %u wr_id 0x%llx pending_cidx %u pending_pidx %u pending_in_use %u\n",
+		 __func__, srq->cidx, srq->pidx, srq->wq_pidx,
+		 srq->in_use, srq->ooo_count,
+		 (unsigned long long)wr_id, srq->pending_cidx,
+		 srq->pending_pidx, srq->pending_in_use);
+	pwr->wr_id = wr_id;
+	pwr->len16 = len16;
+	memcpy(&pwr->wqe, wqe, len16 * 16);
+	t4_srq_produce_pending_wr(srq);
+}
+
+int c4iw_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
+		       struct ib_recv_wr **bad_wr)
+{
+	union t4_recv_wr *wqe, lwqe;
+	struct c4iw_srq *srq;
+	unsigned long flag;
+	u8 len16 = 0;
+	u16 idx = 0;
+	int err = 0;
+	u32 num_wrs;
+
+	srq = to_c4iw_srq(ibsrq);
+	spin_lock_irqsave(&srq->lock, flag);
+	num_wrs = t4_srq_avail(&srq->wq);
+	if (num_wrs == 0) {
+		spin_unlock_irqrestore(&srq->lock, flag);
+		return -ENOMEM;
+	}
+	while (wr) {
+		if (wr->num_sge > T4_MAX_RECV_SGE) {
+			err = -EINVAL;
+			*bad_wr = wr;
+			break;
+		}
+		wqe = &lwqe;
+		if (num_wrs)
+			err = build_srq_recv(wqe, wr, &len16);
+		else
+			err = -ENOMEM;
+		if (err) {
+			*bad_wr = wr;
+			break;
+		}
+
+		wqe->recv.opcode = FW_RI_RECV_WR;
+		wqe->recv.r1 = 0;
+		wqe->recv.wrid = srq->wq.pidx;
+		wqe->recv.r2[0] = 0;
+		wqe->recv.r2[1] = 0;
+		wqe->recv.r2[2] = 0;
+		wqe->recv.len16 = len16;
+
+		if (srq->wq.ooo_count ||
+		    srq->wq.pending_in_use ||
+		    srq->wq.sw_rq[srq->wq.pidx].valid) {
+			defer_srq_wr(&srq->wq, wqe, wr->wr_id, len16);
+		} else {
+			srq->wq.sw_rq[srq->wq.pidx].wr_id = wr->wr_id;
+			srq->wq.sw_rq[srq->wq.pidx].valid = 1;
+			c4iw_copy_wr_to_srq(&srq->wq, wqe, len16);
+			pr_debug("%s cidx %u pidx %u wq_pidx %u in_use %u wr_id 0x%llx\n",
+				 __func__, srq->wq.cidx,
+				 srq->wq.pidx, srq->wq.wq_pidx,
+				 srq->wq.in_use,
+				 (unsigned long long)wr->wr_id);
+			t4_srq_produce(&srq->wq, len16);
+			idx += DIV_ROUND_UP(len16 * 16, T4_EQ_ENTRY_SIZE);
+		}
+		wr = wr->next;
+		num_wrs--;
+	}
+	if (idx)
+		t4_ring_srq_db(&srq->wq, idx, len16, wqe);
+	spin_unlock_irqrestore(&srq->lock, flag);
+	return err;
+}
+
 static inline void build_term_codes(struct t4_cqe *err_cqe, u8 *layer_type,
 				    u8 *ecode)
 {
@@ -1321,7 +1446,7 @@ static void __flush_qp(struct c4iw_qp *q
 		       struct c4iw_cq *schp)
 {
 	int count;
-	int rq_flushed, sq_flushed;
+	int rq_flushed = 0, sq_flushed;
 	unsigned long flag;
 
 	pr_debug("qhp %p rchp %p schp %p\n", qhp, rchp, schp);
@@ -1340,11 +1465,13 @@ static void __flush_qp(struct c4iw_qp *q
 		return;
 	}
 	qhp->wq.flushed = 1;
-	t4_set_wq_in_error(&qhp->wq);
+	t4_set_wq_in_error(&qhp->wq, 0);
 
 	c4iw_flush_hw_cq(rchp, qhp);
-	c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count);
-	rq_flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count);
+	if (!qhp->srq) {
+		c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count);
+		rq_flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count);
+	}
 
 	if (schp != rchp)
 		c4iw_flush_hw_cq(schp, qhp);
@@ -1388,7 +1515,7 @@ static void flush_qp(struct c4iw_qp *qhp
 	schp = to_c4iw_cq(qhp->ibqp.send_cq);
 
 	if (qhp->ibqp.uobject) {
-		t4_set_wq_in_error(&qhp->wq);
+		t4_set_wq_in_error(&qhp->wq, 0);
 		t4_set_cq_in_error(&rchp->cq);
 		spin_lock_irqsave(&rchp->comp_handler_lock, flag);
 		(*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
@@ -1517,16 +1644,21 @@ static int rdma_init(struct c4iw_dev *rh
 	wqe->u.init.pdid = cpu_to_be32(qhp->attr.pd);
 	wqe->u.init.qpid = cpu_to_be32(qhp->wq.sq.qid);
 	wqe->u.init.sq_eqid = cpu_to_be32(qhp->wq.sq.qid);
-	wqe->u.init.rq_eqid = cpu_to_be32(qhp->wq.rq.qid);
+	if (qhp->srq) {
+		wqe->u.init.rq_eqid = cpu_to_be32(FW_RI_INIT_RQEQID_SRQ |
+						  qhp->srq->idx);
+	} else {
+		wqe->u.init.rq_eqid = cpu_to_be32(qhp->wq.rq.qid);
+		wqe->u.init.hwrqsize = cpu_to_be32(qhp->wq.rq.rqt_size);
+		wqe->u.init.hwrqaddr = cpu_to_be32(qhp->wq.rq.rqt_hwaddr -
+						   rhp->rdev.lldi.vr->rq.start);
+	}
 	wqe->u.init.scqid = cpu_to_be32(qhp->attr.scq);
 	wqe->u.init.rcqid = cpu_to_be32(qhp->attr.rcq);
 	wqe->u.init.ord_max = cpu_to_be32(qhp->attr.max_ord);
 	wqe->u.init.ird_max = cpu_to_be32(qhp->attr.max_ird);
 	wqe->u.init.iss = cpu_to_be32(qhp->ep->snd_seq);
 	wqe->u.init.irs = cpu_to_be32(qhp->ep->rcv_seq);
-	wqe->u.init.hwrqsize = cpu_to_be32(qhp->wq.rq.rqt_size);
-	wqe->u.init.hwrqaddr = cpu_to_be32(qhp->wq.rq.rqt_hwaddr -
-					 rhp->rdev.lldi.vr->rq.start);
 	if (qhp->attr.mpa_attr.initiator)
 		build_rtr_msg(qhp->attr.mpa_attr.p2p_type, &wqe->u.init);
 
@@ -1643,7 +1775,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp,
 	case C4IW_QP_STATE_RTS:
 		switch (attrs->next_state) {
 		case C4IW_QP_STATE_CLOSING:
-			t4_set_wq_in_error(&qhp->wq);
+			t4_set_wq_in_error(&qhp->wq, 0);
 			set_state(qhp, C4IW_QP_STATE_CLOSING);
 			ep = qhp->ep;
 			if (!internal) {
@@ -1656,7 +1788,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp,
 				goto err;
 			break;
 		case C4IW_QP_STATE_TERMINATE:
-			t4_set_wq_in_error(&qhp->wq);
+			t4_set_wq_in_error(&qhp->wq, 0);
 			set_state(qhp, C4IW_QP_STATE_TERMINATE);
 			qhp->attr.layer_etype = attrs->layer_etype;
 			qhp->attr.ecode = attrs->ecode;
@@ -1673,7 +1805,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp,
 			}
 			break;
 		case C4IW_QP_STATE_ERROR:
-			t4_set_wq_in_error(&qhp->wq);
+			t4_set_wq_in_error(&qhp->wq, 0);
 			set_state(qhp, C4IW_QP_STATE_ERROR);
 			if (!internal) {
 				abort = 1;
@@ -1819,7 +1951,7 @@ struct ib_qp *c4iw_create_qp(struct ib_p
 	struct c4iw_cq *schp;
 	struct c4iw_cq *rchp;
 	struct c4iw_create_qp_resp uresp;
-	unsigned int sqsize, rqsize;
+	unsigned int sqsize, rqsize = 0;
 	struct c4iw_ucontext *ucontext;
 	int ret;
 	struct c4iw_mm_entry *sq_key_mm, *rq_key_mm = NULL, *sq_db_key_mm;
@@ -1840,11 +1972,13 @@ struct ib_qp *c4iw_create_qp(struct ib_p
 	if (attrs->cap.max_inline_data > T4_MAX_SEND_INLINE)
 		return ERR_PTR(-EINVAL);
 
-	if (attrs->cap.max_recv_wr > rhp->rdev.hw_queue.t4_max_rq_size)
-		return ERR_PTR(-E2BIG);
-	rqsize = attrs->cap.max_recv_wr + 1;
-	if (rqsize < 8)
-		rqsize = 8;
+	if (!attrs->srq) {
+		if (attrs->cap.max_recv_wr > rhp->rdev.hw_queue.t4_max_rq_size)
+			return ERR_PTR(-E2BIG);
+		rqsize = attrs->cap.max_recv_wr + 1;
+		if (rqsize < 8)
+			rqsize = 8;
+	}
 
 	if (attrs->cap.max_send_wr > rhp->rdev.hw_queue.t4_max_sq_size)
 		return ERR_PTR(-E2BIG);
@@ -1869,19 +2003,23 @@ struct ib_qp *c4iw_create_qp(struct ib_p
 		(sqsize + rhp->rdev.hw_queue.t4_eq_status_entries) *
 		sizeof(*qhp->wq.sq.queue) + 16 * sizeof(__be64);
 	qhp->wq.sq.flush_cidx = -1;
-	qhp->wq.rq.size = rqsize;
-	qhp->wq.rq.memsize =
-		(rqsize + rhp->rdev.hw_queue.t4_eq_status_entries) *
-		sizeof(*qhp->wq.rq.queue);
+	if (!attrs->srq) {
+		qhp->wq.rq.size = rqsize;
+		qhp->wq.rq.memsize =
+			(rqsize + rhp->rdev.hw_queue.t4_eq_status_entries) *
+			sizeof(*qhp->wq.rq.queue);
+	}
 
 	if (ucontext) {
 		qhp->wq.sq.memsize = roundup(qhp->wq.sq.memsize, PAGE_SIZE);
-		qhp->wq.rq.memsize = roundup(qhp->wq.rq.memsize, PAGE_SIZE);
+		if (!attrs->srq)
+			qhp->wq.rq.memsize =
+				roundup(qhp->wq.rq.memsize, PAGE_SIZE);
 	}
 
 	ret = create_qp(&rhp->rdev, &qhp->wq, &schp->cq, &rchp->cq,
 			ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
-			qhp->wr_waitp);
+			qhp->wr_waitp, !attrs->srq);
 	if (ret)
 		goto err_free_wr_wait;
 
@@ -1894,10 +2032,12 @@ struct ib_qp *c4iw_create_qp(struct ib_p
 	qhp->attr.scq = ((struct c4iw_cq *) attrs->send_cq)->cq.cqid;
 	qhp->attr.rcq = ((struct c4iw_cq *) attrs->recv_cq)->cq.cqid;
 	qhp->attr.sq_num_entries = attrs->cap.max_send_wr;
-	qhp->attr.rq_num_entries = attrs->cap.max_recv_wr;
 	qhp->attr.sq_max_sges = attrs->cap.max_send_sge;
 	qhp->attr.sq_max_sges_rdma_write = attrs->cap.max_send_sge;
-	qhp->attr.rq_max_sges = attrs->cap.max_recv_sge;
+	if (!attrs->srq) {
+		qhp->attr.rq_num_entries = attrs->cap.max_recv_wr;
+		qhp->attr.rq_max_sges = attrs->cap.max_recv_sge;
+	}
 	qhp->attr.state = C4IW_QP_STATE_IDLE;
 	qhp->attr.next_state = C4IW_QP_STATE_IDLE;
 	qhp->attr.enable_rdma_read = 1;
@@ -1922,20 +2062,25 @@ struct ib_qp *c4iw_create_qp(struct ib_p
 			ret = -ENOMEM;
 			goto err_remove_handle;
 		}
-		rq_key_mm = kmalloc(sizeof(*rq_key_mm), GFP_KERNEL);
-		if (!rq_key_mm) {
-			ret = -ENOMEM;
-			goto err_free_sq_key;
+		if (!attrs->srq) {
+			rq_key_mm = kmalloc(sizeof(*rq_key_mm), GFP_KERNEL);
+			if (!rq_key_mm) {
+				ret = -ENOMEM;
+				goto err_free_sq_key;
+			}
 		}
 		sq_db_key_mm = kmalloc(sizeof(*sq_db_key_mm), GFP_KERNEL);
 		if (!sq_db_key_mm) {
 			ret = -ENOMEM;
 			goto err_free_rq_key;
 		}
-		rq_db_key_mm = kmalloc(sizeof(*rq_db_key_mm), GFP_KERNEL);
-		if (!rq_db_key_mm) {
-			ret = -ENOMEM;
-			goto err_free_sq_db_key;
+		if (!attrs->srq) {
+			rq_db_key_mm =
+				kmalloc(sizeof(*rq_db_key_mm), GFP_KERNEL);
+			if (!rq_db_key_mm) {
+				ret = -ENOMEM;
+				goto err_free_sq_db_key;
+			}
 		}
 		if (t4_sq_onchip(&qhp->wq.sq)) {
 			ma_sync_key_mm = kmalloc(sizeof(*ma_sync_key_mm),
@@ -1951,9 +2096,11 @@ struct ib_qp *c4iw_create_qp(struct ib_p
 		uresp.sqid = qhp->wq.sq.qid;
 		uresp.sq_size = qhp->wq.sq.size;
 		uresp.sq_memsize = qhp->wq.sq.memsize;
-		uresp.rqid = qhp->wq.rq.qid;
-		uresp.rq_size = qhp->wq.rq.size;
-		uresp.rq_memsize = qhp->wq.rq.memsize;
+		if (!attrs->srq) {
+			uresp.rqid = qhp->wq.rq.qid;
+			uresp.rq_size = qhp->wq.rq.size;
+			uresp.rq_memsize = qhp->wq.rq.memsize;
+		}
 		spin_lock(&ucontext->mmap_lock);
 		if (ma_sync_key_mm) {
 			uresp.ma_sync_key = ucontext->key;
@@ -1963,12 +2110,16 @@ struct ib_qp *c4iw_create_qp(struct ib_p
 		}
 		uresp.sq_key = ucontext->key;
 		ucontext->key += PAGE_SIZE;
-		uresp.rq_key = ucontext->key;
-		ucontext->key += PAGE_SIZE;
+		if (!attrs->srq) {
+			uresp.rq_key = ucontext->key;
+			ucontext->key += PAGE_SIZE;
+		}
 		uresp.sq_db_gts_key = ucontext->key;
 		ucontext->key += PAGE_SIZE;
-		uresp.rq_db_gts_key = ucontext->key;
-		ucontext->key += PAGE_SIZE;
+		if (!attrs->srq) {
+			uresp.rq_db_gts_key = ucontext->key;
+			ucontext->key += PAGE_SIZE;
+		}
 		spin_unlock(&ucontext->mmap_lock);
 		ret = ib_copy_to_udata(udata, &uresp, sizeof uresp);
 		if (ret)
@@ -1977,18 +2128,23 @@ struct ib_qp *c4iw_create_qp(struct ib_p
 		sq_key_mm->addr = qhp->wq.sq.phys_addr;
 		sq_key_mm->len = PAGE_ALIGN(qhp->wq.sq.memsize);
 		insert_mmap(ucontext, sq_key_mm);
-		rq_key_mm->key = uresp.rq_key;
-		rq_key_mm->addr = virt_to_phys(qhp->wq.rq.queue);
-		rq_key_mm->len = PAGE_ALIGN(qhp->wq.rq.memsize);
-		insert_mmap(ucontext, rq_key_mm);
+		if (!attrs->srq) {
+			rq_key_mm->key = uresp.rq_key;
+			rq_key_mm->addr = virt_to_phys(qhp->wq.rq.queue);
+			rq_key_mm->len = PAGE_ALIGN(qhp->wq.rq.memsize);
+			insert_mmap(ucontext, rq_key_mm);
+		}
 		sq_db_key_mm->key = uresp.sq_db_gts_key;
 		sq_db_key_mm->addr = (u64)(unsigned long)qhp->wq.sq.bar2_pa;
 		sq_db_key_mm->len = PAGE_SIZE;
 		insert_mmap(ucontext, sq_db_key_mm);
-		rq_db_key_mm->key = uresp.rq_db_gts_key;
-		rq_db_key_mm->addr = (u64)(unsigned long)qhp->wq.rq.bar2_pa;
-		rq_db_key_mm->len = PAGE_SIZE;
-		insert_mmap(ucontext, rq_db_key_mm);
+		if (!attrs->srq) {
+			rq_db_key_mm->key = uresp.rq_db_gts_key;
+			rq_db_key_mm->addr =
+				(u64)(unsigned long)qhp->wq.rq.bar2_pa;
+			rq_db_key_mm->len = PAGE_SIZE;
+			insert_mmap(ucontext, rq_db_key_mm);
+		}
 		if (ma_sync_key_mm) {
 			ma_sync_key_mm->key = uresp.ma_sync_key;
 			ma_sync_key_mm->addr =
@@ -2001,8 +2157,20 @@ struct ib_qp *c4iw_create_qp(struct ib_p
 		c4iw_get_ucontext(ucontext);
 		qhp->ucontext = ucontext;
 	}
+	if (!attrs->srq) {
+		qhp->wq.qp_errp =
+			&qhp->wq.rq.queue[qhp->wq.rq.size].status.qp_err;
+	} else {
+		qhp->wq.qp_errp =
+			&qhp->wq.sq.queue[qhp->wq.sq.size].status.qp_err;
+		qhp->wq.srqidxp =
+			&qhp->wq.sq.queue[qhp->wq.sq.size].status.srqidx;
+	}
+
 	qhp->ibqp.qp_num = qhp->wq.sq.qid;
 	init_timer(&(qhp->timer));
+	if (attrs->srq)
+		qhp->srq = to_c4iw_srq(attrs->srq);
 	INIT_LIST_HEAD(&qhp->db_fc_entry);
 	pr_debug("sq id %u size %u memsize %zu num_entries %u rq id %u size %u memsize %zu num_entries %u\n",
 		 qhp->wq.sq.qid, qhp->wq.sq.size, qhp->wq.sq.memsize,
@@ -2012,18 +2180,20 @@ struct ib_qp *c4iw_create_qp(struct ib_p
 err_free_ma_sync_key:
 	kfree(ma_sync_key_mm);
 err_free_rq_db_key:
-	kfree(rq_db_key_mm);
+	if (!attrs->srq)
+		kfree(rq_db_key_mm);
 err_free_sq_db_key:
 	kfree(sq_db_key_mm);
 err_free_rq_key:
-	kfree(rq_key_mm);
+	if (!attrs->srq)
+		kfree(rq_key_mm);
 err_free_sq_key:
 	kfree(sq_key_mm);
 err_remove_handle:
 	remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid);
 err_destroy_qp:
 	destroy_qp(&rhp->rdev, &qhp->wq,
-		   ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
+		   ucontext ? &ucontext->uctx : &rhp->rdev.uctx, !attrs->srq);
 err_free_wr_wait:
 	c4iw_put_wr_wait(qhp->wr_waitp);
 err_free_qhp:
@@ -2089,6 +2259,45 @@ struct ib_qp *c4iw_get_qp(struct ib_devi
 	return (struct ib_qp *)get_qhp(to_c4iw_dev(dev), qpn);
 }
 
+void c4iw_dispatch_srq_limit_reached_event(struct c4iw_srq *srq)
+{
+	struct ib_event event = {0};
+
+	event.device = &srq->rhp->ibdev;
+	event.element.srq = &srq->ibsrq;
+	event.event = IB_EVENT_SRQ_LIMIT_REACHED;
+	ib_dispatch_event(&event);
+}
+
+int c4iw_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *attr,
+		    enum ib_srq_attr_mask srq_attr_mask,
+		    struct ib_udata *udata)
+{
+	struct c4iw_srq *srq = to_c4iw_srq(ib_srq);
+	int ret = 0;
+
+	/*
+	 * XXX 0 mask == a SW interrupt for srq_limit reached...
+	 */
+	if (udata && !srq_attr_mask) {
+		c4iw_dispatch_srq_limit_reached_event(srq);
+		goto out;
+	}
+
+	/* no support for this yet */
+	if (srq_attr_mask & IB_SRQ_MAX_WR) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (!udata && (srq_attr_mask & IB_SRQ_LIMIT)) {
+		srq->armed = true;
+		srq->srq_limit = attr->srq_limit;
+	}
+out:
+	return ret;
+}
+
 int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 		     int attr_mask, struct ib_qp_init_attr *init_attr)
 {
@@ -2105,3 +2314,358 @@ int c4iw_ib_query_qp(struct ib_qp *ibqp,
 	init_attr->sq_sig_type = qhp->sq_sig_all ? IB_SIGNAL_ALL_WR : 0;
 	return 0;
 }
+
+static void free_srq_queue(struct c4iw_srq *srq, struct c4iw_dev_ucontext *uctx,
+			   struct c4iw_wr_wait *wr_waitp)
+{
+	struct c4iw_rdev *rdev = &srq->rhp->rdev;
+	struct sk_buff *skb = srq->destroy_skb;
+	struct t4_srq *wq = &srq->wq;
+	struct fw_ri_res_wr *res_wr;
+	struct fw_ri_res *res;
+	int wr_len;
+
+	wr_len = sizeof(*res_wr) + sizeof(*res);
+	set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
+
+	res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len);
+	memset(res_wr, 0, wr_len);
+	res_wr->op_nres = cpu_to_be32(FW_WR_OP_V(FW_RI_RES_WR) |
+			FW_RI_RES_WR_NRES_V(1) |
+			FW_WR_COMPL_F);
+	res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
+	res_wr->cookie = (uintptr_t)wr_waitp;
+	res = res_wr->res;
+	res->u.srq.restype = FW_RI_RES_TYPE_SRQ;
+	res->u.srq.op = FW_RI_RES_OP_RESET;
+	res->u.srq.srqid = cpu_to_be32(srq->idx);
+	res->u.srq.eqid = cpu_to_be32(wq->qid);
+
+	c4iw_init_wr_wait(wr_waitp);
+	c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, 0, __func__);
+
+	dma_free_coherent(&rdev->lldi.pdev->dev,
+			  wq->memsize, wq->queue,
+			pci_unmap_addr(wq, mapping));
+	c4iw_rqtpool_free(rdev, wq->rqt_hwaddr, wq->rqt_size);
+	kfree(wq->sw_rq);
+	c4iw_put_qpid(rdev, wq->qid, uctx);
+}
+
+static int alloc_srq_queue(struct c4iw_srq *srq, struct c4iw_dev_ucontext *uctx,
+			   struct c4iw_wr_wait *wr_waitp)
+{
+	struct c4iw_rdev *rdev = &srq->rhp->rdev;
+	int user = (uctx != &rdev->uctx);
+	struct t4_srq *wq = &srq->wq;
+	struct fw_ri_res_wr *res_wr;
+	struct fw_ri_res *res;
+	struct sk_buff *skb;
+	int wr_len;
+	int eqsize;
+	int ret = -ENOMEM;
+
+	wq->qid = c4iw_get_qpid(rdev, uctx);
+	if (!wq->qid)
+		goto err;
+
+	if (!user) {
+		wq->sw_rq = kcalloc(wq->size, sizeof(*wq->sw_rq),
+				    GFP_KERNEL);
+		if (!wq->sw_rq)
+			goto err_put_qpid;
+		wq->pending_wrs = kcalloc(srq->wq.size,
+					  sizeof(*srq->wq.pending_wrs),
+					  GFP_KERNEL);
+		if (!wq->pending_wrs)
+			goto err_free_sw_rq;
+	}
+
+	wq->rqt_size = wq->size;
+	wq->rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rqt_size);
+	if (!wq->rqt_hwaddr)
+		goto err_free_pending_wrs;
+	wq->rqt_abs_idx = (wq->rqt_hwaddr - rdev->lldi.vr->rq.start) >>
+		T4_RQT_ENTRY_SHIFT;
+
+	wq->queue = dma_alloc_coherent(&rdev->lldi.pdev->dev,
+				       wq->memsize, &wq->dma_addr,
+			GFP_KERNEL);
+	if (!wq->queue)
+		goto err_free_rqtpool;
+
+	memset(wq->queue, 0, wq->memsize);
+	pci_unmap_addr_set(wq, mapping, wq->dma_addr);
+
+	wq->bar2_va = c4iw_bar2_addrs(rdev, wq->qid, T4_BAR2_QTYPE_EGRESS,
+				      &wq->bar2_qid,
+			user ? &wq->bar2_pa : NULL);
+
+	/*
+	 * User mode must have bar2 access.
+	 */
+
+	if (user && !wq->bar2_va) {
+		pr_warn(MOD "%s: srqid %u not in BAR2 range.\n",
+			pci_name(rdev->lldi.pdev), wq->qid);
+		ret = -EINVAL;
+		goto err_free_queue;
+	}
+
+	/* build fw_ri_res_wr */
+	wr_len = sizeof(*res_wr) + sizeof(*res);
+
+	skb = alloc_skb(wr_len, GFP_KERNEL | __GFP_NOFAIL);
+	if (!skb)
+		goto err_free_queue;
+	set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
+
+	res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len);
+	memset(res_wr, 0, wr_len);
+	res_wr->op_nres = cpu_to_be32(FW_WR_OP_V(FW_RI_RES_WR) |
+			FW_RI_RES_WR_NRES_V(1) |
+			FW_WR_COMPL_F);
+	res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
+	res_wr->cookie = (uintptr_t)wr_waitp;
+	res = res_wr->res;
+	res->u.srq.restype = FW_RI_RES_TYPE_SRQ;
+	res->u.srq.op = FW_RI_RES_OP_WRITE;
+
+	/*
+	 * eqsize is the number of 64B entries plus the status page size.
+	 */
+	eqsize = wq->size * T4_RQ_NUM_SLOTS +
+		rdev->hw_queue.t4_eq_status_entries;
+	res->u.srq.eqid = cpu_to_be32(wq->qid);
+	res->u.srq.fetchszm_to_iqid =
+						/* no host cidx updates */
+		cpu_to_be32(FW_RI_RES_WR_HOSTFCMODE_V(0) |
+		FW_RI_RES_WR_CPRIO_V(0) |       /* don't keep in chip cache */
+		FW_RI_RES_WR_PCIECHN_V(0) |     /* set by uP at ri_init time */
+		FW_RI_RES_WR_FETCHRO_V(0));     /* relaxed_ordering */
+	res->u.srq.dcaen_to_eqsize =
+		cpu_to_be32(FW_RI_RES_WR_DCAEN_V(0) |
+		FW_RI_RES_WR_DCACPU_V(0) |
+		FW_RI_RES_WR_FBMIN_V(2) |
+		FW_RI_RES_WR_FBMAX_V(3) |
+		FW_RI_RES_WR_CIDXFTHRESHO_V(0) |
+		FW_RI_RES_WR_CIDXFTHRESH_V(0) |
+		FW_RI_RES_WR_EQSIZE_V(eqsize));
+	res->u.srq.eqaddr = cpu_to_be64(wq->dma_addr);
+	res->u.srq.srqid = cpu_to_be32(srq->idx);
+	res->u.srq.pdid = cpu_to_be32(srq->pdid);
+	res->u.srq.hwsrqsize = cpu_to_be32(wq->rqt_size);
+	res->u.srq.hwsrqaddr = cpu_to_be32(wq->rqt_hwaddr -
+			rdev->lldi.vr->rq.start);
+
+	c4iw_init_wr_wait(wr_waitp);
+
+	ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, wq->qid, __func__);
+	if (ret)
+		goto err_free_queue;
+
+	pr_debug("%s srq %u eqid %u pdid %u queue va %p pa 0x%llx\n"
+			" bar2_addr %p rqt addr 0x%x size %d\n",
+			__func__, srq->idx, wq->qid, srq->pdid, wq->queue,
+			(u64)virt_to_phys(wq->queue), wq->bar2_va,
+			wq->rqt_hwaddr, wq->rqt_size);
+
+	return 0;
+err_free_queue:
+	dma_free_coherent(&rdev->lldi.pdev->dev,
+			  wq->memsize, wq->queue,
+			pci_unmap_addr(wq, mapping));
+err_free_rqtpool:
+	c4iw_rqtpool_free(rdev, wq->rqt_hwaddr, wq->rqt_size);
+err_free_pending_wrs:
+	if (!user)
+		kfree(wq->pending_wrs);
+err_free_sw_rq:
+	if (!user)
+		kfree(wq->sw_rq);
+err_put_qpid:
+	c4iw_put_qpid(rdev, wq->qid, uctx);
+err:
+	return ret;
+}
+
+void c4iw_copy_wr_to_srq(struct t4_srq *srq, union t4_recv_wr *wqe, u8 len16)
+{
+	u64 *src, *dst;
+
+	src = (u64 *)wqe;
+	dst = (u64 *)((u8 *)srq->queue + srq->wq_pidx * T4_EQ_ENTRY_SIZE);
+	while (len16) {
+		*dst++ = *src++;
+		if (dst >= (u64 *)&srq->queue[srq->size])
+			dst = (u64 *)srq->queue;
+		*dst++ = *src++;
+		if (dst >= (u64 *)&srq->queue[srq->size])
+			dst = (u64 *)srq->queue;
+		len16--;
+	}
+}
+
+struct ib_srq *c4iw_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *attrs,
+			       struct ib_udata *udata)
+{
+	struct c4iw_dev *rhp;
+	struct c4iw_srq *srq;
+	struct c4iw_pd *php;
+	struct c4iw_create_srq_resp uresp;
+	struct c4iw_ucontext *ucontext;
+	struct c4iw_mm_entry *srq_key_mm, *srq_db_key_mm;
+	int rqsize;
+	int ret;
+	int wr_len;
+
+	pr_debug("%s ib_pd %p\n", __func__, pd);
+
+	php = to_c4iw_pd(pd);
+	rhp = php->rhp;
+
+	if (!rhp->rdev.lldi.vr->srq.size)
+		return ERR_PTR(-EINVAL);
+	if (attrs->attr.max_wr > rhp->rdev.hw_queue.t4_max_rq_size)
+		return ERR_PTR(-E2BIG);
+	if (attrs->attr.max_sge > T4_MAX_RECV_SGE)
+		return ERR_PTR(-E2BIG);
+
+	/*
+	 * SRQ RQT and RQ must be a power of 2 and at least 16 deep.
+	 */
+	rqsize = attrs->attr.max_wr + 1;
+	rqsize = roundup_pow_of_two(max_t(u16, rqsize, 16));
+
+	ucontext = pd->uobject ? to_c4iw_ucontext(pd->uobject->context) : NULL;
+
+	srq = kzalloc(sizeof(*srq), GFP_KERNEL);
+	if (!srq)
+		return ERR_PTR(-ENOMEM);
+
+	srq->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL);
+	if (!srq->wr_waitp) {
+		ret = -ENOMEM;
+		goto err_free_srq;
+	}
+
+	srq->idx = c4iw_alloc_srq_idx(&rhp->rdev);
+	if (srq->idx < 0) {
+		ret = -ENOMEM;
+		goto err_free_wr_wait;
+	}
+
+	wr_len = sizeof(struct fw_ri_res_wr) + sizeof(struct fw_ri_res);
+	srq->destroy_skb = alloc_skb(wr_len, GFP_KERNEL);
+	if (!srq->destroy_skb) {
+		ret = -ENOMEM;
+		goto err_free_srq_idx;
+	}
+
+	srq->rhp = rhp;
+	srq->pdid = php->pdid;
+
+	srq->wq.size = rqsize;
+	srq->wq.memsize =
+		(rqsize + rhp->rdev.hw_queue.t4_eq_status_entries) *
+		sizeof(*srq->wq.queue);
+	if (ucontext)
+		srq->wq.memsize = roundup(srq->wq.memsize, PAGE_SIZE);
+
+	ret = alloc_srq_queue(srq, ucontext ? &ucontext->uctx :
+			&rhp->rdev.uctx, srq->wr_waitp);
+	if (ret)
+		goto err_free_skb;
+	attrs->attr.max_wr = rqsize - 1;
+
+	if (CHELSIO_CHIP_VERSION(rhp->rdev.lldi.adapter_type) > CHELSIO_T6)
+		srq->flags = T4_SRQ_LIMIT_SUPPORT;
+
+	ret = insert_handle(rhp, &rhp->qpidr, srq, srq->wq.qid);
+	if (ret)
+		goto err_free_queue;
+
+	if (udata) {
+		srq_key_mm = kmalloc(sizeof(*srq_key_mm), GFP_KERNEL);
+		if (!srq_key_mm) {
+			ret = -ENOMEM;
+			goto err_remove_handle;
+		}
+		srq_db_key_mm = kmalloc(sizeof(*srq_db_key_mm), GFP_KERNEL);
+		if (!srq_db_key_mm) {
+			ret = -ENOMEM;
+			goto err_free_srq_key_mm;
+		}
+		uresp.flags = srq->flags;
+		uresp.qid_mask = rhp->rdev.qpmask;
+		uresp.srqid = srq->wq.qid;
+		uresp.srq_size = srq->wq.size;
+		uresp.srq_memsize = srq->wq.memsize;
+		uresp.rqt_abs_idx = srq->wq.rqt_abs_idx;
+		spin_lock(&ucontext->mmap_lock);
+		uresp.srq_key = ucontext->key;
+		ucontext->key += PAGE_SIZE;
+		uresp.srq_db_gts_key = ucontext->key;
+		ucontext->key += PAGE_SIZE;
+		spin_unlock(&ucontext->mmap_lock);
+		ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+		if (ret)
+			goto err_free_srq_db_key_mm;
+		srq_key_mm->key = uresp.srq_key;
+		srq_key_mm->addr = virt_to_phys(srq->wq.queue);
+		srq_key_mm->len = PAGE_ALIGN(srq->wq.memsize);
+		insert_mmap(ucontext, srq_key_mm);
+		srq_db_key_mm->key = uresp.srq_db_gts_key;
+		srq_db_key_mm->addr = (u64)(unsigned long)srq->wq.bar2_pa;
+		srq_db_key_mm->len = PAGE_SIZE;
+		insert_mmap(ucontext, srq_db_key_mm);
+	}
+
+	pr_debug("%s srq qid %u idx %u size %u memsize %lu num_entries %u\n",
+		 __func__, srq->wq.qid, srq->idx, srq->wq.size,
+			(unsigned long)srq->wq.memsize, attrs->attr.max_wr);
+
+	spin_lock_init(&srq->lock);
+	return &srq->ibsrq;
+err_free_srq_db_key_mm:
+	kfree(srq_db_key_mm);
+err_free_srq_key_mm:
+	kfree(srq_key_mm);
+err_remove_handle:
+	remove_handle(rhp, &rhp->qpidr, srq->wq.qid);
+err_free_queue:
+	free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
+		       srq->wr_waitp);
+err_free_skb:
+	if (srq->destroy_skb)
+		kfree_skb(srq->destroy_skb);
+err_free_srq_idx:
+	c4iw_free_srq_idx(&rhp->rdev, srq->idx);
+err_free_wr_wait:
+	c4iw_put_wr_wait(srq->wr_waitp);
+err_free_srq:
+	kfree(srq);
+	return ERR_PTR(ret);
+}
+
+int c4iw_destroy_srq(struct ib_srq *ibsrq)
+{
+	struct c4iw_dev *rhp;
+	struct c4iw_srq *srq;
+	struct c4iw_ucontext *ucontext;
+
+	srq = to_c4iw_srq(ibsrq);
+	rhp = srq->rhp;
+
+	pr_debug("%s id %d\n", __func__, srq->wq.qid);
+
+	remove_handle(rhp, &rhp->qpidr, srq->wq.qid);
+	ucontext = ibsrq->uobject ?
+		to_c4iw_ucontext(ibsrq->uobject->context) : NULL;
+	free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
+		       srq->wr_waitp);
+	c4iw_free_srq_idx(&rhp->rdev, srq->idx);
+	c4iw_put_wr_wait(srq->wr_waitp);
+	kfree(srq);
+	return 0;
+}
--- a/drivers/infiniband/hw/cxgb4/resource.c
+++ b/drivers/infiniband/hw/cxgb4/resource.c
@@ -53,7 +53,8 @@ static int c4iw_init_qid_table(struct c4
 }
 
 /* nr_* must be power of 2 */
-int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, u32 nr_pdid)
+int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt,
+		       u32 nr_pdid, u32 nr_srqt)
 {
 	int err = 0;
 	err = c4iw_id_table_alloc(&rdev->resource.tpt_table, 0, nr_tpt, 1,
@@ -67,7 +68,17 @@ int c4iw_init_resource(struct c4iw_rdev
 					nr_pdid, 1, 0);
 	if (err)
 		goto pdid_err;
+	if (!nr_srqt)
+		err = c4iw_id_table_alloc(&rdev->resource.srq_table, 0,
+					  1, 1, 0);
+	else
+		err = c4iw_id_table_alloc(&rdev->resource.srq_table, 0,
+					  nr_srqt, 0, 0);
+	if (err)
+		goto srq_err;
 	return 0;
+ srq_err:
+	c4iw_id_table_free(&rdev->resource.pdid_table);
  pdid_err:
 	c4iw_id_table_free(&rdev->resource.qid_table);
  qid_err:
@@ -371,13 +382,21 @@ void c4iw_rqtpool_free(struct c4iw_rdev
 int c4iw_rqtpool_create(struct c4iw_rdev *rdev)
 {
 	unsigned rqt_start, rqt_chunk, rqt_top;
+	int skip = 0;
 
 	rdev->rqt_pool = gen_pool_create(MIN_RQT_SHIFT, -1);
 	if (!rdev->rqt_pool)
 		return -ENOMEM;
 
-	rqt_start = rdev->lldi.vr->rq.start;
-	rqt_chunk = rdev->lldi.vr->rq.size;
+	/*
+	 * If SRQs are supported, then never use the first RQE from
+	 * the RQT region. This is because HW uses RQT index 0 as NULL.
+	 */
+	if (rdev->lldi.vr->srq.size)
+		skip = T4_RQT_ENTRY_SIZE;
+
+	rqt_start = rdev->lldi.vr->rq.start + skip;
+	rqt_chunk = rdev->lldi.vr->rq.size - skip;
 	rqt_top = rqt_start + rqt_chunk;
 
 	while (rqt_start < rqt_top) {
@@ -405,6 +424,32 @@ void c4iw_rqtpool_destroy(struct c4iw_rd
 	kref_put(&rdev->rqt_kref, destroy_rqtpool);
 }
 
+int c4iw_alloc_srq_idx(struct c4iw_rdev *rdev)
+{
+	int idx;
+
+	idx = c4iw_id_alloc(&rdev->resource.srq_table);
+	mutex_lock(&rdev->stats.lock);
+	if (idx == -1) {
+		rdev->stats.srqt.fail++;
+		mutex_unlock(&rdev->stats.lock);
+		return -ENOMEM;
+	}
+	rdev->stats.srqt.cur++;
+	if (rdev->stats.srqt.cur > rdev->stats.srqt.max)
+		rdev->stats.srqt.max = rdev->stats.srqt.cur;
+	mutex_unlock(&rdev->stats.lock);
+	return idx;
+}
+
+void c4iw_free_srq_idx(struct c4iw_rdev *rdev, int idx)
+{
+	c4iw_id_free(&rdev->resource.srq_table, idx);
+	mutex_lock(&rdev->stats.lock);
+	rdev->stats.srqt.cur--;
+	mutex_unlock(&rdev->stats.lock);
+}
+
 /*
  * On-Chip QP Memory.
  */
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -491,7 +491,6 @@ static inline void t4_rq_produce(struct
 static inline void t4_rq_consume(struct t4_wq *wq)
 {
 	wq->rq.in_use--;
-	wq->rq.msn++;
 	if (++wq->rq.cidx == wq->rq.size)
 		wq->rq.cidx = 0;
 }
@@ -641,12 +640,14 @@ static inline void t4_ring_rq_db(struct
 
 static inline int t4_wq_in_error(struct t4_wq *wq)
 {
-	return wq->rq.queue[wq->rq.size].status.qp_err;
+	return *wq->qp_errp;
 }
 
-static inline void t4_set_wq_in_error(struct t4_wq *wq)
+static inline void t4_set_wq_in_error(struct t4_wq *wq, u32 srqidx)
 {
-	wq->rq.queue[wq->rq.size].status.qp_err = 1;
+	if (srqidx)
+		*wq->srqidxp = srqidx;
+	*wq->qp_errp = 1;
 }
 
 static inline void t4_disable_wq_db(struct t4_wq *wq)