|
Johannes Thumshirn |
98c37e |
From: Israel Rukshin <israelr@mellanox.com>
|
|
Johannes Thumshirn |
98c37e |
Date: Sun, 26 Nov 2017 10:40:55 +0000
|
|
Johannes Thumshirn |
98c37e |
Subject: nvme-rdma: Use mr pool
|
|
Johannes Thumshirn |
98c37e |
Patch-mainline: v4.15-rc2
|
|
Johannes Thumshirn |
98c37e |
Git-commit: f41725bbe16b0773302c0cc7dc2e89f54828712d
|
|
Johannes Thumshirn |
98c37e |
References: FATE#323952, FATE#322506
|
|
Johannes Thumshirn |
98c37e |
|
|
Johannes Thumshirn |
98c37e |
Currently, blk_mq_tagset_iter() iterate over initial hctx tags only. If
|
|
Johannes Thumshirn |
98c37e |
an I/O scheduler is used, it doesn't iterate the hctx scheduler tags and
|
|
Johannes Thumshirn |
98c37e |
the static request aren't been updated. For example, while using NVMe
|
|
Johannes Thumshirn |
98c37e |
over Fabrics RDMA host, this cause us not to reinit the scheduler
|
|
Johannes Thumshirn |
98c37e |
requests and thus not re-register all the memory regions during the
|
|
Johannes Thumshirn |
98c37e |
tagset re-initialization in the reconnect flow.
|
|
Johannes Thumshirn |
98c37e |
|
|
Johannes Thumshirn |
98c37e |
This may lead to a memory registration error:
|
|
Johannes Thumshirn |
98c37e |
|
|
Johannes Thumshirn |
98c37e |
"MEMREG for CQE 0xffff88044c14dce8 failed with status memory management operation error (6)"
|
|
Johannes Thumshirn |
98c37e |
|
|
Johannes Thumshirn |
98c37e |
With this commit we don't need to reinit the requests, and thus fix this
|
|
Johannes Thumshirn |
98c37e |
failure.
|
|
Johannes Thumshirn |
98c37e |
|
|
Johannes Thumshirn |
98c37e |
Signed-off-by: Israel Rukshin <israelr@mellanox.com>
|
|
Johannes Thumshirn |
98c37e |
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
|
|
Johannes Thumshirn |
98c37e |
Reviewed-by: Max Gurtovoy <maxg@mellanox.com>
|
|
Johannes Thumshirn |
98c37e |
Signed-off-by: Christoph Hellwig <hch@lst.de>
|
|
Johannes Thumshirn |
98c37e |
Acked-by: Johannes Thumshirn <jthumshirn@suse.de>
|
|
Johannes Thumshirn |
98c37e |
---
|
|
Johannes Thumshirn |
98c37e |
drivers/nvme/host/rdma.c | 95 ++++++++++++++++++-----------------------------
|
|
Johannes Thumshirn |
98c37e |
1 file changed, 37 insertions(+), 58 deletions(-)
|
|
Johannes Thumshirn |
98c37e |
|
|
Johannes Thumshirn |
98c37e |
--- a/drivers/nvme/host/rdma.c
|
|
Johannes Thumshirn |
98c37e |
+++ b/drivers/nvme/host/rdma.c
|
|
Johannes Thumshirn |
98c37e |
@@ -15,6 +15,7 @@
|
|
Johannes Thumshirn |
98c37e |
#include <linux/module.h>
|
|
Johannes Thumshirn |
98c37e |
#include <linux/init.h>
|
|
Johannes Thumshirn |
98c37e |
#include <linux/slab.h>
|
|
Johannes Thumshirn |
98c37e |
+#include <rdma/mr_pool.h>
|
|
Johannes Thumshirn |
98c37e |
#include <linux/err.h>
|
|
Johannes Thumshirn |
98c37e |
#include <linux/string.h>
|
|
Johannes Thumshirn |
98c37e |
#include <linux/atomic.h>
|
|
Johannes Thumshirn |
98c37e |
@@ -260,32 +261,6 @@ static int nvme_rdma_create_qp(struct nv
|
|
Johannes Thumshirn |
98c37e |
return ret;
|
|
Johannes Thumshirn |
98c37e |
}
|
|
Johannes Thumshirn |
98c37e |
|
|
Johannes Thumshirn |
98c37e |
-static int nvme_rdma_reinit_request(void *data, struct request *rq)
|
|
Johannes Thumshirn |
98c37e |
-{
|
|
Johannes Thumshirn |
98c37e |
- struct nvme_rdma_ctrl *ctrl = data;
|
|
Johannes Thumshirn |
98c37e |
- struct nvme_rdma_device *dev = ctrl->device;
|
|
Johannes Thumshirn |
98c37e |
- struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
|
|
Johannes Thumshirn |
98c37e |
- int ret = 0;
|
|
Johannes Thumshirn |
98c37e |
-
|
|
Johannes Thumshirn |
98c37e |
- if (WARN_ON_ONCE(!req->mr))
|
|
Johannes Thumshirn |
98c37e |
- return 0;
|
|
Johannes Thumshirn |
98c37e |
-
|
|
Johannes Thumshirn |
98c37e |
- ib_dereg_mr(req->mr);
|
|
Johannes Thumshirn |
98c37e |
-
|
|
Johannes Thumshirn |
98c37e |
- req->mr = ib_alloc_mr(dev->pd, IB_MR_TYPE_MEM_REG,
|
|
Johannes Thumshirn |
98c37e |
- ctrl->max_fr_pages);
|
|
Johannes Thumshirn |
98c37e |
- if (IS_ERR(req->mr)) {
|
|
Johannes Thumshirn |
98c37e |
- ret = PTR_ERR(req->mr);
|
|
Johannes Thumshirn |
98c37e |
- req->mr = NULL;
|
|
Johannes Thumshirn |
98c37e |
- goto out;
|
|
Johannes Thumshirn |
98c37e |
- }
|
|
Johannes Thumshirn |
98c37e |
-
|
|
Johannes Thumshirn |
98c37e |
- req->mr->need_inval = false;
|
|
Johannes Thumshirn |
98c37e |
-
|
|
Johannes Thumshirn |
98c37e |
-out:
|
|
Johannes Thumshirn |
98c37e |
- return ret;
|
|
Johannes Thumshirn |
98c37e |
-}
|
|
Johannes Thumshirn |
98c37e |
-
|
|
Johannes Thumshirn |
98c37e |
static void nvme_rdma_exit_request(struct blk_mq_tag_set *set,
|
|
Johannes Thumshirn |
98c37e |
struct request *rq, unsigned int hctx_idx)
|
|
Johannes Thumshirn |
98c37e |
{
|
|
Johannes Thumshirn |
98c37e |
@@ -295,9 +270,6 @@ static void nvme_rdma_exit_request(struc
|
|
Johannes Thumshirn |
98c37e |
struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx];
|
|
Johannes Thumshirn |
98c37e |
struct nvme_rdma_device *dev = queue->device;
|
|
Johannes Thumshirn |
98c37e |
|
|
Johannes Thumshirn |
98c37e |
- if (req->mr)
|
|
Johannes Thumshirn |
98c37e |
- ib_dereg_mr(req->mr);
|
|
Johannes Thumshirn |
98c37e |
-
|
|
Johannes Thumshirn |
98c37e |
nvme_rdma_free_qe(dev->dev, &req->sqe, sizeof(struct nvme_command),
|
|
Johannes Thumshirn |
98c37e |
DMA_TO_DEVICE);
|
|
Johannes Thumshirn |
98c37e |
}
|
|
Johannes Thumshirn |
98c37e |
@@ -319,21 +291,9 @@ static int nvme_rdma_init_request(struct
|
|
Johannes Thumshirn |
98c37e |
if (ret)
|
|
Johannes Thumshirn |
98c37e |
return ret;
|
|
Johannes Thumshirn |
98c37e |
|
|
Johannes Thumshirn |
98c37e |
- req->mr = ib_alloc_mr(dev->pd, IB_MR_TYPE_MEM_REG,
|
|
Johannes Thumshirn |
98c37e |
- ctrl->max_fr_pages);
|
|
Johannes Thumshirn |
98c37e |
- if (IS_ERR(req->mr)) {
|
|
Johannes Thumshirn |
98c37e |
- ret = PTR_ERR(req->mr);
|
|
Johannes Thumshirn |
98c37e |
- goto out_free_qe;
|
|
Johannes Thumshirn |
98c37e |
- }
|
|
Johannes Thumshirn |
98c37e |
-
|
|
Johannes Thumshirn |
98c37e |
req->queue = queue;
|
|
Johannes Thumshirn |
98c37e |
|
|
Johannes Thumshirn |
98c37e |
return 0;
|
|
Johannes Thumshirn |
98c37e |
-
|
|
Johannes Thumshirn |
98c37e |
-out_free_qe:
|
|
Johannes Thumshirn |
98c37e |
- nvme_rdma_free_qe(dev->dev, &req->sqe, sizeof(struct nvme_command),
|
|
Johannes Thumshirn |
98c37e |
- DMA_TO_DEVICE);
|
|
Johannes Thumshirn |
98c37e |
- return -ENOMEM;
|
|
Johannes Thumshirn |
98c37e |
}
|
|
Johannes Thumshirn |
98c37e |
|
|
Johannes Thumshirn |
98c37e |
static int nvme_rdma_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
|
|
Johannes Thumshirn |
98c37e |
@@ -433,6 +393,8 @@ static void nvme_rdma_destroy_queue_ib(s
|
|
Johannes Thumshirn |
98c37e |
struct nvme_rdma_device *dev = queue->device;
|
|
Johannes Thumshirn |
98c37e |
struct ib_device *ibdev = dev->dev;
|
|
Johannes Thumshirn |
98c37e |
|
|
Johannes Thumshirn |
98c37e |
+ ib_mr_pool_destroy(queue->qp, &queue->qp->rdma_mrs);
|
|
Johannes Thumshirn |
98c37e |
+
|
|
Johannes Thumshirn |
98c37e |
rdma_destroy_qp(queue->cm_id);
|
|
Johannes Thumshirn |
98c37e |
ib_free_cq(queue->ib_cq);
|
|
Johannes Thumshirn |
98c37e |
|
|
Johannes Thumshirn |
98c37e |
@@ -442,6 +404,12 @@ static void nvme_rdma_destroy_queue_ib(s
|
|
Johannes Thumshirn |
98c37e |
nvme_rdma_dev_put(dev);
|
|
Johannes Thumshirn |
98c37e |
}
|
|
Johannes Thumshirn |
98c37e |
|
|
Johannes Thumshirn |
98c37e |
+static int nvme_rdma_get_max_fr_pages(struct ib_device *ibdev)
|
|
Johannes Thumshirn |
98c37e |
+{
|
|
Johannes Thumshirn |
98c37e |
+ return min_t(u32, NVME_RDMA_MAX_SEGMENTS,
|
|
Johannes Thumshirn |
98c37e |
+ ibdev->attrs.max_fast_reg_page_list_len);
|
|
Johannes Thumshirn |
98c37e |
+}
|
|
Johannes Thumshirn |
98c37e |
+
|
|
Johannes Thumshirn |
98c37e |
static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
|
|
Johannes Thumshirn |
98c37e |
{
|
|
Johannes Thumshirn |
98c37e |
struct ib_device *ibdev;
|
|
Johannes Thumshirn |
98c37e |
@@ -484,8 +452,22 @@ static int nvme_rdma_create_queue_ib(str
|
|
Johannes Thumshirn |
98c37e |
goto out_destroy_qp;
|
|
Johannes Thumshirn |
98c37e |
}
|
|
Johannes Thumshirn |
98c37e |
|
|
Johannes Thumshirn |
98c37e |
+ ret = ib_mr_pool_init(queue->qp, &queue->qp->rdma_mrs,
|
|
Johannes Thumshirn |
98c37e |
+ queue->queue_size,
|
|
Johannes Thumshirn |
98c37e |
+ IB_MR_TYPE_MEM_REG,
|
|
Johannes Thumshirn |
98c37e |
+ nvme_rdma_get_max_fr_pages(ibdev));
|
|
Johannes Thumshirn |
98c37e |
+ if (ret) {
|
|
Johannes Thumshirn |
98c37e |
+ dev_err(queue->ctrl->ctrl.device,
|
|
Johannes Thumshirn |
98c37e |
+ "failed to initialize MR pool sized %d for QID %d\n",
|
|
Johannes Thumshirn |
98c37e |
+ queue->queue_size, idx);
|
|
Johannes Thumshirn |
98c37e |
+ goto out_destroy_ring;
|
|
Johannes Thumshirn |
98c37e |
+ }
|
|
Johannes Thumshirn |
98c37e |
+
|
|
Johannes Thumshirn |
98c37e |
return 0;
|
|
Johannes Thumshirn |
98c37e |
|
|
Johannes Thumshirn |
98c37e |
+out_destroy_ring:
|
|
Johannes Thumshirn |
98c37e |
+ nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size,
|
|
Johannes Thumshirn |
98c37e |
+ sizeof(struct nvme_completion), DMA_FROM_DEVICE);
|
|
Johannes Thumshirn |
98c37e |
out_destroy_qp:
|
|
Johannes Thumshirn |
98c37e |
rdma_destroy_qp(queue->cm_id);
|
|
Johannes Thumshirn |
98c37e |
out_destroy_ib_cq:
|
|
Johannes Thumshirn |
98c37e |
@@ -757,8 +739,7 @@ static int nvme_rdma_configure_admin_que
|
|
Johannes Thumshirn |
98c37e |
|
|
Johannes Thumshirn |
98c37e |
ctrl->device = ctrl->queues[0].device;
|
|
Johannes Thumshirn |
98c37e |
|
|
Johannes Thumshirn |
98c37e |
- ctrl->max_fr_pages = min_t(u32, NVME_RDMA_MAX_SEGMENTS,
|
|
Johannes Thumshirn |
98c37e |
- ctrl->device->dev->attrs.max_fast_reg_page_list_len);
|
|
Johannes Thumshirn |
98c37e |
+ ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev);
|
|
Johannes Thumshirn |
98c37e |
|
|
Johannes Thumshirn |
98c37e |
if (new) {
|
|
Johannes Thumshirn |
98c37e |
ctrl->ctrl.admin_tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, true);
|
|
Johannes Thumshirn |
98c37e |
@@ -772,10 +753,6 @@ static int nvme_rdma_configure_admin_que
|
|
Johannes Thumshirn |
98c37e |
error = PTR_ERR(ctrl->ctrl.admin_q);
|
|
Johannes Thumshirn |
98c37e |
goto out_free_tagset;
|
|
Johannes Thumshirn |
98c37e |
}
|
|
Johannes Thumshirn |
98c37e |
- } else {
|
|
Johannes Thumshirn |
98c37e |
- error = nvme_reinit_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset);
|
|
Johannes Thumshirn |
98c37e |
- if (error)
|
|
Johannes Thumshirn |
98c37e |
- goto out_free_queue;
|
|
Johannes Thumshirn |
98c37e |
}
|
|
Johannes Thumshirn |
98c37e |
|
|
Johannes Thumshirn |
98c37e |
error = nvme_rdma_start_queue(ctrl, 0);
|
|
Johannes Thumshirn |
98c37e |
@@ -855,10 +832,6 @@ static int nvme_rdma_configure_io_queues
|
|
Johannes Thumshirn |
98c37e |
goto out_free_tag_set;
|
|
Johannes Thumshirn |
98c37e |
}
|
|
Johannes Thumshirn |
98c37e |
} else {
|
|
Johannes Thumshirn |
98c37e |
- ret = nvme_reinit_tagset(&ctrl->ctrl, ctrl->ctrl.tagset);
|
|
Johannes Thumshirn |
98c37e |
- if (ret)
|
|
Johannes Thumshirn |
98c37e |
- goto out_free_io_queues;
|
|
Johannes Thumshirn |
98c37e |
-
|
|
Johannes Thumshirn |
98c37e |
blk_mq_update_nr_hw_queues(&ctrl->tag_set,
|
|
Johannes Thumshirn |
98c37e |
ctrl->ctrl.queue_count - 1);
|
|
Johannes Thumshirn |
98c37e |
}
|
|
Johannes Thumshirn |
98c37e |
@@ -1061,6 +1034,11 @@ static void nvme_rdma_unmap_data(struct
|
|
Johannes Thumshirn |
98c37e |
if (!blk_rq_bytes(rq))
|
|
Johannes Thumshirn |
98c37e |
return;
|
|
Johannes Thumshirn |
98c37e |
|
|
Johannes Thumshirn |
98c37e |
+ if (req->mr) {
|
|
Johannes Thumshirn |
98c37e |
+ ib_mr_pool_put(queue->qp, &queue->qp->rdma_mrs, req->mr);
|
|
Johannes Thumshirn |
98c37e |
+ req->mr = NULL;
|
|
Johannes Thumshirn |
98c37e |
+ }
|
|
Johannes Thumshirn |
98c37e |
+
|
|
Johannes Thumshirn |
98c37e |
ib_dma_unmap_sg(ibdev, req->sg_table.sgl,
|
|
Johannes Thumshirn |
98c37e |
req->nents, rq_data_dir(rq) ==
|
|
Johannes Thumshirn |
98c37e |
WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
|
|
Johannes Thumshirn |
98c37e |
@@ -1117,12 +1095,18 @@ static int nvme_rdma_map_sg_fr(struct nv
|
|
Johannes Thumshirn |
98c37e |
struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl;
|
|
Johannes Thumshirn |
98c37e |
int nr;
|
|
Johannes Thumshirn |
98c37e |
|
|
Johannes Thumshirn |
98c37e |
+ req->mr = ib_mr_pool_get(queue->qp, &queue->qp->rdma_mrs);
|
|
Johannes Thumshirn |
98c37e |
+ if (WARN_ON_ONCE(!req->mr))
|
|
Johannes Thumshirn |
98c37e |
+ return -EAGAIN;
|
|
Johannes Thumshirn |
98c37e |
+
|
|
Johannes Thumshirn |
98c37e |
/*
|
|
Johannes Thumshirn |
98c37e |
* Align the MR to a 4K page size to match the ctrl page size and
|
|
Johannes Thumshirn |
98c37e |
* the block virtual boundary.
|
|
Johannes Thumshirn |
98c37e |
*/
|
|
Johannes Thumshirn |
98c37e |
nr = ib_map_mr_sg(req->mr, req->sg_table.sgl, count, NULL, SZ_4K);
|
|
Johannes Thumshirn |
98c37e |
if (unlikely(nr < count)) {
|
|
Johannes Thumshirn |
98c37e |
+ ib_mr_pool_put(queue->qp, &queue->qp->rdma_mrs, req->mr);
|
|
Johannes Thumshirn |
98c37e |
+ req->mr = NULL;
|
|
Johannes Thumshirn |
98c37e |
if (nr < 0)
|
|
Johannes Thumshirn |
98c37e |
return nr;
|
|
Johannes Thumshirn |
98c37e |
return -EINVAL;
|
|
Johannes Thumshirn |
98c37e |
@@ -1141,8 +1125,6 @@ static int nvme_rdma_map_sg_fr(struct nv
|
|
Johannes Thumshirn |
98c37e |
IB_ACCESS_REMOTE_READ |
|
|
Johannes Thumshirn |
98c37e |
IB_ACCESS_REMOTE_WRITE;
|
|
Johannes Thumshirn |
98c37e |
|
|
Johannes Thumshirn |
98c37e |
- req->mr->need_inval = true;
|
|
Johannes Thumshirn |
98c37e |
-
|
|
Johannes Thumshirn |
98c37e |
sg->addr = cpu_to_le64(req->mr->iova);
|
|
Johannes Thumshirn |
98c37e |
put_unaligned_le24(req->mr->length, sg->length);
|
|
Johannes Thumshirn |
98c37e |
put_unaligned_le32(req->mr->rkey, sg->key);
|
|
Johannes Thumshirn |
98c37e |
@@ -1162,7 +1144,6 @@ static int nvme_rdma_map_data(struct nvm
|
|
Johannes Thumshirn |
98c37e |
|
|
Johannes Thumshirn |
98c37e |
req->num_sge = 1;
|
|
Johannes Thumshirn |
98c37e |
req->inline_data = false;
|
|
Johannes Thumshirn |
98c37e |
- req->mr->need_inval = false;
|
|
Johannes Thumshirn |
98c37e |
refcount_set(&req->ref, 2); /* send and recv completions */
|
|
Johannes Thumshirn |
98c37e |
|
|
Johannes Thumshirn |
98c37e |
c->common.flags |= NVME_CMD_SGL_METABUF;
|
|
Johannes Thumshirn |
98c37e |
@@ -1341,8 +1322,7 @@ static int nvme_rdma_process_nvme_rsp(st
|
|
Johannes Thumshirn |
98c37e |
req->mr->rkey);
|
|
Johannes Thumshirn |
98c37e |
nvme_rdma_error_recovery(queue->ctrl);
|
|
Johannes Thumshirn |
98c37e |
}
|
|
Johannes Thumshirn |
98c37e |
- req->mr->need_inval = false;
|
|
Johannes Thumshirn |
98c37e |
- } else if (req->mr->need_inval) {
|
|
Johannes Thumshirn |
98c37e |
+ } else if (req->mr) {
|
|
Johannes Thumshirn |
98c37e |
ret = nvme_rdma_inv_rkey(queue, req);
|
|
Johannes Thumshirn |
98c37e |
if (unlikely(ret < 0)) {
|
|
Johannes Thumshirn |
98c37e |
dev_err(queue->ctrl->ctrl.device,
|
|
Johannes Thumshirn |
98c37e |
@@ -1650,7 +1630,7 @@ static blk_status_t nvme_rdma_queue_rq(s
|
|
Johannes Thumshirn |
98c37e |
sizeof(struct nvme_command), DMA_TO_DEVICE);
|
|
Johannes Thumshirn |
98c37e |
|
|
Johannes Thumshirn |
98c37e |
err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
|
|
Johannes Thumshirn |
98c37e |
- req->mr->need_inval ? &req->reg_wr.wr : NULL);
|
|
Johannes Thumshirn |
98c37e |
+ req->mr ? &req->reg_wr.wr : NULL);
|
|
Johannes Thumshirn |
98c37e |
if (unlikely(err)) {
|
|
Johannes Thumshirn |
98c37e |
nvme_rdma_unmap_data(queue, rq);
|
|
Johannes Thumshirn |
98c37e |
goto err;
|
|
Johannes Thumshirn |
98c37e |
@@ -1798,7 +1778,6 @@ static const struct nvme_ctrl_ops nvme_r
|
|
Johannes Thumshirn |
98c37e |
.submit_async_event = nvme_rdma_submit_async_event,
|
|
Johannes Thumshirn |
98c37e |
.delete_ctrl = nvme_rdma_delete_ctrl,
|
|
Johannes Thumshirn |
98c37e |
.get_address = nvmf_get_address,
|
|
Johannes Thumshirn |
98c37e |
- .reinit_request = nvme_rdma_reinit_request,
|
|
Johannes Thumshirn |
98c37e |
};
|
|
Johannes Thumshirn |
98c37e |
|
|
Johannes Thumshirn |
98c37e |
static inline bool
|