Blob Blame History Raw
From: Ilya Lesokhin <ilyal@mellanox.com>
Date: Tue, 13 Mar 2018 15:18:48 +0200
Subject: IB/mlx5: Maintain a single emergency page
Patch-mainline: v4.17-rc1
Git-commit: c44ef998f25eaddcd78924f98e5baed602d933e6
References: bsc#1103991 FATE#326007

The mlx5 driver needs to be able to issue invalidation to ODP MRs
even if it cannot allocate memory. To this end it preallocates
emergency pages to use when the situation arises.

This flow should be extremely rare enough, that we don't need
to worry about contention and therefore a single emergency page
is good enough.

Signed-off-by: Ilya Lesokhin <ilyal@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
Acked-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
 drivers/infiniband/hw/mlx5/main.c    |   42 ++++++++++++++++++++++++-----------
 drivers/infiniband/hw/mlx5/mlx5_ib.h |    6 ++---
 drivers/infiniband/hw/mlx5/mr.c      |   11 ++++-----
 3 files changed, 37 insertions(+), 22 deletions(-)

--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -92,6 +92,12 @@ static LIST_HEAD(mlx5_ib_dev_list);
  */
 static DEFINE_MUTEX(mlx5_ib_multiport_mutex);
 
+/* We can't use an array for xlt_emergency_page because dma_map_single
+ * doesn't work on kernel modules memory
+ */
+static unsigned long xlt_emergency_page;
+static struct mutex xlt_emergency_page_mutex;
+
 struct mlx5_ib_dev *mlx5_ib_get_ibdev_from_mpi(struct mlx5_ib_multiport_info *mpi)
 {
 	struct mlx5_ib_dev *dev;
@@ -1702,17 +1708,10 @@ static struct ib_ucontext *mlx5_ib_alloc
 	context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range;
 #endif
 
-	context->upd_xlt_page = __get_free_page(GFP_KERNEL);
-	if (!context->upd_xlt_page) {
-		err = -ENOMEM;
-		goto out_uars;
-	}
-	mutex_init(&context->upd_xlt_page_mutex);
-
 	if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) {
 		err = mlx5_ib_alloc_transport_domain(dev, &context->tdn);
 		if (err)
-			goto out_page;
+			goto out_uars;
 	}
 
 	INIT_LIST_HEAD(&context->vma_private_list);
@@ -1789,9 +1788,6 @@ out_td:
 	if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
 		mlx5_ib_dealloc_transport_domain(dev, context->tdn);
 
-out_page:
-	free_page(context->upd_xlt_page);
-
 out_uars:
 	deallocate_uars(dev, context);
 
@@ -1817,7 +1813,6 @@ static int mlx5_ib_dealloc_ucontext(stru
 	if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
 		mlx5_ib_dealloc_transport_domain(dev, context->tdn);
 
-	free_page(context->upd_xlt_page);
 	deallocate_uars(dev, context);
 	kfree(bfregi->sys_pages);
 	kfree(bfregi->count);
@@ -5301,13 +5296,32 @@ static struct mlx5_interface mlx5_ib_int
 	.protocol	= MLX5_INTERFACE_PROTOCOL_IB,
 };
 
+unsigned long mlx5_ib_get_xlt_emergency_page(void)
+{
+	mutex_lock(&xlt_emergency_page_mutex);
+	return xlt_emergency_page;
+}
+
+void mlx5_ib_put_xlt_emergency_page(void)
+{
+	mutex_unlock(&xlt_emergency_page_mutex);
+}
+
 static int __init mlx5_ib_init(void)
 {
 	int err;
 
+	xlt_emergency_page = __get_free_page(GFP_KERNEL);
+	if (!xlt_emergency_page)
+		return -ENOMEM;
+
+	mutex_init(&xlt_emergency_page_mutex);
+
 	mlx5_ib_event_wq = alloc_ordered_workqueue("mlx5_ib_event_wq", 0);
-	if (!mlx5_ib_event_wq)
+	if (!mlx5_ib_event_wq) {
+		free_page(xlt_emergency_page);
 		return -ENOMEM;
+	}
 
 	mlx5_ib_odp_init();
 
@@ -5320,6 +5334,8 @@ static void __exit mlx5_ib_cleanup(void)
 {
 	mlx5_unregister_interface(&mlx5_ib_interface);
 	destroy_workqueue(mlx5_ib_event_wq);
+	mutex_destroy(&xlt_emergency_page_mutex);
+	free_page(xlt_emergency_page);
 }
 
 module_init(mlx5_ib_init);
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -130,9 +130,6 @@ struct mlx5_ib_ucontext {
 	/* protect vma_private_list add/del */
 	struct mutex		vma_private_list_mutex;
 
-	unsigned long		upd_xlt_page;
-	/* protect ODP/KSM */
-	struct mutex		upd_xlt_page_mutex;
 	u64			lib_caps;
 };
 
@@ -1221,4 +1218,7 @@ static inline int get_num_static_uars(st
 	return get_uars_per_sys_page(dev, bfregi->lib_uar_4k) * bfregi->num_static_sys_pages;
 }
 
+unsigned long mlx5_ib_get_xlt_emergency_page(void);
+void mlx5_ib_put_xlt_emergency_page(void);
+
 #endif /* MLX5_IB_H */
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -989,7 +989,6 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr
 {
 	struct mlx5_ib_dev *dev = mr->dev;
 	struct device *ddev = dev->ib_dev.dev.parent;
-	struct mlx5_ib_ucontext *uctx = NULL;
 	int size;
 	void *xlt;
 	dma_addr_t dma;
@@ -1005,6 +1004,7 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr
 	size_t pages_to_map = 0;
 	size_t pages_iter = 0;
 	gfp_t gfp;
+	bool use_emergency_page = false;
 
 	/* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
 	 * so we need to align the offset and length accordingly
@@ -1031,12 +1031,11 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr
 	}
 
 	if (!xlt) {
-		uctx = to_mucontext(mr->ibmr.pd->uobject->context);
 		mlx5_ib_warn(dev, "Using XLT emergency buffer\n");
+		xlt = (void *)mlx5_ib_get_xlt_emergency_page();
 		size = PAGE_SIZE;
-		xlt = (void *)uctx->upd_xlt_page;
-		mutex_lock(&uctx->upd_xlt_page_mutex);
 		memset(xlt, 0, size);
+		use_emergency_page = true;
 	}
 	pages_iter = size / desc_size;
 	dma = dma_map_single(ddev, xlt, size, DMA_TO_DEVICE);
@@ -1100,8 +1099,8 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr
 	dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
 
 free_xlt:
-	if (uctx)
-		mutex_unlock(&uctx->upd_xlt_page_mutex);
+	if (use_emergency_page)
+		mlx5_ib_put_xlt_emergency_page();
 	else
 		free_pages((unsigned long)xlt, get_order(size));