Blob Blame History Raw
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Thu, 17 Aug 2017 13:13:35 -0700
Subject: [PATCH] skd: Reduce memory usage
Git-commit: a3db102def237c27c6f2a2a6a58b79b2f2d4e39f
Patch-mainline: v4.14-rc1
References: bsc#1104967,FATE#325924

Every single coherent DMA memory buffer occupies at least one page.
Reduce memory usage by switching from coherent buffers to streaming
DMA for I/O requests (struct skd_fitmsg_context) and S/G-lists
(struct fit_sg_descriptor[]).

Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Acked-by: Hannes Reinecke <hare@suse.com>
---
 drivers/block/skd_main.c | 145 +++++++++++++++++++++++++++++++++++------------
 1 file changed, 108 insertions(+), 37 deletions(-)

diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c
index a20434ca3e18..610c8979dc7e 100644
--- a/drivers/block/skd_main.c
+++ b/drivers/block/skd_main.c
@@ -32,6 +32,7 @@
 #include <linux/aer.h>
 #include <linux/wait.h>
 #include <linux/stringify.h>
+#include <linux/slab_def.h>
 #include <scsi/scsi.h>
 #include <scsi/sg.h>
 #include <linux/io.h>
@@ -256,6 +257,9 @@ struct skd_device {
 
 	u8 skcomp_cycle;
 	u32 skcomp_ix;
+	struct kmem_cache *msgbuf_cache;
+	struct kmem_cache *sglist_cache;
+	struct kmem_cache *databuf_cache;
 	struct fit_completion_entry_v1 *skcomp_table;
 	struct fit_comp_error_info *skerr_table;
 	dma_addr_t cq_dma_address;
@@ -538,6 +542,11 @@ static void skd_process_request(struct request *req, bool last)
 		return;
 	}
 
+	dma_sync_single_for_device(&skdev->pdev->dev, skreq->sksg_dma_address,
+				   skreq->n_sg *
+				   sizeof(struct fit_sg_descriptor),
+				   DMA_TO_DEVICE);
+
 	spin_lock_irqsave(&skdev->lock, flags);
 	/* Either a FIT msg is in progress or we have to start one. */
 	skmsg = skdev->skmsg;
@@ -1078,6 +1087,11 @@ static void skd_complete_internal(struct skd_device *skdev,
 
 	dev_dbg(&skdev->pdev->dev, "complete internal %x\n", scsi->cdb[0]);
 
+	dma_sync_single_for_cpu(&skdev->pdev->dev,
+				skspcl->db_dma_address,
+				skspcl->req.sksg_list[0].byte_count,
+				DMA_BIDIRECTIONAL);
+
 	skspcl->req.completion = *skcomp;
 	skspcl->req.state = SKD_REQ_STATE_IDLE;
 	skspcl->req.id += SKD_ID_INCR;
@@ -1263,6 +1277,9 @@ static void skd_send_fitmsg(struct skd_device *skdev,
 		 */
 		qcmd |= FIT_QCMD_MSGSIZE_64;
 
+	dma_sync_single_for_device(&skdev->pdev->dev, skmsg->mb_dma_address,
+				   skmsg->length, DMA_TO_DEVICE);
+
 	/* Make sure skd_msg_buf is written before the doorbell is triggered. */
 	smp_wmb();
 
@@ -1274,6 +1291,8 @@ static void skd_send_special_fitmsg(struct skd_device *skdev,
 {
 	u64 qcmd;
 
+	WARN_ON_ONCE(skspcl->req.n_sg != 1);
+
 	if (unlikely(skdev->dbg_level > 1)) {
 		u8 *bp = (u8 *)skspcl->msg_buf;
 		int i;
@@ -1307,6 +1326,17 @@ static void skd_send_special_fitmsg(struct skd_device *skdev,
 	qcmd = skspcl->mb_dma_address;
 	qcmd |= FIT_QCMD_QID_NORMAL + FIT_QCMD_MSGSIZE_128;
 
+	dma_sync_single_for_device(&skdev->pdev->dev, skspcl->mb_dma_address,
+				   SKD_N_SPECIAL_FITMSG_BYTES, DMA_TO_DEVICE);
+	dma_sync_single_for_device(&skdev->pdev->dev,
+				   skspcl->req.sksg_dma_address,
+				   1 * sizeof(struct fit_sg_descriptor),
+				   DMA_TO_DEVICE);
+	dma_sync_single_for_device(&skdev->pdev->dev,
+				   skspcl->db_dma_address,
+				   skspcl->req.sksg_list[0].byte_count,
+				   DMA_BIDIRECTIONAL);
+
 	/* Make sure skd_msg_buf is written before the doorbell is triggered. */
 	smp_wmb();
 
@@ -2619,6 +2649,35 @@ static void skd_release_irq(struct skd_device *skdev)
  *****************************************************************************
  */
 
+static void *skd_alloc_dma(struct skd_device *skdev, struct kmem_cache *s,
+			   dma_addr_t *dma_handle, gfp_t gfp,
+			   enum dma_data_direction dir)
+{
+	struct device *dev = &skdev->pdev->dev;
+	void *buf;
+
+	buf = kmem_cache_alloc(s, gfp);
+	if (!buf)
+		return NULL;
+	*dma_handle = dma_map_single(dev, buf, s->size, dir);
+	if (dma_mapping_error(dev, *dma_handle)) {
+		kfree(buf);
+		buf = NULL;
+	}
+	return buf;
+}
+
+static void skd_free_dma(struct skd_device *skdev, struct kmem_cache *s,
+			 void *vaddr, dma_addr_t dma_handle,
+			 enum dma_data_direction dir)
+{
+	if (!vaddr)
+		return;
+
+	dma_unmap_single(&skdev->pdev->dev, dma_handle, s->size, dir);
+	kmem_cache_free(s, vaddr);
+}
+
 static int skd_cons_skcomp(struct skd_device *skdev)
 {
 	int rc = 0;
@@ -2695,18 +2754,14 @@ static struct fit_sg_descriptor *skd_cons_sg_list(struct skd_device *skdev,
 						  dma_addr_t *ret_dma_addr)
 {
 	struct fit_sg_descriptor *sg_list;
-	u32 nbytes;
 
-	nbytes = sizeof(*sg_list) * n_sg;
-
-	sg_list = pci_alloc_consistent(skdev->pdev, nbytes, ret_dma_addr);
+	sg_list = skd_alloc_dma(skdev, skdev->sglist_cache, ret_dma_addr,
+				GFP_DMA | __GFP_ZERO, DMA_TO_DEVICE);
 
 	if (sg_list != NULL) {
 		uint64_t dma_address = *ret_dma_addr;
 		u32 i;
 
-		memset(sg_list, 0, nbytes);
-
 		for (i = 0; i < n_sg - 1; i++) {
 			uint64_t ndp_off;
 			ndp_off = (i + 1) * sizeof(struct fit_sg_descriptor);
@@ -2720,15 +2775,14 @@ static struct fit_sg_descriptor *skd_cons_sg_list(struct skd_device *skdev,
 }
 
 static void skd_free_sg_list(struct skd_device *skdev,
-			     struct fit_sg_descriptor *sg_list, u32 n_sg,
+			     struct fit_sg_descriptor *sg_list,
 			     dma_addr_t dma_addr)
 {
-	u32 nbytes = sizeof(*sg_list) * n_sg;
-
 	if (WARN_ON_ONCE(!sg_list))
 		return;
 
-	pci_free_consistent(skdev->pdev, nbytes, sg_list, dma_addr);
+	skd_free_dma(skdev, skdev->sglist_cache, sg_list, dma_addr,
+		     DMA_TO_DEVICE);
 }
 
 static int skd_init_request(struct blk_mq_tag_set *set, struct request *rq,
@@ -2752,34 +2806,31 @@ static void skd_exit_request(struct blk_mq_tag_set *set, struct request *rq,
 	struct skd_device *skdev = set->driver_data;
 	struct skd_request_context *skreq = blk_mq_rq_to_pdu(rq);
 
-	skd_free_sg_list(skdev, skreq->sksg_list,
-			 skdev->sgs_per_request,
-			 skreq->sksg_dma_address);
+	skd_free_sg_list(skdev, skreq->sksg_list, skreq->sksg_dma_address);
 }
 
 static int skd_cons_sksb(struct skd_device *skdev)
 {
 	int rc = 0;
 	struct skd_special_context *skspcl;
-	u32 nbytes;
 
 	skspcl = &skdev->internal_skspcl;
 
 	skspcl->req.id = 0 + SKD_ID_INTERNAL;
 	skspcl->req.state = SKD_REQ_STATE_IDLE;
 
-	nbytes = SKD_N_INTERNAL_BYTES;
-
-	skspcl->data_buf = pci_zalloc_consistent(skdev->pdev, nbytes,
-						 &skspcl->db_dma_address);
+	skspcl->data_buf = skd_alloc_dma(skdev, skdev->databuf_cache,
+					 &skspcl->db_dma_address,
+					 GFP_DMA | __GFP_ZERO,
+					 DMA_BIDIRECTIONAL);
 	if (skspcl->data_buf == NULL) {
 		rc = -ENOMEM;
 		goto err_out;
 	}
 
-	nbytes = SKD_N_SPECIAL_FITMSG_BYTES;
-	skspcl->msg_buf = pci_zalloc_consistent(skdev->pdev, nbytes,
-						&skspcl->mb_dma_address);
+	skspcl->msg_buf = skd_alloc_dma(skdev, skdev->msgbuf_cache,
+					&skspcl->mb_dma_address,
+					GFP_DMA | __GFP_ZERO, DMA_TO_DEVICE);
 	if (skspcl->msg_buf == NULL) {
 		rc = -ENOMEM;
 		goto err_out;
@@ -2886,6 +2937,7 @@ static struct skd_device *skd_construct(struct pci_dev *pdev)
 {
 	struct skd_device *skdev;
 	int blk_major = skd_major;
+	size_t size;
 	int rc;
 
 	skdev = kzalloc(sizeof(*skdev), GFP_KERNEL);
@@ -2914,6 +2966,31 @@ static struct skd_device *skd_construct(struct pci_dev *pdev)
 	INIT_WORK(&skdev->start_queue, skd_start_queue);
 	INIT_WORK(&skdev->completion_worker, skd_completion_worker);
 
+	size = max(SKD_N_FITMSG_BYTES, SKD_N_SPECIAL_FITMSG_BYTES);
+	skdev->msgbuf_cache = kmem_cache_create("skd-msgbuf", size, 0,
+						SLAB_HWCACHE_ALIGN, NULL);
+	if (!skdev->msgbuf_cache)
+		goto err_out;
+	WARN_ONCE(kmem_cache_size(skdev->msgbuf_cache) < size,
+		  "skd-msgbuf: %d < %zd\n",
+		  kmem_cache_size(skdev->msgbuf_cache), size);
+	size = skd_sgs_per_request * sizeof(struct fit_sg_descriptor);
+	skdev->sglist_cache = kmem_cache_create("skd-sglist", size, 0,
+						SLAB_HWCACHE_ALIGN, NULL);
+	if (!skdev->sglist_cache)
+		goto err_out;
+	WARN_ONCE(kmem_cache_size(skdev->sglist_cache) < size,
+		  "skd-sglist: %d < %zd\n",
+		  kmem_cache_size(skdev->sglist_cache), size);
+	size = SKD_N_INTERNAL_BYTES;
+	skdev->databuf_cache = kmem_cache_create("skd-databuf", size, 0,
+						 SLAB_HWCACHE_ALIGN, NULL);
+	if (!skdev->databuf_cache)
+		goto err_out;
+	WARN_ONCE(kmem_cache_size(skdev->databuf_cache) < size,
+		  "skd-databuf: %d < %zd\n",
+		  kmem_cache_size(skdev->databuf_cache), size);
+
 	dev_dbg(&skdev->pdev->dev, "skcomp\n");
 	rc = skd_cons_skcomp(skdev);
 	if (rc < 0)
@@ -2986,31 +3063,21 @@ static void skd_free_skmsg(struct skd_device *skdev)
 
 static void skd_free_sksb(struct skd_device *skdev)
 {
-	struct skd_special_context *skspcl;
-	u32 nbytes;
-
-	skspcl = &skdev->internal_skspcl;
-
-	if (skspcl->data_buf != NULL) {
-		nbytes = SKD_N_INTERNAL_BYTES;
+	struct skd_special_context *skspcl = &skdev->internal_skspcl;
 
-		pci_free_consistent(skdev->pdev, nbytes,
-				    skspcl->data_buf, skspcl->db_dma_address);
-	}
+	skd_free_dma(skdev, skdev->databuf_cache, skspcl->data_buf,
+		     skspcl->db_dma_address, DMA_BIDIRECTIONAL);
 
 	skspcl->data_buf = NULL;
 	skspcl->db_dma_address = 0;
 
-	if (skspcl->msg_buf != NULL) {
-		nbytes = SKD_N_SPECIAL_FITMSG_BYTES;
-		pci_free_consistent(skdev->pdev, nbytes,
-				    skspcl->msg_buf, skspcl->mb_dma_address);
-	}
+	skd_free_dma(skdev, skdev->msgbuf_cache, skspcl->msg_buf,
+		     skspcl->mb_dma_address, DMA_TO_DEVICE);
 
 	skspcl->msg_buf = NULL;
 	skspcl->mb_dma_address = 0;
 
-	skd_free_sg_list(skdev, skspcl->req.sksg_list, 1,
+	skd_free_sg_list(skdev, skspcl->req.sksg_list,
 			 skspcl->req.sksg_dma_address);
 
 	skspcl->req.sksg_list = NULL;
@@ -3056,6 +3123,10 @@ static void skd_destruct(struct skd_device *skdev)
 	dev_dbg(&skdev->pdev->dev, "skcomp\n");
 	skd_free_skcomp(skdev);
 
+	kmem_cache_destroy(skdev->databuf_cache);
+	kmem_cache_destroy(skdev->sglist_cache);
+	kmem_cache_destroy(skdev->msgbuf_cache);
+
 	dev_dbg(&skdev->pdev->dev, "skdev\n");
 	kfree(skdev);
 }
-- 
2.16.4