Blob Blame History Raw
From: Weili Qian <qianweili@huawei.com>
Date: Sat, 9 May 2020 17:44:02 +0800
Subject: crypto: hisilicon - QM memory management optimization
Git-commit: 5308f6600a393ee848ed9d9f77b167aa6b202e9c (partial)
Patch-mainline: v5.8-rc1
References: jsc#SLE-16507 jsc#SLE-15835

Put all the code for the memory allocation into the QM initialization
process. Before, The qp memory was allocated when the qp was created,
and released when the qp was released, It is now changed to allocate
all the qp memory once.

Signed-off-by: Weili Qian <qianweili@huawei.com>
Signed-off-by: Shukun Tan <tanshukun1@huawei.com>
Reviewed-by: Zhou Wang <wangzhou1@hisilicon.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
[mb: drop hunk hisi_qm_get_available_instances and event_cb as we don't support uacce]
Signed-off-by: Matthias Brugger <mbrugger@suse.com>
---
 drivers/crypto/hisilicon/qm.c |  253 ++++++++++++++++++++----------------------
 drivers/crypto/hisilicon/qm.h |    4 
 2 files changed, 126 insertions(+), 131 deletions(-)

--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -6,6 +6,7 @@
 #include <linux/bitmap.h>
 #include <linux/debugfs.h>
 #include <linux/dma-mapping.h>
+#include <linux/idr.h>
 #include <linux/io.h>
 #include <linux/irqreturn.h>
 #include <linux/log2.h>
@@ -572,7 +573,7 @@ static struct hisi_qp *qm_to_hisi_qp(str
 {
 	u16 cqn = le32_to_cpu(eqe->dw0) & QM_EQE_CQN_MASK;
 
-	return qm->qp_array[cqn];
+	return &qm->qp_array[cqn];
 }
 
 static void qm_cq_head_update(struct hisi_qp *qp)
@@ -617,8 +618,7 @@ static void qm_work_process(struct work_
 	while (QM_EQE_PHASE(eqe) == qm->status.eqc_phase) {
 		eqe_num++;
 		qp = qm_to_hisi_qp(qm, eqe);
-		if (qp)
-			qm_poll_qp(qp, qm);
+		qm_poll_qp(qp, qm);
 
 		if (qm->status.eq_head == QM_Q_DEPTH - 1) {
 			qm->status.eqc_phase = !qm->status.eqc_phase;
@@ -1239,50 +1239,35 @@ static struct hisi_qp *qm_create_qp_nolo
 {
 	struct device *dev = &qm->pdev->dev;
 	struct hisi_qp *qp;
-	int qp_id, ret;
+	int qp_id;
 
 	if (!qm_qp_avail_state(qm, NULL, QP_INIT))
 		return ERR_PTR(-EPERM);
 
-	qp = kzalloc(sizeof(*qp), GFP_KERNEL);
-	if (!qp)
-		return ERR_PTR(-ENOMEM);
-
-	qp_id = find_first_zero_bit(qm->qp_bitmap, qm->qp_num);
-	if (qp_id >= qm->qp_num) {
-		dev_info(&qm->pdev->dev, "QM all queues are busy!\n");
-		ret = -EBUSY;
-		goto err_free_qp;
+	if (qm->qp_in_used == qm->qp_num) {
+		dev_info_ratelimited(dev, "All %u queues of QM are busy!\n",
+				     qm->qp_num);
+		return ERR_PTR(-EBUSY);
 	}
-	set_bit(qp_id, qm->qp_bitmap);
-	qm->qp_array[qp_id] = qp;
-	qm->qp_in_used++;
-	qp->qm = qm;
 
-	qp->qdma.size = qm->sqe_size * QM_Q_DEPTH +
-			sizeof(struct qm_cqe) * QM_Q_DEPTH;
-	qp->qdma.va = dma_alloc_coherent(dev, qp->qdma.size,
-					 &qp->qdma.dma, GFP_KERNEL);
-	if (!qp->qdma.va) {
-		ret = -ENOMEM;
-		goto err_clear_bit;
+	qp_id = idr_alloc_cyclic(&qm->qp_idr, NULL, 0, qm->qp_num, GFP_ATOMIC);
+	if (qp_id < 0) {
+		dev_info_ratelimited(dev, "All %u queues of QM are busy!\n",
+				    qm->qp_num);
+		return ERR_PTR(-EBUSY);
 	}
 
-	dev_dbg(dev, "allocate qp dma buf(va=%pK, dma=%pad, size=%zx)\n",
-		qp->qdma.va, &qp->qdma.dma, qp->qdma.size);
+	qp = &qm->qp_array[qp_id];
+
+	memset(qp->cqe, 0, sizeof(struct qm_cqe) * QM_Q_DEPTH);
 
+	qp->req_cb = NULL;
 	qp->qp_id = qp_id;
 	qp->alg_type = alg_type;
+	qm->qp_in_used++;
 	atomic_set(&qp->qp_status.flags, QP_INIT);
 
 	return qp;
-
-err_clear_bit:
-	qm->qp_array[qp_id] = NULL;
-	clear_bit(qp_id, qm->qp_bitmap);
-err_free_qp:
-	kfree(qp);
-	return ERR_PTR(ret);
 }
 
 /**
@@ -1314,8 +1299,6 @@ EXPORT_SYMBOL_GPL(hisi_qm_create_qp);
 void hisi_qm_release_qp(struct hisi_qp *qp)
 {
 	struct hisi_qm *qm = qp->qm;
-	struct qm_dma *qdma = &qp->qdma;
-	struct device *dev = &qm->pdev->dev;
 
 	down_write(&qm->qps_lock);
 
@@ -1324,14 +1307,8 @@ void hisi_qm_release_qp(struct hisi_qp *
 		return;
 	}
 
-	if (qdma->va)
-		dma_free_coherent(dev, qdma->size, qdma->va, qdma->dma);
-
-	qm->qp_array[qp->qp_id] = NULL;
-	clear_bit(qp->qp_id, qm->qp_bitmap);
 	qm->qp_in_used--;
-
-	kfree(qp);
+	idr_remove(&qm->qp_idr, qp->qp_id);
 
 	up_write(&qm->qps_lock);
 }
@@ -1408,41 +1385,13 @@ static int qm_qp_ctx_cfg(struct hisi_qp
 {
 	struct hisi_qm *qm = qp->qm;
 	struct device *dev = &qm->pdev->dev;
-	enum qm_hw_ver ver = qm->ver;
 	int qp_id = qp->qp_id;
 	int pasid = arg;
-	size_t off = 0;
 	int ret;
 
 	if (!qm_qp_avail_state(qm, qp, QP_START))
 		return -EPERM;
 
-#define QP_INIT_BUF(qp, type, size) do { \
-	(qp)->type = ((qp)->qdma.va + (off)); \
-	(qp)->type##_dma = (qp)->qdma.dma + (off); \
-	off += (size); \
-} while (0)
-
-	if (!qp->qdma.dma) {
-		dev_err(dev, "cannot get qm dma buffer\n");
-		return -EINVAL;
-	}
-
-	/* sq need 128 bytes alignment */
-	if (qp->qdma.dma & QM_SQE_DATA_ALIGN_MASK) {
-		dev_err(dev, "qm sq is not aligned to 128 byte\n");
-		return -EINVAL;
-	}
-
-	QP_INIT_BUF(qp, sqe, qm->sqe_size * QM_Q_DEPTH);
-	QP_INIT_BUF(qp, cqe, sizeof(struct qm_cqe) * QM_Q_DEPTH);
-
-	dev_dbg(dev, "init qp buffer(v%d):\n"
-		     " sqe	(%pK, %lx)\n"
-		     " cqe	(%pK, %lx)\n",
-		     ver, qp->sqe, (unsigned long)qp->sqe_dma,
-		     qp->cqe, (unsigned long)qp->cqe_dma);
-
 	ret = qm_qp_ctx_cfg(qp, qp_id, pasid);
 	if (ret)
 		return ret;
@@ -1700,6 +1649,99 @@ int hisi_qm_get_free_qp_num(struct hisi_
 }
 EXPORT_SYMBOL_GPL(hisi_qm_get_free_qp_num);
 
+static void hisi_qp_memory_uninit(struct hisi_qm *qm, int num)
+{
+	struct device *dev = &qm->pdev->dev;
+	struct qm_dma *qdma;
+	int i;
+
+	for (i = num - 1; i >= 0; i--) {
+		qdma = &qm->qp_array[i].qdma;
+		dma_free_coherent(dev, qdma->size, qdma->va, qdma->dma);
+	}
+
+	kfree(qm->qp_array);
+}
+
+static int hisi_qp_memory_init(struct hisi_qm *qm, size_t dma_size, int id)
+{
+	struct device *dev = &qm->pdev->dev;
+	size_t off = qm->sqe_size * QM_Q_DEPTH;
+	struct hisi_qp *qp;
+
+	qp = &qm->qp_array[id];
+	qp->qdma.va = dma_alloc_coherent(dev, dma_size, &qp->qdma.dma,
+					 GFP_KERNEL);
+	if (!qp->qdma.va)
+		return -ENOMEM;
+
+	qp->sqe = qp->qdma.va;
+	qp->sqe_dma = qp->qdma.dma;
+	qp->cqe = qp->qdma.va + off;
+	qp->cqe_dma = qp->qdma.dma + off;
+	qp->qdma.size = dma_size;
+	qp->qm = qm;
+	qp->qp_id = id;
+
+	return 0;
+}
+
+static int hisi_qm_memory_init(struct hisi_qm *qm)
+{
+	struct device *dev = &qm->pdev->dev;
+	size_t qp_dma_size, off = 0;
+	int i, ret = 0;
+
+#define QM_INIT_BUF(qm, type, num) do { \
+	(qm)->type = ((qm)->qdma.va + (off)); \
+	(qm)->type##_dma = (qm)->qdma.dma + (off); \
+	off += QMC_ALIGN(sizeof(struct qm_##type) * (num)); \
+} while (0)
+
+	idr_init(&qm->qp_idr);
+	qm->qdma.size = QMC_ALIGN(sizeof(struct qm_eqe) * QM_Q_DEPTH) +
+			QMC_ALIGN(sizeof(struct qm_aeqe) * QM_Q_DEPTH) +
+			QMC_ALIGN(sizeof(struct qm_sqc) * qm->qp_num) +
+			QMC_ALIGN(sizeof(struct qm_cqc) * qm->qp_num);
+	qm->qdma.va = dma_alloc_coherent(dev, qm->qdma.size, &qm->qdma.dma,
+					 GFP_ATOMIC);
+	dev_dbg(dev, "allocate qm dma buf size=%zx)\n", qm->qdma.size);
+	if (!qm->qdma.va)
+		return -ENOMEM;
+
+	QM_INIT_BUF(qm, eqe, QM_Q_DEPTH);
+	QM_INIT_BUF(qm, aeqe, QM_Q_DEPTH);
+	QM_INIT_BUF(qm, sqc, qm->qp_num);
+	QM_INIT_BUF(qm, cqc, qm->qp_num);
+
+	qm->qp_array = kcalloc(qm->qp_num, sizeof(struct hisi_qp), GFP_KERNEL);
+	if (!qm->qp_array) {
+		ret = -ENOMEM;
+		goto err_alloc_qp_array;
+	}
+
+	/* one more page for device or qp statuses */
+	qp_dma_size = qm->sqe_size * QM_Q_DEPTH +
+		      sizeof(struct qm_cqe) * QM_Q_DEPTH;
+	qp_dma_size = PAGE_ALIGN(qp_dma_size);
+	for (i = 0; i < qm->qp_num; i++) {
+		ret = hisi_qp_memory_init(qm, qp_dma_size, i);
+		if (ret)
+			goto err_init_qp_mem;
+
+		dev_dbg(dev, "allocate qp dma buf size=%zx)\n", qp_dma_size);
+	}
+
+	return ret;
+
+err_init_qp_mem:
+	hisi_qp_memory_uninit(qm, i);
+err_alloc_qp_array:
+	dma_free_coherent(dev, qm->qdma.size, qm->qdma.va, qm->qdma.dma);
+
+	return ret;
+}
+
 static void hisi_qm_pre_init(struct hisi_qm *qm)
 {
 	struct pci_dev *pdev = qm->pdev;
@@ -1782,6 +1824,10 @@ int hisi_qm_init(struct hisi_qm *qm)
 			goto err_irq_unregister;
 	}
 
+	ret = hisi_qm_memory_init(qm);
+	if (ret)
+		goto err_irq_unregister;
+
 	INIT_WORK(&qm->work, qm_work_process);
 
 	atomic_set(&qm->status.flags, QM_INIT);
@@ -1821,6 +1867,9 @@ void hisi_qm_uninit(struct hisi_qm *qm)
 		return;
 	}
 
+	hisi_qp_memory_uninit(qm, qm->qp_num);
+	idr_destroy(&qm->qp_idr);
+
 	if (qm->qdma.va) {
 		hisi_qm_cache_wb(qm);
 		dma_free_coherent(dev, qm->qdma.size,
@@ -1949,22 +1998,10 @@ static int qm_eq_ctx_cfg(struct hisi_qm
 
 static int __hisi_qm_start(struct hisi_qm *qm)
 {
-	struct pci_dev *pdev = qm->pdev;
-	struct device *dev = &pdev->dev;
-	size_t off = 0;
 	int ret;
 
-#define QM_INIT_BUF(qm, type, num) do { \
-	(qm)->type = ((qm)->qdma.va + (off)); \
-	(qm)->type##_dma = (qm)->qdma.dma + (off); \
-	off += QMC_ALIGN(sizeof(struct qm_##type) * (num)); \
-} while (0)
-
 	WARN_ON(!qm->qdma.dma);
 
-	if (qm->qp_num == 0)
-		return -EINVAL;
-
 	if (qm->fun_type == QM_HW_PF) {
 		ret = qm_dev_mem_reset(qm);
 		if (ret)
@@ -1975,21 +2012,6 @@ static int __hisi_qm_start(struct hisi_q
 			return ret;
 	}
 
-	QM_INIT_BUF(qm, eqe, QM_Q_DEPTH);
-	QM_INIT_BUF(qm, aeqe, QM_Q_DEPTH);
-	QM_INIT_BUF(qm, sqc, qm->qp_num);
-	QM_INIT_BUF(qm, cqc, qm->qp_num);
-
-	dev_dbg(dev, "init qm buffer:\n"
-		     " eqe	(%pK, %lx)\n"
-		     " aeqe	(%pK, %lx)\n"
-		     " sqc	(%pK, %lx)\n"
-		     " cqc	(%pK, %lx)\n",
-		     qm->eqe, (unsigned long)qm->eqe_dma,
-		     qm->aeqe, (unsigned long)qm->aeqe_dma,
-		     qm->sqc, (unsigned long)qm->sqc_dma,
-		     qm->cqc, (unsigned long)qm->cqc_dma);
-
 	ret = qm_eq_ctx_cfg(qm);
 	if (ret)
 		return ret;
@@ -2034,33 +2056,6 @@ int hisi_qm_start(struct hisi_qm *qm)
 		goto err_unlock;
 	}
 
-	if (!qm->qp_bitmap) {
-		qm->qp_bitmap = devm_kcalloc(dev, BITS_TO_LONGS(qm->qp_num),
-					     sizeof(long), GFP_KERNEL);
-		qm->qp_array = devm_kcalloc(dev, qm->qp_num,
-					    sizeof(struct hisi_qp *),
-					    GFP_KERNEL);
-		if (!qm->qp_bitmap || !qm->qp_array) {
-			ret = -ENOMEM;
-			goto err_unlock;
-		}
-	}
-
-	if (!qm->qdma.va) {
-		qm->qdma.size = QMC_ALIGN(sizeof(struct qm_eqe) * QM_Q_DEPTH) +
-				QMC_ALIGN(sizeof(struct qm_aeqe) * QM_Q_DEPTH) +
-				QMC_ALIGN(sizeof(struct qm_sqc) * qm->qp_num) +
-				QMC_ALIGN(sizeof(struct qm_cqc) * qm->qp_num);
-		qm->qdma.va = dma_alloc_coherent(dev, qm->qdma.size,
-						 &qm->qdma.dma, GFP_KERNEL);
-		dev_dbg(dev, "allocate qm dma buf(va=%pK, dma=%pad, size=%zx)\n",
-			qm->qdma.va, &qm->qdma.dma, qm->qdma.size);
-		if (!qm->qdma.va) {
-			ret = -ENOMEM;
-			goto err_unlock;
-		}
-	}
-
 	ret = __hisi_qm_start(qm);
 	if (!ret)
 		atomic_set(&qm->status.flags, QM_START);
@@ -2083,8 +2078,8 @@ static int qm_restart(struct hisi_qm *qm
 
 	down_write(&qm->qps_lock);
 	for (i = 0; i < qm->qp_num; i++) {
-		qp = qm->qp_array[i];
-		if (qp && atomic_read(&qp->qp_status.flags) == QP_STOP &&
+		qp = &qm->qp_array[i];
+		if (atomic_read(&qp->qp_status.flags) == QP_STOP &&
 		    qp->is_resetting == true) {
 			ret = qm_start_qp_nolock(qp, 0);
 			if (ret < 0) {
@@ -2109,7 +2104,7 @@ static int qm_stop_started_qp(struct his
 	int i, ret;
 
 	for (i = 0; i < qm->qp_num; i++) {
-		qp = qm->qp_array[i];
+		qp = &qm->qp_array[i];
 		if (qp && atomic_read(&qp->qp_status.flags) == QP_START) {
 			qp->is_resetting = true;
 			ret = qm_stop_qp_nolock(qp);
@@ -2133,8 +2128,8 @@ static void qm_clear_queues(struct hisi_
 	int i;
 
 	for (i = 0; i < qm->qp_num; i++) {
-		qp = qm->qp_array[i];
-		if (qp && qp->is_resetting)
+		qp = &qm->qp_array[i];
+		if (qp->is_resetting)
 			memset(qp->qdma.va, 0, qp->qdma.size);
 	}
 
--- a/drivers/crypto/hisilicon/qm.h
+++ b/drivers/crypto/hisilicon/qm.h
@@ -211,8 +211,8 @@ struct hisi_qm {
 	unsigned long reset_flag;
 
 	struct rw_semaphore qps_lock;
-	unsigned long *qp_bitmap;
-	struct hisi_qp **qp_array;
+	struct idr qp_idr;
+	struct hisi_qp *qp_array;
 
 	struct mutex mailbox_lock;