Blob Blame History Raw
From: James Smart <jsmart2021@gmail.com>
Date: Tue, 12 Mar 2019 16:30:29 -0700
Subject: [PATCH] scsi: lpfc: Specify node affinity for queue memory allocation
References: bsc#1136217,jsc#SLE-4722
Git-commit: c1a21ebc0fa63dcdebd47b410170ea20eda41e1f
Patch-mainline: v5.2-rc1

Change the SLI4 queue creation code to use NUMA node based memory
allocation based on the cpu the queues will be related to.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Hannes Reinecke <hare@suse.de>
---
 drivers/scsi/lpfc/lpfc_init.c | 72 ++++++++++++++++++++++++++-----------------
 drivers/scsi/lpfc/lpfc_sli.c  | 10 +++---
 drivers/scsi/lpfc/lpfc_sli4.h |  6 ++--
 3 files changed, 53 insertions(+), 35 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 2e0c61d4e9e8..864e39dea3ed 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -8640,10 +8640,12 @@ static int
 lpfc_alloc_nvme_wq_cq(struct lpfc_hba *phba, int wqidx)
 {
 	struct lpfc_queue *qdesc;
+	int cpu;
 
+	cpu = lpfc_find_cpu_handle(phba, wqidx, LPFC_FIND_BY_HDWQ);
 	qdesc = lpfc_sli4_queue_alloc(phba, LPFC_EXPANDED_PAGE_SIZE,
 				      phba->sli4_hba.cq_esize,
-				      LPFC_CQE_EXP_COUNT);
+				      LPFC_CQE_EXP_COUNT, cpu);
 	if (!qdesc) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"0508 Failed allocate fast-path NVME CQ (%d)\n",
@@ -8652,11 +8654,12 @@ lpfc_alloc_nvme_wq_cq(struct lpfc_hba *phba, int wqidx)
 	}
 	qdesc->qe_valid = 1;
 	qdesc->hdwq = wqidx;
-	qdesc->chann = lpfc_find_cpu_handle(phba, wqidx, LPFC_FIND_BY_HDWQ);
+	qdesc->chann = cpu;
 	phba->sli4_hba.hdwq[wqidx].nvme_cq = qdesc;
 
 	qdesc = lpfc_sli4_queue_alloc(phba, LPFC_EXPANDED_PAGE_SIZE,
-				      LPFC_WQE128_SIZE, LPFC_WQE_EXP_COUNT);
+				      LPFC_WQE128_SIZE, LPFC_WQE_EXP_COUNT,
+				      cpu);
 	if (!qdesc) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"0509 Failed allocate fast-path NVME WQ (%d)\n",
@@ -8675,18 +8678,20 @@ lpfc_alloc_fcp_wq_cq(struct lpfc_hba *phba, int wqidx)
 {
 	struct lpfc_queue *qdesc;
 	uint32_t wqesize;
+	int cpu;
 
+	cpu = lpfc_find_cpu_handle(phba, wqidx, LPFC_FIND_BY_HDWQ);
 	/* Create Fast Path FCP CQs */
 	if (phba->enab_exp_wqcq_pages)
 		/* Increase the CQ size when WQEs contain an embedded cdb */
 		qdesc = lpfc_sli4_queue_alloc(phba, LPFC_EXPANDED_PAGE_SIZE,
 					      phba->sli4_hba.cq_esize,
-					      LPFC_CQE_EXP_COUNT);
+					      LPFC_CQE_EXP_COUNT, cpu);
 
 	else
 		qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE,
 					      phba->sli4_hba.cq_esize,
-					      phba->sli4_hba.cq_ecount);
+					      phba->sli4_hba.cq_ecount, cpu);
 	if (!qdesc) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 			"0499 Failed allocate fast-path FCP CQ (%d)\n", wqidx);
@@ -8694,7 +8699,7 @@ lpfc_alloc_fcp_wq_cq(struct lpfc_hba *phba, int wqidx)
 	}
 	qdesc->qe_valid = 1;
 	qdesc->hdwq = wqidx;
-	qdesc->chann = lpfc_find_cpu_handle(phba, wqidx, LPFC_FIND_BY_HDWQ);
+	qdesc->chann = cpu;
 	phba->sli4_hba.hdwq[wqidx].fcp_cq = qdesc;
 
 	/* Create Fast Path FCP WQs */
@@ -8704,11 +8709,11 @@ lpfc_alloc_fcp_wq_cq(struct lpfc_hba *phba, int wqidx)
 			LPFC_WQE128_SIZE : phba->sli4_hba.wq_esize;
 		qdesc = lpfc_sli4_queue_alloc(phba, LPFC_EXPANDED_PAGE_SIZE,
 					      wqesize,
-					      LPFC_WQE_EXP_COUNT);
+					      LPFC_WQE_EXP_COUNT, cpu);
 	} else
 		qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE,
 					      phba->sli4_hba.wq_esize,
-					      phba->sli4_hba.wq_ecount);
+					      phba->sli4_hba.wq_ecount, cpu);
 
 	if (!qdesc) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
@@ -8741,7 +8746,7 @@ int
 lpfc_sli4_queue_create(struct lpfc_hba *phba)
 {
 	struct lpfc_queue *qdesc;
-	int idx, eqidx;
+	int idx, eqidx, cpu;
 	struct lpfc_sli4_hdw_queue *qp;
 	struct lpfc_eq_intr_info *eqi;
 
@@ -8828,13 +8833,15 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 
 	/* Create HBA Event Queues (EQs) */
 	for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
+		/* determine EQ affinity */
+		eqidx = lpfc_find_eq_handle(phba, idx);
+		cpu = lpfc_find_cpu_handle(phba, eqidx, LPFC_FIND_BY_EQ);
 		/*
 		 * If there are more Hardware Queues than available
-		 * CQs, multiple Hardware Queues may share a common EQ.
+		 * EQs, multiple Hardware Queues may share a common EQ.
 		 */
 		if (idx >= phba->cfg_irq_chann) {
 			/* Share an existing EQ */
-			eqidx = lpfc_find_eq_handle(phba, idx);
 			phba->sli4_hba.hdwq[idx].hba_eq =
 				phba->sli4_hba.hdwq[eqidx].hba_eq;
 			continue;
@@ -8842,7 +8849,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 		/* Create an EQ */
 		qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE,
 					      phba->sli4_hba.eq_esize,
-					      phba->sli4_hba.eq_ecount);
+					      phba->sli4_hba.eq_ecount, cpu);
 		if (!qdesc) {
 			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 					"0497 Failed allocate EQ (%d)\n", idx);
@@ -8852,9 +8859,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 		qdesc->hdwq = idx;
 
 		/* Save the CPU this EQ is affinitised to */
-		eqidx = lpfc_find_eq_handle(phba, idx);
-		qdesc->chann = lpfc_find_cpu_handle(phba, eqidx,
-						    LPFC_FIND_BY_EQ);
+		qdesc->chann = cpu;
 		phba->sli4_hba.hdwq[idx].hba_eq = qdesc;
 		qdesc->last_cpu = qdesc->chann;
 		eqi = per_cpu_ptr(phba->sli4_hba.eq_info, qdesc->last_cpu);
@@ -8877,11 +8882,14 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 
 		if (phba->nvmet_support) {
 			for (idx = 0; idx < phba->cfg_nvmet_mrq; idx++) {
+				cpu = lpfc_find_cpu_handle(phba, idx,
+							   LPFC_FIND_BY_HDWQ);
 				qdesc = lpfc_sli4_queue_alloc(
 						      phba,
 						      LPFC_DEFAULT_PAGE_SIZE,
 						      phba->sli4_hba.cq_esize,
-						      phba->sli4_hba.cq_ecount);
+						      phba->sli4_hba.cq_ecount,
+						      cpu);
 				if (!qdesc) {
 					lpfc_printf_log(
 						phba, KERN_ERR, LOG_INIT,
@@ -8891,7 +8899,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 				}
 				qdesc->qe_valid = 1;
 				qdesc->hdwq = idx;
-				qdesc->chann = idx;
+				qdesc->chann = cpu;
 				phba->sli4_hba.nvmet_cqset[idx] = qdesc;
 			}
 		}
@@ -8901,10 +8909,11 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 	 * Create Slow Path Completion Queues (CQs)
 	 */
 
+	cpu = lpfc_find_cpu_handle(phba, 0, LPFC_FIND_BY_EQ);
 	/* Create slow-path Mailbox Command Complete Queue */
 	qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE,
 				      phba->sli4_hba.cq_esize,
-				      phba->sli4_hba.cq_ecount);
+				      phba->sli4_hba.cq_ecount, cpu);
 	if (!qdesc) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"0500 Failed allocate slow-path mailbox CQ\n");
@@ -8916,7 +8925,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 	/* Create slow-path ELS Complete Queue */
 	qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE,
 				      phba->sli4_hba.cq_esize,
-				      phba->sli4_hba.cq_ecount);
+				      phba->sli4_hba.cq_ecount, cpu);
 	if (!qdesc) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"0501 Failed allocate slow-path ELS CQ\n");
@@ -8935,7 +8944,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 
 	qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE,
 				      phba->sli4_hba.mq_esize,
-				      phba->sli4_hba.mq_ecount);
+				      phba->sli4_hba.mq_ecount, cpu);
 	if (!qdesc) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"0505 Failed allocate slow-path MQ\n");
@@ -8951,7 +8960,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 	/* Create slow-path ELS Work Queue */
 	qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE,
 				      phba->sli4_hba.wq_esize,
-				      phba->sli4_hba.wq_ecount);
+				      phba->sli4_hba.wq_ecount, cpu);
 	if (!qdesc) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"0504 Failed allocate slow-path ELS WQ\n");
@@ -8965,7 +8974,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 		/* Create NVME LS Complete Queue */
 		qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE,
 					      phba->sli4_hba.cq_esize,
-					      phba->sli4_hba.cq_ecount);
+					      phba->sli4_hba.cq_ecount, cpu);
 		if (!qdesc) {
 			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 					"6079 Failed allocate NVME LS CQ\n");
@@ -8978,7 +8987,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 		/* Create NVME LS Work Queue */
 		qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE,
 					      phba->sli4_hba.wq_esize,
-					      phba->sli4_hba.wq_ecount);
+					      phba->sli4_hba.wq_ecount, cpu);
 		if (!qdesc) {
 			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 					"6080 Failed allocate NVME LS WQ\n");
@@ -8996,7 +9005,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 	/* Create Receive Queue for header */
 	qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE,
 				      phba->sli4_hba.rq_esize,
-				      phba->sli4_hba.rq_ecount);
+				      phba->sli4_hba.rq_ecount, cpu);
 	if (!qdesc) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"0506 Failed allocate receive HRQ\n");
@@ -9007,7 +9016,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 	/* Create Receive Queue for data */
 	qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE,
 				      phba->sli4_hba.rq_esize,
-				      phba->sli4_hba.rq_ecount);
+				      phba->sli4_hba.rq_ecount, cpu);
 	if (!qdesc) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"0507 Failed allocate receive DRQ\n");
@@ -9018,11 +9027,14 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 	if ((phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) &&
 	    phba->nvmet_support) {
 		for (idx = 0; idx < phba->cfg_nvmet_mrq; idx++) {
+			cpu = lpfc_find_cpu_handle(phba, idx,
+						   LPFC_FIND_BY_HDWQ);
 			/* Create NVMET Receive Queue for header */
 			qdesc = lpfc_sli4_queue_alloc(phba,
 						      LPFC_DEFAULT_PAGE_SIZE,
 						      phba->sli4_hba.rq_esize,
-						      LPFC_NVMET_RQE_DEF_COUNT);
+						      LPFC_NVMET_RQE_DEF_COUNT,
+						      cpu);
 			if (!qdesc) {
 				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 						"3146 Failed allocate "
@@ -9033,8 +9045,9 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 			phba->sli4_hba.nvmet_mrq_hdr[idx] = qdesc;
 
 			/* Only needed for header of RQ pair */
-			qdesc->rqbp = kzalloc(sizeof(struct lpfc_rqb),
-					      GFP_KERNEL);
+			qdesc->rqbp = kzalloc_node(sizeof(*qdesc->rqbp),
+						   GFP_KERNEL,
+						   cpu_to_node(cpu));
 			if (qdesc->rqbp == NULL) {
 				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 						"6131 Failed allocate "
@@ -9049,7 +9062,8 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 			qdesc = lpfc_sli4_queue_alloc(phba,
 						      LPFC_DEFAULT_PAGE_SIZE,
 						      phba->sli4_hba.rq_esize,
-						      LPFC_NVMET_RQE_DEF_COUNT);
+						      LPFC_NVMET_RQE_DEF_COUNT,
+						      cpu);
 			if (!qdesc) {
 				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 						"3156 Failed allocate "
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 00db3d9baa8f..b45f317d27d1 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -14504,6 +14504,7 @@ lpfc_sli4_queue_free(struct lpfc_queue *queue)
  * @page_size: The size of a queue page
  * @entry_size: The size of each queue entry for this queue.
  * @entry count: The number of entries that this queue will handle.
+ * @cpu: The cpu that will primarily utilize this queue.
  *
  * This function allocates a queue structure and the DMAable memory used for
  * the host resident queue. This function must be called before creating the
@@ -14511,7 +14512,7 @@ lpfc_sli4_queue_free(struct lpfc_queue *queue)
  **/
 struct lpfc_queue *
 lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t page_size,
-		      uint32_t entry_size, uint32_t entry_count)
+		      uint32_t entry_size, uint32_t entry_count, int cpu)
 {
 	struct lpfc_queue *queue;
 	struct lpfc_dmabuf *dmabuf;
@@ -14527,8 +14528,8 @@ lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t page_size,
 	if (pgcnt > phba->sli4_hba.pc_sli4_params.wqpcnt)
 		pgcnt = phba->sli4_hba.pc_sli4_params.wqpcnt;
 
-	queue = kzalloc(sizeof(struct lpfc_queue) +
-			(sizeof(void *) * pgcnt), GFP_KERNEL);
+	queue = kzalloc_node(sizeof(*queue) + (sizeof(void *) * pgcnt),
+			     GFP_KERNEL, cpu_to_node(cpu));
 	if (!queue)
 		return NULL;
 
@@ -14551,7 +14552,8 @@ lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t page_size,
 	queue->phba = phba;
 
 	for (x = 0; x < queue->page_count; x++) {
-		dmabuf = kzalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL);
+		dmabuf = kzalloc_node(sizeof(*dmabuf), GFP_KERNEL,
+				      dev_to_node(&phba->pcidev->dev));
 		if (!dmabuf)
 			goto out_fail;
 		dmabuf->virt = dma_zalloc_coherent(&phba->pcidev->dev,
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index b86ac85b65d0..bd5b5c3de35e 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -989,8 +989,10 @@ int lpfc_sli4_mbx_read_fcf_rec(struct lpfc_hba *, struct lpfcMboxq *,
 			       uint16_t);
 
 void lpfc_sli4_hba_reset(struct lpfc_hba *);
-struct lpfc_queue *lpfc_sli4_queue_alloc(struct lpfc_hba *, uint32_t,
-					 uint32_t, uint32_t);
+struct lpfc_queue *lpfc_sli4_queue_alloc(struct lpfc_hba *phba,
+					 uint32_t page_size,
+					 uint32_t entry_size,
+					 uint32_t entry_count, int cpu);
 void lpfc_sli4_queue_free(struct lpfc_queue *);
 int lpfc_eq_create(struct lpfc_hba *, struct lpfc_queue *, uint32_t);
 void lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq,
-- 
2.16.4