From: James Smart <jsmart2021@gmail.com>
Date: Tue, 12 Mar 2019 16:30:29 -0700
Subject: [PATCH] scsi: lpfc: Specify node affinity for queue memory allocation
References: bsc#1136217,jsc#SLE-4722
Git-commit: c1a21ebc0fa63dcdebd47b410170ea20eda41e1f
Patch-mainline: v5.2-rc1
Change the SLI4 queue creation code to use NUMA node based memory
allocation based on the cpu the queues will be related to.
Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Hannes Reinecke <hare@suse.de>
---
drivers/scsi/lpfc/lpfc_init.c | 72 ++++++++++++++++++++++++++-----------------
drivers/scsi/lpfc/lpfc_sli.c | 10 +++---
drivers/scsi/lpfc/lpfc_sli4.h | 6 ++--
3 files changed, 53 insertions(+), 35 deletions(-)
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 2e0c61d4e9e8..864e39dea3ed 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -8640,10 +8640,12 @@ static int
lpfc_alloc_nvme_wq_cq(struct lpfc_hba *phba, int wqidx)
{
struct lpfc_queue *qdesc;
+ int cpu;
+ cpu = lpfc_find_cpu_handle(phba, wqidx, LPFC_FIND_BY_HDWQ);
qdesc = lpfc_sli4_queue_alloc(phba, LPFC_EXPANDED_PAGE_SIZE,
phba->sli4_hba.cq_esize,
- LPFC_CQE_EXP_COUNT);
+ LPFC_CQE_EXP_COUNT, cpu);
if (!qdesc) {
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
"0508 Failed allocate fast-path NVME CQ (%d)\n",
@@ -8652,11 +8654,12 @@ lpfc_alloc_nvme_wq_cq(struct lpfc_hba *phba, int wqidx)
}
qdesc->qe_valid = 1;
qdesc->hdwq = wqidx;
- qdesc->chann = lpfc_find_cpu_handle(phba, wqidx, LPFC_FIND_BY_HDWQ);
+ qdesc->chann = cpu;
phba->sli4_hba.hdwq[wqidx].nvme_cq = qdesc;
qdesc = lpfc_sli4_queue_alloc(phba, LPFC_EXPANDED_PAGE_SIZE,
- LPFC_WQE128_SIZE, LPFC_WQE_EXP_COUNT);
+ LPFC_WQE128_SIZE, LPFC_WQE_EXP_COUNT,
+ cpu);
if (!qdesc) {
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
"0509 Failed allocate fast-path NVME WQ (%d)\n",
@@ -8675,18 +8678,20 @@ lpfc_alloc_fcp_wq_cq(struct lpfc_hba *phba, int wqidx)
{
struct lpfc_queue *qdesc;
uint32_t wqesize;
+ int cpu;
+ cpu = lpfc_find_cpu_handle(phba, wqidx, LPFC_FIND_BY_HDWQ);
/* Create Fast Path FCP CQs */
if (phba->enab_exp_wqcq_pages)
/* Increase the CQ size when WQEs contain an embedded cdb */
qdesc = lpfc_sli4_queue_alloc(phba, LPFC_EXPANDED_PAGE_SIZE,
phba->sli4_hba.cq_esize,
- LPFC_CQE_EXP_COUNT);
+ LPFC_CQE_EXP_COUNT, cpu);
else
qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE,
phba->sli4_hba.cq_esize,
- phba->sli4_hba.cq_ecount);
+ phba->sli4_hba.cq_ecount, cpu);
if (!qdesc) {
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
"0499 Failed allocate fast-path FCP CQ (%d)\n", wqidx);
@@ -8694,7 +8699,7 @@ lpfc_alloc_fcp_wq_cq(struct lpfc_hba *phba, int wqidx)
}
qdesc->qe_valid = 1;
qdesc->hdwq = wqidx;
- qdesc->chann = lpfc_find_cpu_handle(phba, wqidx, LPFC_FIND_BY_HDWQ);
+ qdesc->chann = cpu;
phba->sli4_hba.hdwq[wqidx].fcp_cq = qdesc;
/* Create Fast Path FCP WQs */
@@ -8704,11 +8709,11 @@ lpfc_alloc_fcp_wq_cq(struct lpfc_hba *phba, int wqidx)
LPFC_WQE128_SIZE : phba->sli4_hba.wq_esize;
qdesc = lpfc_sli4_queue_alloc(phba, LPFC_EXPANDED_PAGE_SIZE,
wqesize,
- LPFC_WQE_EXP_COUNT);
+ LPFC_WQE_EXP_COUNT, cpu);
} else
qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE,
phba->sli4_hba.wq_esize,
- phba->sli4_hba.wq_ecount);
+ phba->sli4_hba.wq_ecount, cpu);
if (!qdesc) {
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
@@ -8741,7 +8746,7 @@ int
lpfc_sli4_queue_create(struct lpfc_hba *phba)
{
struct lpfc_queue *qdesc;
- int idx, eqidx;
+ int idx, eqidx, cpu;
struct lpfc_sli4_hdw_queue *qp;
struct lpfc_eq_intr_info *eqi;
@@ -8828,13 +8833,15 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
/* Create HBA Event Queues (EQs) */
for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
+ /* determine EQ affinity */
+ eqidx = lpfc_find_eq_handle(phba, idx);
+ cpu = lpfc_find_cpu_handle(phba, eqidx, LPFC_FIND_BY_EQ);
/*
* If there are more Hardware Queues than available
- * CQs, multiple Hardware Queues may share a common EQ.
+ * EQs, multiple Hardware Queues may share a common EQ.
*/
if (idx >= phba->cfg_irq_chann) {
/* Share an existing EQ */
- eqidx = lpfc_find_eq_handle(phba, idx);
phba->sli4_hba.hdwq[idx].hba_eq =
phba->sli4_hba.hdwq[eqidx].hba_eq;
continue;
@@ -8842,7 +8849,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
/* Create an EQ */
qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE,
phba->sli4_hba.eq_esize,
- phba->sli4_hba.eq_ecount);
+ phba->sli4_hba.eq_ecount, cpu);
if (!qdesc) {
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
"0497 Failed allocate EQ (%d)\n", idx);
@@ -8852,9 +8859,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
qdesc->hdwq = idx;
/* Save the CPU this EQ is affinitised to */
- eqidx = lpfc_find_eq_handle(phba, idx);
- qdesc->chann = lpfc_find_cpu_handle(phba, eqidx,
- LPFC_FIND_BY_EQ);
+ qdesc->chann = cpu;
phba->sli4_hba.hdwq[idx].hba_eq = qdesc;
qdesc->last_cpu = qdesc->chann;
eqi = per_cpu_ptr(phba->sli4_hba.eq_info, qdesc->last_cpu);
@@ -8877,11 +8882,14 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
if (phba->nvmet_support) {
for (idx = 0; idx < phba->cfg_nvmet_mrq; idx++) {
+ cpu = lpfc_find_cpu_handle(phba, idx,
+ LPFC_FIND_BY_HDWQ);
qdesc = lpfc_sli4_queue_alloc(
phba,
LPFC_DEFAULT_PAGE_SIZE,
phba->sli4_hba.cq_esize,
- phba->sli4_hba.cq_ecount);
+ phba->sli4_hba.cq_ecount,
+ cpu);
if (!qdesc) {
lpfc_printf_log(
phba, KERN_ERR, LOG_INIT,
@@ -8891,7 +8899,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
}
qdesc->qe_valid = 1;
qdesc->hdwq = idx;
- qdesc->chann = idx;
+ qdesc->chann = cpu;
phba->sli4_hba.nvmet_cqset[idx] = qdesc;
}
}
@@ -8901,10 +8909,11 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
* Create Slow Path Completion Queues (CQs)
*/
+ cpu = lpfc_find_cpu_handle(phba, 0, LPFC_FIND_BY_EQ);
/* Create slow-path Mailbox Command Complete Queue */
qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE,
phba->sli4_hba.cq_esize,
- phba->sli4_hba.cq_ecount);
+ phba->sli4_hba.cq_ecount, cpu);
if (!qdesc) {
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
"0500 Failed allocate slow-path mailbox CQ\n");
@@ -8916,7 +8925,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
/* Create slow-path ELS Complete Queue */
qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE,
phba->sli4_hba.cq_esize,
- phba->sli4_hba.cq_ecount);
+ phba->sli4_hba.cq_ecount, cpu);
if (!qdesc) {
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
"0501 Failed allocate slow-path ELS CQ\n");
@@ -8935,7 +8944,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE,
phba->sli4_hba.mq_esize,
- phba->sli4_hba.mq_ecount);
+ phba->sli4_hba.mq_ecount, cpu);
if (!qdesc) {
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
"0505 Failed allocate slow-path MQ\n");
@@ -8951,7 +8960,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
/* Create slow-path ELS Work Queue */
qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE,
phba->sli4_hba.wq_esize,
- phba->sli4_hba.wq_ecount);
+ phba->sli4_hba.wq_ecount, cpu);
if (!qdesc) {
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
"0504 Failed allocate slow-path ELS WQ\n");
@@ -8965,7 +8974,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
/* Create NVME LS Complete Queue */
qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE,
phba->sli4_hba.cq_esize,
- phba->sli4_hba.cq_ecount);
+ phba->sli4_hba.cq_ecount, cpu);
if (!qdesc) {
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
"6079 Failed allocate NVME LS CQ\n");
@@ -8978,7 +8987,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
/* Create NVME LS Work Queue */
qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE,
phba->sli4_hba.wq_esize,
- phba->sli4_hba.wq_ecount);
+ phba->sli4_hba.wq_ecount, cpu);
if (!qdesc) {
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
"6080 Failed allocate NVME LS WQ\n");
@@ -8996,7 +9005,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
/* Create Receive Queue for header */
qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE,
phba->sli4_hba.rq_esize,
- phba->sli4_hba.rq_ecount);
+ phba->sli4_hba.rq_ecount, cpu);
if (!qdesc) {
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
"0506 Failed allocate receive HRQ\n");
@@ -9007,7 +9016,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
/* Create Receive Queue for data */
qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE,
phba->sli4_hba.rq_esize,
- phba->sli4_hba.rq_ecount);
+ phba->sli4_hba.rq_ecount, cpu);
if (!qdesc) {
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
"0507 Failed allocate receive DRQ\n");
@@ -9018,11 +9027,14 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
if ((phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) &&
phba->nvmet_support) {
for (idx = 0; idx < phba->cfg_nvmet_mrq; idx++) {
+ cpu = lpfc_find_cpu_handle(phba, idx,
+ LPFC_FIND_BY_HDWQ);
/* Create NVMET Receive Queue for header */
qdesc = lpfc_sli4_queue_alloc(phba,
LPFC_DEFAULT_PAGE_SIZE,
phba->sli4_hba.rq_esize,
- LPFC_NVMET_RQE_DEF_COUNT);
+ LPFC_NVMET_RQE_DEF_COUNT,
+ cpu);
if (!qdesc) {
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
"3146 Failed allocate "
@@ -9033,8 +9045,9 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
phba->sli4_hba.nvmet_mrq_hdr[idx] = qdesc;
/* Only needed for header of RQ pair */
- qdesc->rqbp = kzalloc(sizeof(struct lpfc_rqb),
- GFP_KERNEL);
+ qdesc->rqbp = kzalloc_node(sizeof(*qdesc->rqbp),
+ GFP_KERNEL,
+ cpu_to_node(cpu));
if (qdesc->rqbp == NULL) {
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
"6131 Failed allocate "
@@ -9049,7 +9062,8 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
qdesc = lpfc_sli4_queue_alloc(phba,
LPFC_DEFAULT_PAGE_SIZE,
phba->sli4_hba.rq_esize,
- LPFC_NVMET_RQE_DEF_COUNT);
+ LPFC_NVMET_RQE_DEF_COUNT,
+ cpu);
if (!qdesc) {
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
"3156 Failed allocate "
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 00db3d9baa8f..b45f317d27d1 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -14504,6 +14504,7 @@ lpfc_sli4_queue_free(struct lpfc_queue *queue)
* @page_size: The size of a queue page
* @entry_size: The size of each queue entry for this queue.
* @entry count: The number of entries that this queue will handle.
+ * @cpu: The cpu that will primarily utilize this queue.
*
* This function allocates a queue structure and the DMAable memory used for
* the host resident queue. This function must be called before creating the
@@ -14511,7 +14512,7 @@ lpfc_sli4_queue_free(struct lpfc_queue *queue)
**/
struct lpfc_queue *
lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t page_size,
- uint32_t entry_size, uint32_t entry_count)
+ uint32_t entry_size, uint32_t entry_count, int cpu)
{
struct lpfc_queue *queue;
struct lpfc_dmabuf *dmabuf;
@@ -14527,8 +14528,8 @@ lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t page_size,
if (pgcnt > phba->sli4_hba.pc_sli4_params.wqpcnt)
pgcnt = phba->sli4_hba.pc_sli4_params.wqpcnt;
- queue = kzalloc(sizeof(struct lpfc_queue) +
- (sizeof(void *) * pgcnt), GFP_KERNEL);
+ queue = kzalloc_node(sizeof(*queue) + (sizeof(void *) * pgcnt),
+ GFP_KERNEL, cpu_to_node(cpu));
if (!queue)
return NULL;
@@ -14551,7 +14552,8 @@ lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t page_size,
queue->phba = phba;
for (x = 0; x < queue->page_count; x++) {
- dmabuf = kzalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL);
+ dmabuf = kzalloc_node(sizeof(*dmabuf), GFP_KERNEL,
+ dev_to_node(&phba->pcidev->dev));
if (!dmabuf)
goto out_fail;
dmabuf->virt = dma_zalloc_coherent(&phba->pcidev->dev,
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index b86ac85b65d0..bd5b5c3de35e 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -989,8 +989,10 @@ int lpfc_sli4_mbx_read_fcf_rec(struct lpfc_hba *, struct lpfcMboxq *,
uint16_t);
void lpfc_sli4_hba_reset(struct lpfc_hba *);
-struct lpfc_queue *lpfc_sli4_queue_alloc(struct lpfc_hba *, uint32_t,
- uint32_t, uint32_t);
+struct lpfc_queue *lpfc_sli4_queue_alloc(struct lpfc_hba *phba,
+ uint32_t page_size,
+ uint32_t entry_size,
+ uint32_t entry_count, int cpu);
void lpfc_sli4_queue_free(struct lpfc_queue *);
int lpfc_eq_create(struct lpfc_hba *, struct lpfc_queue *, uint32_t);
void lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq,
--
2.16.4