Blob Blame History Raw
From: Saurabh Sengar <ssengar@linux.microsoft.com>
Date: Mon, 31 Oct 2022 23:06:01 -0700
Subject: net: mana: Assign interrupts to CPUs based on NUMA nodes
Patch-mainline: v6.2-rc1
Git-commit: 71fa6887eeca7b631528f9c7a39815498de8028c
References: bsc#1208153

In large VMs with multiple NUMA nodes, network performance is usually
best if network interrupts are all assigned to the same virtual NUMA
node. This patch assigns online CPU according to a numa aware policy,
local cpus are returned first, followed by non-local ones, then it wraps
around.

Signed-off-by: Saurabh Sengar <ssengar@linux.microsoft.com>
Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
Link: https://lore.kernel.org/r/1667282761-11547-1-git-send-email-ssengar@linux.microsoft.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
 drivers/net/ethernet/microsoft/mana/gdma.h      |    1 
 drivers/net/ethernet/microsoft/mana/gdma_main.c |   30 +++++++++++++++++++++---
 2 files changed, 28 insertions(+), 3 deletions(-)

--- a/drivers/net/ethernet/microsoft/mana/gdma.h
+++ b/drivers/net/ethernet/microsoft/mana/gdma.h
@@ -353,6 +353,7 @@ struct gdma_context {
 	void __iomem		*shm_base;
 	void __iomem		*db_page_base;
 	u32 db_page_size;
+	int                     numa_node;
 
 	/* Shared memory chanenl (used to bootstrap HWC) */
 	struct shm_channel	shm_channel;
--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
+++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
@@ -1197,8 +1197,10 @@ static int mana_gd_setup_irqs(struct pci
 	struct gdma_context *gc = pci_get_drvdata(pdev);
 	struct gdma_irq_context *gic;
 	unsigned int max_irqs;
+	u16 *cpus;
+	cpumask_var_t req_mask;
 	int nvec, irq;
-	int err, i, j;
+	int err, i = 0, j;
 
 	if (max_queues_per_port > MANA_MAX_NUM_QUEUES)
 		max_queues_per_port = MANA_MAX_NUM_QUEUES;
@@ -1217,7 +1219,21 @@ static int mana_gd_setup_irqs(struct pci
 		goto free_irq_vector;
 	}
 
+	if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL)) {
+		err = -ENOMEM;
+		goto free_irq;
+	}
+
+	cpus = kcalloc(nvec, sizeof(*cpus), GFP_KERNEL);
+	if (!cpus) {
+		err = -ENOMEM;
+		goto free_mask;
+	}
+	for (i = 0; i < nvec; i++)
+		cpus[i] = cpumask_local_spread(i, gc->numa_node);
+
 	for (i = 0; i < nvec; i++) {
+		cpumask_set_cpu(cpus[i], req_mask);
 		gic = &gc->irq_contexts[i];
 		gic->handler = NULL;
 		gic->arg = NULL;
@@ -1225,13 +1241,17 @@ static int mana_gd_setup_irqs(struct pci
 		irq = pci_irq_vector(pdev, i);
 		if (irq < 0) {
 			err = irq;
-			goto free_irq;
+			goto free_mask;
 		}
 
 		err = request_irq(irq, mana_gd_intr, 0, "mana_intr", gic);
 		if (err)
-			goto free_irq;
+			goto free_mask;
+		irq_set_affinity_and_hint(irq, req_mask);
+		cpumask_clear(req_mask);
 	}
+	free_cpumask_var(req_mask);
+	kfree(cpus);
 
 	err = mana_gd_alloc_res_map(nvec, &gc->msix_resource);
 	if (err)
@@ -1242,6 +1262,9 @@ static int mana_gd_setup_irqs(struct pci
 
 	return 0;
 
+free_mask:
+	free_cpumask_var(req_mask);
+	kfree(cpus);
 free_irq:
 	for (j = i - 1; j >= 0; j--) {
 		irq = pci_irq_vector(pdev, j);
@@ -1371,6 +1394,7 @@ static int mana_gd_probe(struct pci_dev
 	if (!bar0_va)
 		goto free_gc;
 
+	gc->numa_node = dev_to_node(&pdev->dev);
 	gc->is_pf = mana_is_pf(pdev->device);
 	gc->bar0_va = bar0_va;
 	gc->dev = &pdev->dev;