From 69baa0445e9d96e62aa9147363d34438afe58793 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Feb 22 2023 11:12:54 +0000 Subject: Merge remote-tracking branch 'origin/users/tbogendoerfer/SLE15-SP2-LTSS/for-next' into SLE15-SP2-LTSS --- diff --git a/patches.suse/genirq-Provide-new-interfaces-for-affinity-hints.patch b/patches.suse/genirq-Provide-new-interfaces-for-affinity-hints.patch new file mode 100644 index 0000000..4219818 --- /dev/null +++ b/patches.suse/genirq-Provide-new-interfaces-for-affinity-hints.patch @@ -0,0 +1,130 @@ +From: Thomas Gleixner +Date: Fri, 3 Sep 2021 11:24:17 -0400 +Subject: genirq: Provide new interfaces for affinity hints +Patch-mainline: v5.17-rc1 +Git-commit: 65c7cdedeb3026fabcc967a7aae2f755ad4d0783 +References: bsc#1208153 + +The discussion about removing the side effect of irq_set_affinity_hint() of +actually applying the cpumask (if not NULL) as affinity to the interrupt, +unearthed a few unpleasantries: + + 1) The modular perf drivers rely on the current behaviour for the very + wrong reasons. + + 2) While none of the other drivers prevents user space from changing + the affinity, a cursorily inspection shows that there are at least + expectations in some drivers. + +#1 needs to be cleaned up anyway, so that's not a problem + +#2 might result in subtle regressions especially when irqbalanced (which + nowadays ignores the affinity hint) is disabled. + +Provide new interfaces: + + irq_update_affinity_hint() - Only sets the affinity hint pointer + irq_set_affinity_and_hint() - Set the pointer and apply the affinity to + the interrupt + +Make irq_set_affinity_hint() a wrapper around irq_apply_affinity_hint() and +document it to be phased out. + +SUSE: For kABI compliance changed this patch to leave irq_set_affinity_hint() + alone and only add the new interfaces. + +Signed-off-by: Thomas Gleixner +Signed-off-by: Nitesh Narayan Lal +Signed-off-by: Thomas Gleixner +Reviewed-by: Ming Lei +Link: https://lore.kernel.org/r/20210501021832.743094-1-jesse.brandeburg@intel.com +Link: https://lore.kernel.org/r/20210903152430.244937-2-nitesh@redhat.com +Acked-by: Thomas Bogendoerfer +--- + include/linux/interrupt.h | 42 ++++++++++++++++++++++++++++++++++++++++++ + kernel/irq/manage.c | 16 ++++++++++++++++ + 2 files changed, 58 insertions(+) + +--- a/include/linux/interrupt.h ++++ b/include/linux/interrupt.h +@@ -336,6 +336,36 @@ extern int irq_can_set_affinity(unsigned + extern int irq_select_affinity(unsigned int irq); + + extern int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m); ++extern int __irq_apply_affinity_hint(unsigned int irq, const struct cpumask *m, ++ bool setaffinity); ++ ++/** ++ * irq_update_affinity_hint - Update the affinity hint ++ * @irq: Interrupt to update ++ * @m: cpumask pointer (NULL to clear the hint) ++ * ++ * Updates the affinity hint, but does not change the affinity of the interrupt. ++ */ ++static inline int ++irq_update_affinity_hint(unsigned int irq, const struct cpumask *m) ++{ ++ return __irq_apply_affinity_hint(irq, m, false); ++} ++ ++/** ++ * irq_set_affinity_and_hint - Update the affinity hint and apply the provided ++ * cpumask to the interrupt ++ * @irq: Interrupt to update ++ * @m: cpumask pointer (NULL to clear the hint) ++ * ++ * Updates the affinity hint and if @m is not NULL it applies it as the ++ * affinity of that interrupt. ++ */ ++static inline int ++irq_set_affinity_and_hint(unsigned int irq, const struct cpumask *m) ++{ ++ return __irq_apply_affinity_hint(irq, m, true); ++} + + extern int + irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify); +@@ -365,6 +395,18 @@ static inline int irq_can_set_affinity(u + + static inline int irq_select_affinity(unsigned int irq) { return 0; } + ++static inline int irq_update_affinity_hint(unsigned int irq, ++ const struct cpumask *m) ++{ ++ return -EINVAL; ++} ++ ++static inline int irq_set_affinity_and_hint(unsigned int irq, ++ const struct cpumask *m) ++{ ++ return -EINVAL; ++} ++ + static inline int irq_set_affinity_hint(unsigned int irq, + const struct cpumask *m) + { +--- a/kernel/irq/manage.c ++++ b/kernel/irq/manage.c +@@ -322,6 +322,22 @@ int irq_set_affinity_hint(unsigned int i + } + EXPORT_SYMBOL_GPL(irq_set_affinity_hint); + ++int __irq_apply_affinity_hint(unsigned int irq, const struct cpumask *m, ++ bool setaffinity) ++{ ++ unsigned long flags; ++ struct irq_desc *desc = irq_get_desc_lock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL); ++ ++ if (!desc) ++ return -EINVAL; ++ desc->affinity_hint = m; ++ irq_put_desc_unlock(desc, flags); ++ if (m && setaffinity) ++ __irq_set_affinity(irq, m, false); ++ return 0; ++} ++EXPORT_SYMBOL_GPL(__irq_apply_affinity_hint); ++ + static void irq_affinity_notify(struct work_struct *work) + { + struct irq_affinity_notify *notify = diff --git a/patches.suse/net-mana-Assign-interrupts-to-CPUs-based-on-NUMA-nod.patch b/patches.suse/net-mana-Assign-interrupts-to-CPUs-based-on-NUMA-nod.patch new file mode 100644 index 0000000..f1adcaf --- /dev/null +++ b/patches.suse/net-mana-Assign-interrupts-to-CPUs-based-on-NUMA-nod.patch @@ -0,0 +1,107 @@ +From: Saurabh Sengar +Date: Mon, 31 Oct 2022 23:06:01 -0700 +Subject: net: mana: Assign interrupts to CPUs based on NUMA nodes +Patch-mainline: v6.2-rc1 +Git-commit: 71fa6887eeca7b631528f9c7a39815498de8028c +References: bsc#1208153 + +In large VMs with multiple NUMA nodes, network performance is usually +best if network interrupts are all assigned to the same virtual NUMA +node. This patch assigns online CPU according to a numa aware policy, +local cpus are returned first, followed by non-local ones, then it wraps +around. + +Signed-off-by: Saurabh Sengar +Reviewed-by: Haiyang Zhang +Link: https://lore.kernel.org/r/1667282761-11547-1-git-send-email-ssengar@linux.microsoft.com +Signed-off-by: Paolo Abeni +Acked-by: Thomas Bogendoerfer +--- + drivers/net/ethernet/microsoft/mana/gdma.h | 1 + drivers/net/ethernet/microsoft/mana/gdma_main.c | 30 +++++++++++++++++++++--- + 2 files changed, 28 insertions(+), 3 deletions(-) + +--- a/drivers/net/ethernet/microsoft/mana/gdma.h ++++ b/drivers/net/ethernet/microsoft/mana/gdma.h +@@ -353,6 +353,7 @@ struct gdma_context { + void __iomem *shm_base; + void __iomem *db_page_base; + u32 db_page_size; ++ int numa_node; + + /* Shared memory chanenl (used to bootstrap HWC) */ + struct shm_channel shm_channel; +--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c ++++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c +@@ -1197,8 +1197,10 @@ static int mana_gd_setup_irqs(struct pci + struct gdma_context *gc = pci_get_drvdata(pdev); + struct gdma_irq_context *gic; + unsigned int max_irqs; ++ u16 *cpus; ++ cpumask_var_t req_mask; + int nvec, irq; +- int err, i, j; ++ int err, i = 0, j; + + if (max_queues_per_port > MANA_MAX_NUM_QUEUES) + max_queues_per_port = MANA_MAX_NUM_QUEUES; +@@ -1217,7 +1219,21 @@ static int mana_gd_setup_irqs(struct pci + goto free_irq_vector; + } + ++ if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL)) { ++ err = -ENOMEM; ++ goto free_irq; ++ } ++ ++ cpus = kcalloc(nvec, sizeof(*cpus), GFP_KERNEL); ++ if (!cpus) { ++ err = -ENOMEM; ++ goto free_mask; ++ } ++ for (i = 0; i < nvec; i++) ++ cpus[i] = cpumask_local_spread(i, gc->numa_node); ++ + for (i = 0; i < nvec; i++) { ++ cpumask_set_cpu(cpus[i], req_mask); + gic = &gc->irq_contexts[i]; + gic->handler = NULL; + gic->arg = NULL; +@@ -1225,13 +1241,17 @@ static int mana_gd_setup_irqs(struct pci + irq = pci_irq_vector(pdev, i); + if (irq < 0) { + err = irq; +- goto free_irq; ++ goto free_mask; + } + + err = request_irq(irq, mana_gd_intr, 0, "mana_intr", gic); + if (err) +- goto free_irq; ++ goto free_mask; ++ irq_set_affinity_and_hint(irq, req_mask); ++ cpumask_clear(req_mask); + } ++ free_cpumask_var(req_mask); ++ kfree(cpus); + + err = mana_gd_alloc_res_map(nvec, &gc->msix_resource); + if (err) +@@ -1242,6 +1262,9 @@ static int mana_gd_setup_irqs(struct pci + + return 0; + ++free_mask: ++ free_cpumask_var(req_mask); ++ kfree(cpus); + free_irq: + for (j = i - 1; j >= 0; j--) { + irq = pci_irq_vector(pdev, j); +@@ -1371,6 +1394,7 @@ static int mana_gd_probe(struct pci_dev + if (!bar0_va) + goto free_gc; + ++ gc->numa_node = dev_to_node(&pdev->dev); + gc->is_pf = mana_is_pf(pdev->device); + gc->bar0_va = bar0_va; + gc->dev = &pdev->dev; diff --git a/patches.suse/net-mana-Fix-IRQ-name-add-PCI-and-queue-number.patch b/patches.suse/net-mana-Fix-IRQ-name-add-PCI-and-queue-number.patch index c52441b..87c2604 100644 --- a/patches.suse/net-mana-Fix-IRQ-name-add-PCI-and-queue-number.patch +++ b/patches.suse/net-mana-Fix-IRQ-name-add-PCI-and-queue-number.patch @@ -39,7 +39,7 @@ Acked-by: Thomas Bogendoerfer struct gdma_context { --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c -@@ -1222,13 +1222,20 @@ static int mana_gd_setup_irqs(struct pci +@@ -1238,13 +1238,20 @@ static int mana_gd_setup_irqs(struct pci gic->handler = NULL; gic->arg = NULL; @@ -53,11 +53,11 @@ Acked-by: Thomas Bogendoerfer irq = pci_irq_vector(pdev, i); if (irq < 0) { err = irq; - goto free_irq; + goto free_mask; } - err = request_irq(irq, mana_gd_intr, 0, "mana_intr", gic); + err = request_irq(irq, mana_gd_intr, 0, gic->name, gic); if (err) - goto free_irq; - } + goto free_mask; + irq_set_affinity_and_hint(irq, req_mask); diff --git a/patches.suse/net-mana-Fix-accessing-freed-irq-affinity_hint.patch b/patches.suse/net-mana-Fix-accessing-freed-irq-affinity_hint.patch new file mode 100644 index 0000000..d207f4d --- /dev/null +++ b/patches.suse/net-mana-Fix-accessing-freed-irq-affinity_hint.patch @@ -0,0 +1,131 @@ +From: Haiyang Zhang +Date: Mon, 6 Feb 2023 13:28:49 -0800 +Subject: net: mana: Fix accessing freed irq affinity_hint +Patch-mainline: v6.2-rc8 +Git-commit: 18a048370b06a3a521219e9e5b10bdc2178ef19c +References: bsc#1208153 + +After calling irq_set_affinity_and_hint(), the cpumask pointer is +saved in desc->affinity_hint, and will be used later when reading +/proc/irq//affinity_hint. So the cpumask variable needs to be +persistent. Otherwise, we are accessing freed memory when reading +the affinity_hint file. + +Also, need to clear affinity_hint before free_irq(), otherwise there +is a one-time warning and stack trace during module unloading: + + [ 243.948687] WARNING: CPU: 10 PID: 1589 at kernel/irq/manage.c:1913 free_irq+0x318/0x360 + ... + [ 243.948753] Call Trace: + [ 243.948754] + [ 243.948760] mana_gd_remove_irqs+0x78/0xc0 [mana] + [ 243.948767] mana_gd_remove+0x3e/0x80 [mana] + [ 243.948773] pci_device_remove+0x3d/0xb0 + [ 243.948778] device_remove+0x46/0x70 + [ 243.948782] device_release_driver_internal+0x1fe/0x280 + [ 243.948785] driver_detach+0x4e/0xa0 + [ 243.948787] bus_remove_driver+0x70/0xf0 + [ 243.948789] driver_unregister+0x35/0x60 + [ 243.948792] pci_unregister_driver+0x44/0x90 + [ 243.948794] mana_driver_exit+0x14/0x3fe [mana] + [ 243.948800] __do_sys_delete_module.constprop.0+0x185/0x2f0 + +To fix the bug, use the persistent mask, cpumask_of(cpu#), and set +affinity_hint to NULL before freeing the IRQ, as required by free_irq(). + +Cc: stable@vger.kernel.org +Fixes: 71fa6887eeca ("net: mana: Assign interrupts to CPUs based on NUMA nodes") +Signed-off-by: Haiyang Zhang +Reviewed-by: Michael Kelley +Reviewed-by: Leon Romanovsky +Link: https://lore.kernel.org/r/1675718929-19565-1-git-send-email-haiyangz@microsoft.com +Signed-off-by: Jakub Kicinski +Acked-by: Thomas Bogendoerfer +--- + drivers/net/ethernet/microsoft/mana/gdma_main.c | 37 +++++++----------------- + 1 file changed, 11 insertions(+), 26 deletions(-) + +--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c ++++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c +@@ -1196,9 +1196,7 @@ static int mana_gd_setup_irqs(struct pci + unsigned int max_queues_per_port = num_online_cpus(); + struct gdma_context *gc = pci_get_drvdata(pdev); + struct gdma_irq_context *gic; +- unsigned int max_irqs; +- u16 *cpus; +- cpumask_var_t req_mask; ++ unsigned int max_irqs, cpu; + int nvec, irq; + int err, i = 0, j; + +@@ -1219,21 +1217,7 @@ static int mana_gd_setup_irqs(struct pci + goto free_irq_vector; + } + +- if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL)) { +- err = -ENOMEM; +- goto free_irq; +- } +- +- cpus = kcalloc(nvec, sizeof(*cpus), GFP_KERNEL); +- if (!cpus) { +- err = -ENOMEM; +- goto free_mask; +- } +- for (i = 0; i < nvec; i++) +- cpus[i] = cpumask_local_spread(i, gc->numa_node); +- + for (i = 0; i < nvec; i++) { +- cpumask_set_cpu(cpus[i], req_mask); + gic = &gc->irq_contexts[i]; + gic->handler = NULL; + gic->arg = NULL; +@@ -1248,17 +1232,16 @@ static int mana_gd_setup_irqs(struct pci + irq = pci_irq_vector(pdev, i); + if (irq < 0) { + err = irq; +- goto free_mask; ++ goto free_irq; + } + + err = request_irq(irq, mana_gd_intr, 0, gic->name, gic); + if (err) +- goto free_mask; +- irq_set_affinity_and_hint(irq, req_mask); +- cpumask_clear(req_mask); ++ goto free_irq; ++ ++ cpu = cpumask_local_spread(i, gc->numa_node); ++ irq_set_affinity_and_hint(irq, cpumask_of(cpu)); + } +- free_cpumask_var(req_mask); +- kfree(cpus); + + err = mana_gd_alloc_res_map(nvec, &gc->msix_resource); + if (err) +@@ -1269,13 +1252,12 @@ static int mana_gd_setup_irqs(struct pci + + return 0; + +-free_mask: +- free_cpumask_var(req_mask); +- kfree(cpus); + free_irq: + for (j = i - 1; j >= 0; j--) { + irq = pci_irq_vector(pdev, j); + gic = &gc->irq_contexts[j]; ++ ++ irq_update_affinity_hint(irq, NULL); + free_irq(irq, gic); + } + +@@ -1303,6 +1285,9 @@ static void mana_gd_remove_irqs(struct p + continue; + + gic = &gc->irq_contexts[i]; ++ ++ /* Need to clear the hint before free_irq */ ++ irq_update_affinity_hint(irq, NULL); + free_irq(irq, gic); + } + diff --git a/patches.suse/vmxnet3-move-rss-code-block-under-eop-descriptor.patch b/patches.suse/vmxnet3-move-rss-code-block-under-eop-descriptor.patch new file mode 100644 index 0000000..cb6a42c --- /dev/null +++ b/patches.suse/vmxnet3-move-rss-code-block-under-eop-descriptor.patch @@ -0,0 +1,94 @@ +From: Ronak Doshi +Date: Wed, 8 Feb 2023 14:38:59 -0800 +Subject: vmxnet3: move rss code block under eop descriptor +Patch-mainline: v6.2 +Git-commit: ec76d0c2da5c6dfb6a33f1545cc15997013923da +References: bsc#1208212 + +Commit b3973bb40041 ("vmxnet3: set correct hash type based on +rss information") added hashType information into skb. However, +rssType field is populated for eop descriptor. This can lead +to incorrectly reporting of hashType for packets which use +multiple rx descriptors. Multiple rx descriptors are used +for Jumbo frame or LRO packets, which can hit this issue. + +This patch moves the RSS codeblock under eop descritor. + +Cc: stable@vger.kernel.org +Fixes: b3973bb40041 ("vmxnet3: set correct hash type based on rss information") +Signed-off-by: Ronak Doshi +Acked-by: Peng Li +Acked-by: Guolin Yang +Link: https://lore.kernel.org/r/20230208223900.5794-1-doshir@vmware.com +Signed-off-by: Jakub Kicinski +Acked-by: Thomas Bogendoerfer +--- + drivers/net/vmxnet3/vmxnet3_drv.c | 50 +++++++++++++++++++------------------- + 1 file changed, 25 insertions(+), 25 deletions(-) + +--- a/drivers/net/vmxnet3/vmxnet3_drv.c ++++ b/drivers/net/vmxnet3/vmxnet3_drv.c +@@ -1479,31 +1479,6 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx + rxd->len = rbi->len; + } + +-#ifdef VMXNET3_RSS +- if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE && +- (adapter->netdev->features & NETIF_F_RXHASH)) { +- enum pkt_hash_types hash_type; +- +- switch (rcd->rssType) { +- case VMXNET3_RCD_RSS_TYPE_IPV4: +- case VMXNET3_RCD_RSS_TYPE_IPV6: +- hash_type = PKT_HASH_TYPE_L3; +- break; +- case VMXNET3_RCD_RSS_TYPE_TCPIPV4: +- case VMXNET3_RCD_RSS_TYPE_TCPIPV6: +- case VMXNET3_RCD_RSS_TYPE_UDPIPV4: +- case VMXNET3_RCD_RSS_TYPE_UDPIPV6: +- hash_type = PKT_HASH_TYPE_L4; +- break; +- default: +- hash_type = PKT_HASH_TYPE_L3; +- break; +- } +- skb_set_hash(ctx->skb, +- le32_to_cpu(rcd->rssHash), +- hash_type); +- } +-#endif + skb_put(ctx->skb, rcd->len); + + if (VMXNET3_VERSION_GE_2(adapter) && +@@ -1580,6 +1555,31 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx + u32 mtu = adapter->netdev->mtu; + skb->len += skb->data_len; + ++#ifdef VMXNET3_RSS ++ if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE && ++ (adapter->netdev->features & NETIF_F_RXHASH)) { ++ enum pkt_hash_types hash_type; ++ ++ switch (rcd->rssType) { ++ case VMXNET3_RCD_RSS_TYPE_IPV4: ++ case VMXNET3_RCD_RSS_TYPE_IPV6: ++ hash_type = PKT_HASH_TYPE_L3; ++ break; ++ case VMXNET3_RCD_RSS_TYPE_TCPIPV4: ++ case VMXNET3_RCD_RSS_TYPE_TCPIPV6: ++ case VMXNET3_RCD_RSS_TYPE_UDPIPV4: ++ case VMXNET3_RCD_RSS_TYPE_UDPIPV6: ++ hash_type = PKT_HASH_TYPE_L4; ++ break; ++ default: ++ hash_type = PKT_HASH_TYPE_L3; ++ break; ++ } ++ skb_set_hash(skb, ++ le32_to_cpu(rcd->rssHash), ++ hash_type); ++ } ++#endif + vmxnet3_rx_csum(adapter, skb, + (union Vmxnet3_GenericDesc *)rcd); + skb->protocol = eth_type_trans(skb, adapter->netdev); diff --git a/series.conf b/series.conf index 0f36836..d1620b5 100644 --- a/series.conf +++ b/series.conf @@ -23015,6 +23015,7 @@ patches.suse/x86-prepare-asm-files-for-straight-line-speculation.patch patches.suse/x86-prepare-inline-asm-for-straight-line-speculation.patch patches.suse/x86-add-straight-line-speculation-mitigation.patch + patches.suse/genirq-Provide-new-interfaces-for-affinity-hints.patch patches.suse/powerpc-watchdog-Fix-missed-watchdog-reset-due-to-me.patch patches.suse/powerpc-watchdog-tighten-non-atomic-read-modify-writ.patch patches.suse/powerpc-watchdog-Avoid-holding-wd_smp_lock-over-prin.patch @@ -23332,6 +23333,7 @@ patches.suse/xen-netback-Ensure-protocol-headers-don-t-fall-in-th.patch patches.suse/xen-netback-don-t-call-kfree_skb-with-interrupts-dis.patch patches.suse/0001-drm-vmwgfx-Validate-the-box-size-for-the-snooped-cur.patch + patches.suse/net-mana-Assign-interrupts-to-CPUs-based-on-NUMA-nod.patch patches.suse/x86-bugs-Flush-IBP-in-ib_prctl_set.patch patches.suse/net-sched-atm-dont-intepret-cls-results-when-asked-t.patch patches.suse/net-sched-cbq-dont-intepret-cls-results-when-asked-t.patch @@ -23345,6 +23347,8 @@ patches.suse/module-Don-t-wait-for-GOING-modules.patch patches.suse/net-mana-Fix-IRQ-name-add-PCI-and-queue-number.patch patches.suse/sctp-fail-if-no-bound-addresses-can-be-used-for-a-gi.patch + patches.suse/net-mana-Fix-accessing-freed-irq-affinity_hint.patch + patches.suse/vmxnet3-move-rss-code-block-under-eop-descriptor.patch ######################################################## # end of sorted patches