diff --git a/patches.kabi/blk-mq-kABI-fixes-for-blk_mq_queue_map.patch b/patches.kabi/blk-mq-kABI-fixes-for-blk_mq_queue_map.patch new file mode 100644 index 0000000..e6fee8e --- /dev/null +++ b/patches.kabi/blk-mq-kABI-fixes-for-blk_mq_queue_map.patch @@ -0,0 +1,52 @@ +From: Daniel Wagner +Date: Mon, 04 Oct 2021 16:06:52 +0200 +Subject: blk-mq: kABI fixes for blk_mq_queue_map +Patch-Mainline: never, kABI fix for SLE15-SP3 +References: bsc#1185762 + +Signed-off-by: Daniel Wagner +--- + block/blk-mq-pci.c | 2 ++ + block/blk-mq-virtio.c | 2 ++ + include/linux/blk-mq.h | 4 ++++ + 3 files changed, 8 insertions(+) + +--- a/block/blk-mq-pci.c ++++ b/block/blk-mq-pci.c +@@ -8,7 +8,9 @@ + #include + #include + #include ++#ifndef __GENKSYMS__ + #include ++#endif + + #include "blk-mq.h" + +--- a/block/blk-mq-virtio.c ++++ b/block/blk-mq-virtio.c +@@ -7,7 +7,9 @@ + #include + #include + #include ++#ifndef __GENKSYMS__ + #include ++#endif + #include "blk-mq.h" + + /** +--- a/include/linux/blk-mq.h ++++ b/include/linux/blk-mq.h +@@ -191,8 +191,12 @@ struct blk_mq_hw_ctx { + struct blk_mq_queue_map { + unsigned int *mq_map; + unsigned int nr_queues; ++#ifndef __GENKSYMS__ + unsigned int queue_offset:31; + unsigned int use_managed_irq:1; ++#else ++ unsigned int queue_offset; ++#endif + }; + + /** diff --git a/patches.suse/blk-mq-don-t-deactivate-hctx-if-managed-irq-isn-t-used.patch b/patches.suse/blk-mq-don-t-deactivate-hctx-if-managed-irq-isn-t-used.patch new file mode 100644 index 0000000..ea8a87a --- /dev/null +++ b/patches.suse/blk-mq-don-t-deactivate-hctx-if-managed-irq-isn-t-used.patch @@ -0,0 +1,99 @@ +From: Ming Lei +Date: Wed, 18 Aug 2021 22:44:28 +0800 +Subject: blk-mq: don't deactivate hctx if managed irq isn't used +Patch-mainline: Not yet, https://lore.kernel.org/linux-block/20210818144428.896216-1-ming.lei@redhat.com/ +References: bsc#1185762 + +blk-mq deactivates one hctx when the last CPU in hctx->cpumask become +offline by draining all requests originated from this hctx and moving new +allocation to other active hctx. This way is for avoiding inflight IO in +case of managed irq because managed irq is shutdown when the last CPU in +the irq's affinity becomes offline. + +However, lots of drivers(nvme fc, rdma, tcp, loop, ...) don't use managed +irq, so they needn't to deactivate hctx when the last CPU becomes offline. +Also, some of them are the only user of blk_mq_alloc_request_hctx() which +is used for connecting io queue. And their requirement is that the connect +request needs to be submitted successfully via one specified hctx even +though all CPUs in this hctx->cpumask have become offline. + +Addressing the requirement for nvme fc/rdma/loop by allowing to +allocate request from one hctx when all CPUs in this hctx are offline, +since these drivers don't use managed irq. + +Finally don't deactivate one hctx when it doesn't use managed irq. + +Tested-by: Wen Xiong +Reviewed-by: John Garry +Reviewed-by: Christoph Hellwig +Signed-off-by: Ming Lei +Acked-by: Daniel Wagner +--- + block/blk-mq.c | 35 +++++++++++++++++++++++++---------- + 1 file changed, 25 insertions(+), 10 deletions(-) + +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -424,6 +424,23 @@ struct request *blk_mq_alloc_request(str + } + EXPORT_SYMBOL(blk_mq_alloc_request); + ++static inline int blk_mq_first_mapped_cpu(struct blk_mq_hw_ctx *hctx) ++{ ++ int cpu = cpumask_first_and(hctx->cpumask, cpu_online_mask); ++ ++ if (cpu >= nr_cpu_ids) ++ cpu = cpumask_first(hctx->cpumask); ++ return cpu; ++} ++ ++static bool blk_mq_hctx_use_managed_irq(struct blk_mq_hw_ctx *hctx) ++{ ++ if (hctx->type == HCTX_TYPE_POLL) ++ return false; ++ ++ return hctx->queue->tag_set->map[hctx->type].use_managed_irq; ++} ++ + struct request *blk_mq_alloc_request_hctx(struct request_queue *q, + unsigned int op, blk_mq_req_flags_t flags, unsigned int hctx_idx) + { +@@ -465,7 +482,10 @@ struct request *blk_mq_alloc_request_hct + data.hctx = q->queue_hw_ctx[hctx_idx]; + if (!blk_mq_hw_queue_mapped(data.hctx)) + goto out_queue_exit; +- cpu = cpumask_first_and(data.hctx->cpumask, cpu_online_mask); ++ ++ WARN_ON_ONCE(blk_mq_hctx_use_managed_irq(data.hctx)); ++ ++ cpu = blk_mq_first_mapped_cpu(data.hctx); + data.ctx = __blk_mq_get_ctx(q, cpu); + + if (!q->elevator) +@@ -1507,15 +1527,6 @@ static void __blk_mq_run_hw_queue(struct + hctx_unlock(hctx, srcu_idx); + } + +-static inline int blk_mq_first_mapped_cpu(struct blk_mq_hw_ctx *hctx) +-{ +- int cpu = cpumask_first_and(hctx->cpumask, cpu_online_mask); +- +- if (cpu >= nr_cpu_ids) +- cpu = cpumask_first(hctx->cpumask); +- return cpu; +-} +- + /* + * It'd be great if the workqueue API had a way to pass + * in a mask and had some smarts for more clever placement. +@@ -2457,6 +2468,10 @@ static int blk_mq_hctx_notify_offline(un + struct blk_mq_hw_ctx *hctx = hlist_entry_safe(node, + struct blk_mq_hw_ctx, cpuhp_online); + ++ /* hctx needn't to be deactivated in case managed irq isn't used */ ++ if (!blk_mq_hctx_use_managed_irq(hctx)) ++ return 0; ++ + if (!cpumask_test_cpu(cpu, hctx->cpumask) || + !blk_mq_last_cpu_in_hctx(cpu, hctx)) + return 0; diff --git a/patches.suse/blk-mq-mark-if-one-queue-map-uses-managed-irq.patch b/patches.suse/blk-mq-mark-if-one-queue-map-uses-managed-irq.patch new file mode 100644 index 0000000..28573c2 --- /dev/null +++ b/patches.suse/blk-mq-mark-if-one-queue-map-uses-managed-irq.patch @@ -0,0 +1,87 @@ +From: Ming Lei +Date: Wed, 18 Aug 2021 22:44:27 +0800 +Subject: blk-mq: mark if one queue map uses managed irq +Patch-mainline: Not yet, https://lore.kernel.org/linux-block/20210818144428.896216-1-ming.lei@redhat.com/ +References: bsc#1185762 + +Retrieve this info via new added helper of device_has_managed_msi_irq, +then we can decide if one hctx needs to be drained before all its CPUs +become offline. + +Tested-by: Wen Xiong +Reviewed-by: Christoph Hellwig +Reviewed-by: John Garry +Signed-off-by: Ming Lei +[dwagner: dropped hisi_sas bits, the driver doesn't use manged IRQ yet] +Acked-by: Daniel Wagner +--- + block/blk-mq-pci.c | 2 ++ + block/blk-mq-rdma.c | 7 +++++++ + block/blk-mq-virtio.c | 2 ++ + include/linux/blk-mq.h | 3 ++- + 4 files changed, 13 insertions(+), 1 deletion(-) + +--- a/block/blk-mq-pci.c ++++ b/block/blk-mq-pci.c +@@ -8,6 +8,7 @@ + #include + #include + #include ++#include + + #include "blk-mq.h" + +@@ -37,6 +38,7 @@ int blk_mq_pci_map_queues(struct blk_mq_ + for_each_cpu(cpu, mask) + qmap->mq_map[cpu] = qmap->queue_offset + queue; + } ++ qmap->use_managed_irq = device_has_managed_msi_irq(&pdev->dev); + + return 0; + +--- a/block/blk-mq-rdma.c ++++ b/block/blk-mq-rdma.c +@@ -36,6 +36,13 @@ int blk_mq_rdma_map_queues(struct blk_mq + map->mq_map[cpu] = map->queue_offset + queue; + } + ++ /* ++ * RDMA doesn't use managed irq, and nvme rdma driver can allocate ++ * and submit requests on specified hctx via ++ * blk_mq_alloc_request_hctx ++ */ ++ map->use_managed_irq = false; ++ + return 0; + + fallback: +--- a/block/blk-mq-virtio.c ++++ b/block/blk-mq-virtio.c +@@ -7,6 +7,7 @@ + #include + #include + #include ++#include + #include "blk-mq.h" + + /** +@@ -38,6 +39,7 @@ int blk_mq_virtio_map_queues(struct blk_ + for_each_cpu(cpu, mask) + qmap->mq_map[cpu] = qmap->queue_offset + queue; + } ++ qmap->use_managed_irq = device_has_managed_msi_irq(&vdev->dev); + + return 0; + fallback: +--- a/include/linux/blk-mq.h ++++ b/include/linux/blk-mq.h +@@ -191,7 +191,8 @@ struct blk_mq_hw_ctx { + struct blk_mq_queue_map { + unsigned int *mq_map; + unsigned int nr_queues; +- unsigned int queue_offset; ++ unsigned int queue_offset:31; ++ unsigned int use_managed_irq:1; + }; + + /** diff --git a/patches.suse/genirq-add-device_has_managed_msi_irq.patch b/patches.suse/genirq-add-device_has_managed_msi_irq.patch new file mode 100644 index 0000000..a3b49d3 --- /dev/null +++ b/patches.suse/genirq-add-device_has_managed_msi_irq.patch @@ -0,0 +1,71 @@ +From: Ming Lei +Date: Wed, 18 Aug 2021 22:44:26 +0800 +Subject: genirq: add device_has_managed_msi_irq +Patch-mainline: Not yet, https://lore.kernel.org/linux-block/20210818144428.896216-1-ming.lei@redhat.com/ +References: bsc#1185762 + +irq vector allocation with managed affinity may be used by driver, and +blk-mq needs this info for draining queue because genirq core will shutdown +managed irq when all CPUs in the affinity mask are offline. + +The info of using managed irq is often produced by drivers, and it is +consumed by blk-mq, so different subsystems are involved in this info flow. + +Address this issue by adding one helper of device_has_managed_msi_irq() +which is suggested by John Garry. + +Tested-by: Wen Xiong +Reviewed-by: Christoph Hellwig +Suggested-by: John Garry +Signed-off-by: Ming Lei +Acked-by: Daniel Wagner +--- + include/linux/msi.h | 5 +++++ + kernel/irq/msi.c | 18 ++++++++++++++++++ + 2 files changed, 23 insertions(+) + +--- a/include/linux/msi.h ++++ b/include/linux/msi.h +@@ -59,10 +59,15 @@ struct platform_msi_priv_data; + void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg); + #ifdef CONFIG_GENERIC_MSI_IRQ + void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg); ++bool device_has_managed_msi_irq(struct device *dev); + #else + static inline void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg) + { + } ++static inline bool device_has_managed_msi_irq(struct device *dev) ++{ ++ return false; ++} + #endif + + typedef void (*irq_write_msi_msg_t)(struct msi_desc *desc, +--- a/kernel/irq/msi.c ++++ b/kernel/irq/msi.c +@@ -69,6 +69,24 @@ void get_cached_msi_msg(unsigned int irq + } + EXPORT_SYMBOL_GPL(get_cached_msi_msg); + ++/** ++ * device_has_managed_msi_irq - Query if device has managed irq entry ++ * @dev: Pointer to the device for which we want to query ++ * ++ * Return true if there is managed irq vector allocated on this device ++ */ ++bool device_has_managed_msi_irq(struct device *dev) ++{ ++ struct msi_desc *desc; ++ ++ for_each_msi_entry(desc, dev) { ++ if (desc->affinity && desc->affinity->is_managed) ++ return true; ++ } ++ return false; ++} ++EXPORT_SYMBOL_GPL(device_has_managed_msi_irq); ++ + #ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN + static inline void irq_chip_write_msi_msg(struct irq_data *data, + struct msi_msg *msg) diff --git a/series.conf b/series.conf index 016290f..f9f2c86 100644 --- a/series.conf +++ b/series.conf @@ -51788,6 +51788,10 @@ patches.suse/bfq-tune-slice-idle.patch patches.suse/block-floppy-fix-contended-case-in-floppy_queue_rq.patch + patches.suse/genirq-add-device_has_managed_msi_irq.patch + patches.suse/blk-mq-mark-if-one-queue-map-uses-managed-irq.patch + patches.suse/blk-mq-don-t-deactivate-hctx-if-managed-irq-isn-t-used.patch + ######################################################## # Networking core ######################################################## @@ -52084,6 +52088,7 @@ patches.kabi/NFS-pass-cred-explicitly-for-access-tests.patch patches.kabi/scsi-fc-kABI-fixes-for-new-ELS_RDP-definition.patch patches.kabi/ath_key_delete-kABI-fix.patch + patches.kabi/blk-mq-kABI-fixes-for-blk_mq_queue_map.patch ######################################################## # You'd better have a good reason for adding a patch