From: Ming Lei <ming.lei@redhat.com>
Date: Tue, 8 Mar 2022 15:32:14 +0800
Subject: [PATCH] blk-mq: figure out correct numa node for hw queue
Git-commit: 4d805131abf219e0019715f1cf29763c613aae07
Patch-mainline: v5.18-rc1
References: jsc#PED-1183
The current code always uses default queue map and hw queue index
for figuring out the numa node for hw queue, this way isn't correct
because blk-mq supports three queue maps, and the correct queue map
should be used for the specified hw queue.
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20220308073219.91173-2-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Acked-by: Hannes Reinecke <hare@suse.com>
---
block/blk-mq.c | 36 ++++++++++++++++++++++++++++++------
1 file changed, 30 insertions(+), 6 deletions(-)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index a05ce7725031..5d25abf9e551 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3107,15 +3107,41 @@ void blk_mq_free_rq_map(struct blk_mq_tags *tags)
blk_mq_free_tags(tags);
}
+static enum hctx_type hctx_idx_to_type(struct blk_mq_tag_set *set,
+ unsigned int hctx_idx)
+{
+ int i;
+
+ for (i = 0; i < set->nr_maps; i++) {
+ unsigned int start = set->map[i].queue_offset;
+ unsigned int end = start + set->map[i].nr_queues;
+
+ if (hctx_idx >= start && hctx_idx < end)
+ break;
+ }
+
+ if (i >= set->nr_maps)
+ i = HCTX_TYPE_DEFAULT;
+
+ return i;
+}
+
+static int blk_mq_get_hctx_node(struct blk_mq_tag_set *set,
+ unsigned int hctx_idx)
+{
+ enum hctx_type type = hctx_idx_to_type(set, hctx_idx);
+
+ return blk_mq_hw_queue_to_node(&set->map[type], hctx_idx);
+}
+
static struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
unsigned int hctx_idx,
unsigned int nr_tags,
unsigned int reserved_tags)
{
+ int node = blk_mq_get_hctx_node(set, hctx_idx);
struct blk_mq_tags *tags;
- int node;
- node = blk_mq_hw_queue_to_node(&set->map[HCTX_TYPE_DEFAULT], hctx_idx);
if (node == NUMA_NO_NODE)
node = set->numa_node;
@@ -3164,10 +3190,9 @@ static int blk_mq_alloc_rqs(struct blk_mq_tag_set *set,
unsigned int hctx_idx, unsigned int depth)
{
unsigned int i, j, entries_per_page, max_order = 4;
+ int node = blk_mq_get_hctx_node(set, hctx_idx);
size_t rq_size, left;
- int node;
- node = blk_mq_hw_queue_to_node(&set->map[HCTX_TYPE_DEFAULT], hctx_idx);
if (node == NUMA_NO_NODE)
node = set->numa_node;
@@ -3941,10 +3966,9 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
/* protect against switching io scheduler */
mutex_lock(&q->sysfs_lock);
for (i = 0; i < set->nr_hw_queues; i++) {
- int node;
+ int node = blk_mq_get_hctx_node(set, i);
struct blk_mq_hw_ctx *hctx;
- node = blk_mq_hw_queue_to_node(&set->map[HCTX_TYPE_DEFAULT], i);
/*
* If the hw queue has been mapped to another numa node,
* we need to realloc the hctx. If allocation fails, fallback
--
2.35.3