From 32cc3f0e9a4510ac73a855039c3b0736fdca48f8 Mon Sep 17 00:00:00 2001 From: Yousaf Kaukab Date: Apr 15 2024 13:25:18 +0000 Subject: Merge remote-tracking branch 'origin/users/colyli/SLE15-SP5/for-next' into SLE15-SP5 Pull bcache/dm/nvdimm fixes from Coly Li --- diff --git a/blacklist.conf b/blacklist.conf index e6f12a8..919bddb 100644 --- a/blacklist.conf +++ b/blacklist.conf @@ -854,3 +854,4 @@ c7ac8231ace9b07306d0299969e42073b189c70a # prereq of f7ec1cd5cc7ef3ad964b677ba82 daa694e4137571b4ebec330f9a9b4d54aa8b8089 # prereq of f7ec1cd5cc7ef3ad964b677ba82b8b77f1c93009 f7ec1cd5cc7ef3ad964b677ba82b8b77f1c93009 # performance optimization, see bsc#1222436 b377c66ae3509ccea596512d6afb4777711c4870 # we don't have annotate_noendbr +a307e2abfc22880a3026bc2f2a997402b7c2d833 # typo fix diff --git a/patches.suse/0017-bcache-avoid-unnecessary-soft-lockup-in-kworker-upda.patch b/patches.suse/0017-bcache-avoid-unnecessary-soft-lockup-in-kworker-upda.patch index 85c0ed0..dd789da 100644 --- a/patches.suse/0017-bcache-avoid-unnecessary-soft-lockup-in-kworker-upda.patch +++ b/patches.suse/0017-bcache-avoid-unnecessary-soft-lockup-in-kworker-upda.patch @@ -62,10 +62,10 @@ Signed-off-by: Jens Axboe --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h -@@ -397,6 +397,13 @@ struct cached_dev { +@@ -395,6 +395,13 @@ struct cached_dev { + atomic_t io_errors; + unsigned int error_limit; unsigned int offline_seconds; - - char backing_dev_name[BDEVNAME_SIZE]; + + /* + * Retry to update writeback_rate if contention happens for @@ -116,7 +116,7 @@ Signed-off-by: Jens Axboe } -@@ -1003,6 +1011,9 @@ void bch_cached_dev_writeback_init(struc +@@ -1006,6 +1014,9 @@ void bch_cached_dev_writeback_init(struc dc->writeback_rate_fp_term_high = 1000; dc->writeback_rate_i_term_inverse = 10000; diff --git a/patches.suse/Avoid-deadlock-for-recursive-I-O-on-dm-thin-when-used-as-swap-4905.patch b/patches.suse/Avoid-deadlock-for-recursive-I-O-on-dm-thin-when-used-as-swap-4905.patch deleted file mode 100644 index a96abe1..0000000 --- a/patches.suse/Avoid-deadlock-for-recursive-I-O-on-dm-thin-when-used-as-swap-4905.patch +++ /dev/null @@ -1,73 +0,0 @@ -From 49058b21fac119193c553dfce58cdfe62e8871d8 Mon Sep 17 00:00:00 2001 -From: Coly Li -Date: Mon, 27 Feb 2023 21:26:14 +0800 -Subject: [PATCH] Avoid deadlock for recursive I/O on dm-thin when used as swap -Patch-mainline: Not yet, posted to upstream but not accepted yet. -References: bsc#1177529 - -This is an alrady known issue that dm-thin volume cannot be used as -swap, otherwise a deadlock may happen when dm-thin internal memory -demond triggers swap I/O on the dm-thin volume itself. - -Thanks to Mikulas Patocka for commit a666e5c05e7c ("dm: fix deadlock -when swapping to encrypted device"), this method can also be used for -dm-thin to avoid the recursive I/O when it is used as swap. - -This patch just simply sets ti->limit_swap_bios by tree in pool_ctr() -and thin_ctr(), other important stuffs are already done by Patocka in -the above mentioned commit. - -In my test, I create a dm-thin volume /dev/vg/swap and use it as swap -device. Then I run fio on another dm-thin volume /dev/vg/main and use -large --blocksize to trigger swap I/O onto /dev/vg/swap. - -The following fio command line is used in my test, - fio --name recursive-swap-io --lockmem 1 --iodepth 128 \ - --ioengine libaio --filename /dev/vg/main --rw randrw \ - --blocksize 1M --numjobs 32 --time_based --runtime=12h - -Without the patch, the whole system can be locked up within 15 seconds. - -With this patch, there is no any deadlock or hang task observed after -2 hours fio running. - -Further more, I change --blocksize from 1M to 128M, around 30 seconds -after fio running, no I/O rate displayed by fio, and the out-of-memory -killer message shows up in kernel message. After around 20 minutes all -fio processes are killed and the whole system backs to be alive. - -This is exactly what is expected when recursive I/O happens on dm-thin -volume when it is used as swap. - -Note: this change depends on commit a666e5c05e7c ("dm: fix deadlock when -swapping to encrypted device") - -(Coly Li: refreshed for Linux v5.14 based SUSE kernel) - -Signed-off-by: Coly Li -Cc: Mikulas Patocka -Cc: Mike Snitzer -Cc: stable@vger.kernel.org - ---- - drivers/md/dm-thin.c | 2 ++ - 1 file changed, 2 insertions(+) - ---- a/drivers/md/dm-thin.c -+++ b/drivers/md/dm-thin.c -@@ -3355,6 +3355,7 @@ static int pool_ctr(struct dm_target *ti - pt->low_water_blocks = low_water_blocks; - pt->adjusted_pf = pt->requested_pf = pf; - ti->num_flush_bios = 1; -+ ti->limit_swap_bios = true; - - /* - * Only need to enable discards if the pool should pass -@@ -4233,6 +4234,7 @@ static int thin_ctr(struct dm_target *ti - goto bad; - - ti->num_flush_bios = 1; -+ ti->limit_swap_bios = true; - ti->flush_supported = true; - ti->accounts_remapped_io = true; - ti->per_io_data_size = sizeof(struct dm_thin_endio_hook); diff --git a/patches.suse/bcache-Fix-__bch_btree_node_alloc-to-make-the-failur-80fc.patch b/patches.suse/bcache-Fix-__bch_btree_node_alloc-to-make-the-failur-80fc.patch new file mode 100644 index 0000000..8b5dc1e --- /dev/null +++ b/patches.suse/bcache-Fix-__bch_btree_node_alloc-to-make-the-failur-80fc.patch @@ -0,0 +1,48 @@ +From 80fca8a10b604afad6c14213fdfd816c4eda3ee4 Mon Sep 17 00:00:00 2001 +From: Zheng Wang +Date: Thu, 15 Jun 2023 20:12:22 +0800 +Subject: [PATCH] bcache: Fix __bch_btree_node_alloc to make the failure + behavior consistent +Git-commit: 80fca8a10b604afad6c14213fdfd816c4eda3ee4 +Patch-mainline: v6.5-rc1 +References: git-fixes + +In some specific situations, the return value of __bch_btree_node_alloc +may be NULL. This may lead to a potential NULL pointer dereference in +caller function like a calling chain : +btree_split->bch_btree_node_alloc->__bch_btree_node_alloc. + +Fix it by initializing the return value in __bch_btree_node_alloc. + +Fixes: cafe56359144 ("bcache: A block layer cache") +Cc: stable@vger.kernel.org +Signed-off-by: Zheng Wang +Signed-off-by: Coly Li +Link: https://lore.kernel.org/r/20230615121223.22502-6-colyli@suse.de +Signed-off-by: Jens Axboe + +--- + drivers/md/bcache/btree.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c +index 7c21e54468bf..0ddf91204782 100644 +--- a/drivers/md/bcache/btree.c ++++ b/drivers/md/bcache/btree.c +@@ -1090,10 +1090,12 @@ struct btree *__bch_btree_node_alloc(struct cache_set *c, struct btree_op *op, + struct btree *parent) + { + BKEY_PADDED(key) k; +- struct btree *b = ERR_PTR(-EAGAIN); ++ struct btree *b; + + mutex_lock(&c->bucket_lock); + retry: ++ /* return ERR_PTR(-EAGAIN) when it fails */ ++ b = ERR_PTR(-EAGAIN); + if (__bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, wait)) + goto err; + +-- +2.35.3 + diff --git a/patches.suse/bcache-Remove-dead-references-to-cache_readaheads-ccb8.patch b/patches.suse/bcache-Remove-dead-references-to-cache_readaheads-ccb8.patch new file mode 100644 index 0000000..4231bbc --- /dev/null +++ b/patches.suse/bcache-Remove-dead-references-to-cache_readaheads-ccb8.patch @@ -0,0 +1,50 @@ +From ccb8c3bd6d93e7986b702d1f66d5d56d08abc59f Mon Sep 17 00:00:00 2001 +From: Andrea Tomassetti +Date: Thu, 15 Jun 2023 20:12:20 +0800 +Subject: [PATCH] bcache: Remove dead references to cache_readaheads +Git-commit: ccb8c3bd6d93e7986b702d1f66d5d56d08abc59f +Patch-mainline: v6.5-rc1 +References: git-fixes + +The cache_readaheads stat counter is not used anymore and should be +removed. + +Signed-off-by: Andrea Tomassetti +Signed-off-by: Coly Li +Link: https://lore.kernel.org/r/20230615121223.22502-4-colyli@suse.de +Signed-off-by: Jens Axboe + +--- + Documentation/admin-guide/bcache.rst | 3 --- + drivers/md/bcache/stats.h | 1 - + 2 files changed, 4 deletions(-) + +diff --git a/Documentation/admin-guide/bcache.rst b/Documentation/admin-guide/bcache.rst +index bb5032a99234..6fdb495ac466 100644 +--- a/Documentation/admin-guide/bcache.rst ++++ b/Documentation/admin-guide/bcache.rst +@@ -508,9 +508,6 @@ cache_miss_collisions + cache miss, but raced with a write and data was already present (usually 0 + since the synchronization for cache misses was rewritten) + +-cache_readaheads +- Count of times readahead occurred. +- + Sysfs - cache set + ~~~~~~~~~~~~~~~~~ + +diff --git a/drivers/md/bcache/stats.h b/drivers/md/bcache/stats.h +index bd3afc856d53..21b445f8af15 100644 +--- a/drivers/md/bcache/stats.h ++++ b/drivers/md/bcache/stats.h +@@ -18,7 +18,6 @@ struct cache_stats { + unsigned long cache_misses; + unsigned long cache_bypass_hits; + unsigned long cache_bypass_misses; +- unsigned long cache_readaheads; + unsigned long cache_miss_collisions; + unsigned long sectors_bypassed; + +-- +2.35.3 + diff --git a/patches.suse/bcache-Remove-unnecessary-NULL-point-check-in-node-a-028d.patch b/patches.suse/bcache-Remove-unnecessary-NULL-point-check-in-node-a-028d.patch new file mode 100644 index 0000000..13af5d2 --- /dev/null +++ b/patches.suse/bcache-Remove-unnecessary-NULL-point-check-in-node-a-028d.patch @@ -0,0 +1,99 @@ +From 028ddcac477b691dd9205c92f991cc15259d033e Mon Sep 17 00:00:00 2001 +From: Zheng Wang +Date: Thu, 15 Jun 2023 20:12:21 +0800 +Subject: [PATCH] bcache: Remove unnecessary NULL point check in node + allocations +Git-commit: 028ddcac477b691dd9205c92f991cc15259d033e +Patch-mainline: v6.5-rc1 +References: git-fixes + +Due to the previous fix of __bch_btree_node_alloc, the return value will +never be a NULL pointer. So IS_ERR is enough to handle the failure +situation. Fix it by replacing IS_ERR_OR_NULL check by an IS_ERR check. + +Fixes: cafe56359144 ("bcache: A block layer cache") +Cc: stable@vger.kernel.org +Signed-off-by: Zheng Wang +Signed-off-by: Coly Li +Link: https://lore.kernel.org/r/20230615121223.22502-5-colyli@suse.de +Signed-off-by: Jens Axboe + +--- + drivers/md/bcache/btree.c | 10 +++++----- + drivers/md/bcache/super.c | 4 ++-- + 2 files changed, 7 insertions(+), 7 deletions(-) + +diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c +index 147c493a989a..7c21e54468bf 100644 +--- a/drivers/md/bcache/btree.c ++++ b/drivers/md/bcache/btree.c +@@ -1138,7 +1138,7 @@ static struct btree *btree_node_alloc_replacement(struct btree *b, + { + struct btree *n = bch_btree_node_alloc(b->c, op, b->level, b->parent); + +- if (!IS_ERR_OR_NULL(n)) { ++ if (!IS_ERR(n)) { + mutex_lock(&n->write_lock); + bch_btree_sort_into(&b->keys, &n->keys, &b->c->sort); + bkey_copy_key(&n->key, &b->key); +@@ -1340,7 +1340,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, + memset(new_nodes, 0, sizeof(new_nodes)); + closure_init_stack(&cl); + +- while (nodes < GC_MERGE_NODES && !IS_ERR_OR_NULL(r[nodes].b)) ++ while (nodes < GC_MERGE_NODES && !IS_ERR(r[nodes].b)) + keys += r[nodes++].keys; + + blocks = btree_default_blocks(b->c) * 2 / 3; +@@ -1352,7 +1352,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, + + for (i = 0; i < nodes; i++) { + new_nodes[i] = btree_node_alloc_replacement(r[i].b, NULL); +- if (IS_ERR_OR_NULL(new_nodes[i])) ++ if (IS_ERR(new_nodes[i])) + goto out_nocoalesce; + } + +@@ -1487,7 +1487,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, + bch_keylist_free(&keylist); + + for (i = 0; i < nodes; i++) +- if (!IS_ERR_OR_NULL(new_nodes[i])) { ++ if (!IS_ERR(new_nodes[i])) { + btree_node_free(new_nodes[i]); + rw_unlock(true, new_nodes[i]); + } +@@ -1669,7 +1669,7 @@ static int bch_btree_gc_root(struct btree *b, struct btree_op *op, + if (should_rewrite) { + n = btree_node_alloc_replacement(b, NULL); + +- if (!IS_ERR_OR_NULL(n)) { ++ if (!IS_ERR(n)) { + bch_btree_node_write_sync(n); + + bch_btree_set_root(n); +diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c +index 1f829e74db0a..e2a803683105 100644 +--- a/drivers/md/bcache/super.c ++++ b/drivers/md/bcache/super.c +@@ -1723,7 +1723,7 @@ static void cache_set_flush(struct closure *cl) + if (!IS_ERR_OR_NULL(c->gc_thread)) + kthread_stop(c->gc_thread); + +- if (!IS_ERR_OR_NULL(c->root)) ++ if (!IS_ERR(c->root)) + list_add(&c->root->list, &c->btree_cache); + + /* +@@ -2087,7 +2087,7 @@ static int run_cache_set(struct cache_set *c) + + err = "cannot allocate new btree root"; + c->root = __bch_btree_node_alloc(c, NULL, 0, true, NULL); +- if (IS_ERR_OR_NULL(c->root)) ++ if (IS_ERR(c->root)) + goto err; + + mutex_lock(&c->root->write_lock); +-- +2.35.3 + diff --git a/patches.suse/bcache-add-code-comments-for-bch_btree_node_get-and--31f5.patch b/patches.suse/bcache-add-code-comments-for-bch_btree_node_get-and--31f5.patch new file mode 100644 index 0000000..5bfd443 --- /dev/null +++ b/patches.suse/bcache-add-code-comments-for-bch_btree_node_get-and--31f5.patch @@ -0,0 +1,49 @@ +From 31f5b956a197d4ec25c8a07cb3a2ab69d0c0b82f Mon Sep 17 00:00:00 2001 +From: Coly Li +Date: Mon, 20 Nov 2023 13:25:02 +0800 +Subject: [PATCH] bcache: add code comments for bch_btree_node_get() and + __bch_btree_node_alloc() +Git-commit: 31f5b956a197d4ec25c8a07cb3a2ab69d0c0b82f +Patch-mainline: v6.7-rc3 +References: git-fixes + +This patch adds code comments to bch_btree_node_get() and +__bch_btree_node_alloc() that NULL pointer will not be returned and it +is unnecessary to check NULL pointer by the callers of these routines. + +Signed-off-by: Coly Li +Link: https://lore.kernel.org/r/20231120052503.6122-10-colyli@suse.de +Signed-off-by: Jens Axboe + +--- + drivers/md/bcache/btree.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c +index 79f1fa4a0d55..de3019972b35 100644 +--- a/drivers/md/bcache/btree.c ++++ b/drivers/md/bcache/btree.c +@@ -1000,6 +1000,9 @@ static struct btree *mca_alloc(struct cache_set *c, struct btree_op *op, + * + * The btree node will have either a read or a write lock held, depending on + * level and op->lock. ++ * ++ * Note: Only error code or btree pointer will be returned, it is unncessary ++ * for callers to check NULL pointer. + */ + struct btree *bch_btree_node_get(struct cache_set *c, struct btree_op *op, + struct bkey *k, int level, bool write, +@@ -1111,6 +1114,10 @@ static void btree_node_free(struct btree *b) + mutex_unlock(&b->c->bucket_lock); + } + ++/* ++ * Only error code or btree pointer will be returned, it is unncessary for ++ * callers to check NULL pointer. ++ */ + struct btree *__bch_btree_node_alloc(struct cache_set *c, struct btree_op *op, + int level, bool wait, + struct btree *parent) +-- +2.35.3 + diff --git a/patches.suse/bcache-avoid-NULL-checking-to-c-root-in-run_cache_se-3eba.patch b/patches.suse/bcache-avoid-NULL-checking-to-c-root-in-run_cache_se-3eba.patch new file mode 100644 index 0000000..8e8eaef --- /dev/null +++ b/patches.suse/bcache-avoid-NULL-checking-to-c-root-in-run_cache_se-3eba.patch @@ -0,0 +1,38 @@ +From 3eba5e0b2422aec3c9e79822029599961fdcab97 Mon Sep 17 00:00:00 2001 +From: Coly Li +Date: Mon, 20 Nov 2023 13:25:03 +0800 +Subject: [PATCH] bcache: avoid NULL checking to c->root in run_cache_set() +Git-commit: 3eba5e0b2422aec3c9e79822029599961fdcab97 +Patch-mainline: v6.7-rc3 +References: git-fixes + +In run_cache_set() after c->root returned from bch_btree_node_get(), it +is checked by IS_ERR_OR_NULL(). Indeed it is unncessary to check NULL +because bch_btree_node_get() will not return NULL pointer to caller. + +This patch replaces IS_ERR_OR_NULL() by IS_ERR() for the above reason. + +Signed-off-by: Coly Li +Link: https://lore.kernel.org/r/20231120052503.6122-11-colyli@suse.de +Signed-off-by: Jens Axboe + +--- + drivers/md/bcache/super.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c +index c7ecc7058d77..bfe1685dbae5 100644 +--- a/drivers/md/bcache/super.c ++++ b/drivers/md/bcache/super.c +@@ -2018,7 +2018,7 @@ static int run_cache_set(struct cache_set *c) + c->root = bch_btree_node_get(c, NULL, k, + j->btree_level, + true, NULL); +- if (IS_ERR_OR_NULL(c->root)) ++ if (IS_ERR(c->root)) + goto err; + + list_del_init(&c->root->list); +-- +2.35.3 + diff --git a/patches.suse/bcache-avoid-oversize-memory-allocation-by-small-str-baf8.patch b/patches.suse/bcache-avoid-oversize-memory-allocation-by-small-str-baf8.patch new file mode 100644 index 0000000..9fc183b --- /dev/null +++ b/patches.suse/bcache-avoid-oversize-memory-allocation-by-small-str-baf8.patch @@ -0,0 +1,89 @@ +From baf8fb7e0e5ec54ea0839f0c534f2cdcd79bea9c Mon Sep 17 00:00:00 2001 +From: Coly Li +Date: Mon, 20 Nov 2023 13:24:54 +0800 +Subject: [PATCH] bcache: avoid oversize memory allocation by small stripe_size +Git-commit: baf8fb7e0e5ec54ea0839f0c534f2cdcd79bea9c +Patch-mainline: v6.7-rc3 +References: git-fixes + +Arraies bcache->stripe_sectors_dirty and bcache->full_dirty_stripes are +used for dirty data writeback, their sizes are decided by backing device +capacity and stripe size. Larger backing device capacity or smaller +stripe size make these two arraies occupies more dynamic memory space. + +Currently bcache->stripe_size is directly inherited from +queue->limits.io_opt of underlying storage device. For normal hard +drives, its limits.io_opt is 0, and bcache sets the corresponding +stripe_size to 1TB (1<<31 sectors), it works fine 10+ years. But for +devices do declare value for queue->limits.io_opt, small stripe_size +(comparing to 1TB) becomes an issue for oversize memory allocations of +bcache->stripe_sectors_dirty and bcache->full_dirty_stripes, while the +capacity of hard drives gets much larger in recent decade. + +For example a raid5 array assembled by three 20TB hardrives, the raid +device capacity is 40TB with typical 512KB limits.io_opt. After the math +calculation in bcache code, these two arraies will occupy 400MB dynamic +memory. Even worse Andrea Tomassetti reports that a 4KB limits.io_opt is +declared on a new 2TB hard drive, then these two arraies request 2GB and +512MB dynamic memory from kzalloc(). The result is that bcache device +always fails to initialize on his system. + +To avoid the oversize memory allocation, bcache->stripe_size should not +directly inherited by queue->limits.io_opt from the underlying device. +This patch defines BCH_MIN_STRIPE_SZ (4MB) as minimal bcache stripe size +and set bcache device's stripe size against the declared limits.io_opt +value from the underlying storage device, +- If the declared limits.io_opt > BCH_MIN_STRIPE_SZ, bcache device will + set its stripe size directly by this limits.io_opt value. +- If the declared limits.io_opt < BCH_MIN_STRIPE_SZ, bcache device will + set its stripe size by a value multiplying limits.io_opt and euqal or + large than BCH_MIN_STRIPE_SZ. + +Then the minimal stripe size of a bcache device will always be >= 4MB. +For a 40TB raid5 device with 512KB limits.io_opt, memory occupied by +bcache->stripe_sectors_dirty and bcache->full_dirty_stripes will be 50MB +in total. For a 2TB hard drive with 4KB limits.io_opt, memory occupied +by these two arraies will be 2.5MB in total. + +Such mount of memory allocated for bcache->stripe_sectors_dirty and +bcache->full_dirty_stripes is reasonable for most of storage devices. + +Reported-by: Andrea Tomassetti +Signed-off-by: Coly Li +Reviewed-by: Eric Wheeler +Link: https://lore.kernel.org/r/20231120052503.6122-2-colyli@suse.de +Signed-off-by: Jens Axboe + +--- + drivers/md/bcache/bcache.h | 1 + + drivers/md/bcache/super.c | 2 ++ + 2 files changed, 3 insertions(+) + +diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h +index 05be59ae21b2..6ae2329052c9 100644 +--- a/drivers/md/bcache/bcache.h ++++ b/drivers/md/bcache/bcache.h +@@ -265,6 +265,7 @@ struct bcache_device { + #define BCACHE_DEV_WB_RUNNING 3 + #define BCACHE_DEV_RATE_DW_RUNNING 4 + int nr_stripes; ++#define BCH_MIN_STRIPE_SZ ((4 << 20) >> SECTOR_SHIFT) + unsigned int stripe_size; + atomic_t *stripe_sectors_dirty; + unsigned long *full_dirty_stripes; +diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c +index 8bd899766372..c7ecc7058d77 100644 +--- a/drivers/md/bcache/super.c ++++ b/drivers/md/bcache/super.c +@@ -905,6 +905,8 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size, + + if (!d->stripe_size) + d->stripe_size = 1 << 31; ++ else if (d->stripe_size < BCH_MIN_STRIPE_SZ) ++ d->stripe_size = roundup(BCH_MIN_STRIPE_SZ, d->stripe_size); + + n = DIV_ROUND_UP_ULL(sectors, d->stripe_size); + if (!n || n > max_stripes) { +-- +2.35.3 + diff --git a/patches.suse/bcache-bset-Fix-comment-typos-11e5.patch b/patches.suse/bcache-bset-Fix-comment-typos-11e5.patch new file mode 100644 index 0000000..0a27877 --- /dev/null +++ b/patches.suse/bcache-bset-Fix-comment-typos-11e5.patch @@ -0,0 +1,37 @@ +From 11e529ccea33f24af6b54fe10bb3be9c1c48eddb Mon Sep 17 00:00:00 2001 +From: Jules Maselbas +Date: Tue, 20 Sep 2022 00:16:45 +0800 +Subject: [PATCH] bcache: bset: Fix comment typos +Git-commit: 11e529ccea33f24af6b54fe10bb3be9c1c48eddb +Patch-mainline: v6.1-rc1 +References: git-fixes + +Remove the redundant word `by`, correct the typo `creaated`. + +Cc: Kent Overstreet +Cc: linux-bcache@vger.kernel.org +Signed-off-by: Jules Maselbas +Signed-off-by: Coly Li +Link: https://lore.kernel.org/r/20220919161647.81238-4-colyli@suse.de +Signed-off-by: Jens Axboe + +--- + drivers/md/bcache/bset.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c +index 94d38e8a59b3..2bba4d6aaaa2 100644 +--- a/drivers/md/bcache/bset.c ++++ b/drivers/md/bcache/bset.c +@@ -1264,7 +1264,7 @@ static void __btree_sort(struct btree_keys *b, struct btree_iter *iter, + * + * Don't worry event 'out' is allocated from mempool, it can + * still be swapped here. Because state->pool is a page mempool +- * creaated by by mempool_init_page_pool(), which allocates ++ * created by mempool_init_page_pool(), which allocates + * pages by alloc_pages() indeed. + */ + +-- +2.35.3 + diff --git a/patches.suse/bcache-check-return-value-from-btree_node_alloc_repl-7779.patch b/patches.suse/bcache-check-return-value-from-btree_node_alloc_repl-7779.patch new file mode 100644 index 0000000..18dd171 --- /dev/null +++ b/patches.suse/bcache-check-return-value-from-btree_node_alloc_repl-7779.patch @@ -0,0 +1,41 @@ +From 777967e7e9f6f5f3e153abffb562bffaf4430d26 Mon Sep 17 00:00:00 2001 +From: Coly Li +Date: Mon, 20 Nov 2023 13:24:55 +0800 +Subject: [PATCH] bcache: check return value from + btree_node_alloc_replacement() +Git-commit: 777967e7e9f6f5f3e153abffb562bffaf4430d26 +Patch-mainline: v6.7-rc3 +References: git-fixes + +In btree_gc_rewrite_node(), pointer 'n' is not checked after it returns +from btree_gc_rewrite_node(). There is potential possibility that 'n' is +a non NULL ERR_PTR(), referencing such error code is not permitted in +following code. Therefore a return value checking is necessary after 'n' +is back from btree_node_alloc_replacement(). + +Signed-off-by: Coly Li +Reported-by: Dan Carpenter +Cc: +Link: https://lore.kernel.org/r/20231120052503.6122-3-colyli@suse.de +Signed-off-by: Jens Axboe + +--- + drivers/md/bcache/btree.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c +index ae5cbb55861f..de8d552201dc 100644 +--- a/drivers/md/bcache/btree.c ++++ b/drivers/md/bcache/btree.c +@@ -1532,6 +1532,8 @@ static int btree_gc_rewrite_node(struct btree *b, struct btree_op *op, + return 0; + + n = btree_node_alloc_replacement(replace, NULL); ++ if (IS_ERR(n)) ++ return 0; + + /* recheck reserve after allocating replacement node */ + if (btree_check_reserve(b, NULL)) { +-- +2.35.3 + diff --git a/patches.suse/bcache-fix-NULL-pointer-reference-in-cached_dev_deta-aa97.patch b/patches.suse/bcache-fix-NULL-pointer-reference-in-cached_dev_deta-aa97.patch new file mode 100644 index 0000000..809f8f3 --- /dev/null +++ b/patches.suse/bcache-fix-NULL-pointer-reference-in-cached_dev_deta-aa97.patch @@ -0,0 +1,64 @@ +From aa97f6cdb7e92909e17c8ca63e622fcb81d57a57 Mon Sep 17 00:00:00 2001 +From: Lin Feng +Date: Fri, 12 Nov 2021 13:36:29 +0800 +Subject: [PATCH] bcache: fix NULL pointer reference in + cached_dev_detach_finish +Git-commit: aa97f6cdb7e92909e17c8ca63e622fcb81d57a57 +Patch-mainline: v5.16-rc6 +References: git-fixes + +Commit 0259d4498ba4 ("bcache: move calc_cached_dev_sectors to proper +place on backing device detach") tries to fix calc_cached_dev_sectors +when bcache device detaches, but now we have: + +cached_dev_detach_finish + ... + bcache_device_detach(&dc->disk); + ... + closure_put(&d->c->caching); + d->c = NULL; [*explicitly set dc->disk.c to NULL*] + list_move(&dc->list, &uncached_devices); + calc_cached_dev_sectors(dc->disk.c); [*passing a NULL pointer*] + ... + +Upper codeflows shows how bug happens, this patch fix the problem by +caching dc->disk.c beforehand, and cache_set won't be freed under us +because c->caching closure at least holds a reference count and closure +callback __cache_set_unregister only being called by bch_cache_set_stop +which using closure_queue(&c->caching), that means c->caching closure +callback for destroying cache_set won't be trigger by previous +closure_put(&d->c->caching). +So at this stage(while cached_dev_detach_finish is calling) it's safe to +access cache_set dc->disk.c. + +(Coly Li: rebased for Linux 5.14 based SUSE kernel) + +Fixes: 0259d4498ba4 ("bcache: move calc_cached_dev_sectors to proper place on backing device detach") +Signed-off-by: Lin Feng +Signed-off-by: Coly Li +Link: https://lore.kernel.org/r/20211112053629.3437-2-colyli@suse.de +Signed-off-by: Jens Axboe + +--- + drivers/md/bcache/super.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/md/bcache/super.c ++++ b/drivers/md/bcache/super.c +@@ -1139,6 +1139,7 @@ static void cancel_writeback_rate_update + static void cached_dev_detach_finish(struct work_struct *w) + { + struct cached_dev *dc = container_of(w, struct cached_dev, detach); ++ struct cache_set *c = dc->disk.c; + + BUG_ON(!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)); + BUG_ON(refcount_read(&dc->count)); +@@ -1156,7 +1157,7 @@ static void cached_dev_detach_finish(str + + bcache_device_detach(&dc->disk); + list_move(&dc->list, &uncached_devices); +- calc_cached_dev_sectors(dc->disk.c); ++ calc_cached_dev_sectors(c); + + clear_bit(BCACHE_DEV_DETACHING, &dc->disk.flags); + clear_bit(BCACHE_DEV_UNLINK_DONE, &dc->disk.flags); diff --git a/patches.suse/bcache-fix-error-info-in-register_bcache-d55f.patch b/patches.suse/bcache-fix-error-info-in-register_bcache-d55f.patch new file mode 100644 index 0000000..e404a4c --- /dev/null +++ b/patches.suse/bcache-fix-error-info-in-register_bcache-d55f.patch @@ -0,0 +1,63 @@ +From d55f7cb2e5c053010d2b527494da9bbb722a78ba Mon Sep 17 00:00:00 2001 +From: Chao Yu +Date: Wed, 20 Oct 2021 22:38:07 +0800 +Subject: [PATCH] bcache: fix error info in register_bcache() +Git-commit: d55f7cb2e5c053010d2b527494da9bbb722a78ba +Patch-mainline: v5.16-rc1 +References: git-fixes + +In register_bcache(), there are several cases we didn't set +correct error info (return value and/or error message): +- if kzalloc() fails, it needs to return ENOMEM and print +"cannot allocate memory"; +- if register_cache() fails, it's better to propagate its +return value rather than using default EINVAL. + +Signed-off-by: Chao Yu +Reviewed-by: Hannes Reinecke +Signed-off-by: Coly Li +Link: https://lore.kernel.org/r/20211020143812.6403-4-colyli@suse.de +Signed-off-by: Jens Axboe + +--- + drivers/md/bcache/super.c | 13 ++++++++++--- + 1 file changed, 10 insertions(+), 3 deletions(-) + +diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c +index 330d6c167265..62b0140b0a73 100644 +--- a/drivers/md/bcache/super.c ++++ b/drivers/md/bcache/super.c +@@ -2617,8 +2617,11 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, + if (SB_IS_BDEV(sb)) { + struct cached_dev *dc = kzalloc(sizeof(*dc), GFP_KERNEL); + +- if (!dc) ++ if (!dc) { ++ ret = -ENOMEM; ++ err = "cannot allocate memory"; + goto out_put_sb_page; ++ } + + mutex_lock(&bch_register_lock); + ret = register_bdev(sb, sb_disk, bdev, dc); +@@ -2629,11 +2632,15 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, + } else { + struct cache *ca = kzalloc(sizeof(*ca), GFP_KERNEL); + +- if (!ca) ++ if (!ca) { ++ ret = -ENOMEM; ++ err = "cannot allocate memory"; + goto out_put_sb_page; ++ } + + /* blkdev_put() will be called in bch_cache_release() */ +- if (register_cache(sb, sb_disk, bdev, ca) != 0) ++ ret = register_cache(sb, sb_disk, bdev, ca); ++ if (ret) + goto out_free_sb; + } + +-- +2.35.3 + diff --git a/patches.suse/bcache-fix-repeated-words-in-comments-6dd3.patch b/patches.suse/bcache-fix-repeated-words-in-comments-6dd3.patch new file mode 100644 index 0000000..bb34b69 --- /dev/null +++ b/patches.suse/bcache-fix-repeated-words-in-comments-6dd3.patch @@ -0,0 +1,35 @@ +From 6dd3be6923eec2c49860e7292e4e2783c74a9dff Mon Sep 17 00:00:00 2001 +From: Jilin Yuan +Date: Tue, 20 Sep 2022 00:16:46 +0800 +Subject: [PATCH] bcache:: fix repeated words in comments +Git-commit: 6dd3be6923eec2c49860e7292e4e2783c74a9dff +Patch-mainline: v6.1-rc1 +References: git-fixes + +Delete the redundant word 'we'. + +Signed-off-by: Jilin Yuan +Signed-off-by: Coly Li +Link: https://lore.kernel.org/r/20220919161647.81238-5-colyli@suse.de +Signed-off-by: Jens Axboe + +--- + drivers/md/bcache/bcache.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h +index 2acda9cea0f9..aebb7ef10e63 100644 +--- a/drivers/md/bcache/bcache.h ++++ b/drivers/md/bcache/bcache.h +@@ -107,7 +107,7 @@ + * + * BTREE NODES: + * +- * Our unit of allocation is a bucket, and we we can't arbitrarily allocate and ++ * Our unit of allocation is a bucket, and we can't arbitrarily allocate and + * free smaller than a bucket - so, that's how big our btree nodes are. + * + * (If buckets are really big we'll only use part of the bucket for a btree node +-- +2.35.3 + diff --git a/patches.suse/bcache-fixup-bcache_dev_sectors_dirty_add-multithrea-7b10.patch b/patches.suse/bcache-fixup-bcache_dev_sectors_dirty_add-multithrea-7b10.patch new file mode 100644 index 0000000..b08e0ff --- /dev/null +++ b/patches.suse/bcache-fixup-bcache_dev_sectors_dirty_add-multithrea-7b10.patch @@ -0,0 +1,67 @@ +From 7b1002f7cfe581930f63787a0b3de0144e61ed55 Mon Sep 17 00:00:00 2001 +From: Mingzhe Zou +Date: Fri, 7 Jan 2022 16:21:13 +0800 +Subject: [PATCH] bcache: fixup bcache_dev_sectors_dirty_add() multithreaded + CPU false sharing +Git-commit: 7b1002f7cfe581930f63787a0b3de0144e61ed55 +Patch-mainline: v5.18-rc1 +References: git-fixes + +When attaching a cached device (a.k.a backing device) to a cache +device, bch_sectors_dirty_init() is called to count dirty sectors +and stripes (see what bcache_dev_sectors_dirty_add() does) on the +cache device. + +When bcache_dev_sectors_dirty_add() is called, set_bit(stripe, +d->full_dirty_stripes) or clear_bit(stripe, d->full_dirty_stripes) +operation will always be performed. In full_dirty_stripes, each 1bit +represents stripe_size (8192) sectors (512B), so 1bit=4MB (8192*512), +and each CPU cache line=64B=512bit=2048MB. When 20 threads process +a cached disk with 100G dirty data, a single thread processes about +23M at a time, and 20 threads total 460M. These full_dirty_stripes +bits corresponding to the 460M data is likely to fall in the same CPU +cache line. When one of these threads performs a set_bit or clear_bit +operation, the same CPU cache line of other threads will become invalid +and must read the full_dirty_stripes from the main memory again. Compared +with single thread, the time of a bcache_dev_sectors_dirty_add() +call is increased by about 50 times in our test (100G dirty data, +20 threads, bcache_dev_sectors_dirty_add() is called more than +20 million times). + +This patch tries to test_bit before set_bit or clear_bit operation. +Therefore, a lot of force set and clear operations will be avoided, +and most of bcache_dev_sectors_dirty_add() calls will only read CPU +cache line. + +Signed-off-by: Mingzhe Zou +Signed-off-by: Coly Li + +--- + drivers/md/bcache/writeback.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c +index d42301e6309d..176461f89f46 100644 +--- a/drivers/md/bcache/writeback.c ++++ b/drivers/md/bcache/writeback.c +@@ -585,10 +585,13 @@ void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned int inode, + + sectors_dirty = atomic_add_return(s, + d->stripe_sectors_dirty + stripe); +- if (sectors_dirty == d->stripe_size) +- set_bit(stripe, d->full_dirty_stripes); +- else +- clear_bit(stripe, d->full_dirty_stripes); ++ if (sectors_dirty == d->stripe_size) { ++ if (!test_bit(stripe, d->full_dirty_stripes)) ++ set_bit(stripe, d->full_dirty_stripes); ++ } else { ++ if (test_bit(stripe, d->full_dirty_stripes)) ++ clear_bit(stripe, d->full_dirty_stripes); ++ } + + nr_sectors -= s; + stripe_offset = 0; +-- +2.35.3 + diff --git a/patches.suse/bcache-fixup-btree_cache_wait-list-damage-f085.patch b/patches.suse/bcache-fixup-btree_cache_wait-list-damage-f085.patch new file mode 100644 index 0000000..bb46137 --- /dev/null +++ b/patches.suse/bcache-fixup-btree_cache_wait-list-damage-f085.patch @@ -0,0 +1,119 @@ +From f0854489fc07d2456f7cc71a63f4faf9c716ffbe Mon Sep 17 00:00:00 2001 +From: Mingzhe Zou +Date: Thu, 15 Jun 2023 20:12:23 +0800 +Subject: [PATCH] bcache: fixup btree_cache_wait list damage +Git-commit: f0854489fc07d2456f7cc71a63f4faf9c716ffbe +Patch-mainline: v6.5-rc1 +References: git-fixes + +We get a kernel crash about "list_add corruption. next->prev should be +prev (ffff9c801bc01210), but was ffff9c77b688237c. +(next=ffffae586d8afe68)." + +crash> struct list_head 0xffff9c801bc01210 +struct list_head { + next = 0xffffae586d8afe68, + prev = 0xffffae586d8afe68 +} +crash> struct list_head 0xffff9c77b688237c +struct list_head { + next = 0x0, + prev = 0x0 +} +crash> struct list_head 0xffffae586d8afe68 +struct list_head struct: invalid kernel virtual address: ffffae586d8afe68 type: "gdb_readmem_callback" +Cannot access memory at address 0xffffae586d8afe68 + +[230469.019492] Call Trace: +[230469.032041] prepare_to_wait+0x8a/0xb0 +[230469.044363] ? bch_btree_keys_free+0x6c/0xc0 [escache] +[230469.056533] mca_cannibalize_lock+0x72/0x90 [escache] +[230469.068788] mca_alloc+0x2ae/0x450 [escache] +[230469.080790] bch_btree_node_get+0x136/0x2d0 [escache] +[230469.092681] bch_btree_check_thread+0x1e1/0x260 [escache] +[230469.104382] ? finish_wait+0x80/0x80 +[230469.115884] ? bch_btree_check_recurse+0x1a0/0x1a0 [escache] +[230469.127259] kthread+0x112/0x130 +[230469.138448] ? kthread_flush_work_fn+0x10/0x10 +[230469.149477] ret_from_fork+0x35/0x40 + +bch_btree_check_thread() and bch_dirty_init_thread() may call +mca_cannibalize() to cannibalize other cached btree nodes. Only one thread +can do it at a time, so the op of other threads will be added to the +btree_cache_wait list. + +We must call finish_wait() to remove op from btree_cache_wait before free +it's memory address. Otherwise, the list will be damaged. Also should call +bch_cannibalize_unlock() to release the btree_cache_alloc_lock and wake_up +other waiters. + +Fixes: 8e7102273f59 ("bcache: make bch_btree_check() to be multithreaded") +Fixes: b144e45fc576 ("bcache: make bch_sectors_dirty_init() to be multithreaded") +Cc: stable@vger.kernel.org +Signed-off-by: Mingzhe Zou +Signed-off-by: Coly Li +Link: https://lore.kernel.org/r/20230615121223.22502-7-colyli@suse.de +Signed-off-by: Jens Axboe + +--- + drivers/md/bcache/btree.c | 11 ++++++++++- + drivers/md/bcache/btree.h | 1 + + drivers/md/bcache/writeback.c | 10 ++++++++++ + 3 files changed, 21 insertions(+), 1 deletion(-) + +--- a/drivers/md/bcache/btree.c ++++ b/drivers/md/bcache/btree.c +@@ -885,7 +885,7 @@ static struct btree *mca_cannibalize(str + * cannibalize_bucket() will take. This means every time we unlock the root of + * the btree, we need to release this lock if we have it held. + */ +-static void bch_cannibalize_unlock(struct cache_set *c) ++void bch_cannibalize_unlock(struct cache_set *c) + { + spin_lock(&c->btree_cannibalize_lock); + if (c->btree_cache_alloc_lock == current) { +@@ -1970,6 +1970,15 @@ static int bch_btree_check_thread(void * + c->gc_stats.nodes++; + bch_btree_op_init(&op, 0); + ret = bcache_btree(check_recurse, p, c->root, &op); ++ /* ++ * The op may be added to cache_set's btree_cache_wait ++ * in mca_cannibalize(), must ensure it is removed from ++ * the list and release btree_cache_alloc_lock before ++ * free op memory. ++ * Otherwise, the btree_cache_wait will be damaged. ++ */ ++ bch_cannibalize_unlock(c); ++ finish_wait(&c->btree_cache_wait, &(&op)->wait); + if (ret) + goto out; + } +--- a/drivers/md/bcache/btree.h ++++ b/drivers/md/bcache/btree.h +@@ -282,6 +282,7 @@ void bch_initial_gc_finish(struct cache_ + void bch_moving_gc(struct cache_set *c); + int bch_btree_check(struct cache_set *c); + void bch_initial_mark_key(struct cache_set *c, int level, struct bkey *k); ++void bch_cannibalize_unlock(struct cache_set *c); + + static inline void wake_up_gc(struct cache_set *c) + { +--- a/drivers/md/bcache/writeback.c ++++ b/drivers/md/bcache/writeback.c +@@ -890,6 +890,16 @@ static int bch_root_node_dirty_init(stru + if (ret < 0) + pr_warn("sectors dirty init failed, ret=%d!\n", ret); + ++ /* ++ * The op may be added to cache_set's btree_cache_wait ++ * in mca_cannibalize(), must ensure it is removed from ++ * the list and release btree_cache_alloc_lock before ++ * free op memory. ++ * Otherwise, the btree_cache_wait will be damaged. ++ */ ++ bch_cannibalize_unlock(c); ++ finish_wait(&c->btree_cache_wait, &(&op.op)->wait); ++ + return ret; + } + diff --git a/patches.suse/bcache-fixup-init-dirty-data-errors-7cc4.patch b/patches.suse/bcache-fixup-init-dirty-data-errors-7cc4.patch new file mode 100644 index 0000000..9c71079 --- /dev/null +++ b/patches.suse/bcache-fixup-init-dirty-data-errors-7cc4.patch @@ -0,0 +1,45 @@ +From 7cc47e64d3d69786a2711a4767e26b26ba63d7ed Mon Sep 17 00:00:00 2001 +From: Mingzhe Zou +Date: Mon, 20 Nov 2023 13:24:58 +0800 +Subject: [PATCH] bcache: fixup init dirty data errors +Git-commit: 7cc47e64d3d69786a2711a4767e26b26ba63d7ed +Patch-mainline: v6.7-rc3 +References: git-fixes + +We found that after long run, the dirty_data of the bcache device +will have errors. This error cannot be eliminated unless re-register. + +We also found that reattach after detach, this error can accumulate. + +In bch_sectors_dirty_init(), all inode <= d->id keys will be recounted +again. This is wrong, we only need to count the keys of the current +device. + +(Coly Li: rebased for Linux 5.14 based SUSE kernel) + +Fixes: b144e45fc576 ("bcache: make bch_sectors_dirty_init() to be multithreaded") +Signed-off-by: Mingzhe Zou +Cc: +Signed-off-by: Coly Li +Link: https://lore.kernel.org/r/20231120052503.6122-6-colyli@suse.de +Signed-off-by: Jens Axboe + +--- + drivers/md/bcache/writeback.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/drivers/md/bcache/writeback.c ++++ b/drivers/md/bcache/writeback.c +@@ -991,8 +991,11 @@ void bch_sectors_dirty_init(struct bcach + op.count = 0; + + for_each_key_filter(&c->root->keys, +- k, &iter, bch_ptr_invalid) ++ k, &iter, bch_ptr_invalid) { ++ if (KEY_INODE(k) != op.inode) ++ continue; + sectors_dirty_init_fn(&op.op, c->root, k); ++ } + + rw_unlock(0, c->root); + return; diff --git a/patches.suse/bcache-fixup-lock-c-root-error-e348.patch b/patches.suse/bcache-fixup-lock-c-root-error-e348.patch new file mode 100644 index 0000000..544e831 --- /dev/null +++ b/patches.suse/bcache-fixup-lock-c-root-error-e348.patch @@ -0,0 +1,180 @@ +From e34820f984512b433ee1fc291417e60c47d56727 Mon Sep 17 00:00:00 2001 +From: Mingzhe Zou +Date: Mon, 20 Nov 2023 13:24:59 +0800 +Subject: [PATCH] bcache: fixup lock c->root error +Git-commit: e34820f984512b433ee1fc291417e60c47d56727 +Patch-mainline: v6.7-rc3 +References: git-fixes + +We had a problem with io hung because it was waiting for c->root to +release the lock. + +crash> cache_set.root -l cache_set.list ffffa03fde4c0050 + root = 0xffff802ef454c800 +crash> btree -o 0xffff802ef454c800 | grep rw_semaphore + [ffff802ef454c858] struct rw_semaphore lock; +crash> struct rw_semaphore ffff802ef454c858 +struct rw_semaphore { + count = { + counter = -4294967297 + }, + wait_list = { + next = 0xffff00006786fc28, + prev = 0xffff00005d0efac8 + }, + wait_lock = { + raw_lock = { + { + val = { + counter = 0 + }, + { + locked = 0 '\000', + pending = 0 '\000' + }, + { + locked_pending = 0, + tail = 0 + } + } + } + }, + osq = { + tail = { + counter = 0 + } + }, + owner = 0xffffa03fdc586603 +} + +The "counter = -4294967297" means that lock count is -1 and a write lock +is being attempted. Then, we found that there is a btree with a counter +of 1 in btree_cache_freeable. + +crash> cache_set -l cache_set.list ffffa03fde4c0050 -o|grep btree_cache + [ffffa03fde4c1140] struct list_head btree_cache; + [ffffa03fde4c1150] struct list_head btree_cache_freeable; + [ffffa03fde4c1160] struct list_head btree_cache_freed; + [ffffa03fde4c1170] unsigned int btree_cache_used; + [ffffa03fde4c1178] wait_queue_head_t btree_cache_wait; + [ffffa03fde4c1190] struct task_struct *btree_cache_alloc_lock; +crash> list -H ffffa03fde4c1140|wc -l +973 +crash> list -H ffffa03fde4c1150|wc -l +1123 +crash> cache_set.btree_cache_used -l cache_set.list ffffa03fde4c0050 + btree_cache_used = 2097 +crash> list -s btree -l btree.list -H ffffa03fde4c1140|grep -E -A2 "^ lock = {" > btree_cache.txt +crash> list -s btree -l btree.list -H ffffa03fde4c1150|grep -E -A2 "^ lock = {" > btree_cache_freeable.txt +[root@node-3 127.0.0.1-2023-08-04-16:40:28]# pwd +/var/crash/127.0.0.1-2023-08-04-16:40:28 +[root@node-3 127.0.0.1-2023-08-04-16:40:28]# cat btree_cache.txt|grep counter|grep -v "counter = 0" +[root@node-3 127.0.0.1-2023-08-04-16:40:28]# cat btree_cache_freeable.txt|grep counter|grep -v "counter = 0" + counter = 1 + +We found that this is a bug in bch_sectors_dirty_init() when locking c->root: + (1). Thread X has locked c->root(A) write. + (2). Thread Y failed to lock c->root(A), waiting for the lock(c->root A). + (3). Thread X bch_btree_set_root() changes c->root from A to B. + (4). Thread X releases the lock(c->root A). + (5). Thread Y successfully locks c->root(A). + (6). Thread Y releases the lock(c->root B). + + down_write locked ---(1)----------------------┐ + | | + | down_read waiting ---(2)----┐ | + | | ┌-------------┐ ┌-------------┐ + bch_btree_set_root ===(3)========>> | c->root A | | c->root B | + | | └-------------┘ └-------------┘ + up_write ---(4)---------------------┘ | | + | | | + down_read locked ---(5)-----------┘ | + | | + up_read ---(6)-----------------------------┘ + +Since c->root may change, the correct steps to lock c->root should be +the same as bch_root_usage(), compare after locking. + +static unsigned int bch_root_usage(struct cache_set *c) +{ + unsigned int bytes = 0; + struct bkey *k; + struct btree *b; + struct btree_iter iter; + + goto lock_root; + + do { + rw_unlock(false, b); +Lock_root: b = c->root; + rw_lock(false, b, b->level); + } while (b != c->root); + + for_each_key_filter(&b->keys, k, &iter, bch_ptr_bad) + bytes += bkey_bytes(k); + + rw_unlock(false, b); + + return (bytes * 100) / btree_bytes(c); +} + +Fixes: b144e45fc576 ("bcache: make bch_sectors_dirty_init() to be multithreaded") +Signed-off-by: Mingzhe Zou +Cc: +Signed-off-by: Coly Li +Link: https://lore.kernel.org/r/20231120052503.6122-7-colyli@suse.de +Signed-off-by: Jens Axboe + +--- + drivers/md/bcache/writeback.c | 14 +++++++++++--- + 1 file changed, 11 insertions(+), 3 deletions(-) + +diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c +index 77fb72ac6b81..a1d760916246 100644 +--- a/drivers/md/bcache/writeback.c ++++ b/drivers/md/bcache/writeback.c +@@ -977,14 +977,22 @@ static int bch_btre_dirty_init_thread_nr(void) + void bch_sectors_dirty_init(struct bcache_device *d) + { + int i; ++ struct btree *b = NULL; + struct bkey *k = NULL; + struct btree_iter iter; + struct sectors_dirty_init op; + struct cache_set *c = d->c; + struct bch_dirty_init_state state; + ++retry_lock: ++ b = c->root; ++ rw_lock(0, b, b->level); ++ if (b != c->root) { ++ rw_unlock(0, b); ++ goto retry_lock; ++ } ++ + /* Just count root keys if no leaf node */ +- rw_lock(0, c->root, c->root->level); + if (c->root->level == 0) { + bch_btree_op_init(&op.op, -1); + op.inode = d->id; +@@ -997,7 +1005,7 @@ void bch_sectors_dirty_init(struct bcache_device *d) + sectors_dirty_init_fn(&op.op, c->root, k); + } + +- rw_unlock(0, c->root); ++ rw_unlock(0, b); + return; + } + +@@ -1033,7 +1041,7 @@ void bch_sectors_dirty_init(struct bcache_device *d) + out: + /* Must wait for all threads to stop. */ + wait_event(state.wait, atomic_read(&state.started) == 0); +- rw_unlock(0, c->root); ++ rw_unlock(0, b); + } + + void bch_cached_dev_writeback_init(struct cached_dev *dc) +-- +2.35.3 + diff --git a/patches.suse/bcache-fixup-multi-threaded-bch_sectors_dirty_init-w-2faa.patch b/patches.suse/bcache-fixup-multi-threaded-bch_sectors_dirty_init-w-2faa.patch new file mode 100644 index 0000000..3d9ac27 --- /dev/null +++ b/patches.suse/bcache-fixup-multi-threaded-bch_sectors_dirty_init-w-2faa.patch @@ -0,0 +1,129 @@ +From 2faac25d7958c4761bb8cec54adb79f806783ad6 Mon Sep 17 00:00:00 2001 +From: Mingzhe Zou +Date: Mon, 20 Nov 2023 13:25:00 +0800 +Subject: [PATCH] bcache: fixup multi-threaded bch_sectors_dirty_init() wake-up + race +Git-commit: 2faac25d7958c4761bb8cec54adb79f806783ad6 +Patch-mainline: v6.7-rc3 +References: git-fixes + +We get a kernel crash about "unable to handle kernel paging request": + +```dmesg +[368033.032005] BUG: unable to handle kernel paging request at ffffffffad9ae4b5 +[368033.032007] PGD fc3a0d067 P4D fc3a0d067 PUD fc3a0e063 PMD 8000000fc38000e1 +[368033.032012] Oops: 0003 [#1] SMP PTI +[368033.032015] CPU: 23 PID: 55090 Comm: bch_dirtcnt[0] Kdump: loaded Tainted: G OE --------- - - 4.18.0-147.5.1.es8_24.x86_64 #1 +[368033.032017] Hardware name: Tsinghua Tongfang THTF Chaoqiang Server/072T6D, BIOS 2.4.3 01/17/2017 +[368033.032027] RIP: 0010:native_queued_spin_lock_slowpath+0x183/0x1d0 +[368033.032029] Code: 8b 02 48 85 c0 74 f6 48 89 c1 eb d0 c1 e9 12 83 e0 +03 83 e9 01 48 c1 e0 05 48 63 c9 48 05 c0 3d 02 00 48 03 04 cd 60 68 93 +ad <48> 89 10 8b 42 08 85 c0 75 09 f3 90 8b 42 08 85 c0 74 f7 48 8b 02 +[368033.032031] RSP: 0018:ffffbb48852abe00 EFLAGS: 00010082 +[368033.032032] RAX: ffffffffad9ae4b5 RBX: 0000000000000246 RCX: 0000000000003bf3 +[368033.032033] RDX: ffff97b0ff8e3dc0 RSI: 0000000000600000 RDI: ffffbb4884743c68 +[368033.032034] RBP: 0000000000000001 R08: 0000000000000000 R09: 000007ffffffffff +[368033.032035] R10: ffffbb486bb01000 R11: 0000000000000001 R12: ffffffffc068da70 +[368033.032036] R13: 0000000000000003 R14: 0000000000000000 R15: 0000000000000000 +[368033.032038] FS: 0000000000000000(0000) GS:ffff97b0ff8c0000(0000) knlGS:0000000000000000 +[368033.032039] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[368033.032040] CR2: ffffffffad9ae4b5 CR3: 0000000fc3a0a002 CR4: 00000000003626e0 +[368033.032042] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +[368033.032043] bcache: bch_cached_dev_attach() Caching rbd479 as bcache462 on set 8cff3c36-4a76-4242-afaa-7630206bc70b +[368033.032045] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +[368033.032046] Call Trace: +[368033.032054] _raw_spin_lock_irqsave+0x32/0x40 +[368033.032061] __wake_up_common_lock+0x63/0xc0 +[368033.032073] ? bch_ptr_invalid+0x10/0x10 [bcache] +[368033.033502] bch_dirty_init_thread+0x14c/0x160 [bcache] +[368033.033511] ? read_dirty_submit+0x60/0x60 [bcache] +[368033.033516] kthread+0x112/0x130 +[368033.033520] ? kthread_flush_work_fn+0x10/0x10 +[368033.034505] ret_from_fork+0x35/0x40 +``` + +The crash occurred when call wake_up(&state->wait), and then we want +to look at the value in the state. However, bch_sectors_dirty_init() +is not found in the stack of any task. Since state is allocated on +the stack, we guess that bch_sectors_dirty_init() has exited, causing +bch_dirty_init_thread() to be unable to handle kernel paging request. + +In order to verify this idea, we added some printing information during +wake_up(&state->wait). We find that "wake up" is printed twice, however +we only expect the last thread to wake up once. + +```dmesg +[ 994.641004] alcache: bch_dirty_init_thread() wake up +[ 994.641018] alcache: bch_dirty_init_thread() wake up +[ 994.641523] alcache: bch_sectors_dirty_init() init exit +``` + +There is a race. If bch_sectors_dirty_init() exits after the first wake +up, the second wake up will trigger this bug("unable to handle kernel +paging request"). + +Proceed as follows: + +bch_sectors_dirty_init + kthread_run ==============> bch_dirty_init_thread(bch_dirtcnt[0]) + ... ... + atomic_inc(&state.started) ... + ... ... + atomic_read(&state.enough) ... + ... atomic_set(&state->enough, 1) + kthread_run ======================================================> bch_dirty_init_thread(bch_dirtcnt[1]) + ... atomic_dec_and_test(&state->started) ... + atomic_inc(&state.started) ... ... + ... wake_up(&state->wait) ... + atomic_read(&state.enough) atomic_dec_and_test(&state->started) + ... ... + wait_event(state.wait, atomic_read(&state.started) == 0) ... + return ... + wake_up(&state->wait) + +We believe it is very common to wake up twice if there is no dirty, but +crash is an extremely low probability event. It's hard for us to reproduce +this issue. We attached and detached continuously for a week, with a total +of more than one million attaches and only one crash. + +Putting atomic_inc(&state.started) before kthread_run() can avoid waking +up twice. + +Fixes: b144e45fc576 ("bcache: make bch_sectors_dirty_init() to be multithreaded") +Signed-off-by: Mingzhe Zou +Cc: +Signed-off-by: Coly Li +Link: https://lore.kernel.org/r/20231120052503.6122-8-colyli@suse.de +Signed-off-by: Jens Axboe + +--- + drivers/md/bcache/writeback.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c +index a1d760916246..3accfdaee6b1 100644 +--- a/drivers/md/bcache/writeback.c ++++ b/drivers/md/bcache/writeback.c +@@ -1025,17 +1025,18 @@ void bch_sectors_dirty_init(struct bcache_device *d) + if (atomic_read(&state.enough)) + break; + ++ atomic_inc(&state.started); + state.infos[i].state = &state; + state.infos[i].thread = + kthread_run(bch_dirty_init_thread, &state.infos[i], + "bch_dirtcnt[%d]", i); + if (IS_ERR(state.infos[i].thread)) { + pr_err("fails to run thread bch_dirty_init[%d]\n", i); ++ atomic_dec(&state.started); + for (--i; i >= 0; i--) + kthread_stop(state.infos[i].thread); + goto out; + } +- atomic_inc(&state.started); + } + + out: +-- +2.35.3 + diff --git a/patches.suse/bcache-move-calc_cached_dev_sectors-to-proper-place--0259.patch b/patches.suse/bcache-move-calc_cached_dev_sectors-to-proper-place--0259.patch new file mode 100644 index 0000000..752e8d6 --- /dev/null +++ b/patches.suse/bcache-move-calc_cached_dev_sectors-to-proper-place--0259.patch @@ -0,0 +1,54 @@ +From 0259d4498ba48454749ecfb9c81e892cdb8d1a32 Mon Sep 17 00:00:00 2001 +From: Lin Feng +Date: Wed, 20 Oct 2021 22:38:08 +0800 +Subject: [PATCH] bcache: move calc_cached_dev_sectors to proper place on + backing device detach +Git-commit: 0259d4498ba48454749ecfb9c81e892cdb8d1a32 +Patch-mainline: v5.16-rc1 +References: git-fixes + +Calculation of cache_set's cached sectors is done by travelling +cached_devs list as shown below: + +static void calc_cached_dev_sectors(struct cache_set *c) +{ +... + list_for_each_entry(dc, &c->cached_devs, list) + sectors += bdev_sectors(dc->bdev); + + c->cached_dev_sectors = sectors; +} + +But cached_dev won't be unlinked from c->cached_devs list until we call +following list_move(&dc->list, &uncached_devices), +so previous fix in 'commit 46010141da6677b81cc77f9b47f8ac62bd1cbfd3 +("bcache: recal cached_dev_sectors on detach")' is wrong, now we move +it to its right place. + +Signed-off-by: Lin Feng +Signed-off-by: Coly Li +Link: https://lore.kernel.org/r/20211020143812.6403-5-colyli@suse.de +Signed-off-by: Jens Axboe + +--- + drivers/md/bcache/super.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c +index 62b0140b0a73..dced2ea17431 100644 +--- a/drivers/md/bcache/super.c ++++ b/drivers/md/bcache/super.c +@@ -1154,9 +1154,9 @@ static void cached_dev_detach_finish(struct work_struct *w) + + mutex_lock(&bch_register_lock); + +- calc_cached_dev_sectors(dc->disk.c); + bcache_device_detach(&dc->disk); + list_move(&dc->list, &uncached_devices); ++ calc_cached_dev_sectors(dc->disk.c); + + clear_bit(BCACHE_DEV_DETACHING, &dc->disk.flags); + clear_bit(BCACHE_DEV_UNLINK_DONE, &dc->disk.flags); +-- +2.35.3 + diff --git a/patches.suse/bcache-move-uapi-header-bcache.h-to-bcache-code-dire-cf21.patch b/patches.suse/bcache-move-uapi-header-bcache.h-to-bcache-code-dire-cf21.patch new file mode 100644 index 0000000..72f0a1b --- /dev/null +++ b/patches.suse/bcache-move-uapi-header-bcache.h-to-bcache-code-dire-cf21.patch @@ -0,0 +1,988 @@ +From cf2197ca4b8c199d188593ca6800ea1827c42171 Mon Sep 17 00:00:00 2001 +From: Coly Li +Date: Fri, 29 Oct 2021 14:09:29 +0800 +Subject: [PATCH] bcache: move uapi header bcache.h to bcache code directory +Git-commit: cf2197ca4b8c199d188593ca6800ea1827c42171 +Patch-mainline: v5.16-rc1 +References: git-fixes + +The header file include/uapi/linux/bcache.h is not really a user space +API heaer. This file defines the ondisk format of bcache internal meta +data but no one includes it from user space, bcache-tools has its own +copy of this header with minor modification. + +Therefore, this patch moves include/uapi/linux/bcache.h to bcache code +directory as drivers/md/bcache/bcache_ondisk.h. + +Suggested-by: Arnd Bergmann +Suggested-by: Christoph Hellwig +Signed-off-by: Coly Li +Link: https://lore.kernel.org/r/20211029060930.119923-2-colyli@suse.de +Signed-off-by: Jens Axboe + +--- + drivers/md/bcache/bcache.h | 2 +- + .../uapi/linux/bcache.h => drivers/md/bcache/bcache_ondisk.h | 0 + drivers/md/bcache/bcache.h | 2 + drivers/md/bcache/bcache_ondisk.h | 445 ++++++++++++++++++++++++++++++++++++++ + drivers/md/bcache/bset.h | 2 + drivers/md/bcache/features.c | 2 + drivers/md/bcache/features.h | 3 + include/uapi/linux/bcache.h | 445 -------------------------------------- + 6 files changed, 450 insertions(+), 449 deletions(-) + rename include/uapi/linux/bcache.h => drivers/md/bcache/bcache_ondisk.h (100%) + +--- a/drivers/md/bcache/bcache.h ++++ b/drivers/md/bcache/bcache.h +@@ -178,7 +178,6 @@ + + #define pr_fmt(fmt) "bcache: %s() " fmt, __func__ + +-#include + #include + #include + #include +@@ -190,6 +189,7 @@ + #include + #include + ++#include "bcache_ondisk.h" + #include "bset.h" + #include "util.h" + #include "closure.h" +--- /dev/null ++++ b/drivers/md/bcache/bcache_ondisk.h +@@ -0,0 +1,445 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++#ifndef _LINUX_BCACHE_H ++#define _LINUX_BCACHE_H ++ ++/* ++ * Bcache on disk data structures ++ */ ++ ++#include ++ ++#define BITMASK(name, type, field, offset, size) \ ++static inline __u64 name(const type *k) \ ++{ return (k->field >> offset) & ~(~0ULL << size); } \ ++ \ ++static inline void SET_##name(type *k, __u64 v) \ ++{ \ ++ k->field &= ~(~(~0ULL << size) << offset); \ ++ k->field |= (v & ~(~0ULL << size)) << offset; \ ++} ++ ++/* Btree keys - all units are in sectors */ ++ ++struct bkey { ++ __u64 high; ++ __u64 low; ++ __u64 ptr[]; ++}; ++ ++#define KEY_FIELD(name, field, offset, size) \ ++ BITMASK(name, struct bkey, field, offset, size) ++ ++#define PTR_FIELD(name, offset, size) \ ++static inline __u64 name(const struct bkey *k, unsigned int i) \ ++{ return (k->ptr[i] >> offset) & ~(~0ULL << size); } \ ++ \ ++static inline void SET_##name(struct bkey *k, unsigned int i, __u64 v) \ ++{ \ ++ k->ptr[i] &= ~(~(~0ULL << size) << offset); \ ++ k->ptr[i] |= (v & ~(~0ULL << size)) << offset; \ ++} ++ ++#define KEY_SIZE_BITS 16 ++#define KEY_MAX_U64S 8 ++ ++KEY_FIELD(KEY_PTRS, high, 60, 3) ++KEY_FIELD(HEADER_SIZE, high, 58, 2) ++KEY_FIELD(KEY_CSUM, high, 56, 2) ++KEY_FIELD(KEY_PINNED, high, 55, 1) ++KEY_FIELD(KEY_DIRTY, high, 36, 1) ++ ++KEY_FIELD(KEY_SIZE, high, 20, KEY_SIZE_BITS) ++KEY_FIELD(KEY_INODE, high, 0, 20) ++ ++/* Next time I change the on disk format, KEY_OFFSET() won't be 64 bits */ ++ ++static inline __u64 KEY_OFFSET(const struct bkey *k) ++{ ++ return k->low; ++} ++ ++static inline void SET_KEY_OFFSET(struct bkey *k, __u64 v) ++{ ++ k->low = v; ++} ++ ++/* ++ * The high bit being set is a relic from when we used it to do binary ++ * searches - it told you where a key started. It's not used anymore, ++ * and can probably be safely dropped. ++ */ ++#define KEY(inode, offset, size) \ ++((struct bkey) { \ ++ .high = (1ULL << 63) | ((__u64) (size) << 20) | (inode), \ ++ .low = (offset) \ ++}) ++ ++#define ZERO_KEY KEY(0, 0, 0) ++ ++#define MAX_KEY_INODE (~(~0 << 20)) ++#define MAX_KEY_OFFSET (~0ULL >> 1) ++#define MAX_KEY KEY(MAX_KEY_INODE, MAX_KEY_OFFSET, 0) ++ ++#define KEY_START(k) (KEY_OFFSET(k) - KEY_SIZE(k)) ++#define START_KEY(k) KEY(KEY_INODE(k), KEY_START(k), 0) ++ ++#define PTR_DEV_BITS 12 ++ ++PTR_FIELD(PTR_DEV, 51, PTR_DEV_BITS) ++PTR_FIELD(PTR_OFFSET, 8, 43) ++PTR_FIELD(PTR_GEN, 0, 8) ++ ++#define PTR_CHECK_DEV ((1 << PTR_DEV_BITS) - 1) ++ ++#define MAKE_PTR(gen, offset, dev) \ ++ ((((__u64) dev) << 51) | ((__u64) offset) << 8 | gen) ++ ++/* Bkey utility code */ ++ ++static inline unsigned long bkey_u64s(const struct bkey *k) ++{ ++ return (sizeof(struct bkey) / sizeof(__u64)) + KEY_PTRS(k); ++} ++ ++static inline unsigned long bkey_bytes(const struct bkey *k) ++{ ++ return bkey_u64s(k) * sizeof(__u64); ++} ++ ++#define bkey_copy(_dest, _src) memcpy(_dest, _src, bkey_bytes(_src)) ++ ++static inline void bkey_copy_key(struct bkey *dest, const struct bkey *src) ++{ ++ SET_KEY_INODE(dest, KEY_INODE(src)); ++ SET_KEY_OFFSET(dest, KEY_OFFSET(src)); ++} ++ ++static inline struct bkey *bkey_next(const struct bkey *k) ++{ ++ __u64 *d = (void *) k; ++ ++ return (struct bkey *) (d + bkey_u64s(k)); ++} ++ ++static inline struct bkey *bkey_idx(const struct bkey *k, unsigned int nr_keys) ++{ ++ __u64 *d = (void *) k; ++ ++ return (struct bkey *) (d + nr_keys); ++} ++/* Enough for a key with 6 pointers */ ++#define BKEY_PAD 8 ++ ++#define BKEY_PADDED(key) \ ++ union { struct bkey key; __u64 key ## _pad[BKEY_PAD]; } ++ ++/* Superblock */ ++ ++/* Version 0: Cache device ++ * Version 1: Backing device ++ * Version 2: Seed pointer into btree node checksum ++ * Version 3: Cache device with new UUID format ++ * Version 4: Backing device with data offset ++ */ ++#define BCACHE_SB_VERSION_CDEV 0 ++#define BCACHE_SB_VERSION_BDEV 1 ++#define BCACHE_SB_VERSION_CDEV_WITH_UUID 3 ++#define BCACHE_SB_VERSION_BDEV_WITH_OFFSET 4 ++#define BCACHE_SB_VERSION_CDEV_WITH_FEATURES 5 ++#define BCACHE_SB_VERSION_BDEV_WITH_FEATURES 6 ++#define BCACHE_SB_MAX_VERSION 6 ++ ++#define SB_SECTOR 8 ++#define SB_OFFSET (SB_SECTOR << SECTOR_SHIFT) ++#define SB_SIZE 4096 ++#define SB_LABEL_SIZE 32 ++#define SB_JOURNAL_BUCKETS 256U ++/* SB_JOURNAL_BUCKETS must be divisible by BITS_PER_LONG */ ++#define MAX_CACHES_PER_SET 8 ++ ++#define BDEV_DATA_START_DEFAULT 16 /* sectors */ ++ ++struct cache_sb_disk { ++ __le64 csum; ++ __le64 offset; /* sector where this sb was written */ ++ __le64 version; ++ ++ __u8 magic[16]; ++ ++ __u8 uuid[16]; ++ union { ++ __u8 set_uuid[16]; ++ __le64 set_magic; ++ }; ++ __u8 label[SB_LABEL_SIZE]; ++ ++ __le64 flags; ++ __le64 seq; ++ ++ __le64 feature_compat; ++ __le64 feature_incompat; ++ __le64 feature_ro_compat; ++ ++ __le64 pad[5]; ++ ++ union { ++ struct { ++ /* Cache devices */ ++ __le64 nbuckets; /* device size */ ++ ++ __le16 block_size; /* sectors */ ++ __le16 bucket_size; /* sectors */ ++ ++ __le16 nr_in_set; ++ __le16 nr_this_dev; ++ }; ++ struct { ++ /* Backing devices */ ++ __le64 data_offset; ++ ++ /* ++ * block_size from the cache device section is still used by ++ * backing devices, so don't add anything here until we fix ++ * things to not need it for backing devices anymore ++ */ ++ }; ++ }; ++ ++ __le32 last_mount; /* time overflow in y2106 */ ++ ++ __le16 first_bucket; ++ union { ++ __le16 njournal_buckets; ++ __le16 keys; ++ }; ++ __le64 d[SB_JOURNAL_BUCKETS]; /* journal buckets */ ++ __le16 obso_bucket_size_hi; /* obsoleted */ ++}; ++ ++/* ++ * This is for in-memory bcache super block. ++ * NOTE: cache_sb is NOT exactly mapping to cache_sb_disk, the member ++ * size, ordering and even whole struct size may be different ++ * from cache_sb_disk. ++ */ ++struct cache_sb { ++ __u64 offset; /* sector where this sb was written */ ++ __u64 version; ++ ++ __u8 magic[16]; ++ ++ __u8 uuid[16]; ++ union { ++ __u8 set_uuid[16]; ++ __u64 set_magic; ++ }; ++ __u8 label[SB_LABEL_SIZE]; ++ ++ __u64 flags; ++ __u64 seq; ++ ++ __u64 feature_compat; ++ __u64 feature_incompat; ++ __u64 feature_ro_compat; ++ ++ union { ++ struct { ++ /* Cache devices */ ++ __u64 nbuckets; /* device size */ ++ ++ __u16 block_size; /* sectors */ ++ __u16 nr_in_set; ++ __u16 nr_this_dev; ++ __u32 bucket_size; /* sectors */ ++ }; ++ struct { ++ /* Backing devices */ ++ __u64 data_offset; ++ ++ /* ++ * block_size from the cache device section is still used by ++ * backing devices, so don't add anything here until we fix ++ * things to not need it for backing devices anymore ++ */ ++ }; ++ }; ++ ++ __u32 last_mount; /* time overflow in y2106 */ ++ ++ __u16 first_bucket; ++ union { ++ __u16 njournal_buckets; ++ __u16 keys; ++ }; ++ __u64 d[SB_JOURNAL_BUCKETS]; /* journal buckets */ ++}; ++ ++static inline _Bool SB_IS_BDEV(const struct cache_sb *sb) ++{ ++ return sb->version == BCACHE_SB_VERSION_BDEV ++ || sb->version == BCACHE_SB_VERSION_BDEV_WITH_OFFSET ++ || sb->version == BCACHE_SB_VERSION_BDEV_WITH_FEATURES; ++} ++ ++BITMASK(CACHE_SYNC, struct cache_sb, flags, 0, 1); ++BITMASK(CACHE_DISCARD, struct cache_sb, flags, 1, 1); ++BITMASK(CACHE_REPLACEMENT, struct cache_sb, flags, 2, 3); ++#define CACHE_REPLACEMENT_LRU 0U ++#define CACHE_REPLACEMENT_FIFO 1U ++#define CACHE_REPLACEMENT_RANDOM 2U ++ ++BITMASK(BDEV_CACHE_MODE, struct cache_sb, flags, 0, 4); ++#define CACHE_MODE_WRITETHROUGH 0U ++#define CACHE_MODE_WRITEBACK 1U ++#define CACHE_MODE_WRITEAROUND 2U ++#define CACHE_MODE_NONE 3U ++BITMASK(BDEV_STATE, struct cache_sb, flags, 61, 2); ++#define BDEV_STATE_NONE 0U ++#define BDEV_STATE_CLEAN 1U ++#define BDEV_STATE_DIRTY 2U ++#define BDEV_STATE_STALE 3U ++ ++/* ++ * Magic numbers ++ * ++ * The various other data structures have their own magic numbers, which are ++ * xored with the first part of the cache set's UUID ++ */ ++ ++#define JSET_MAGIC 0x245235c1a3625032ULL ++#define PSET_MAGIC 0x6750e15f87337f91ULL ++#define BSET_MAGIC 0x90135c78b99e07f5ULL ++ ++static inline __u64 jset_magic(struct cache_sb *sb) ++{ ++ return sb->set_magic ^ JSET_MAGIC; ++} ++ ++static inline __u64 pset_magic(struct cache_sb *sb) ++{ ++ return sb->set_magic ^ PSET_MAGIC; ++} ++ ++static inline __u64 bset_magic(struct cache_sb *sb) ++{ ++ return sb->set_magic ^ BSET_MAGIC; ++} ++ ++/* ++ * Journal ++ * ++ * On disk format for a journal entry: ++ * seq is monotonically increasing; every journal entry has its own unique ++ * sequence number. ++ * ++ * last_seq is the oldest journal entry that still has keys the btree hasn't ++ * flushed to disk yet. ++ * ++ * version is for on disk format changes. ++ */ ++ ++#define BCACHE_JSET_VERSION_UUIDv1 1 ++#define BCACHE_JSET_VERSION_UUID 1 /* Always latest UUID format */ ++#define BCACHE_JSET_VERSION 1 ++ ++struct jset { ++ __u64 csum; ++ __u64 magic; ++ __u64 seq; ++ __u32 version; ++ __u32 keys; ++ ++ __u64 last_seq; ++ ++ BKEY_PADDED(uuid_bucket); ++ BKEY_PADDED(btree_root); ++ __u16 btree_level; ++ __u16 pad[3]; ++ ++ __u64 prio_bucket[MAX_CACHES_PER_SET]; ++ ++ union { ++ struct bkey start[0]; ++ __u64 d[0]; ++ }; ++}; ++ ++/* Bucket prios/gens */ ++ ++struct prio_set { ++ __u64 csum; ++ __u64 magic; ++ __u64 seq; ++ __u32 version; ++ __u32 pad; ++ ++ __u64 next_bucket; ++ ++ struct bucket_disk { ++ __u16 prio; ++ __u8 gen; ++ } __attribute((packed)) data[]; ++}; ++ ++/* UUIDS - per backing device/flash only volume metadata */ ++ ++struct uuid_entry { ++ union { ++ struct { ++ __u8 uuid[16]; ++ __u8 label[32]; ++ __u32 first_reg; /* time overflow in y2106 */ ++ __u32 last_reg; ++ __u32 invalidated; ++ ++ __u32 flags; ++ /* Size of flash only volumes */ ++ __u64 sectors; ++ }; ++ ++ __u8 pad[128]; ++ }; ++}; ++ ++BITMASK(UUID_FLASH_ONLY, struct uuid_entry, flags, 0, 1); ++ ++/* Btree nodes */ ++ ++/* Version 1: Seed pointer into btree node checksum ++ */ ++#define BCACHE_BSET_CSUM 1 ++#define BCACHE_BSET_VERSION 1 ++ ++/* ++ * Btree nodes ++ * ++ * On disk a btree node is a list/log of these; within each set the keys are ++ * sorted ++ */ ++struct bset { ++ __u64 csum; ++ __u64 magic; ++ __u64 seq; ++ __u32 version; ++ __u32 keys; ++ ++ union { ++ struct bkey start[0]; ++ __u64 d[0]; ++ }; ++}; ++ ++/* OBSOLETE */ ++ ++/* UUIDS - per backing device/flash only volume metadata */ ++ ++struct uuid_entry_v0 { ++ __u8 uuid[16]; ++ __u8 label[32]; ++ __u32 first_reg; ++ __u32 last_reg; ++ __u32 invalidated; ++ __u32 pad; ++}; ++ ++#endif /* _LINUX_BCACHE_H */ +--- a/drivers/md/bcache/bset.h ++++ b/drivers/md/bcache/bset.h +@@ -2,10 +2,10 @@ + #ifndef _BCACHE_BSET_H + #define _BCACHE_BSET_H + +-#include + #include + #include + ++#include "bcache_ondisk.h" + #include "util.h" /* for time_stats */ + + /* +--- a/drivers/md/bcache/features.c ++++ b/drivers/md/bcache/features.c +@@ -6,7 +6,7 @@ + * Copyright 2020 Coly Li + * + */ +-#include ++#include "bcache_ondisk.h" + #include "bcache.h" + #include "features.h" + +--- a/drivers/md/bcache/features.h ++++ b/drivers/md/bcache/features.h +@@ -2,10 +2,11 @@ + #ifndef _BCACHE_FEATURES_H + #define _BCACHE_FEATURES_H + +-#include + #include + #include + ++#include "bcache_ondisk.h" ++ + #define BCH_FEATURE_COMPAT 0 + #define BCH_FEATURE_RO_COMPAT 1 + #define BCH_FEATURE_INCOMPAT 2 +--- a/include/uapi/linux/bcache.h ++++ /dev/null +@@ -1,445 +0,0 @@ +-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +-#ifndef _LINUX_BCACHE_H +-#define _LINUX_BCACHE_H +- +-/* +- * Bcache on disk data structures +- */ +- +-#include +- +-#define BITMASK(name, type, field, offset, size) \ +-static inline __u64 name(const type *k) \ +-{ return (k->field >> offset) & ~(~0ULL << size); } \ +- \ +-static inline void SET_##name(type *k, __u64 v) \ +-{ \ +- k->field &= ~(~(~0ULL << size) << offset); \ +- k->field |= (v & ~(~0ULL << size)) << offset; \ +-} +- +-/* Btree keys - all units are in sectors */ +- +-struct bkey { +- __u64 high; +- __u64 low; +- __u64 ptr[]; +-}; +- +-#define KEY_FIELD(name, field, offset, size) \ +- BITMASK(name, struct bkey, field, offset, size) +- +-#define PTR_FIELD(name, offset, size) \ +-static inline __u64 name(const struct bkey *k, unsigned int i) \ +-{ return (k->ptr[i] >> offset) & ~(~0ULL << size); } \ +- \ +-static inline void SET_##name(struct bkey *k, unsigned int i, __u64 v) \ +-{ \ +- k->ptr[i] &= ~(~(~0ULL << size) << offset); \ +- k->ptr[i] |= (v & ~(~0ULL << size)) << offset; \ +-} +- +-#define KEY_SIZE_BITS 16 +-#define KEY_MAX_U64S 8 +- +-KEY_FIELD(KEY_PTRS, high, 60, 3) +-KEY_FIELD(HEADER_SIZE, high, 58, 2) +-KEY_FIELD(KEY_CSUM, high, 56, 2) +-KEY_FIELD(KEY_PINNED, high, 55, 1) +-KEY_FIELD(KEY_DIRTY, high, 36, 1) +- +-KEY_FIELD(KEY_SIZE, high, 20, KEY_SIZE_BITS) +-KEY_FIELD(KEY_INODE, high, 0, 20) +- +-/* Next time I change the on disk format, KEY_OFFSET() won't be 64 bits */ +- +-static inline __u64 KEY_OFFSET(const struct bkey *k) +-{ +- return k->low; +-} +- +-static inline void SET_KEY_OFFSET(struct bkey *k, __u64 v) +-{ +- k->low = v; +-} +- +-/* +- * The high bit being set is a relic from when we used it to do binary +- * searches - it told you where a key started. It's not used anymore, +- * and can probably be safely dropped. +- */ +-#define KEY(inode, offset, size) \ +-((struct bkey) { \ +- .high = (1ULL << 63) | ((__u64) (size) << 20) | (inode), \ +- .low = (offset) \ +-}) +- +-#define ZERO_KEY KEY(0, 0, 0) +- +-#define MAX_KEY_INODE (~(~0 << 20)) +-#define MAX_KEY_OFFSET (~0ULL >> 1) +-#define MAX_KEY KEY(MAX_KEY_INODE, MAX_KEY_OFFSET, 0) +- +-#define KEY_START(k) (KEY_OFFSET(k) - KEY_SIZE(k)) +-#define START_KEY(k) KEY(KEY_INODE(k), KEY_START(k), 0) +- +-#define PTR_DEV_BITS 12 +- +-PTR_FIELD(PTR_DEV, 51, PTR_DEV_BITS) +-PTR_FIELD(PTR_OFFSET, 8, 43) +-PTR_FIELD(PTR_GEN, 0, 8) +- +-#define PTR_CHECK_DEV ((1 << PTR_DEV_BITS) - 1) +- +-#define MAKE_PTR(gen, offset, dev) \ +- ((((__u64) dev) << 51) | ((__u64) offset) << 8 | gen) +- +-/* Bkey utility code */ +- +-static inline unsigned long bkey_u64s(const struct bkey *k) +-{ +- return (sizeof(struct bkey) / sizeof(__u64)) + KEY_PTRS(k); +-} +- +-static inline unsigned long bkey_bytes(const struct bkey *k) +-{ +- return bkey_u64s(k) * sizeof(__u64); +-} +- +-#define bkey_copy(_dest, _src) memcpy(_dest, _src, bkey_bytes(_src)) +- +-static inline void bkey_copy_key(struct bkey *dest, const struct bkey *src) +-{ +- SET_KEY_INODE(dest, KEY_INODE(src)); +- SET_KEY_OFFSET(dest, KEY_OFFSET(src)); +-} +- +-static inline struct bkey *bkey_next(const struct bkey *k) +-{ +- __u64 *d = (void *) k; +- +- return (struct bkey *) (d + bkey_u64s(k)); +-} +- +-static inline struct bkey *bkey_idx(const struct bkey *k, unsigned int nr_keys) +-{ +- __u64 *d = (void *) k; +- +- return (struct bkey *) (d + nr_keys); +-} +-/* Enough for a key with 6 pointers */ +-#define BKEY_PAD 8 +- +-#define BKEY_PADDED(key) \ +- union { struct bkey key; __u64 key ## _pad[BKEY_PAD]; } +- +-/* Superblock */ +- +-/* Version 0: Cache device +- * Version 1: Backing device +- * Version 2: Seed pointer into btree node checksum +- * Version 3: Cache device with new UUID format +- * Version 4: Backing device with data offset +- */ +-#define BCACHE_SB_VERSION_CDEV 0 +-#define BCACHE_SB_VERSION_BDEV 1 +-#define BCACHE_SB_VERSION_CDEV_WITH_UUID 3 +-#define BCACHE_SB_VERSION_BDEV_WITH_OFFSET 4 +-#define BCACHE_SB_VERSION_CDEV_WITH_FEATURES 5 +-#define BCACHE_SB_VERSION_BDEV_WITH_FEATURES 6 +-#define BCACHE_SB_MAX_VERSION 6 +- +-#define SB_SECTOR 8 +-#define SB_OFFSET (SB_SECTOR << SECTOR_SHIFT) +-#define SB_SIZE 4096 +-#define SB_LABEL_SIZE 32 +-#define SB_JOURNAL_BUCKETS 256U +-/* SB_JOURNAL_BUCKETS must be divisible by BITS_PER_LONG */ +-#define MAX_CACHES_PER_SET 8 +- +-#define BDEV_DATA_START_DEFAULT 16 /* sectors */ +- +-struct cache_sb_disk { +- __le64 csum; +- __le64 offset; /* sector where this sb was written */ +- __le64 version; +- +- __u8 magic[16]; +- +- __u8 uuid[16]; +- union { +- __u8 set_uuid[16]; +- __le64 set_magic; +- }; +- __u8 label[SB_LABEL_SIZE]; +- +- __le64 flags; +- __le64 seq; +- +- __le64 feature_compat; +- __le64 feature_incompat; +- __le64 feature_ro_compat; +- +- __le64 pad[5]; +- +- union { +- struct { +- /* Cache devices */ +- __le64 nbuckets; /* device size */ +- +- __le16 block_size; /* sectors */ +- __le16 bucket_size; /* sectors */ +- +- __le16 nr_in_set; +- __le16 nr_this_dev; +- }; +- struct { +- /* Backing devices */ +- __le64 data_offset; +- +- /* +- * block_size from the cache device section is still used by +- * backing devices, so don't add anything here until we fix +- * things to not need it for backing devices anymore +- */ +- }; +- }; +- +- __le32 last_mount; /* time overflow in y2106 */ +- +- __le16 first_bucket; +- union { +- __le16 njournal_buckets; +- __le16 keys; +- }; +- __le64 d[SB_JOURNAL_BUCKETS]; /* journal buckets */ +- __le16 obso_bucket_size_hi; /* obsoleted */ +-}; +- +-/* +- * This is for in-memory bcache super block. +- * NOTE: cache_sb is NOT exactly mapping to cache_sb_disk, the member +- * size, ordering and even whole struct size may be different +- * from cache_sb_disk. +- */ +-struct cache_sb { +- __u64 offset; /* sector where this sb was written */ +- __u64 version; +- +- __u8 magic[16]; +- +- __u8 uuid[16]; +- union { +- __u8 set_uuid[16]; +- __u64 set_magic; +- }; +- __u8 label[SB_LABEL_SIZE]; +- +- __u64 flags; +- __u64 seq; +- +- __u64 feature_compat; +- __u64 feature_incompat; +- __u64 feature_ro_compat; +- +- union { +- struct { +- /* Cache devices */ +- __u64 nbuckets; /* device size */ +- +- __u16 block_size; /* sectors */ +- __u16 nr_in_set; +- __u16 nr_this_dev; +- __u32 bucket_size; /* sectors */ +- }; +- struct { +- /* Backing devices */ +- __u64 data_offset; +- +- /* +- * block_size from the cache device section is still used by +- * backing devices, so don't add anything here until we fix +- * things to not need it for backing devices anymore +- */ +- }; +- }; +- +- __u32 last_mount; /* time overflow in y2106 */ +- +- __u16 first_bucket; +- union { +- __u16 njournal_buckets; +- __u16 keys; +- }; +- __u64 d[SB_JOURNAL_BUCKETS]; /* journal buckets */ +-}; +- +-static inline _Bool SB_IS_BDEV(const struct cache_sb *sb) +-{ +- return sb->version == BCACHE_SB_VERSION_BDEV +- || sb->version == BCACHE_SB_VERSION_BDEV_WITH_OFFSET +- || sb->version == BCACHE_SB_VERSION_BDEV_WITH_FEATURES; +-} +- +-BITMASK(CACHE_SYNC, struct cache_sb, flags, 0, 1); +-BITMASK(CACHE_DISCARD, struct cache_sb, flags, 1, 1); +-BITMASK(CACHE_REPLACEMENT, struct cache_sb, flags, 2, 3); +-#define CACHE_REPLACEMENT_LRU 0U +-#define CACHE_REPLACEMENT_FIFO 1U +-#define CACHE_REPLACEMENT_RANDOM 2U +- +-BITMASK(BDEV_CACHE_MODE, struct cache_sb, flags, 0, 4); +-#define CACHE_MODE_WRITETHROUGH 0U +-#define CACHE_MODE_WRITEBACK 1U +-#define CACHE_MODE_WRITEAROUND 2U +-#define CACHE_MODE_NONE 3U +-BITMASK(BDEV_STATE, struct cache_sb, flags, 61, 2); +-#define BDEV_STATE_NONE 0U +-#define BDEV_STATE_CLEAN 1U +-#define BDEV_STATE_DIRTY 2U +-#define BDEV_STATE_STALE 3U +- +-/* +- * Magic numbers +- * +- * The various other data structures have their own magic numbers, which are +- * xored with the first part of the cache set's UUID +- */ +- +-#define JSET_MAGIC 0x245235c1a3625032ULL +-#define PSET_MAGIC 0x6750e15f87337f91ULL +-#define BSET_MAGIC 0x90135c78b99e07f5ULL +- +-static inline __u64 jset_magic(struct cache_sb *sb) +-{ +- return sb->set_magic ^ JSET_MAGIC; +-} +- +-static inline __u64 pset_magic(struct cache_sb *sb) +-{ +- return sb->set_magic ^ PSET_MAGIC; +-} +- +-static inline __u64 bset_magic(struct cache_sb *sb) +-{ +- return sb->set_magic ^ BSET_MAGIC; +-} +- +-/* +- * Journal +- * +- * On disk format for a journal entry: +- * seq is monotonically increasing; every journal entry has its own unique +- * sequence number. +- * +- * last_seq is the oldest journal entry that still has keys the btree hasn't +- * flushed to disk yet. +- * +- * version is for on disk format changes. +- */ +- +-#define BCACHE_JSET_VERSION_UUIDv1 1 +-#define BCACHE_JSET_VERSION_UUID 1 /* Always latest UUID format */ +-#define BCACHE_JSET_VERSION 1 +- +-struct jset { +- __u64 csum; +- __u64 magic; +- __u64 seq; +- __u32 version; +- __u32 keys; +- +- __u64 last_seq; +- +- BKEY_PADDED(uuid_bucket); +- BKEY_PADDED(btree_root); +- __u16 btree_level; +- __u16 pad[3]; +- +- __u64 prio_bucket[MAX_CACHES_PER_SET]; +- +- union { +- struct bkey start[0]; +- __u64 d[0]; +- }; +-}; +- +-/* Bucket prios/gens */ +- +-struct prio_set { +- __u64 csum; +- __u64 magic; +- __u64 seq; +- __u32 version; +- __u32 pad; +- +- __u64 next_bucket; +- +- struct bucket_disk { +- __u16 prio; +- __u8 gen; +- } __attribute((packed)) data[]; +-}; +- +-/* UUIDS - per backing device/flash only volume metadata */ +- +-struct uuid_entry { +- union { +- struct { +- __u8 uuid[16]; +- __u8 label[32]; +- __u32 first_reg; /* time overflow in y2106 */ +- __u32 last_reg; +- __u32 invalidated; +- +- __u32 flags; +- /* Size of flash only volumes */ +- __u64 sectors; +- }; +- +- __u8 pad[128]; +- }; +-}; +- +-BITMASK(UUID_FLASH_ONLY, struct uuid_entry, flags, 0, 1); +- +-/* Btree nodes */ +- +-/* Version 1: Seed pointer into btree node checksum +- */ +-#define BCACHE_BSET_CSUM 1 +-#define BCACHE_BSET_VERSION 1 +- +-/* +- * Btree nodes +- * +- * On disk a btree node is a list/log of these; within each set the keys are +- * sorted +- */ +-struct bset { +- __u64 csum; +- __u64 magic; +- __u64 seq; +- __u32 version; +- __u32 keys; +- +- union { +- struct bkey start[0]; +- __u64 d[0]; +- }; +-}; +- +-/* OBSOLETE */ +- +-/* UUIDS - per backing device/flash only volume metadata */ +- +-struct uuid_entry_v0 { +- __u8 uuid[16]; +- __u8 label[32]; +- __u32 first_reg; +- __u32 last_reg; +- __u32 invalidated; +- __u32 pad; +-}; +- +-#endif /* _LINUX_BCACHE_H */ diff --git a/patches.suse/bcache-prevent-potential-division-by-zero-error-2c7f.patch b/patches.suse/bcache-prevent-potential-division-by-zero-error-2c7f.patch new file mode 100644 index 0000000..2bb30f1 --- /dev/null +++ b/patches.suse/bcache-prevent-potential-division-by-zero-error-2c7f.patch @@ -0,0 +1,57 @@ +From 2c7f497ac274a14330208b18f6f734000868ebf9 Mon Sep 17 00:00:00 2001 +From: Rand Deeb +Date: Mon, 20 Nov 2023 13:24:57 +0800 +Subject: [PATCH] bcache: prevent potential division by zero error +Git-commit: 2c7f497ac274a14330208b18f6f734000868ebf9 +Patch-mainline: v6.7-rc3 +References: git-fixes + +In SHOW(), the variable 'n' is of type 'size_t.' While there is a +conditional check to verify that 'n' is not equal to zero before +executing the 'do_div' macro, concerns arise regarding potential +division by zero error in 64-bit environments. + +The concern arises when 'n' is 64 bits in size, greater than zero, and +the lower 32 bits of it are zeros. In such cases, the conditional check +passes because 'n' is non-zero, but the 'do_div' macro casts 'n' to +'uint32_t,' effectively truncating it to its lower 32 bits. +Consequently, the 'n' value becomes zero. + +To fix this potential division by zero error and ensure precise +division handling, this commit replaces the 'do_div' macro with +div64_u64(). div64_u64() is designed to work with 64-bit operands, +guaranteeing that division is performed correctly. + +This change enhances the robustness of the code, ensuring that division +operations yield accurate results in all scenarios, eliminating the +possibility of division by zero, and improving compatibility across +different 64-bit environments. + +Found by Linux Verification Center (linuxtesting.org) with SVACE. + +Signed-off-by: Rand Deeb +Cc: +Signed-off-by: Coly Li +Link: https://lore.kernel.org/r/20231120052503.6122-5-colyli@suse.de +Signed-off-by: Jens Axboe + +--- + drivers/md/bcache/sysfs.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c +index 45d8af755de6..a438efb66069 100644 +--- a/drivers/md/bcache/sysfs.c ++++ b/drivers/md/bcache/sysfs.c +@@ -1104,7 +1104,7 @@ SHOW(__bch_cache) + sum += INITIAL_PRIO - cached[i]; + + if (n) +- do_div(sum, n); ++ sum = div64_u64(sum, n); + + for (i = 0; i < ARRAY_SIZE(q); i++) + q[i] = INITIAL_PRIO - cached[n * (i + 1) / +-- +2.35.3 + diff --git a/patches.suse/bcache-remove-EXPERIMENTAL-for-Kconfig-option-Asynch-640c.patch b/patches.suse/bcache-remove-EXPERIMENTAL-for-Kconfig-option-Asynch-640c.patch new file mode 100644 index 0000000..8e9bd1c --- /dev/null +++ b/patches.suse/bcache-remove-EXPERIMENTAL-for-Kconfig-option-Asynch-640c.patch @@ -0,0 +1,43 @@ +From 640c46a21f89364f04445cdd43b61eb46bd49b5d Mon Sep 17 00:00:00 2001 +From: Coly Li +Date: Tue, 19 Jul 2022 12:27:24 +0800 +Subject: [PATCH] bcache: remove EXPERIMENTAL for Kconfig option 'Asynchronous + device registration' +Git-commit: 640c46a21f89364f04445cdd43b61eb46bd49b5d +Patch-mainline: v6.0-rc1 +References: git-fixes + +The "Asynchronous device registration (EXPERIMENTAL)" Kconfig option is +for 2+ years, it is used when registration takes too much time for +massive amount of cached data, to avoid udev task timeout during boot +time. + +Many users and products enable this Kconfig option for quite long time +(e.g. SUSE Linux) and it works as expected and no issue reported. + +It is time to remove the "EXPERIMENTAL" tag from this Kconfig item. + +Signed-off-by: Coly Li +Link: https://lore.kernel.org/r/20220719042724.8498-2-colyli@suse.de +Signed-off-by: Jens Axboe + +--- + drivers/md/bcache/Kconfig | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig +index cf3e8096942a..529c9d04e9a4 100644 +--- a/drivers/md/bcache/Kconfig ++++ b/drivers/md/bcache/Kconfig +@@ -29,7 +29,7 @@ config BCACHE_CLOSURES_DEBUG + operations that get stuck. + + config BCACHE_ASYNC_REGISTRATION +- bool "Asynchronous device registration (EXPERIMENTAL)" ++ bool "Asynchronous device registration" + depends on BCACHE + help + Add a sysfs file /sys/fs/bcache/register_async. Writing registering +-- +2.35.3 + diff --git a/patches.suse/bcache-remove-bch_crc64_update-39fa.patch b/patches.suse/bcache-remove-bch_crc64_update-39fa.patch new file mode 100644 index 0000000..cafb871 --- /dev/null +++ b/patches.suse/bcache-remove-bch_crc64_update-39fa.patch @@ -0,0 +1,69 @@ +From 39fa7a95552cc851029267b97c1317f1dea61cad Mon Sep 17 00:00:00 2001 +From: Christoph Hellwig +Date: Wed, 20 Oct 2021 22:38:12 +0800 +Subject: [PATCH] bcache: remove bch_crc64_update +Git-commit: 39fa7a95552cc851029267b97c1317f1dea61cad +Patch-mainline: v5.16-rc1 +References: git-fixes + +bch_crc64_update is an entirely pointless wrapper around crc64_be. + +Signed-off-by: Christoph Hellwig +Signed-off-by: Coly Li +Link: https://lore.kernel.org/r/20211020143812.6403-9-colyli@suse.de +Signed-off-by: Jens Axboe + +--- + drivers/md/bcache/btree.c | 2 +- + drivers/md/bcache/request.c | 2 +- + drivers/md/bcache/util.h | 8 -------- + 3 files changed, 2 insertions(+), 10 deletions(-) + +diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c +index 0595559de174..93b67b8d31c3 100644 +--- a/drivers/md/bcache/btree.c ++++ b/drivers/md/bcache/btree.c +@@ -141,7 +141,7 @@ static uint64_t btree_csum_set(struct btree *b, struct bset *i) + uint64_t crc = b->key.ptr[0]; + void *data = (void *) i + 8, *end = bset_bkey_last(i); + +- crc = bch_crc64_update(crc, data, end - data); ++ crc = crc64_be(crc, data, end - data); + return crc ^ 0xffffffffffffffffULL; + } + +diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c +index f86909a66ac6..d15aae6c51c1 100644 +--- a/drivers/md/bcache/request.c ++++ b/drivers/md/bcache/request.c +@@ -46,7 +46,7 @@ static void bio_csum(struct bio *bio, struct bkey *k) + bio_for_each_segment(bv, bio, iter) { + void *d = kmap(bv.bv_page) + bv.bv_offset; + +- csum = bch_crc64_update(csum, d, bv.bv_len); ++ csum = crc64_be(csum, d, bv.bv_len); + kunmap(bv.bv_page); + } + +diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h +index b64460a76267..6274d6a17e5e 100644 +--- a/drivers/md/bcache/util.h ++++ b/drivers/md/bcache/util.h +@@ -548,14 +548,6 @@ static inline uint64_t bch_crc64(const void *p, size_t len) + return crc ^ 0xffffffffffffffffULL; + } + +-static inline uint64_t bch_crc64_update(uint64_t crc, +- const void *p, +- size_t len) +-{ +- crc = crc64_be(crc, p, len); +- return crc; +-} +- + /* + * A stepwise-linear pseudo-exponential. This returns 1 << (x >> + * frac_bits), with the less-significant bits filled in by linear +-- +2.35.3 + diff --git a/patches.suse/bcache-remove-redundant-assignment-to-variable-cur_i-be93.patch b/patches.suse/bcache-remove-redundant-assignment-to-variable-cur_i-be93.patch new file mode 100644 index 0000000..399624b --- /dev/null +++ b/patches.suse/bcache-remove-redundant-assignment-to-variable-cur_i-be93.patch @@ -0,0 +1,41 @@ +From be93825f0e6428c2d3f03a6e4d447dc48d33d7ff Mon Sep 17 00:00:00 2001 +From: Colin Ian King +Date: Mon, 20 Nov 2023 13:24:56 +0800 +Subject: [PATCH] bcache: remove redundant assignment to variable cur_idx +Git-commit: be93825f0e6428c2d3f03a6e4d447dc48d33d7ff +Patch-mainline: v6.7-rc3 +References: git-fixes + +Variable cur_idx is being initialized with a value that is never read, +it is being re-assigned later in a while-loop. Remove the redundant +assignment. Cleans up clang scan build warning: + +drivers/md/bcache/writeback.c:916:2: warning: Value stored to 'cur_idx' +is never read [deadcode.DeadStores] + +Signed-off-by: Colin Ian King +Reviewed-by: Coly Li +Signed-off-by: Coly Li +Link: https://lore.kernel.org/r/20231120052503.6122-4-colyli@suse.de +Signed-off-by: Jens Axboe + +--- + drivers/md/bcache/writeback.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c +index 24c049067f61..c3e872e0a6f2 100644 +--- a/drivers/md/bcache/writeback.c ++++ b/drivers/md/bcache/writeback.c +@@ -913,7 +913,7 @@ static int bch_dirty_init_thread(void *arg) + int cur_idx, prev_idx, skip_nr; + + k = p = NULL; +- cur_idx = prev_idx = 0; ++ prev_idx = 0; + + bch_btree_iter_init(&c->root->keys, &iter, NULL); + k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad); +-- +2.35.3 + diff --git a/patches.suse/bcache-remove-the-backing_dev_name-field-from-struct-0f5c.patch b/patches.suse/bcache-remove-the-backing_dev_name-field-from-struct-0f5c.patch new file mode 100644 index 0000000..55286ef --- /dev/null +++ b/patches.suse/bcache-remove-the-backing_dev_name-field-from-struct-0f5c.patch @@ -0,0 +1,262 @@ +From 0f5cd7815f7f4bb1dd340a9aeb9b9d6a7c7eec22 Mon Sep 17 00:00:00 2001 +From: Christoph Hellwig +Date: Wed, 20 Oct 2021 22:38:10 +0800 +Subject: [PATCH] bcache: remove the backing_dev_name field from struct + cached_dev +Git-commit: 0f5cd7815f7f4bb1dd340a9aeb9b9d6a7c7eec22 +Patch-mainline: v5.16-rc1 +References: git-fixes + +Just use the %pg format specifier to print the name directly. + +Signed-off-by: Christoph Hellwig +Signed-off-by: Coly Li +Link: https://lore.kernel.org/r/20211020143812.6403-7-colyli@suse.de +Signed-off-by: Jens Axboe + +--- + drivers/md/bcache/bcache.h | 2 -- + drivers/md/bcache/debug.c | 4 ++-- + drivers/md/bcache/io.c | 8 +++---- + drivers/md/bcache/request.c | 4 ++-- + drivers/md/bcache/super.c | 48 ++++++++++++++++--------------------- + drivers/md/bcache/sysfs.c | 2 +- + 6 files changed, 29 insertions(+), 39 deletions(-) + +diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h +index 47ff9ecea2e2..941685409c68 100644 +--- a/drivers/md/bcache/bcache.h ++++ b/drivers/md/bcache/bcache.h +@@ -395,8 +395,6 @@ struct cached_dev { + atomic_t io_errors; + unsigned int error_limit; + unsigned int offline_seconds; +- +- char backing_dev_name[BDEVNAME_SIZE]; + }; + + enum alloc_reserve { +diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c +index 116edda845c3..e803cad864be 100644 +--- a/drivers/md/bcache/debug.c ++++ b/drivers/md/bcache/debug.c +@@ -137,8 +137,8 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio) + p2 + bv.bv_offset, + bv.bv_len), + dc->disk.c, +- "verify failed at dev %s sector %llu", +- dc->backing_dev_name, ++ "verify failed at dev %pg sector %llu", ++ dc->bdev, + (uint64_t) bio->bi_iter.bi_sector); + + kunmap_atomic(p1); +diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c +index 564357de7640..9c6f9ec55b72 100644 +--- a/drivers/md/bcache/io.c ++++ b/drivers/md/bcache/io.c +@@ -65,15 +65,15 @@ void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio) + * we shouldn't count failed REQ_RAHEAD bio to dc->io_errors. + */ + if (bio->bi_opf & REQ_RAHEAD) { +- pr_warn_ratelimited("%s: Read-ahead I/O failed on backing device, ignore\n", +- dc->backing_dev_name); ++ pr_warn_ratelimited("%pg: Read-ahead I/O failed on backing device, ignore\n", ++ dc->bdev); + return; + } + + errors = atomic_add_return(1, &dc->io_errors); + if (errors < dc->error_limit) +- pr_err("%s: IO error on backing device, unrecoverable\n", +- dc->backing_dev_name); ++ pr_err("%pg: IO error on backing device, unrecoverable\n", ++ dc->bdev); + else + bch_cached_dev_error(dc); + } +diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c +index 23b28edae90f..f86909a66ac6 100644 +--- a/drivers/md/bcache/request.c ++++ b/drivers/md/bcache/request.c +@@ -651,8 +651,8 @@ static void backing_request_endio(struct bio *bio) + */ + if (unlikely(s->iop.writeback && + bio->bi_opf & REQ_PREFLUSH)) { +- pr_err("Can't flush %s: returned bi_status %i\n", +- dc->backing_dev_name, bio->bi_status); ++ pr_err("Can't flush %pg: returned bi_status %i\n", ++ dc->bdev, bio->bi_status); + } else { + /* set to orig_bio->bi_status in bio_complete() */ + s->iop.status = bio->bi_status; +diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c +index 88cdce218f5c..dc35f6e1d8d3 100644 +--- a/drivers/md/bcache/super.c ++++ b/drivers/md/bcache/super.c +@@ -1026,8 +1026,8 @@ static int cached_dev_status_update(void *arg) + dc->offline_seconds = 0; + + if (dc->offline_seconds >= BACKING_DEV_OFFLINE_TIMEOUT) { +- pr_err("%s: device offline for %d seconds\n", +- dc->backing_dev_name, ++ pr_err("%pg: device offline for %d seconds\n", ++ dc->bdev, + BACKING_DEV_OFFLINE_TIMEOUT); + pr_err("%s: disable I/O request due to backing device offline\n", + dc->disk.name); +@@ -1058,15 +1058,13 @@ int bch_cached_dev_run(struct cached_dev *dc) + }; + + if (dc->io_disable) { +- pr_err("I/O disabled on cached dev %s\n", +- dc->backing_dev_name); ++ pr_err("I/O disabled on cached dev %pg\n", dc->bdev); + ret = -EIO; + goto out; + } + + if (atomic_xchg(&dc->running, 1)) { +- pr_info("cached dev %s is running already\n", +- dc->backing_dev_name); ++ pr_info("cached dev %pg is running already\n", dc->bdev); + ret = -EBUSY; + goto out; + } +@@ -1163,7 +1161,7 @@ static void cached_dev_detach_finish(struct work_struct *w) + + mutex_unlock(&bch_register_lock); + +- pr_info("Caching disabled for %s\n", dc->backing_dev_name); ++ pr_info("Caching disabled for %pg\n", dc->bdev); + + /* Drop ref we took in cached_dev_detach() */ + closure_put(&dc->disk.cl); +@@ -1203,29 +1201,27 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, + return -ENOENT; + + if (dc->disk.c) { +- pr_err("Can't attach %s: already attached\n", +- dc->backing_dev_name); ++ pr_err("Can't attach %pg: already attached\n", dc->bdev); + return -EINVAL; + } + + if (test_bit(CACHE_SET_STOPPING, &c->flags)) { +- pr_err("Can't attach %s: shutting down\n", +- dc->backing_dev_name); ++ pr_err("Can't attach %pg: shutting down\n", dc->bdev); + return -EINVAL; + } + + if (dc->sb.block_size < c->cache->sb.block_size) { + /* Will die */ +- pr_err("Couldn't attach %s: block size less than set's block size\n", +- dc->backing_dev_name); ++ pr_err("Couldn't attach %pg: block size less than set's block size\n", ++ dc->bdev); + return -EINVAL; + } + + /* Check whether already attached */ + list_for_each_entry_safe(exist_dc, t, &c->cached_devs, list) { + if (!memcmp(dc->sb.uuid, exist_dc->sb.uuid, 16)) { +- pr_err("Tried to attach %s but duplicate UUID already attached\n", +- dc->backing_dev_name); ++ pr_err("Tried to attach %pg but duplicate UUID already attached\n", ++ dc->bdev); + + return -EINVAL; + } +@@ -1243,15 +1239,13 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, + + if (!u) { + if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) { +- pr_err("Couldn't find uuid for %s in set\n", +- dc->backing_dev_name); ++ pr_err("Couldn't find uuid for %pg in set\n", dc->bdev); + return -ENOENT; + } + + u = uuid_find_empty(c); + if (!u) { +- pr_err("Not caching %s, no room for UUID\n", +- dc->backing_dev_name); ++ pr_err("Not caching %pg, no room for UUID\n", dc->bdev); + return -EINVAL; + } + } +@@ -1319,8 +1313,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, + */ + kthread_stop(dc->writeback_thread); + cancel_writeback_rate_update_dwork(dc); +- pr_err("Couldn't run cached device %s\n", +- dc->backing_dev_name); ++ pr_err("Couldn't run cached device %pg\n", dc->bdev); + return ret; + } + +@@ -1336,8 +1329,8 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, + /* Allow the writeback thread to proceed */ + up_write(&dc->writeback_lock); + +- pr_info("Caching %s as %s on set %pU\n", +- dc->backing_dev_name, ++ pr_info("Caching %pg as %s on set %pU\n", ++ dc->bdev, + dc->disk.disk->disk_name, + dc->disk.c->set_uuid); + return 0; +@@ -1461,7 +1454,6 @@ static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk, + struct cache_set *c; + int ret = -ENOMEM; + +- bdevname(bdev, dc->backing_dev_name); + memcpy(&dc->sb, sb, sizeof(struct cache_sb)); + dc->bdev = bdev; + dc->bdev->bd_holder = dc; +@@ -1476,7 +1468,7 @@ static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk, + if (bch_cache_accounting_add_kobjs(&dc->accounting, &dc->disk.kobj)) + goto err; + +- pr_info("registered backing device %s\n", dc->backing_dev_name); ++ pr_info("registered backing device %pg\n", dc->bdev); + + list_add(&dc->list, &uncached_devices); + /* attach to a matched cache set if it exists */ +@@ -1493,7 +1485,7 @@ static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk, + + return 0; + err: +- pr_notice("error %s: %s\n", dc->backing_dev_name, err); ++ pr_notice("error %pg: %s\n", dc->bdev, err); + bcache_device_stop(&dc->disk); + return ret; + } +@@ -1621,8 +1613,8 @@ bool bch_cached_dev_error(struct cached_dev *dc) + /* make others know io_disable is true earlier */ + smp_mb(); + +- pr_err("stop %s: too many IO errors on backing device %s\n", +- dc->disk.disk->disk_name, dc->backing_dev_name); ++ pr_err("stop %s: too many IO errors on backing device %pg\n", ++ dc->disk.disk->disk_name, dc->bdev); + + bcache_device_stop(&dc->disk); + return true; +diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c +index 05ac1d6fbbf3..1f0dce30fa75 100644 +--- a/drivers/md/bcache/sysfs.c ++++ b/drivers/md/bcache/sysfs.c +@@ -271,7 +271,7 @@ SHOW(__bch_cached_dev) + } + + if (attr == &sysfs_backing_dev_name) { +- snprintf(buf, BDEVNAME_SIZE + 1, "%s", dc->backing_dev_name); ++ snprintf(buf, BDEVNAME_SIZE + 1, "%pg", dc->bdev); + strcat(buf, "\n"); + return strlen(buf); + } +-- +2.35.3 + diff --git a/patches.suse/bcache-remove-the-cache_dev_name-field-from-struct-c-7e84.patch b/patches.suse/bcache-remove-the-cache_dev_name-field-from-struct-c-7e84.patch new file mode 100644 index 0000000..26163c9 --- /dev/null +++ b/patches.suse/bcache-remove-the-cache_dev_name-field-from-struct-c-7e84.patch @@ -0,0 +1,97 @@ +From 7e84c2150731faec088ebfe33459f61d118b2497 Mon Sep 17 00:00:00 2001 +From: Christoph Hellwig +Date: Wed, 20 Oct 2021 22:38:09 +0800 +Subject: [PATCH] bcache: remove the cache_dev_name field from struct cache +Git-commit: 7e84c2150731faec088ebfe33459f61d118b2497 +Patch-mainline: v5.16-rc1 +References: git-fixes + +Just use the %pg format specifier to print the name directly. + +Signed-off-by: Christoph Hellwig +Signed-off-by: Coly Li +Link: https://lore.kernel.org/r/20211020143812.6403-6-colyli@suse.de +Signed-off-by: Jens Axboe + +--- + drivers/md/bcache/bcache.h | 2 -- + drivers/md/bcache/io.c | 8 ++++---- + drivers/md/bcache/super.c | 7 +++---- + 3 files changed, 7 insertions(+), 10 deletions(-) + +diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h +index 5fc989a6d452..47ff9ecea2e2 100644 +--- a/drivers/md/bcache/bcache.h ++++ b/drivers/md/bcache/bcache.h +@@ -470,8 +470,6 @@ struct cache { + atomic_long_t meta_sectors_written; + atomic_long_t btree_sectors_written; + atomic_long_t sectors_written; +- +- char cache_dev_name[BDEVNAME_SIZE]; + }; + + struct gc_stat { +diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c +index e4388fe3ab7e..564357de7640 100644 +--- a/drivers/md/bcache/io.c ++++ b/drivers/md/bcache/io.c +@@ -123,13 +123,13 @@ void bch_count_io_errors(struct cache *ca, + errors >>= IO_ERROR_SHIFT; + + if (errors < ca->set->error_limit) +- pr_err("%s: IO error on %s%s\n", +- ca->cache_dev_name, m, ++ pr_err("%pg: IO error on %s%s\n", ++ ca->bdev, m, + is_read ? ", recovering." : "."); + else + bch_cache_set_error(ca->set, +- "%s: too many IO errors %s\n", +- ca->cache_dev_name, m); ++ "%pg: too many IO errors %s\n", ++ ca->bdev, m); + } + } + +diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c +index dced2ea17431..88cdce218f5c 100644 +--- a/drivers/md/bcache/super.c ++++ b/drivers/md/bcache/super.c +@@ -2338,7 +2338,7 @@ static int cache_alloc(struct cache *ca) + err_free: + module_put(THIS_MODULE); + if (err) +- pr_notice("error %s: %s\n", ca->cache_dev_name, err); ++ pr_notice("error %pg: %s\n", ca->bdev, err); + return ret; + } + +@@ -2348,7 +2348,6 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk, + const char *err = NULL; /* must be set for any error case */ + int ret = 0; + +- bdevname(bdev, ca->cache_dev_name); + memcpy(&ca->sb, sb, sizeof(struct cache_sb)); + ca->bdev = bdev; + ca->bdev->bd_holder = ca; +@@ -2390,14 +2389,14 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk, + goto out; + } + +- pr_info("registered cache device %s\n", ca->cache_dev_name); ++ pr_info("registered cache device %pg\n", ca->bdev); + + out: + kobject_put(&ca->kobj); + + err: + if (err) +- pr_notice("error %s: %s\n", ca->cache_dev_name, err); ++ pr_notice("error %pg: %s\n", ca->bdev, err); + + return ret; + } +-- +2.35.3 + diff --git a/patches.suse/bcache-remove-unnecessary-flush_workqueue-97d2.patch b/patches.suse/bcache-remove-unnecessary-flush_workqueue-97d2.patch new file mode 100644 index 0000000..031dcd3 --- /dev/null +++ b/patches.suse/bcache-remove-unnecessary-flush_workqueue-97d2.patch @@ -0,0 +1,40 @@ +From 97d26ae764a43bfaf870312761a0a0f9b49b6351 Mon Sep 17 00:00:00 2001 +From: Li Lei +Date: Tue, 20 Sep 2022 00:16:43 +0800 +Subject: [PATCH] bcache: remove unnecessary flush_workqueue +Git-commit: 97d26ae764a43bfaf870312761a0a0f9b49b6351 +Patch-mainline: v6.1-rc1 +References: git-fixes + +All pending works will be drained by destroy_workqueue(), no need to call +flush_workqueue() explicitly. + +Signed-off-by: Li Lei +Signed-off-by: Coly Li +Link: https://lore.kernel.org/r/20220919161647.81238-2-colyli@suse.de +Signed-off-by: Jens Axboe + +--- + drivers/md/bcache/writeback.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c +index 3f0ff3aab6f2..647661005176 100644 +--- a/drivers/md/bcache/writeback.c ++++ b/drivers/md/bcache/writeback.c +@@ -801,10 +801,9 @@ static int bch_writeback_thread(void *arg) + } + } + +- if (dc->writeback_write_wq) { +- flush_workqueue(dc->writeback_write_wq); ++ if (dc->writeback_write_wq) + destroy_workqueue(dc->writeback_write_wq); +- } ++ + cached_dev_put(dc); + wait_for_kthread_stop(); + +-- +2.35.3 + diff --git a/patches.suse/bcache-remove-unused-bch_mark_cache_readahead-functi-d86b.patch b/patches.suse/bcache-remove-unused-bch_mark_cache_readahead-functi-d86b.patch new file mode 100644 index 0000000..f19ad64 --- /dev/null +++ b/patches.suse/bcache-remove-unused-bch_mark_cache_readahead-functi-d86b.patch @@ -0,0 +1,37 @@ +From d86b4e6dc88826f2b5cfa90c4ebbccb19a88bc39 Mon Sep 17 00:00:00 2001 +From: Lin Feng +Date: Tue, 20 Sep 2022 00:16:44 +0800 +Subject: [PATCH] bcache: remove unused bch_mark_cache_readahead function def + in stats.h +Git-commit: d86b4e6dc88826f2b5cfa90c4ebbccb19a88bc39 +Patch-mainline: v6.1-rc1 +References: git-fixes + +This is a cleanup for commit 1616a4c2ab1a ("bcache: remove bcache device +self-defined readahead")', currently no user for +bch_mark_cache_readahead() since that commit. + +Signed-off-by: Lin Feng +Signed-off-by: Coly Li +Link: https://lore.kernel.org/r/20220919161647.81238-3-colyli@suse.de +Signed-off-by: Jens Axboe + +--- + drivers/md/bcache/stats.h | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/drivers/md/bcache/stats.h b/drivers/md/bcache/stats.h +index ca4f435f7216..bd3afc856d53 100644 +--- a/drivers/md/bcache/stats.h ++++ b/drivers/md/bcache/stats.h +@@ -54,7 +54,6 @@ void bch_cache_accounting_destroy(struct cache_accounting *acc); + + void bch_mark_cache_accounting(struct cache_set *c, struct bcache_device *d, + bool hit, bool bypass); +-void bch_mark_cache_readahead(struct cache_set *c, struct bcache_device *d); + void bch_mark_cache_miss_collision(struct cache_set *c, + struct bcache_device *d); + void bch_mark_sectors_bypassed(struct cache_set *c, +-- +2.35.3 + diff --git a/patches.suse/bcache-replace-a-mistaken-IS_ERR-by-IS_ERR_OR_NULL-i-f72f.patch b/patches.suse/bcache-replace-a-mistaken-IS_ERR-by-IS_ERR_OR_NULL-i-f72f.patch new file mode 100644 index 0000000..73c2540 --- /dev/null +++ b/patches.suse/bcache-replace-a-mistaken-IS_ERR-by-IS_ERR_OR_NULL-i-f72f.patch @@ -0,0 +1,57 @@ +From f72f4312d4388376fc8a1f6cf37cb21a0d41758b Mon Sep 17 00:00:00 2001 +From: Coly Li +Date: Mon, 20 Nov 2023 13:25:01 +0800 +Subject: [PATCH] bcache: replace a mistaken IS_ERR() by IS_ERR_OR_NULL() in + btree_gc_coalesce() +Git-commit: f72f4312d4388376fc8a1f6cf37cb21a0d41758b +Patch-mainline: v6.7-rc3 +References: git-fixes + +Commit 028ddcac477b ("bcache: Remove unnecessary NULL point check in +node allocations") do the following change inside btree_gc_coalesce(), + +31 @@ -1340,7 +1340,7 @@ static int btree_gc_coalesce( +32 memset(new_nodes, 0, sizeof(new_nodes)); +33 closure_init_stack(&cl); +34 +35 - while (nodes < GC_MERGE_NODES && !IS_ERR_OR_NULL(r[nodes].b)) +36 + while (nodes < GC_MERGE_NODES && !IS_ERR(r[nodes].b)) +37 keys += r[nodes++].keys; +38 +39 blocks = btree_default_blocks(b->c) * 2 / 3; + +At line 35 the original r[nodes].b is not always allocatored from +__bch_btree_node_alloc(), and possibly initialized as NULL pointer by +caller of btree_gc_coalesce(). Therefore the change at line 36 is not +correct. + +This patch replaces the mistaken IS_ERR() by IS_ERR_OR_NULL() to avoid +potential issue. + +Fixes: 028ddcac477b ("bcache: Remove unnecessary NULL point check in node allocations") +Cc: # 6.5+ +Cc: Zheng Wang +Signed-off-by: Coly Li +Link: https://lore.kernel.org/r/20231120052503.6122-9-colyli@suse.de +Signed-off-by: Jens Axboe + +--- + drivers/md/bcache/btree.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c +index de8d552201dc..79f1fa4a0d55 100644 +--- a/drivers/md/bcache/btree.c ++++ b/drivers/md/bcache/btree.c +@@ -1368,7 +1368,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, + memset(new_nodes, 0, sizeof(new_nodes)); + closure_init_stack(&cl); + +- while (nodes < GC_MERGE_NODES && !IS_ERR(r[nodes].b)) ++ while (nodes < GC_MERGE_NODES && !IS_ERR_OR_NULL(r[nodes].b)) + keys += r[nodes++].keys; + + blocks = btree_default_blocks(b->c) * 2 / 3; +-- +2.35.3 + diff --git a/patches.suse/bcache-replace-snprintf-in-show-functions-with-sysfs-1b86.patch b/patches.suse/bcache-replace-snprintf-in-show-functions-with-sysfs-1b86.patch new file mode 100644 index 0000000..3c88284 --- /dev/null +++ b/patches.suse/bcache-replace-snprintf-in-show-functions-with-sysfs-1b86.patch @@ -0,0 +1,92 @@ +From 1b86db5f4e025840e0bf7cef2b10e84531954386 Mon Sep 17 00:00:00 2001 +From: Qing Wang +Date: Fri, 29 Oct 2021 14:09:30 +0800 +Subject: [PATCH] bcache: replace snprintf in show functions with sysfs_emit +Git-commit: 1b86db5f4e025840e0bf7cef2b10e84531954386 +Patch-mainline: v5.16-rc1 +References: git-fixes + +coccicheck complains about the use of snprintf() in sysfs show functions. + +Fix the following coccicheck warning: +drivers/md/bcache/sysfs.h:54:12-20: WARNING: use scnprintf or sprintf. + +Implement sysfs_print() by sysfs_emit() and remove snprint() since no one +uses it any more. + +Suggested-by: Coly Li +Signed-off-by: Qing Wang +Signed-off-by: Coly Li +Link: https://lore.kernel.org/r/20211029060930.119923-3-colyli@suse.de +Signed-off-by: Jens Axboe + +--- + drivers/md/bcache/sysfs.h | 18 ++++++++++++++++-- + drivers/md/bcache/util.h | 17 ----------------- + 2 files changed, 16 insertions(+), 19 deletions(-) + +diff --git a/drivers/md/bcache/sysfs.h b/drivers/md/bcache/sysfs.h +index 215df32f567b..c1752ba2e05b 100644 +--- a/drivers/md/bcache/sysfs.h ++++ b/drivers/md/bcache/sysfs.h +@@ -51,13 +51,27 @@ STORE(fn) \ + #define sysfs_printf(file, fmt, ...) \ + do { \ + if (attr == &sysfs_ ## file) \ +- return snprintf(buf, PAGE_SIZE, fmt "\n", __VA_ARGS__); \ ++ return sysfs_emit(buf, fmt "\n", __VA_ARGS__); \ + } while (0) + + #define sysfs_print(file, var) \ + do { \ + if (attr == &sysfs_ ## file) \ +- return snprint(buf, PAGE_SIZE, var); \ ++ return sysfs_emit(buf, \ ++ __builtin_types_compatible_p(typeof(var), int) \ ++ ? "%i\n" : \ ++ __builtin_types_compatible_p(typeof(var), unsigned int) \ ++ ? "%u\n" : \ ++ __builtin_types_compatible_p(typeof(var), long) \ ++ ? "%li\n" : \ ++ __builtin_types_compatible_p(typeof(var), unsigned long)\ ++ ? "%lu\n" : \ ++ __builtin_types_compatible_p(typeof(var), int64_t) \ ++ ? "%lli\n" : \ ++ __builtin_types_compatible_p(typeof(var), uint64_t) \ ++ ? "%llu\n" : \ ++ __builtin_types_compatible_p(typeof(var), const char *) \ ++ ? "%s\n" : "%i\n", var); \ + } while (0) + + #define sysfs_hprint(file, val) \ +diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h +index 6274d6a17e5e..cdb165517d0b 100644 +--- a/drivers/md/bcache/util.h ++++ b/drivers/md/bcache/util.h +@@ -340,23 +340,6 @@ static inline int bch_strtoul_h(const char *cp, long *res) + _r; \ + }) + +-#define snprint(buf, size, var) \ +- snprintf(buf, size, \ +- __builtin_types_compatible_p(typeof(var), int) \ +- ? "%i\n" : \ +- __builtin_types_compatible_p(typeof(var), unsigned int) \ +- ? "%u\n" : \ +- __builtin_types_compatible_p(typeof(var), long) \ +- ? "%li\n" : \ +- __builtin_types_compatible_p(typeof(var), unsigned long)\ +- ? "%lu\n" : \ +- __builtin_types_compatible_p(typeof(var), int64_t) \ +- ? "%lli\n" : \ +- __builtin_types_compatible_p(typeof(var), uint64_t) \ +- ? "%llu\n" : \ +- __builtin_types_compatible_p(typeof(var), const char *) \ +- ? "%s\n" : "%i\n", var) +- + ssize_t bch_hprint(char *buf, int64_t v); + + bool bch_is_zero(const char *p, size_t n); +-- +2.35.3 + diff --git a/patches.suse/bcache-revert-replacing-IS_ERR_OR_NULL-with-IS_ERR-bb6c.patch b/patches.suse/bcache-revert-replacing-IS_ERR_OR_NULL-with-IS_ERR-bb6c.patch new file mode 100644 index 0000000..1cecfaf --- /dev/null +++ b/patches.suse/bcache-revert-replacing-IS_ERR_OR_NULL-with-IS_ERR-bb6c.patch @@ -0,0 +1,76 @@ +From bb6cc253861bd5a7cf8439e2118659696df9619f Mon Sep 17 00:00:00 2001 +From: Markus Weippert +Date: Fri, 24 Nov 2023 16:14:37 +0100 +Subject: [PATCH] bcache: revert replacing IS_ERR_OR_NULL with IS_ERR +Git-commit: bb6cc253861bd5a7cf8439e2118659696df9619f +Patch-mainline: v6.7-rc4 +References: git-fixes + +Commit 028ddcac477b ("bcache: Remove unnecessary NULL point check in +node allocations") replaced IS_ERR_OR_NULL by IS_ERR. This leads to a +NULL pointer dereference. + +Bug: kernel NULL pointer dereference, address: 0000000000000080 +Call Trace: + ? __die_body.cold+0x1a/0x1f + ? page_fault_oops+0xd2/0x2b0 + ? exc_page_fault+0x70/0x170 + ? asm_exc_page_fault+0x22/0x30 + ? btree_node_free+0xf/0x160 [bcache] + ? up_write+0x32/0x60 + btree_gc_coalesce+0x2aa/0x890 [bcache] + ? bch_extent_bad+0x70/0x170 [bcache] + btree_gc_recurse+0x130/0x390 [bcache] + ? btree_gc_mark_node+0x72/0x230 [bcache] + bch_btree_gc+0x5da/0x600 [bcache] + ? cpuusage_read+0x10/0x10 + ? bch_btree_gc+0x600/0x600 [bcache] + bch_gc_thread+0x135/0x180 [bcache] + +The relevant code starts with: + + new_nodes[0] = NULL; + + for (i = 0; i < nodes; i++) { + if (__bch_keylist_realloc(&keylist, bkey_u64s(&r[i].b->key))) + goto out_nocoalesce; + // ... +Out_nocoalesce: // ... + for (i = 0; i < nodes; i++) + if (!IS_ERR(new_nodes[i])) { // IS_ERR_OR_NULL before +028ddcac477b + btree_node_free(new_nodes[i]); // new_nodes[0] is NULL + rw_unlock(true, new_nodes[i]); + } + +This patch replaces IS_ERR() by IS_ERR_OR_NULL() to fix this. + +Fixes: 028ddcac477b ("bcache: Remove unnecessary NULL point check in node allocations") +Link: https://lore.kernel.org/all/3DF4A87A-2AC1-4893-AE5F-E921478419A9@suse.de/ +Cc: stable@vger.kernel.org +Cc: Zheng Wang +Cc: Coly Li +Signed-off-by: Markus Weippert +Signed-off-by: Jens Axboe +Signed-off-by: Coly Li + +--- + drivers/md/bcache/btree.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c +index de3019972b35..261596791218 100644 +--- a/drivers/md/bcache/btree.c ++++ b/drivers/md/bcache/btree.c +@@ -1522,7 +1522,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, + bch_keylist_free(&keylist); + + for (i = 0; i < nodes; i++) +- if (!IS_ERR(new_nodes[i])) { ++ if (!IS_ERR_OR_NULL(new_nodes[i])) { + btree_node_free(new_nodes[i]); + rw_unlock(true, new_nodes[i]); + } +-- +2.35.3 + diff --git a/patches.suse/bcache-use-bvec_kmap_local-in-bch_data_verify-0038.patch b/patches.suse/bcache-use-bvec_kmap_local-in-bch_data_verify-0038.patch new file mode 100644 index 0000000..560a27b --- /dev/null +++ b/patches.suse/bcache-use-bvec_kmap_local-in-bch_data_verify-0038.patch @@ -0,0 +1,57 @@ +From 00387bd21dac98f9e793294c895768d9e5441f82 Mon Sep 17 00:00:00 2001 +From: Christoph Hellwig +Date: Wed, 20 Oct 2021 22:38:11 +0800 +Subject: [PATCH] bcache: use bvec_kmap_local in bch_data_verify +Git-commit: 00387bd21dac98f9e793294c895768d9e5441f82 +Patch-mainline: v5.16-rc1 +References: git-fixes + +Using local kmaps slightly reduces the chances to stray writes, and +the bvec interface cleans up the code a little bit. + +Also switch from page_address to bvec_kmap_local for cbv to be on the +safe side and to avoid pointlessly poking into bvec internals. + +Signed-off-by: Christoph Hellwig +Signed-off-by: Coly Li +Link: https://lore.kernel.org/r/20211020143812.6403-8-colyli@suse.de +Signed-off-by: Jens Axboe + +--- + drivers/md/bcache/debug.c | 11 +++++------ + 1 file changed, 5 insertions(+), 6 deletions(-) + +diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c +index e803cad864be..6230dfdd9286 100644 +--- a/drivers/md/bcache/debug.c ++++ b/drivers/md/bcache/debug.c +@@ -127,21 +127,20 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio) + + citer.bi_size = UINT_MAX; + bio_for_each_segment(bv, bio, iter) { +- void *p1 = kmap_atomic(bv.bv_page); ++ void *p1 = bvec_kmap_local(&bv); + void *p2; + + cbv = bio_iter_iovec(check, citer); +- p2 = page_address(cbv.bv_page); ++ p2 = bvec_kmap_local(&cbv); + +- cache_set_err_on(memcmp(p1 + bv.bv_offset, +- p2 + bv.bv_offset, +- bv.bv_len), ++ cache_set_err_on(memcmp(p1, p2, bv.bv_len), + dc->disk.c, + "verify failed at dev %pg sector %llu", + dc->bdev, + (uint64_t) bio->bi_iter.bi_sector); + +- kunmap_atomic(p1); ++ kunmap_local(p2); ++ kunmap_local(p1); + bio_advance_iter(check, &citer, bv.bv_len); + } + +-- +2.35.3 + diff --git a/patches.suse/bcache-use-bvec_kmap_local-in-bio_csum-07fe.patch b/patches.suse/bcache-use-bvec_kmap_local-in-bio_csum-07fe.patch new file mode 100644 index 0000000..0d83619 --- /dev/null +++ b/patches.suse/bcache-use-bvec_kmap_local-in-bio_csum-07fe.patch @@ -0,0 +1,41 @@ +From 07fee7aba5472d0e65345146a68b4bd1a8b656c3 Mon Sep 17 00:00:00 2001 +From: Christoph Hellwig +Date: Thu, 3 Mar 2022 14:19:02 +0300 +Subject: [PATCH] bcache: use bvec_kmap_local in bio_csum +Git-commit: 07fee7aba5472d0e65345146a68b4bd1a8b656c3 +Patch-mainline: v5.18-rc1 +References: git-fixes + +Using local kmaps slightly reduces the chances to stray writes, and +the bvec interface cleans up the code a little bit. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Ira Weiny +Link: https://lore.kernel.org/r/20220303111905.321089-8-hch@lst.de +Signed-off-by: Jens Axboe +Signed-off-by: Coly Li + +--- + drivers/md/bcache/request.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c +index 6869e010475a..fdd0194f84dd 100644 +--- a/drivers/md/bcache/request.c ++++ b/drivers/md/bcache/request.c +@@ -44,10 +44,10 @@ static void bio_csum(struct bio *bio, struct bkey *k) + uint64_t csum = 0; + + bio_for_each_segment(bv, bio, iter) { +- void *d = kmap(bv.bv_page) + bv.bv_offset; ++ void *d = bvec_kmap_local(&bv); + + csum = crc64_be(csum, d, bv.bv_len); +- kunmap(bv.bv_page); ++ kunmap_local(d); + } + + k->ptr[KEY_PTRS(k)] = csum & (~0ULL >> 1); +-- +2.35.3 + diff --git a/patches.suse/bcache-use-default_groups-in-kobj_type-fa97.patch b/patches.suse/bcache-use-default_groups-in-kobj_type-fa97.patch new file mode 100644 index 0000000..be6de6d --- /dev/null +++ b/patches.suse/bcache-use-default_groups-in-kobj_type-fa97.patch @@ -0,0 +1,147 @@ +From fa97cb843cfb874c50cd1dcc46a2f28187e184e9 Mon Sep 17 00:00:00 2001 +From: Greg Kroah-Hartman +Date: Thu, 6 Jan 2022 11:00:04 +0100 +Subject: [PATCH] bcache: use default_groups in kobj_type +Git-commit: fa97cb843cfb874c50cd1dcc46a2f28187e184e9 +Patch-mainline: v5.18-rc1 +References: git-fixes + +There are currently 2 ways to create a set of sysfs files for a +kobj_type, through the default_attrs field, and the default_groups +field. Move the bcache sysfs code to use default_groups field which has +been the preferred way since aa30f47cf666 ("kobject: Add support for +default attribute groups to kobj_type") so that we can soon get rid of +the obsolete default_attrs field. + +Cc: Kent Overstreet +Cc: linux-bcache@vger.kernel.org +Acked-by: Coly Li +Link: https://lore.kernel.org/r/20220106100004.3277439-1-gregkh@linuxfoundation.org +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Coly Li + +--- + drivers/md/bcache/stats.c | 3 ++- + drivers/md/bcache/sysfs.c | 15 ++++++++++----- + drivers/md/bcache/sysfs.h | 2 +- + 3 files changed, 13 insertions(+), 7 deletions(-) + +diff --git a/drivers/md/bcache/stats.c b/drivers/md/bcache/stats.c +index 4c7ee5fedb9d..68b02216033d 100644 +--- a/drivers/md/bcache/stats.c ++++ b/drivers/md/bcache/stats.c +@@ -78,7 +78,7 @@ static void bch_stats_release(struct kobject *k) + { + } + +-static struct attribute *bch_stats_files[] = { ++static struct attribute *bch_stats_attrs[] = { + &sysfs_cache_hits, + &sysfs_cache_misses, + &sysfs_cache_bypass_hits, +@@ -88,6 +88,7 @@ static struct attribute *bch_stats_files[] = { + &sysfs_bypassed, + NULL + }; ++ATTRIBUTE_GROUPS(bch_stats); + static KTYPE(bch_stats); + + int bch_cache_accounting_add_kobjs(struct cache_accounting *acc, +diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c +index 1f0dce30fa75..d1029d71ff3b 100644 +--- a/drivers/md/bcache/sysfs.c ++++ b/drivers/md/bcache/sysfs.c +@@ -500,7 +500,7 @@ STORE(bch_cached_dev) + return size; + } + +-static struct attribute *bch_cached_dev_files[] = { ++static struct attribute *bch_cached_dev_attrs[] = { + &sysfs_attach, + &sysfs_detach, + &sysfs_stop, +@@ -543,6 +543,7 @@ static struct attribute *bch_cached_dev_files[] = { + &sysfs_backing_dev_uuid, + NULL + }; ++ATTRIBUTE_GROUPS(bch_cached_dev); + KTYPE(bch_cached_dev); + + SHOW(bch_flash_dev) +@@ -600,7 +601,7 @@ STORE(__bch_flash_dev) + } + STORE_LOCKED(bch_flash_dev) + +-static struct attribute *bch_flash_dev_files[] = { ++static struct attribute *bch_flash_dev_attrs[] = { + &sysfs_unregister, + #if 0 + &sysfs_data_csum, +@@ -609,6 +610,7 @@ static struct attribute *bch_flash_dev_files[] = { + &sysfs_size, + NULL + }; ++ATTRIBUTE_GROUPS(bch_flash_dev); + KTYPE(bch_flash_dev); + + struct bset_stats_op { +@@ -955,7 +957,7 @@ static void bch_cache_set_internal_release(struct kobject *k) + { + } + +-static struct attribute *bch_cache_set_files[] = { ++static struct attribute *bch_cache_set_attrs[] = { + &sysfs_unregister, + &sysfs_stop, + &sysfs_synchronous, +@@ -980,9 +982,10 @@ static struct attribute *bch_cache_set_files[] = { + &sysfs_clear_stats, + NULL + }; ++ATTRIBUTE_GROUPS(bch_cache_set); + KTYPE(bch_cache_set); + +-static struct attribute *bch_cache_set_internal_files[] = { ++static struct attribute *bch_cache_set_internal_attrs[] = { + &sysfs_active_journal_entries, + + sysfs_time_stats_attribute_list(btree_gc, sec, ms) +@@ -1022,6 +1025,7 @@ static struct attribute *bch_cache_set_internal_files[] = { + &sysfs_feature_incompat, + NULL + }; ++ATTRIBUTE_GROUPS(bch_cache_set_internal); + KTYPE(bch_cache_set_internal); + + static int __bch_cache_cmp(const void *l, const void *r) +@@ -1182,7 +1186,7 @@ STORE(__bch_cache) + } + STORE_LOCKED(bch_cache) + +-static struct attribute *bch_cache_files[] = { ++static struct attribute *bch_cache_attrs[] = { + &sysfs_bucket_size, + &sysfs_block_size, + &sysfs_nbuckets, +@@ -1196,4 +1200,5 @@ static struct attribute *bch_cache_files[] = { + &sysfs_cache_replacement_policy, + NULL + }; ++ATTRIBUTE_GROUPS(bch_cache); + KTYPE(bch_cache); +diff --git a/drivers/md/bcache/sysfs.h b/drivers/md/bcache/sysfs.h +index c1752ba2e05b..a2ff6447b699 100644 +--- a/drivers/md/bcache/sysfs.h ++++ b/drivers/md/bcache/sysfs.h +@@ -9,7 +9,7 @@ struct kobj_type type ## _ktype = { \ + .show = type ## _show, \ + .store = type ## _store \ + }), \ +- .default_attrs = type ## _files \ ++ .default_groups = type ## _groups \ + } + + #define SHOW(fn) \ +-- +2.35.3 + diff --git a/patches.suse/dm-add-cond_resched-to-dm_wq_work-0ca4.patch b/patches.suse/dm-add-cond_resched-to-dm_wq_work-0ca4.patch new file mode 100644 index 0000000..90e0508 --- /dev/null +++ b/patches.suse/dm-add-cond_resched-to-dm_wq_work-0ca4.patch @@ -0,0 +1,37 @@ +From 0ca44fcef241768fd25ee763b3d203b9852f269b Mon Sep 17 00:00:00 2001 +From: Pingfan Liu +Date: Wed, 15 Feb 2023 19:23:40 +0800 +Subject: [PATCH] dm: add cond_resched() to dm_wq_work() +Git-commit: 0ca44fcef241768fd25ee763b3d203b9852f269b +Patch-mainline: v6.3-rc1 +References: git-fixes + +Otherwise the while() loop in dm_wq_work() can result in a "dead +loop" on systems that have preemption disabled. This is particularly +problematic on single cpu systems. + +Cc: stable@vger.kernel.org +Signed-off-by: Pingfan Liu +Acked-by: Ming Lei +Signed-off-by: Mike Snitzer +Signed-off-by: Coly Li + +--- + drivers/md/dm.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/md/dm.c b/drivers/md/dm.c +index 90b64bfc63b0..15b91959e433 100644 +--- a/drivers/md/dm.c ++++ b/drivers/md/dm.c +@@ -2570,6 +2570,7 @@ static void dm_wq_work(struct work_struct *work) + break; + + submit_bio_noacct(bio); ++ cond_resched(); + } + } + +-- +2.35.3 + diff --git a/patches.suse/dm-cache-add-cond_resched-to-various-workqueue-loops-7622.patch b/patches.suse/dm-cache-add-cond_resched-to-various-workqueue-loops-7622.patch new file mode 100644 index 0000000..9aa83e3 --- /dev/null +++ b/patches.suse/dm-cache-add-cond_resched-to-various-workqueue-loops-7622.patch @@ -0,0 +1,50 @@ +From 76227f6dc805e9e960128bcc6276647361e0827c Mon Sep 17 00:00:00 2001 +From: Mike Snitzer +Date: Thu, 16 Feb 2023 15:31:08 -0500 +Subject: [PATCH] dm cache: add cond_resched() to various workqueue loops +Git-commit: 76227f6dc805e9e960128bcc6276647361e0827c +Patch-mainline: v6.3-rc1 +References: git-fixes + +Otherwise on resource constrained systems these workqueues may be too +greedy. + +Signed-off-by: Mike Snitzer +Signed-off-by: Coly Li + +--- + drivers/md/dm-cache-target.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c +index 23195701dc15..dbbcfa580078 100644 +--- a/drivers/md/dm-cache-target.c ++++ b/drivers/md/dm-cache-target.c +@@ -1829,6 +1829,7 @@ static void process_deferred_bios(struct work_struct *ws) + + else + commit_needed = process_bio(cache, bio) || commit_needed; ++ cond_resched(); + } + + if (commit_needed) +@@ -1852,6 +1853,7 @@ static void requeue_deferred_bios(struct cache *cache) + while ((bio = bio_list_pop(&bios))) { + bio->bi_status = BLK_STS_DM_REQUEUE; + bio_endio(bio); ++ cond_resched(); + } + } + +@@ -1892,6 +1894,8 @@ static void check_migrations(struct work_struct *ws) + r = mg_start(cache, op, NULL); + if (r) + break; ++ ++ cond_resched(); + } + } + +-- +2.35.3 + diff --git a/patches.suse/dm-cache-policy-smq-ensure-IO-doesn-t-prevent-cleane-1e4a.patch b/patches.suse/dm-cache-policy-smq-ensure-IO-doesn-t-prevent-cleane-1e4a.patch new file mode 100644 index 0000000..306720e --- /dev/null +++ b/patches.suse/dm-cache-policy-smq-ensure-IO-doesn-t-prevent-cleane-1e4a.patch @@ -0,0 +1,107 @@ +From 1e4ab7b4c881cf26c1c72b3f56519e03475486fb Mon Sep 17 00:00:00 2001 +From: Joe Thornber +Date: Tue, 25 Jul 2023 11:44:41 -0400 +Subject: [PATCH] dm cache policy smq: ensure IO doesn't prevent cleaner policy + progress +Git-commit: 1e4ab7b4c881cf26c1c72b3f56519e03475486fb +Patch-mainline: v6.5-rc4 +References: git-fixes + +When using the cleaner policy to decommission the cache, there is +never any writeback started from the cache as it is constantly delayed +due to normal I/O keeping the device busy. Meaning @idle=false was +always being passed to clean_target_met() + +Fix this by adding a specific 'cleaner' flag that is set when the +cleaner policy is configured. This flag serves to always allow the +cleaner's writeback work to be queued until the cache is +decommissioned (even if the cache isn't idle). + +Reported-by: David Jeffery +Fixes: b29d4986d0da ("dm cache: significant rework to leverage dm-bio-prison-v2") +Cc: stable@vger.kernel.org +Signed-off-by: Joe Thornber +Signed-off-by: Mike Snitzer +Signed-off-by: Coly Li + +--- + drivers/md/dm-cache-policy-smq.c | 28 ++++++++++++++++++---------- + 1 file changed, 18 insertions(+), 10 deletions(-) + +--- a/drivers/md/dm-cache-policy-smq.c ++++ b/drivers/md/dm-cache-policy-smq.c +@@ -854,7 +854,13 @@ struct smq_policy { + + struct background_tracker *bg_work; + +- bool migrations_allowed; ++ bool migrations_allowed:1; ++ ++ /* ++ * If this is set the policy will try and clean the whole cache ++ * even if the device is not idle. ++ */ ++ bool cleaner:1; + }; + + /*----------------------------------------------------------------*/ +@@ -1135,7 +1141,7 @@ static bool clean_target_met(struct smq_ + * Cache entries may not be populated. So we cannot rely on the + * size of the clean queue. + */ +- if (idle) { ++ if (idle || mq->cleaner) { + /* + * We'd like to clean everything. + */ +@@ -1718,11 +1724,9 @@ static void calc_hotspot_params(sector_t + *hotspot_block_size /= 2u; + } + +-static struct dm_cache_policy *__smq_create(dm_cblock_t cache_size, +- sector_t origin_size, +- sector_t cache_block_size, +- bool mimic_mq, +- bool migrations_allowed) ++static struct dm_cache_policy * ++__smq_create(dm_cblock_t cache_size, sector_t origin_size, sector_t cache_block_size, ++ bool mimic_mq, bool migrations_allowed, bool cleaner) + { + unsigned i; + unsigned nr_sentinels_per_queue = 2u * NR_CACHE_LEVELS; +@@ -1809,6 +1813,7 @@ static struct dm_cache_policy *__smq_cre + goto bad_btracker; + + mq->migrations_allowed = migrations_allowed; ++ mq->cleaner = cleaner; + + return &mq->policy; + +@@ -1832,21 +1837,24 @@ static struct dm_cache_policy *smq_creat + sector_t origin_size, + sector_t cache_block_size) + { +- return __smq_create(cache_size, origin_size, cache_block_size, false, true); ++ return __smq_create(cache_size, origin_size, cache_block_size, ++ false, true, false); + } + + static struct dm_cache_policy *mq_create(dm_cblock_t cache_size, + sector_t origin_size, + sector_t cache_block_size) + { +- return __smq_create(cache_size, origin_size, cache_block_size, true, true); ++ return __smq_create(cache_size, origin_size, cache_block_size, ++ true, true, false); + } + + static struct dm_cache_policy *cleaner_create(dm_cblock_t cache_size, + sector_t origin_size, + sector_t cache_block_size) + { +- return __smq_create(cache_size, origin_size, cache_block_size, false, false); ++ return __smq_create(cache_size, origin_size, cache_block_size, ++ false, false, true); + } + + /*----------------------------------------------------------------*/ diff --git a/patches.suse/dm-call-the-resume-method-on-internal-suspend-65e8.patch b/patches.suse/dm-call-the-resume-method-on-internal-suspend-65e8.patch new file mode 100644 index 0000000..ac36ed5 --- /dev/null +++ b/patches.suse/dm-call-the-resume-method-on-internal-suspend-65e8.patch @@ -0,0 +1,123 @@ +From 65e8fbde64520001abf1c8d0e573561b4746ef38 Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka +Date: Mon, 11 Mar 2024 15:06:39 +0100 +Subject: [PATCH] dm: call the resume method on internal suspend +Git-commit: 65e8fbde64520001abf1c8d0e573561b4746ef38 +Patch-mainline: v6.9-rc1 +References: git-fixes + +There is this reported crash when experimenting with the lvm2 testsuite. +The list corruption is caused by the fact that the postsuspend and resume +methods were not paired correctly; there were two consecutive calls to the +origin_postsuspend function. The second call attempts to remove the +"hash_list" entry from a list, while it was already removed by the first +call. + +Fix __dm_internal_resume so that it calls the preresume and resume +methods of the table's targets. + +If a preresume method of some target fails, we are in a tricky situation. +We can't return an error because dm_internal_resume isn't supposed to +return errors. We can't return success, because then the "resume" and +"postsuspend" methods would not be paired correctly. So, we set the +DMF_SUSPENDED flag and we fake normal suspend - it may confuse userspace +tools, but it won't cause a kernel crash. + +Signed-off-by: Coly Li + +------------[ cut here ]------------ +kernel BUG at lib/list_debug.c:56! +invalid opcode: 0000 [#1] PREEMPT SMP +CPU: 1 PID: 8343 Comm: dmsetup Not tainted 6.8.0-rc6 #4 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014 +RIP: 0010:__list_del_entry_valid_or_report+0x77/0xc0 + +RSP: 0018:ffff8881b831bcc0 EFLAGS: 00010282 +RAX: 000000000000004e RBX: ffff888143b6eb80 RCX: 0000000000000000 +RDX: 0000000000000001 RSI: ffffffff819053d0 RDI: 00000000ffffffff +RBP: ffff8881b83a3400 R08: 00000000fffeffff R09: 0000000000000058 +R10: 0000000000000000 R11: ffffffff81a24080 R12: 0000000000000001 +R13: ffff88814538e000 R14: ffff888143bc6dc0 R15: ffffffffa02e4bb0 +FS: 00000000f7c0f780(0000) GS:ffff8893f0a40000(0000) knlGS:0000000000000000 +CS: 0010 DS: 002b ES: 002b CR0: 0000000080050033 +CR2: 0000000057fb5000 CR3: 0000000143474000 CR4: 00000000000006b0 +Call Trace: + + ? die+0x2d/0x80 + ? do_trap+0xeb/0xf0 + ? __list_del_entry_valid_or_report+0x77/0xc0 + ? do_error_trap+0x60/0x80 + ? __list_del_entry_valid_or_report+0x77/0xc0 + ? exc_invalid_op+0x49/0x60 + ? __list_del_entry_valid_or_report+0x77/0xc0 + ? asm_exc_invalid_op+0x16/0x20 + ? table_deps+0x1b0/0x1b0 [dm_mod] + ? __list_del_entry_valid_or_report+0x77/0xc0 + origin_postsuspend+0x1a/0x50 [dm_snapshot] + dm_table_postsuspend_targets+0x34/0x50 [dm_mod] + dm_suspend+0xd8/0xf0 [dm_mod] + dev_suspend+0x1f2/0x2f0 [dm_mod] + ? table_deps+0x1b0/0x1b0 [dm_mod] + ctl_ioctl+0x300/0x5f0 [dm_mod] + dm_compat_ctl_ioctl+0x7/0x10 [dm_mod] + __x64_compat_sys_ioctl+0x104/0x170 + do_syscall_64+0x184/0x1b0 + entry_SYSCALL_64_after_hwframe+0x46/0x4e +RIP: 0033:0xf7e6aead + +---[ end trace 0000000000000000 ]--- + +Fixes: ffcc39364160 ("dm: enhance internal suspend and resume interface") +Signed-off-by: Mikulas Patocka +Signed-off-by: Mike Snitzer +--- + drivers/md/dm.c | 26 ++++++++++++++++++++------ + 1 file changed, 20 insertions(+), 6 deletions(-) + +diff --git a/drivers/md/dm.c b/drivers/md/dm.c +index 1bed115a7e8e..f1fe8828e1de 100644 +--- a/drivers/md/dm.c ++++ b/drivers/md/dm.c +@@ -2945,6 +2945,9 @@ static void __dm_internal_suspend(struct mapped_device *md, unsigned int suspend + + static void __dm_internal_resume(struct mapped_device *md) + { ++ int r; ++ struct dm_table *map; ++ + BUG_ON(!md->internal_suspend_count); + + if (--md->internal_suspend_count) +@@ -2953,12 +2956,23 @@ static void __dm_internal_resume(struct mapped_device *md) + if (dm_suspended_md(md)) + goto done; /* resume from nested suspend */ + +- /* +- * NOTE: existing callers don't need to call dm_table_resume_targets +- * (which may fail -- so best to avoid it for now by passing NULL map) +- */ +- (void) __dm_resume(md, NULL); +- ++ map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock)); ++ r = __dm_resume(md, map); ++ if (r) { ++ /* ++ * If a preresume method of some target failed, we are in a ++ * tricky situation. We can't return an error to the caller. We ++ * can't fake success because then the "resume" and ++ * "postsuspend" methods would not be paired correctly, and it ++ * would break various targets, for example it would cause list ++ * corruption in the "origin" target. ++ * ++ * So, we fake normal suspend here, to make sure that the ++ * "resume" and "postsuspend" methods will be paired correctly. ++ */ ++ DMERR("Preresume method failed: %d", r); ++ set_bit(DMF_SUSPENDED, &md->flags); ++ } + done: + clear_bit(DMF_SUSPENDED_INTERNALLY, &md->flags); + smp_mb__after_atomic(); +-- +2.35.3 + diff --git a/patches.suse/dm-clone-call-kmem_cache_destroy-in-dm_clone_init-er-6827.patch b/patches.suse/dm-clone-call-kmem_cache_destroy-in-dm_clone_init-er-6827.patch new file mode 100644 index 0000000..bdb4efc --- /dev/null +++ b/patches.suse/dm-clone-call-kmem_cache_destroy-in-dm_clone_init-er-6827.patch @@ -0,0 +1,34 @@ +From 6827af4a9a9f5bb664c42abf7c11af4978d72201 Mon Sep 17 00:00:00 2001 +From: Mike Snitzer +Date: Tue, 4 Apr 2023 11:59:00 -0400 +Subject: [PATCH] dm clone: call kmem_cache_destroy() in dm_clone_init() error + path +Git-commit: 6827af4a9a9f5bb664c42abf7c11af4978d72201 +Patch-mainline: v6.4-rc1 +References: git-fixes + +Otherwise the _hydration_cache will leak if dm_register_target() fails. + +Cc: stable@vger.kernel.org +Signed-off-by: Mike Snitzer +Signed-off-by: Coly Li + +--- + drivers/md/dm-clone-target.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/md/dm-clone-target.c b/drivers/md/dm-clone-target.c +index f38a27604c7a..fc30ebd67622 100644 +--- a/drivers/md/dm-clone-target.c ++++ b/drivers/md/dm-clone-target.c +@@ -2205,6 +2205,7 @@ static int __init dm_clone_init(void) + r = dm_register_target(&clone_target); + if (r < 0) { + DMERR("Failed to register clone target"); ++ kmem_cache_destroy(_hydration_cache); + return r; + } + +-- +2.35.3 + diff --git a/patches.suse/dm-crypt-add-cond_resched-to-dmcrypt_write-fb29.patch b/patches.suse/dm-crypt-add-cond_resched-to-dmcrypt_write-fb29.patch new file mode 100644 index 0000000..469c35c --- /dev/null +++ b/patches.suse/dm-crypt-add-cond_resched-to-dmcrypt_write-fb29.patch @@ -0,0 +1,50 @@ +From fb294b1c0ba982144ca467a75e7d01ff26304e2b Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka +Date: Mon, 6 Mar 2023 11:17:58 -0500 +Subject: [PATCH] dm crypt: add cond_resched() to dmcrypt_write() +Git-commit: fb294b1c0ba982144ca467a75e7d01ff26304e2b +Patch-mainline: v6.3-rc4 +References: git-fixes + +The loop in dmcrypt_write may be running for unbounded amount of time, +thus we need cond_resched() in it. + +This commit fixes the following warning: + +[ 3391.153255][ C12] watchdog: BUG: soft lockup - CPU#12 stuck for 23s! [dmcrypt_write/2:2897] +... +[ 3391.387210][ C12] Call trace: +[ 3391.390338][ C12] blk_attempt_bio_merge.part.6+0x38/0x158 +[ 3391.395970][ C12] blk_attempt_plug_merge+0xc0/0x1b0 +[ 3391.401085][ C12] blk_mq_submit_bio+0x398/0x550 +[ 3391.405856][ C12] submit_bio_noacct+0x308/0x380 +[ 3391.410630][ C12] dmcrypt_write+0x1e4/0x208 [dm_crypt] +[ 3391.416005][ C12] kthread+0x130/0x138 +[ 3391.419911][ C12] ret_from_fork+0x10/0x18 + +Reported-by: yangerkun +Fixes: dc2676210c42 ("dm crypt: offload writes to thread") +Cc: stable@vger.kernel.org +Signed-off-by: Mikulas Patocka +Signed-off-by: Mike Snitzer +Signed-off-by: Coly Li + +--- + drivers/md/dm-crypt.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c +index 87c5706131f2..faba1be572f9 100644 +--- a/drivers/md/dm-crypt.c ++++ b/drivers/md/dm-crypt.c +@@ -1937,6 +1937,7 @@ static int dmcrypt_write(void *data) + io = crypt_io_from_node(rb_first(&write_tree)); + rb_erase(&io->rb_node, &write_tree); + kcryptd_io_write(io); ++ cond_resched(); + } while (!RB_EMPTY_ROOT(&write_tree)); + blk_finish_plug(&plug); + } +-- +2.35.3 + diff --git a/patches.suse/dm-crypt-avoid-accessing-uninitialized-tasklet-d9a0.patch b/patches.suse/dm-crypt-avoid-accessing-uninitialized-tasklet-d9a0.patch new file mode 100644 index 0000000..ebc2929 --- /dev/null +++ b/patches.suse/dm-crypt-avoid-accessing-uninitialized-tasklet-d9a0.patch @@ -0,0 +1,86 @@ +From d9a02e016aaf5a57fb44e9a5e6da8ccd3b9e2e70 Mon Sep 17 00:00:00 2001 +From: Mike Snitzer +Date: Wed, 8 Mar 2023 14:39:54 -0500 +Subject: [PATCH] dm crypt: avoid accessing uninitialized tasklet +Git-commit: d9a02e016aaf5a57fb44e9a5e6da8ccd3b9e2e70 +Patch-mainline: v6.3-rc4 +References: git-fixes + +When neither "no_read_workqueue" nor "no_write_workqueue" are enabled, +tasklet_trylock() in crypt_dec_pending() may still return false due to +an uninitialized state, and dm-crypt will unnecessarily do io completion +in io_queue workqueue instead of current context. + +Fix this by adding an 'in_tasklet' flag to dm_crypt_io struct and +initialize it to false in crypt_io_init(). Set this flag to true in +kcryptd_queue_crypt() before calling tasklet_schedule(). If set +crypt_dec_pending() will punt io completion to a workqueue. + +This also nicely avoids the tasklet_trylock/unlock hack when tasklets +aren't in use. + +Fixes: 8e14f610159d ("dm crypt: do not call bio_endio() from the dm-crypt tasklet") +Cc: stable@vger.kernel.org +Reported-by: Hou Tao +Suggested-by: Ignat Korchagin +Reviewed-by: Ignat Korchagin +Signed-off-by: Mike Snitzer +Signed-off-by: Coly Li + +--- + drivers/md/dm-crypt.c | 15 +++++++++------ + 1 file changed, 9 insertions(+), 6 deletions(-) + +diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c +index faba1be572f9..2764b4ea18a3 100644 +--- a/drivers/md/dm-crypt.c ++++ b/drivers/md/dm-crypt.c +@@ -72,7 +72,9 @@ struct dm_crypt_io { + struct crypt_config *cc; + struct bio *base_bio; + u8 *integrity_metadata; +- bool integrity_metadata_from_pool; ++ bool integrity_metadata_from_pool:1; ++ bool in_tasklet:1; ++ + struct work_struct work; + struct tasklet_struct tasklet; + +@@ -1731,6 +1733,7 @@ static void crypt_io_init(struct dm_crypt_io *io, struct crypt_config *cc, + io->ctx.r.req = NULL; + io->integrity_metadata = NULL; + io->integrity_metadata_from_pool = false; ++ io->in_tasklet = false; + atomic_set(&io->io_pending, 0); + } + +@@ -1777,14 +1780,13 @@ static void crypt_dec_pending(struct dm_crypt_io *io) + * our tasklet. In this case we need to delay bio_endio() + * execution to after the tasklet is done and dequeued. + */ +- if (tasklet_trylock(&io->tasklet)) { +- tasklet_unlock(&io->tasklet); +- bio_endio(base_bio); ++ if (io->in_tasklet) { ++ INIT_WORK(&io->work, kcryptd_io_bio_endio); ++ queue_work(cc->io_queue, &io->work); + return; + } + +- INIT_WORK(&io->work, kcryptd_io_bio_endio); +- queue_work(cc->io_queue, &io->work); ++ bio_endio(base_bio); + } + + /* +@@ -2233,6 +2235,7 @@ static void kcryptd_queue_crypt(struct dm_crypt_io *io) + * it is being executed with irqs disabled. + */ + if (in_hardirq() || irqs_disabled()) { ++ io->in_tasklet = true; + tasklet_init(&io->tasklet, kcryptd_crypt_tasklet, (unsigned long)&io->work); + tasklet_schedule(&io->tasklet); + return; +-- +2.35.3 + diff --git a/patches.suse/dm-crypt-dm-verity-disable-tasklets-0a9b.patch b/patches.suse/dm-crypt-dm-verity-disable-tasklets-0a9b.patch new file mode 100644 index 0000000..d2fe3af --- /dev/null +++ b/patches.suse/dm-crypt-dm-verity-disable-tasklets-0a9b.patch @@ -0,0 +1,184 @@ +From 0a9bab391e336489169b95cb0d4553d921302189 Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka +Date: Wed, 31 Jan 2024 21:57:27 +0100 +Subject: [PATCH] dm-crypt, dm-verity: disable tasklets +Git-commit: 0a9bab391e336489169b95cb0d4553d921302189 +Patch-mainline: v6.8-rc3 +References: bsc#1222416, CVE-2024-26718 + +Tasklets have an inherent problem with memory corruption. The function +tasklet_action_common calls tasklet_trylock, then it calls the tasklet +callback and then it calls tasklet_unlock. If the tasklet callback frees +the structure that contains the tasklet or if it calls some code that may +free it, tasklet_unlock will write into free memory. + +The commits 8e14f610159d and d9a02e016aaf try to fix it for dm-crypt, but +it is not a sufficient fix and the data corruption can still happen [1]. +There is no fix for dm-verity and dm-verity will write into free memory +with every tasklet-processed bio. + +There will be atomic workqueues implemented in the kernel 6.9 [2]. They +will have better interface and they will not suffer from the memory +corruption problem. + +But we need something that stops the memory corruption now and that can be +backported to the stable kernels. So, I'm proposing this commit that +disables tasklets in both dm-crypt and dm-verity. This commit doesn't +remove the tasklet support, because the tasklet code will be reused when +atomic workqueues will be implemented. + +[1] https://lore.kernel.org/all/d390d7ee-f142-44d3-822a-87949e14608b@suse.de/T/ +[2] https://lore.kernel.org/lkml/20240130091300.2968534-1-tj@kernel.org/ + +(Coly Li: rebased for Linux 5.14 based SUSE kernel) + +Signed-off-by: Mikulas Patocka +Cc: stable@vger.kernel.org +Fixes: 39d42fa96ba1b ("dm crypt: add flags to optionally bypass kcryptd workqueues") +Fixes: 5721d4e5a9cdb ("dm verity: Add optional "try_verify_in_tasklet" feature") +Signed-off-by: Mike Snitzer +Signed-off-by: Coly Li + +--- + drivers/md/dm-crypt.c | 37 ++----------------------------------- + drivers/md/dm-verity-target.c | 26 ++------------------------ + drivers/md/dm-verity.h | 1 - + 3 files changed, 4 insertions(+), 60 deletions(-) + +--- a/drivers/md/dm-crypt.c ++++ b/drivers/md/dm-crypt.c +@@ -72,10 +72,8 @@ struct dm_crypt_io { + struct bio *base_bio; + u8 *integrity_metadata; + bool integrity_metadata_from_pool:1; +- bool in_tasklet:1; + + struct work_struct work; +- struct tasklet_struct tasklet; + + struct convert_context ctx; + +@@ -1730,7 +1728,6 @@ static void crypt_io_init(struct dm_cryp + io->ctx.r.req = NULL; + io->integrity_metadata = NULL; + io->integrity_metadata_from_pool = false; +- io->in_tasklet = false; + atomic_set(&io->io_pending, 0); + } + +@@ -1739,12 +1736,6 @@ static void crypt_inc_pending(struct dm_ + atomic_inc(&io->io_pending); + } + +-static void kcryptd_io_bio_endio(struct work_struct *work) +-{ +- struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work); +- bio_endio(io->base_bio); +-} +- + /* + * One of the bios was finished. Check for completion of + * the whole request and correctly clean up the buffer. +@@ -1768,20 +1759,6 @@ static void crypt_dec_pending(struct dm_ + + base_bio->bi_status = error; + +- /* +- * If we are running this function from our tasklet, +- * we can't call bio_endio() here, because it will call +- * clone_endio() from dm.c, which in turn will +- * free the current struct dm_crypt_io structure with +- * our tasklet. In this case we need to delay bio_endio() +- * execution to after the tasklet is done and dequeued. +- */ +- if (io->in_tasklet) { +- INIT_WORK(&io->work, kcryptd_io_bio_endio); +- queue_work(cc->io_queue, &io->work); +- return; +- } +- + bio_endio(base_bio); + } + +@@ -2214,11 +2191,6 @@ static void kcryptd_crypt(struct work_st + kcryptd_crypt_write_convert(io); + } + +-static void kcryptd_crypt_tasklet(unsigned long work) +-{ +- kcryptd_crypt((struct work_struct *)work); +-} +- + static void kcryptd_queue_crypt(struct dm_crypt_io *io) + { + struct crypt_config *cc = io->cc; +@@ -2230,15 +2202,10 @@ static void kcryptd_queue_crypt(struct d + * irqs_disabled(): the kernel may run some IO completion from the idle thread, but + * it is being executed with irqs disabled. + */ +- if (in_hardirq() || irqs_disabled()) { +- io->in_tasklet = true; +- tasklet_init(&io->tasklet, kcryptd_crypt_tasklet, (unsigned long)&io->work); +- tasklet_schedule(&io->tasklet); ++ if (!(in_hardirq() || irqs_disabled())) { ++ kcryptd_crypt(&io->work); + return; + } +- +- kcryptd_crypt(&io->work); +- return; + } + + INIT_WORK(&io->work, kcryptd_crypt); +--- a/drivers/md/dm-verity-target.c ++++ b/drivers/md/dm-verity-target.c +@@ -634,23 +634,6 @@ static void verity_work(struct work_stru + verity_finish_io(io, errno_to_blk_status(verity_verify_io(io))); + } + +-static void verity_tasklet(unsigned long data) +-{ +- struct dm_verity_io *io = (struct dm_verity_io *)data; +- int err; +- +- io->in_tasklet = true; +- err = verity_verify_io(io); +- if (err == -EAGAIN || err == -ENOMEM) { +- /* fallback to retrying with work-queue */ +- INIT_WORK(&io->work, verity_work); +- queue_work(io->v->verify_wq, &io->work); +- return; +- } +- +- verity_finish_io(io, errno_to_blk_status(err)); +-} +- + static void verity_end_io(struct bio *bio) + { + struct dm_verity_io *io = bio->bi_private; +@@ -663,13 +646,8 @@ static void verity_end_io(struct bio *bi + return; + } + +- if (static_branch_unlikely(&use_tasklet_enabled) && io->v->use_tasklet) { +- tasklet_init(&io->tasklet, verity_tasklet, (unsigned long)io); +- tasklet_schedule(&io->tasklet); +- } else { +- INIT_WORK(&io->work, verity_work); +- queue_work(io->v->verify_wq, &io->work); +- } ++ INIT_WORK(&io->work, verity_work); ++ queue_work(io->v->verify_wq, &io->work); + } + + /* +--- a/drivers/md/dm-verity.h ++++ b/drivers/md/dm-verity.h +@@ -83,7 +83,6 @@ struct dm_verity_io { + struct bvec_iter iter; + + struct work_struct work; +- struct tasklet_struct tasklet; + + /* + * Three variably-size fields follow this struct: diff --git a/patches.suse/dm-crypt-don-t-modify-the-data-when-using-authentica-50c7.patch b/patches.suse/dm-crypt-don-t-modify-the-data-when-using-authentica-50c7.patch new file mode 100644 index 0000000..77792e9 --- /dev/null +++ b/patches.suse/dm-crypt-don-t-modify-the-data-when-using-authentica-50c7.patch @@ -0,0 +1,49 @@ +From 50c70240097ce41fe6bce6478b80478281e4d0f7 Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka +Date: Mon, 19 Feb 2024 21:30:10 +0100 +Subject: [PATCH] dm-crypt: don't modify the data when using authenticated + encryption +Git-commit: 50c70240097ce41fe6bce6478b80478281e4d0f7 +Patch-mainline: v6.8-rc6 +References: bsc#1222720, CVE-2024-26763 + +It was said that authenticated encryption could produce invalid tag when +the data that is being encrypted is modified [1]. So, fix this problem by +copying the data into the clone bio first and then encrypt them inside the +clone bio. + +This may reduce performance, but it is needed to prevent the user from +corrupting the device by writing data with O_DIRECT and modifying them at +the same time. + +[1] https://lore.kernel.org/all/20240207004723.GA35324@sol.localdomain/T/ + +Signed-off-by: Mikulas Patocka +Cc: stable@vger.kernel.org +Signed-off-by: Mike Snitzer +Signed-off-by: Coly Li + +--- + drivers/md/dm-crypt.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c +index f745f8508243..14c5be6eda3b 100644 +--- a/drivers/md/dm-crypt.c ++++ b/drivers/md/dm-crypt.c +@@ -2071,6 +2071,12 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) + io->ctx.bio_out = clone; + io->ctx.iter_out = clone->bi_iter; + ++ if (crypt_integrity_aead(cc)) { ++ bio_copy_data(clone, io->base_bio); ++ io->ctx.bio_in = clone; ++ io->ctx.iter_in = clone->bi_iter; ++ } ++ + sector += bio_sectors(clone); + + crypt_inc_pending(io); +-- +2.35.3 + diff --git a/patches.suse/dm-delay-fix-a-race-between-delay_presuspend-and-del-6fc4.patch b/patches.suse/dm-delay-fix-a-race-between-delay_presuspend-and-del-6fc4.patch new file mode 100644 index 0000000..052858c --- /dev/null +++ b/patches.suse/dm-delay-fix-a-race-between-delay_presuspend-and-del-6fc4.patch @@ -0,0 +1,94 @@ +From 6fc45b6ed921dc00dfb264dc08c7d67ee63d2656 Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka +Date: Fri, 17 Nov 2023 18:21:14 +0100 +Subject: [PATCH] dm-delay: fix a race between delay_presuspend and delay_bio +Git-commit: 6fc45b6ed921dc00dfb264dc08c7d67ee63d2656 +Patch-mainline: v6.7-rc2 +References: git-fixes + +In delay_presuspend, we set the atomic variable may_delay and then stop +the timer and flush pending bios. The intention here is to prevent the +delay target from re-arming the timer again. + +However, this test is racy. Suppose that one thread goes to delay_bio, +sees that dc->may_delay is one and proceeds; now, another thread executes +delay_presuspend, it sets dc->may_delay to zero, deletes the timer and +flushes pending bios. Then, the first thread continues and adds the bio to +delayed->list despite the fact that dc->may_delay is false. + +Fix this bug by changing may_delay's type from atomic_t to bool and +only access it while holding the delayed_bios_lock mutex. Note that we +don't have to grab the mutex in delay_resume because there are no bios +in flight at this point. + +(Coly Li: rebased for Linux 5.14 based SUSE kernel) + +Signed-off-by: Mikulas Patocka +Cc: stable@vger.kernel.org +Signed-off-by: Mike Snitzer +Signed-off-by: Coly Li + +--- + drivers/md/dm-delay.c | 16 +++++++++++----- + 1 file changed, 11 insertions(+), 5 deletions(-) + +--- a/drivers/md/dm-delay.c ++++ b/drivers/md/dm-delay.c +@@ -30,7 +30,7 @@ struct delay_c { + struct workqueue_struct *kdelayd_wq; + struct work_struct flush_expired_bios; + struct list_head delayed_bios; +- atomic_t may_delay; ++ bool may_delay; + + struct delay_class read; + struct delay_class write; +@@ -191,7 +191,7 @@ static int delay_ctr(struct dm_target *t + INIT_WORK(&dc->flush_expired_bios, flush_expired_bios); + INIT_LIST_HEAD(&dc->delayed_bios); + mutex_init(&dc->timer_lock); +- atomic_set(&dc->may_delay, 1); ++ dc->may_delay = true; + dc->argc = argc; + + ret = delay_class_ctr(ti, &dc->read, argv); +@@ -246,7 +246,7 @@ static int delay_bio(struct delay_c *dc, + struct dm_delay_info *delayed; + unsigned long expires = 0; + +- if (!c->delay || !atomic_read(&dc->may_delay)) ++ if (!c->delay) + return DM_MAPIO_REMAPPED; + + delayed = dm_per_bio_data(bio, sizeof(struct dm_delay_info)); +@@ -255,6 +255,10 @@ static int delay_bio(struct delay_c *dc, + delayed->expires = expires = jiffies + msecs_to_jiffies(c->delay); + + mutex_lock(&delayed_bios_lock); ++ if (unlikely(!dc->may_delay)) { ++ mutex_unlock(&delayed_bios_lock); ++ return DM_MAPIO_REMAPPED; ++ } + c->ops++; + list_add_tail(&delayed->list, &dc->delayed_bios); + mutex_unlock(&delayed_bios_lock); +@@ -268,7 +272,9 @@ static void delay_presuspend(struct dm_t + { + struct delay_c *dc = ti->private; + +- atomic_set(&dc->may_delay, 0); ++ mutex_lock(&delayed_bios_lock); ++ dc->may_delay = false; ++ mutex_unlock(&delayed_bios_lock); + del_timer_sync(&dc->delay_timer); + flush_bios(flush_delayed_bios(dc, 1)); + } +@@ -277,7 +283,7 @@ static void delay_resume(struct dm_targe + { + struct delay_c *dc = ti->private; + +- atomic_set(&dc->may_delay, 1); ++ dc->may_delay = true; + } + + static int delay_map(struct dm_target *ti, struct bio *bio) diff --git a/patches.suse/dm-don-t-lock-fs-when-the-map-is-NULL-during-suspend-2760.patch b/patches.suse/dm-don-t-lock-fs-when-the-map-is-NULL-during-suspend-2760.patch new file mode 100644 index 0000000..27dd0d9 --- /dev/null +++ b/patches.suse/dm-don-t-lock-fs-when-the-map-is-NULL-during-suspend-2760.patch @@ -0,0 +1,65 @@ +From 2760904d895279f87196f0fa9ec570c79fe6a2e4 Mon Sep 17 00:00:00 2001 +From: Li Lingfeng +Date: Thu, 1 Jun 2023 14:14:23 +0800 +Subject: [PATCH] dm: don't lock fs when the map is NULL during suspend or + resume +Git-commit: 2760904d895279f87196f0fa9ec570c79fe6a2e4 +Patch-mainline: v6.4-rc7 +References: git-fixes + +As described in commit 38d11da522aa ("dm: don't lock fs when the map is +NULL in process of resume"), a deadlock may be triggered between +do_resume() and do_mount(). + +This commit preserves the fix from commit 38d11da522aa but moves it to +where it also serves to fix a similar deadlock between do_suspend() +and do_mount(). It does so, if the active map is NULL, by clearing +DM_SUSPEND_LOCKFS_FLAG in dm_suspend() which is called by both +do_suspend() and do_resume(). + +Fixes: 38d11da522aa ("dm: don't lock fs when the map is NULL in process of resume") +Signed-off-by: Li Lingfeng +Signed-off-by: Mike Snitzer +Signed-off-by: Coly Li +--- + drivers/md/dm-ioctl.c | 5 +---- + drivers/md/dm.c | 4 ++++ + 2 files changed, 5 insertions(+), 4 deletions(-) + +diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c +index cc77cf3d4109..7d5c9c582ed2 100644 +--- a/drivers/md/dm-ioctl.c ++++ b/drivers/md/dm-ioctl.c +@@ -1168,13 +1168,10 @@ static int do_resume(struct dm_ioctl *param) + /* Do we need to load a new map ? */ + if (new_map) { + sector_t old_size, new_size; +- int srcu_idx; + + /* Suspend if it isn't already suspended */ +- old_map = dm_get_live_table(md, &srcu_idx); +- if ((param->flags & DM_SKIP_LOCKFS_FLAG) || !old_map) ++ if (param->flags & DM_SKIP_LOCKFS_FLAG) + suspend_flags &= ~DM_SUSPEND_LOCKFS_FLAG; +- dm_put_live_table(md, srcu_idx); + if (param->flags & DM_NOFLUSH_FLAG) + suspend_flags |= DM_SUSPEND_NOFLUSH_FLAG; + if (!dm_suspended_md(md)) +diff --git a/drivers/md/dm.c b/drivers/md/dm.c +index 3b694ba3a106..8488547fc00d 100644 +--- a/drivers/md/dm.c ++++ b/drivers/md/dm.c +@@ -2808,6 +2808,10 @@ int dm_suspend(struct mapped_device *md, unsigned int suspend_flags) + } + + map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock)); ++ if (!map) { ++ /* avoid deadlock with fs/namespace.c:do_mount() */ ++ suspend_flags &= ~DM_SUSPEND_LOCKFS_FLAG; ++ } + + r = __dm_suspend(md, map, suspend_flags, TASK_INTERRUPTIBLE, DMF_SUSPENDED); + if (r) +-- +2.35.3 + diff --git a/patches.suse/dm-don-t-lock-fs-when-the-map-is-NULL-in-process-of--38d1.patch b/patches.suse/dm-don-t-lock-fs-when-the-map-is-NULL-in-process-of--38d1.patch new file mode 100644 index 0000000..2f45511 --- /dev/null +++ b/patches.suse/dm-don-t-lock-fs-when-the-map-is-NULL-in-process-of--38d1.patch @@ -0,0 +1,86 @@ +From 38d11da522aacaa05898c734a1cec86f1e611129 Mon Sep 17 00:00:00 2001 +From: Li Lingfeng +Date: Tue, 18 Apr 2023 16:38:04 +0800 +Subject: [PATCH] dm: don't lock fs when the map is NULL in process of resume +Git-commit: 38d11da522aacaa05898c734a1cec86f1e611129 +Patch-mainline: v6.4-rc1 +References: git-fixes + +Commit fa247089de99 ("dm: requeue IO if mapping table not yet available") +added a detection of whether the mapping table is available in the IO +submission process. If the mapping table is unavailable, it returns +BLK_STS_RESOURCE and requeues the IO. +This can lead to the following deadlock problem: + +dm create mount +ioctl(DM_DEV_CREATE_CMD) +ioctl(DM_TABLE_LOAD_CMD) + do_mount + vfs_get_tree + ext4_get_tree + get_tree_bdev + sget_fc + alloc_super + // got &s->s_umount + down_write_nested(&s->s_umount, ...); + ext4_fill_super + ext4_load_super + ext4_read_bh + submit_bio + // submit and wait io end +ioctl(DM_DEV_SUSPEND_CMD) +dev_suspend + do_resume + dm_suspend + __dm_suspend + lock_fs + freeze_bdev + get_active_super + grab_super + // wait for &s->s_umount + down_write(&s->s_umount); + dm_swap_table + __bind + // set md->map(can't get here) + +IO will be continuously requeued while holding the lock since mapping +table is NULL. At the same time, mapping table won't be set since the +lock is not available. +Like request-based DM, bio-based DM also has the same problem. + +It's not proper to just abort IO if the mapping table not available. +So clear DM_SKIP_LOCKFS_FLAG when the mapping table is NULL, this +allows the DM table to be loaded and the IO submitted upon resume. + +Fixes: fa247089de99 ("dm: requeue IO if mapping table not yet available") +Cc: stable@vger.kernel.org +Signed-off-by: Li Lingfeng +Signed-off-by: Mike Snitzer +Signed-off-by: Coly Li + +--- + drivers/md/dm-ioctl.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c +index 7d5c9c582ed2..cc77cf3d4109 100644 +--- a/drivers/md/dm-ioctl.c ++++ b/drivers/md/dm-ioctl.c +@@ -1168,10 +1168,13 @@ static int do_resume(struct dm_ioctl *param) + /* Do we need to load a new map ? */ + if (new_map) { + sector_t old_size, new_size; ++ int srcu_idx; + + /* Suspend if it isn't already suspended */ +- if (param->flags & DM_SKIP_LOCKFS_FLAG) ++ old_map = dm_get_live_table(md, &srcu_idx); ++ if ((param->flags & DM_SKIP_LOCKFS_FLAG) || !old_map) + suspend_flags &= ~DM_SUSPEND_LOCKFS_FLAG; ++ dm_put_live_table(md, srcu_idx); + if (param->flags & DM_NOFLUSH_FLAG) + suspend_flags |= DM_SUSPEND_NOFLUSH_FLAG; + if (!dm_suspended_md(md)) +-- +2.35.3 + diff --git a/patches.suse/dm-flakey-don-t-corrupt-the-zero-page-f507.patch b/patches.suse/dm-flakey-don-t-corrupt-the-zero-page-f507.patch new file mode 100644 index 0000000..4201e25 --- /dev/null +++ b/patches.suse/dm-flakey-don-t-corrupt-the-zero-page-f507.patch @@ -0,0 +1,51 @@ +From f50714b57aecb6b3dc81d578e295f86d9c73f078 Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka +Date: Sun, 22 Jan 2023 14:02:57 -0500 +Subject: [PATCH] dm flakey: don't corrupt the zero page +Git-commit: f50714b57aecb6b3dc81d578e295f86d9c73f078 +Patch-mainline: v6.3-rc1 +References: git-fixes + +When we need to zero some range on a block device, the function +__blkdev_issue_zero_pages submits a write bio with the bio vector pointing +to the zero page. If we use dm-flakey with corrupt bio writes option, it +will corrupt the content of the zero page which results in crashes of +various userspace programs. Glibc assumes that memory returned by mmap is +zeroed and it uses it for calloc implementation; if the newly mapped +memory is not zeroed, calloc will return non-zeroed memory. + +Fix this bug by testing if the page is equal to ZERO_PAGE(0) and +avoiding the corruption in this case. + +Cc: stable@vger.kernel.org +Fixes: a00f5276e266 ("dm flakey: Properly corrupt multi-page bios.") +Signed-off-by: Mikulas Patocka +Reviewed-by: Sweet Tea Dorminy +Signed-off-by: Mike Snitzer +Signed-off-by: Coly Li + +--- + drivers/md/dm-flakey.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c +index 89fa7a68c6c4..ff9ca5b2a47e 100644 +--- a/drivers/md/dm-flakey.c ++++ b/drivers/md/dm-flakey.c +@@ -303,8 +303,11 @@ static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc) + */ + bio_for_each_segment(bvec, bio, iter) { + if (bio_iter_len(bio, iter) > corrupt_bio_byte) { +- char *segment = (page_address(bio_iter_page(bio, iter)) +- + bio_iter_offset(bio, iter)); ++ char *segment; ++ struct page *page = bio_iter_page(bio, iter); ++ if (unlikely(page == ZERO_PAGE(0))) ++ break; ++ segment = (page_address(page) + bio_iter_offset(bio, iter)); + segment[corrupt_bio_byte] = fc->corrupt_bio_value; + DMDEBUG("Corrupting data bio=%p by writing %u to byte %u " + "(rw=%c bi_opf=%u bi_sector=%llu size=%u)\n", +-- +2.35.3 + diff --git a/patches.suse/dm-flakey-fix-a-bug-with-32-bit-highmem-systems-8eb2.patch b/patches.suse/dm-flakey-fix-a-bug-with-32-bit-highmem-systems-8eb2.patch new file mode 100644 index 0000000..3711903 --- /dev/null +++ b/patches.suse/dm-flakey-fix-a-bug-with-32-bit-highmem-systems-8eb2.patch @@ -0,0 +1,39 @@ +From 8eb29c4fbf9661e6bd4dd86197a37ffe0ecc9d50 Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka +Date: Sun, 22 Jan 2023 14:03:31 -0500 +Subject: [PATCH] dm flakey: fix a bug with 32-bit highmem systems +Git-commit: 8eb29c4fbf9661e6bd4dd86197a37ffe0ecc9d50 +Patch-mainline: v6.3-rc1 +References: git-fixes + +The function page_address does not work with 32-bit systems with high +memory. Use bvec_kmap_local/kunmap_local instead. + +Cc: stable@vger.kernel.org +Signed-off-by: Mikulas Patocka +Reviewed-by: Sweet Tea Dorminy +Signed-off-by: Mike Snitzer +Signed-off-by: Coly Li + +--- + drivers/md/dm-flakey.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c +index ff9ca5b2a47e..33608d436cec 100644 +--- a/drivers/md/dm-flakey.c ++++ b/drivers/md/dm-flakey.c +@@ -307,8 +307,9 @@ static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc) + struct page *page = bio_iter_page(bio, iter); + if (unlikely(page == ZERO_PAGE(0))) + break; +- segment = (page_address(page) + bio_iter_offset(bio, iter)); ++ segment = bvec_kmap_local(&bvec); + segment[corrupt_bio_byte] = fc->corrupt_bio_value; ++ kunmap_local(segment); + DMDEBUG("Corrupting data bio=%p by writing %u to byte %u " + "(rw=%c bi_opf=%u bi_sector=%llu size=%u)\n", + bio, fc->corrupt_bio_value, fc->corrupt_bio_byte, +-- +2.35.3 + diff --git a/patches.suse/dm-flakey-fix-a-crash-with-invalid-table-line-98db.patch b/patches.suse/dm-flakey-fix-a-crash-with-invalid-table-line-98db.patch new file mode 100644 index 0000000..95e0720 --- /dev/null +++ b/patches.suse/dm-flakey-fix-a-crash-with-invalid-table-line-98db.patch @@ -0,0 +1,42 @@ +From 98dba02d9a93eec11bffbb93c7c51624290702d2 Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka +Date: Tue, 18 Apr 2023 15:57:47 -0400 +Subject: [PATCH] dm flakey: fix a crash with invalid table line +Git-commit: 98dba02d9a93eec11bffbb93c7c51624290702d2 +Patch-mainline: v6.4-rc1 +References: git-fixes + +This command will crash with NULL pointer dereference: + dmsetup create flakey --table \ + "0 `blockdev --getsize /dev/ram0` flakey /dev/ram0 0 0 1 2 corrupt_bio_byte 512" + +Fix the crash by checking if arg_name is non-NULL before comparing it. + +Cc: stable@vger.kernel.org +Signed-off-by: Mikulas Patocka +Signed-off-by: Mike Snitzer +Signed-off-by: Coly Li + +--- + drivers/md/dm-flakey.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c +index ebcfb99b186b..ef07b294e550 100644 +--- a/drivers/md/dm-flakey.c ++++ b/drivers/md/dm-flakey.c +@@ -125,9 +125,9 @@ static int parse_features(struct dm_arg_set *as, struct flakey_c *fc, + * Direction r or w? + */ + arg_name = dm_shift_arg(as); +- if (!strcasecmp(arg_name, "w")) ++ if (arg_name && !strcasecmp(arg_name, "w")) + fc->corrupt_bio_rw = WRITE; +- else if (!strcasecmp(arg_name, "r")) ++ else if (arg_name && !strcasecmp(arg_name, "r")) + fc->corrupt_bio_rw = READ; + else { + ti->error = "Invalid corrupt bio direction (r or w)"; +-- +2.35.3 + diff --git a/patches.suse/dm-flakey-fix-logic-when-corrupting-a-bio-aa56.patch b/patches.suse/dm-flakey-fix-logic-when-corrupting-a-bio-aa56.patch new file mode 100644 index 0000000..199335d --- /dev/null +++ b/patches.suse/dm-flakey-fix-logic-when-corrupting-a-bio-aa56.patch @@ -0,0 +1,70 @@ +From aa56b9b75996ff4c76a0a4181c2fa0206c3d91cc Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka +Date: Sun, 22 Jan 2023 14:03:56 -0500 +Subject: [PATCH] dm flakey: fix logic when corrupting a bio +Git-commit: aa56b9b75996ff4c76a0a4181c2fa0206c3d91cc +Patch-mainline: v6.3-rc1 +References: git-fixes + +If "corrupt_bio_byte" is set to corrupt reads and corrupt_bio_flags is +used, dm-flakey would erroneously return all writes as errors. Likewise, +if "corrupt_bio_byte" is set to corrupt writes, dm-flakey would return +errors for all reads. + +Fix the logic so that if fc->corrupt_bio_byte is non-zero, dm-flakey +will not abort reads on writes with an error. + +Cc: stable@vger.kernel.org +Signed-off-by: Mikulas Patocka +Reviewed-by: Sweet Tea Dorminy +Signed-off-by: Mike Snitzer +Signed-off-by: Coly Li + +--- + drivers/md/dm-flakey.c | 23 +++++++++++++---------- + 1 file changed, 13 insertions(+), 10 deletions(-) + +diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c +index 33608d436cec..335684a1aeaa 100644 +--- a/drivers/md/dm-flakey.c ++++ b/drivers/md/dm-flakey.c +@@ -365,9 +365,11 @@ static int flakey_map(struct dm_target *ti, struct bio *bio) + /* + * Corrupt matching writes. + */ +- if (fc->corrupt_bio_byte && (fc->corrupt_bio_rw == WRITE)) { +- if (all_corrupt_bio_flags_match(bio, fc)) +- corrupt_bio_data(bio, fc); ++ if (fc->corrupt_bio_byte) { ++ if (fc->corrupt_bio_rw == WRITE) { ++ if (all_corrupt_bio_flags_match(bio, fc)) ++ corrupt_bio_data(bio, fc); ++ } + goto map_bio; + } + +@@ -393,13 +395,14 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio, + return DM_ENDIO_DONE; + + if (!*error && pb->bio_submitted && (bio_data_dir(bio) == READ)) { +- if (fc->corrupt_bio_byte && (fc->corrupt_bio_rw == READ) && +- all_corrupt_bio_flags_match(bio, fc)) { +- /* +- * Corrupt successful matching READs while in down state. +- */ +- corrupt_bio_data(bio, fc); +- ++ if (fc->corrupt_bio_byte) { ++ if ((fc->corrupt_bio_rw == READ) && ++ all_corrupt_bio_flags_match(bio, fc)) { ++ /* ++ * Corrupt successful matching READs while in down state. ++ */ ++ corrupt_bio_data(bio, fc); ++ } + } else if (!test_bit(DROP_WRITES, &fc->flags) && + !test_bit(ERROR_WRITES, &fc->flags)) { + /* +-- +2.35.3 + diff --git a/patches.suse/dm-init-add-dm-mod.waitfor-to-wait-for-asynchronousl-0356.patch b/patches.suse/dm-init-add-dm-mod.waitfor-to-wait-for-asynchronousl-0356.patch new file mode 100644 index 0000000..e9b3096 --- /dev/null +++ b/patches.suse/dm-init-add-dm-mod.waitfor-to-wait-for-asynchronousl-0356.patch @@ -0,0 +1,126 @@ +From 035641b01e72af4f6c6cf22a4bdb5d7dfc4e8e8e Mon Sep 17 00:00:00 2001 +From: Peter Korsgaard +Date: Wed, 16 Nov 2022 07:16:56 +0100 +Subject: [PATCH] dm init: add dm-mod.waitfor to wait for asynchronously probed + block devices +Git-commit: 035641b01e72af4f6c6cf22a4bdb5d7dfc4e8e8e +Patch-mainline: v6.2-rc1 +References: git-fixes + +Just calling wait_for_device_probe() is not enough to ensure that +asynchronously probed block devices are available (E.G. mmc, usb), so +add a "dm-mod.waitfor=[,..,]" parameter to get +dm-init to explicitly wait for specific block devices before +initializing the tables with logic similar to the rootwait logic that +was introduced with commit cc1ed7542c8c ("init: wait for +asynchronously scanned block devices"). + +E.G. with dm-verity on mmc using: +dm-mod.waitfor="PARTLABEL=hash-a,PARTLABEL=root-a" + +[ 0.671671] device-mapper: init: waiting for all devices to be available before creating mapped devices +[ 0.671679] device-mapper: init: waiting for device PARTLABEL=hash-a ... +[ 0.710695] mmc0: new HS200 MMC card at address 0001 +[ 0.711158] mmcblk0: mmc0:0001 004GA0 3.69 GiB +[ 0.715954] mmcblk0boot0: mmc0:0001 004GA0 partition 1 2.00 MiB +[ 0.722085] mmcblk0boot1: mmc0:0001 004GA0 partition 2 2.00 MiB +[ 0.728093] mmcblk0rpmb: mmc0:0001 004GA0 partition 3 512 KiB, chardev (249:0) +[ 0.738274] mmcblk0: p1 p2 p3 p4 p5 p6 p7 +[ 0.751282] device-mapper: init: waiting for device PARTLABEL=root-a ... +[ 0.751306] device-mapper: init: all devices available +[ 0.751683] device-mapper: verity: sha256 using implementation "sha256-generic" +[ 0.759344] device-mapper: ioctl: dm-0 (vroot) is ready +[ 0.766540] VFS: Mounted root (squashfs filesystem) readonly on device 254:0. + +Signed-off-by: Peter Korsgaard +Signed-off-by: Mike Snitzer +Signed-off-by: Coly Li + +--- + .../admin-guide/device-mapper/dm-init.rst | 8 +++++++ + drivers/md/dm-init.c | 22 ++++++++++++++++++- + 2 files changed, 29 insertions(+), 1 deletion(-) + +diff --git a/Documentation/admin-guide/device-mapper/dm-init.rst b/Documentation/admin-guide/device-mapper/dm-init.rst +index e5242ff17e9b..981d6a907699 100644 +--- a/Documentation/admin-guide/device-mapper/dm-init.rst ++++ b/Documentation/admin-guide/device-mapper/dm-init.rst +@@ -123,3 +123,11 @@ Other examples (per target): + 0 1638400 verity 1 8:1 8:2 4096 4096 204800 1 sha256 + fb1a5a0f00deb908d8b53cb270858975e76cf64105d412ce764225d53b8f3cfd + 51934789604d1b92399c52e7cb149d1b3a1b74bbbcb103b2a0aaacbed5c08584 ++ ++For setups using device-mapper on top of asynchronously probed block ++devices (MMC, USB, ..), it may be necessary to tell dm-init to ++explicitly wait for them to become available before setting up the ++device-mapper tables. This can be done with the "dm-mod.waitfor=" ++module parameter, which takes a list of devices to wait for:: ++ ++ dm-mod.waitfor=[,..,] +diff --git a/drivers/md/dm-init.c b/drivers/md/dm-init.c +index b0c45c6ebe0b..dc4381d68313 100644 +--- a/drivers/md/dm-init.c ++++ b/drivers/md/dm-init.c +@@ -8,6 +8,7 @@ + */ + + #include ++#include + #include + #include + #include +@@ -18,12 +19,17 @@ + #define DM_MAX_DEVICES 256 + #define DM_MAX_TARGETS 256 + #define DM_MAX_STR_SIZE 4096 ++#define DM_MAX_WAITFOR 256 + + static char *create; + ++static char *waitfor[DM_MAX_WAITFOR]; ++ + /* + * Format: dm-mod.create=,,,,[,
+][;,,,,
[,
+]+] + * Table format: ++ * Block devices to wait for to become available before setting up tables: ++ * dm-mod.waitfor=[,..,] + * + * See Documentation/admin-guide/device-mapper/dm-init.rst for dm-mod.create="..." format + * details. +@@ -266,7 +272,7 @@ static int __init dm_init_init(void) + struct dm_device *dev; + LIST_HEAD(devices); + char *str; +- int r; ++ int i, r; + + if (!create) + return 0; +@@ -286,6 +292,17 @@ static int __init dm_init_init(void) + DMINFO("waiting for all devices to be available before creating mapped devices"); + wait_for_device_probe(); + ++ for (i = 0; i < ARRAY_SIZE(waitfor); i++) { ++ if (waitfor[i]) { ++ DMINFO("waiting for device %s ...", waitfor[i]); ++ while (!dm_get_dev_t(waitfor[i])) ++ msleep(5); ++ } ++ } ++ ++ if (waitfor[0]) ++ DMINFO("all devices available"); ++ + list_for_each_entry(dev, &devices, list) { + if (dm_early_create(&dev->dmi, dev->table, + dev->target_args_array)) +@@ -301,3 +318,6 @@ late_initcall(dm_init_init); + + module_param(create, charp, 0); + MODULE_PARM_DESC(create, "Create a mapped device in early boot"); ++ ++module_param_array(waitfor, charp, NULL, 0); ++MODULE_PARM_DESC(waitfor, "Devices to wait for before setting up tables"); +-- +2.35.3 + diff --git a/patches.suse/dm-integrity-call-kmem_cache_destroy-in-dm_integrity-6b79.patch b/patches.suse/dm-integrity-call-kmem_cache_destroy-in-dm_integrity-6b79.patch new file mode 100644 index 0000000..b509c5f --- /dev/null +++ b/patches.suse/dm-integrity-call-kmem_cache_destroy-in-dm_integrity-6b79.patch @@ -0,0 +1,43 @@ +From 6b79a428c02769f2a11f8ae76bf866226d134887 Mon Sep 17 00:00:00 2001 +From: Mike Snitzer +Date: Tue, 4 Apr 2023 13:34:28 -0400 +Subject: [PATCH] dm integrity: call kmem_cache_destroy() in + dm_integrity_init() error path +Git-commit: 6b79a428c02769f2a11f8ae76bf866226d134887 +Patch-mainline: v6.4-rc1 +References: git-fixes + +Otherwise the journal_io_cache will leak if dm_register_target() fails. + +Cc: stable@vger.kernel.org +Signed-off-by: Mike Snitzer +Signed-off-by: Coly Li + +--- + drivers/md/dm-integrity.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c +index b0d5057fbdd9..54830b07b829 100644 +--- a/drivers/md/dm-integrity.c ++++ b/drivers/md/dm-integrity.c +@@ -4703,11 +4703,13 @@ static int __init dm_integrity_init(void) + } + + r = dm_register_target(&integrity_target); +- +- if (r < 0) ++ if (r < 0) { + DMERR("register failed %d", r); ++ kmem_cache_destroy(journal_io_cache); ++ return r; ++ } + +- return r; ++ return 0; + } + + static void __exit dm_integrity_exit(void) +-- +2.35.3 + diff --git a/patches.suse/dm-integrity-don-t-modify-bio-s-immutable-bio_vec-in-b86f.patch b/patches.suse/dm-integrity-don-t-modify-bio-s-immutable-bio_vec-in-b86f.patch new file mode 100644 index 0000000..f98b74d --- /dev/null +++ b/patches.suse/dm-integrity-don-t-modify-bio-s-immutable-bio_vec-in-b86f.patch @@ -0,0 +1,69 @@ +From b86f4b790c998afdbc88fe1aa55cfe89c4068726 Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka +Date: Tue, 5 Dec 2023 16:39:16 +0100 +Subject: [PATCH] dm-integrity: don't modify bio's immutable bio_vec in + integrity_metadata() +Git-commit: b86f4b790c998afdbc88fe1aa55cfe89c4068726 +Patch-mainline: v6.7-rc7 +References: git-fixes + +__bio_for_each_segment assumes that the first struct bio_vec argument +doesn't change - it calls "bio_advance_iter_single((bio), &(iter), +(bvl).bv_len)" to advance the iterator. Unfortunately, the dm-integrity +code changes the bio_vec with "bv.bv_len -= pos". When this code path +is taken, the iterator would be out of sync and dm-integrity would +report errors. This happens if the machine is out of memory and +"kmalloc" fails. + +Fix this bug by making a copy of "bv" and changing the copy instead. + +(Coly Li: rebased for Linux 5.14 based SUSE kernel) + +Fixes: 7eada909bfd7 ("dm: add integrity target") +Cc: stable@vger.kernel.org # v4.12+ +Signed-off-by: Mikulas Patocka +Signed-off-by: Mike Snitzer +Signed-off-by: Coly Li + +--- + drivers/md/dm-integrity.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +--- a/drivers/md/dm-integrity.c ++++ b/drivers/md/dm-integrity.c +@@ -1769,11 +1769,12 @@ static void integrity_metadata(struct wo + sectors_to_process = dio->range.n_sectors; + + __bio_for_each_segment(bv, bio, iter, dio->bio_details.bi_iter) { ++ struct bio_vec bv_copy = bv; + unsigned pos; + char *mem, *checksums_ptr; + + again: +- mem = bvec_kmap_local(&bv); ++ mem = bvec_kmap_local(&bv_copy); + pos = 0; + checksums_ptr = checksums; + do { +@@ -1782,7 +1783,7 @@ again: + sectors_to_process -= ic->sectors_per_block; + pos += ic->sectors_per_block << SECTOR_SHIFT; + sector += ic->sectors_per_block; +- } while (pos < bv.bv_len && sectors_to_process && checksums != checksums_onstack); ++ } while (pos < bv_copy.bv_len && sectors_to_process && checksums != checksums_onstack); + kunmap_local(mem); + + r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset, +@@ -1807,9 +1808,9 @@ again: + if (!sectors_to_process) + break; + +- if (unlikely(pos < bv.bv_len)) { +- bv.bv_offset += pos; +- bv.bv_len -= pos; ++ if (unlikely(pos < bv_copy.bv_len)) { ++ bv_copy.bv_offset += pos; ++ bv_copy.bv_len -= pos; + goto again; + } + } diff --git a/patches.suse/dm-integrity-fix-out-of-range-warning-8e91.patch b/patches.suse/dm-integrity-fix-out-of-range-warning-8e91.patch new file mode 100644 index 0000000..c19a65e --- /dev/null +++ b/patches.suse/dm-integrity-fix-out-of-range-warning-8e91.patch @@ -0,0 +1,47 @@ +From 8e91c2342351e0f5ef6c0a704384a7f6fc70c3b2 Mon Sep 17 00:00:00 2001 +From: Arnd Bergmann +Date: Thu, 28 Mar 2024 15:30:39 +0100 +Subject: [PATCH] dm integrity: fix out-of-range warning +Git-commit: 8e91c2342351e0f5ef6c0a704384a7f6fc70c3b2 +Patch-mainline: v6.9-rc2 +References: git-fixes + +Depending on the value of CONFIG_HZ, clang complains about a pointless +Comparison: + +drivers/md/dm-integrity.c:4085:12: error: result of comparison of + constant 42949672950 with expression of type + 'unsigned int' is always false + [-Werror,-Wtautological-constant-out-of-range-compare] + if (val >= (uint64_t)UINT_MAX * 1000 / HZ) { + +As the check remains useful for other configurations, shut up the +warning by adding a second type cast to uint64_t. + +Fixes: 468dfca38b1a ("dm integrity: add a bitmap mode") +Signed-off-by: Arnd Bergmann +Reviewed-by: Mikulas Patocka +Reviewed-by: Justin Stitt +Signed-off-by: Mike Snitzer +Signed-off-by: Coly Li + +--- + drivers/md/dm-integrity.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c +index 37b9f8f1ae1a..7f3dc8ee6ab8 100644 +--- a/drivers/md/dm-integrity.c ++++ b/drivers/md/dm-integrity.c +@@ -4221,7 +4221,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv + } else if (sscanf(opt_string, "sectors_per_bit:%llu%c", &llval, &dummy) == 1) { + log2_sectors_per_bitmap_bit = !llval ? 0 : __ilog2_u64(llval); + } else if (sscanf(opt_string, "bitmap_flush_interval:%u%c", &val, &dummy) == 1) { +- if (val >= (uint64_t)UINT_MAX * 1000 / HZ) { ++ if ((uint64_t)val >= (uint64_t)UINT_MAX * 1000 / HZ) { + r = -EINVAL; + ti->error = "Invalid bitmap_flush_interval argument"; + goto bad; +-- +2.35.3 + diff --git a/patches.suse/dm-integrity-reduce-vmalloc-space-footprint-on-32-bi-6d50.patch b/patches.suse/dm-integrity-reduce-vmalloc-space-footprint-on-32-bi-6d50.patch new file mode 100644 index 0000000..45d745b --- /dev/null +++ b/patches.suse/dm-integrity-reduce-vmalloc-space-footprint-on-32-bi-6d50.patch @@ -0,0 +1,48 @@ +From 6d50eb4725934fd22f5eeccb401000687c790fd0 Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka +Date: Mon, 26 Jun 2023 16:44:34 +0200 +Subject: [PATCH] dm integrity: reduce vmalloc space footprint on 32-bit + architectures +Git-commit: 6d50eb4725934fd22f5eeccb401000687c790fd0 +Patch-mainline: v6.5-rc1 +References: git-fixes + +It was reported that dm-integrity runs out of vmalloc space on 32-bit +architectures. On x86, there is only 128MiB vmalloc space and dm-integrity +consumes it quickly because it has a 64MiB journal and 8MiB recalculate +buffer. + +Fix this by reducing the size of the journal to 4MiB and the size of +the recalculate buffer to 1MiB, so that multiple dm-integrity devices +can be created and activated on 32-bit architectures. + +Cc: stable@vger.kernel.org +Signed-off-by: Mikulas Patocka +Signed-off-by: Mike Snitzer +Signed-off-by: Coly Li + +--- + drivers/md/dm-integrity.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c +index 5e5f1c029b75..0a910bb8db17 100644 +--- a/drivers/md/dm-integrity.c ++++ b/drivers/md/dm-integrity.c +@@ -34,11 +34,11 @@ + #define DEFAULT_BUFFER_SECTORS 128 + #define DEFAULT_JOURNAL_WATERMARK 50 + #define DEFAULT_SYNC_MSEC 10000 +-#define DEFAULT_MAX_JOURNAL_SECTORS 131072 ++#define DEFAULT_MAX_JOURNAL_SECTORS (IS_ENABLED(CONFIG_64BIT) ? 131072 : 8192) + #define MIN_LOG2_INTERLEAVE_SECTORS 3 + #define MAX_LOG2_INTERLEAVE_SECTORS 31 + #define METADATA_WORKQUEUE_MAX_ACTIVE 16 +-#define RECALC_SECTORS 32768 ++#define RECALC_SECTORS (IS_ENABLED(CONFIG_64BIT) ? 32768 : 2048) + #define RECALC_WRITE_SUPER 16 + #define BITMAP_BLOCK_SIZE 4096 /* don't change it */ + #define BITMAP_FLUSH_INTERVAL (10 * HZ) +-- +2.35.3 + diff --git a/patches.suse/dm-raid-clean-up-four-equivalent-goto-tags-in-raid_c-e74c.patch b/patches.suse/dm-raid-clean-up-four-equivalent-goto-tags-in-raid_c-e74c.patch new file mode 100644 index 0000000..dfca21f --- /dev/null +++ b/patches.suse/dm-raid-clean-up-four-equivalent-goto-tags-in-raid_c-e74c.patch @@ -0,0 +1,96 @@ +From e74c874eabe2e9173a8fbdad616cd89c70eb8ffd Mon Sep 17 00:00:00 2001 +From: Yu Kuai +Date: Sat, 8 Jul 2023 17:21:52 +0800 +Subject: [PATCH] dm raid: clean up four equivalent goto tags in raid_ctr() +Git-commit: e74c874eabe2e9173a8fbdad616cd89c70eb8ffd +Patch-mainline: v6.5-rc4 +References: git-fixes + +There are four equivalent goto tags in raid_ctr(), clean them up to +use just one. + +There is no functional change and this is preparation to fix +raid_ctr()'s unprotected md_stop(). + +Signed-off-by: Yu Kuai +Signed-off-by: Mike Snitzer +Signed-off-by: Coly Li + +--- + drivers/md/dm-raid.c | 27 +++++++++------------------ + 1 file changed, 9 insertions(+), 18 deletions(-) + +diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c +index 3d7366b912bf..845b68c3fd5f 100644 +--- a/drivers/md/dm-raid.c ++++ b/drivers/md/dm-raid.c +@@ -3251,8 +3251,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) + r = md_start(&rs->md); + if (r) { + ti->error = "Failed to start raid array"; +- mddev_unlock(&rs->md); +- goto bad_md_start; ++ goto bad_unlock; + } + + /* If raid4/5/6 journal mode explicitly requested (only possible with journal dev) -> set it */ +@@ -3260,8 +3259,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) + r = r5c_journal_mode_set(&rs->md, rs->journal_dev.mode); + if (r) { + ti->error = "Failed to set raid4/5/6 journal mode"; +- mddev_unlock(&rs->md); +- goto bad_journal_mode_set; ++ goto bad_unlock; + } + } + +@@ -3271,19 +3269,15 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) + /* Try to adjust the raid4/5/6 stripe cache size to the stripe size */ + if (rs_is_raid456(rs)) { + r = rs_set_raid456_stripe_cache(rs); +- if (r) { +- mddev_unlock(&rs->md); +- goto bad_stripe_cache; +- } ++ if (r) ++ goto bad_unlock; + } + + /* Now do an early reshape check */ + if (test_bit(RT_FLAG_RESHAPE_RS, &rs->runtime_flags)) { + r = rs_check_reshape(rs); +- if (r) { +- mddev_unlock(&rs->md); +- goto bad_check_reshape; +- } ++ if (r) ++ goto bad_unlock; + + /* Restore new, ctr requested layout to perform check */ + rs_config_restore(rs, &rs_layout); +@@ -3292,8 +3286,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) + r = rs->md.pers->check_reshape(&rs->md); + if (r) { + ti->error = "Reshape check failed"; +- mddev_unlock(&rs->md); +- goto bad_check_reshape; ++ goto bad_unlock; + } + } + } +@@ -3304,10 +3297,8 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) + mddev_unlock(&rs->md); + return 0; + +-bad_md_start: +-bad_journal_mode_set: +-bad_stripe_cache: +-bad_check_reshape: ++bad_unlock: ++ mddev_unlock(&rs->md); + md_stop(&rs->md); + bad: + raid_set_free(rs); +-- +2.35.3 + diff --git a/patches.suse/dm-raid-fix-false-positive-for-requeue-needed-during-b25b.patch b/patches.suse/dm-raid-fix-false-positive-for-requeue-needed-during-b25b.patch new file mode 100644 index 0000000..cc29801 --- /dev/null +++ b/patches.suse/dm-raid-fix-false-positive-for-requeue-needed-during-b25b.patch @@ -0,0 +1,46 @@ +From b25b8f4b8ecef0f48c05f0c3572daeabefe16526 Mon Sep 17 00:00:00 2001 +From: Ming Lei +Date: Mon, 11 Mar 2024 13:42:55 -0400 +Subject: [PATCH] dm raid: fix false positive for requeue needed during reshape +Git-commit: b25b8f4b8ecef0f48c05f0c3572daeabefe16526 +Patch-mainline: v6.9-rc1 +References: git-fixes + +An empty flush doesn't have a payload, so it should never be looked at +when considering to possibly requeue a bio for the case when a reshape +is in progress. + +Fixes: 9dbd1aa3a81c ("dm raid: add reshaping support to the target") +Reported-by: Patrick Plenefisch +Signed-off-by: Ming Lei +Signed-off-by: Mike Snitzer +Signed-off-by: Coly Li + +--- + drivers/md/dm-raid.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c +index 6bb1765be1e5..b3c8920fe723 100644 +--- a/drivers/md/dm-raid.c ++++ b/drivers/md/dm-raid.c +@@ -3329,14 +3329,14 @@ static int raid_map(struct dm_target *ti, struct bio *bio) + struct mddev *mddev = &rs->md; + + /* +- * If we're reshaping to add disk(s)), ti->len and ++ * If we're reshaping to add disk(s), ti->len and + * mddev->array_sectors will differ during the process + * (ti->len > mddev->array_sectors), so we have to requeue + * bios with addresses > mddev->array_sectors here or + * there will occur accesses past EOD of the component + * data images thus erroring the raid set. + */ +- if (unlikely(bio_end_sector(bio) > mddev->array_sectors)) ++ if (unlikely(bio_has_data(bio) && bio_end_sector(bio) > mddev->array_sectors)) + return DM_MAPIO_REQUEUE; + + md_handle_request(mddev, bio); +-- +2.35.3 + diff --git a/patches.suse/dm-raid-fix-lockdep-waring-in-pers-hot_add_disk-9500.patch b/patches.suse/dm-raid-fix-lockdep-waring-in-pers-hot_add_disk-9500.patch new file mode 100644 index 0000000..a4efc06 --- /dev/null +++ b/patches.suse/dm-raid-fix-lockdep-waring-in-pers-hot_add_disk-9500.patch @@ -0,0 +1,49 @@ +From 95009ae904b1e9dca8db6f649f2d7c18a6e42c75 Mon Sep 17 00:00:00 2001 +From: Yu Kuai +Date: Tue, 5 Mar 2024 15:23:06 +0800 +Subject: [PATCH] dm-raid: fix lockdep waring in "pers->hot_add_disk" +Git-commit: 95009ae904b1e9dca8db6f649f2d7c18a6e42c75 +Patch-mainline: v6.9-rc1 +References: git-fixes + +The lockdep assert is added by commit a448af25becf ("md/raid10: remove +rcu protection to access rdev from conf") in print_conf(). And I didn't +notice that dm-raid is calling "pers->hot_add_disk" without holding +'reconfig_mutex'. + +"pers->hot_add_disk" read and write many fields that is protected by +'reconfig_mutex', and raid_resume() already grab the lock in other +contex. Hence fix this problem by protecting "pers->host_add_disk" +with the lock. + +Fixes: 9092c02d9435 ("DM RAID: Add ability to restore transiently failed devices on resume") +Fixes: a448af25becf ("md/raid10: remove rcu protection to access rdev from conf") +Cc: stable@vger.kernel.org # v6.7+ +Signed-off-by: Yu Kuai +Signed-off-by: Xiao Ni +Acked-by: Mike Snitzer +Signed-off-by: Song Liu +Link: https://lore.kernel.org/r/20240305072306.2562024-10-yukuai1@huaweicloud.com +Signed-off-by: Coly Li + +--- + drivers/md/dm-raid.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c +index ea45f777691c..17e9af60bbf7 100644 +--- a/drivers/md/dm-raid.c ++++ b/drivers/md/dm-raid.c +@@ -4091,7 +4091,9 @@ static void raid_resume(struct dm_target *ti) + * Take this opportunity to check whether any failed + * devices are reachable again. + */ ++ mddev_lock_nointr(mddev); + attempt_restore_of_faulty_devices(rs); ++ mddev_unlock(mddev); + } + + if (test_and_clear_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) { +-- +2.35.3 + diff --git a/patches.suse/dm-raid-fix-missing-reconfig_mutex-unlock-in-raid_ct-bae3.patch b/patches.suse/dm-raid-fix-missing-reconfig_mutex-unlock-in-raid_ct-bae3.patch new file mode 100644 index 0000000..e17c57c --- /dev/null +++ b/patches.suse/dm-raid-fix-missing-reconfig_mutex-unlock-in-raid_ct-bae3.patch @@ -0,0 +1,58 @@ +From bae3028799dc4f1109acc4df37c8ff06f2d8f1a0 Mon Sep 17 00:00:00 2001 +From: Yu Kuai +Date: Sat, 8 Jul 2023 17:21:51 +0800 +Subject: [PATCH] dm raid: fix missing reconfig_mutex unlock in raid_ctr() + error paths +Git-commit: bae3028799dc4f1109acc4df37c8ff06f2d8f1a0 +Patch-mainline: v6.5-rc4 +References: git-fixes + +In the error paths 'bad_stripe_cache' and 'bad_check_reshape', +'reconfig_mutex' is still held after raid_ctr() returns. + +Fixes: 9dbd1aa3a81c ("dm raid: add reshaping support to the target") +Signed-off-by: Yu Kuai +Signed-off-by: Mike Snitzer +Signed-off-by: Coly Li + +--- + drivers/md/dm-raid.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c +index 8846bf510a35..3d7366b912bf 100644 +--- a/drivers/md/dm-raid.c ++++ b/drivers/md/dm-raid.c +@@ -3271,15 +3271,19 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) + /* Try to adjust the raid4/5/6 stripe cache size to the stripe size */ + if (rs_is_raid456(rs)) { + r = rs_set_raid456_stripe_cache(rs); +- if (r) ++ if (r) { ++ mddev_unlock(&rs->md); + goto bad_stripe_cache; ++ } + } + + /* Now do an early reshape check */ + if (test_bit(RT_FLAG_RESHAPE_RS, &rs->runtime_flags)) { + r = rs_check_reshape(rs); +- if (r) ++ if (r) { ++ mddev_unlock(&rs->md); + goto bad_check_reshape; ++ } + + /* Restore new, ctr requested layout to perform check */ + rs_config_restore(rs, &rs_layout); +@@ -3288,6 +3292,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) + r = rs->md.pers->check_reshape(&rs->md); + if (r) { + ti->error = "Reshape check failed"; ++ mddev_unlock(&rs->md); + goto bad_check_reshape; + } + } +-- +2.35.3 + diff --git a/patches.suse/dm-remove-flush_scheduled_work-during-local_exit-0b22.patch b/patches.suse/dm-remove-flush_scheduled_work-during-local_exit-0b22.patch new file mode 100644 index 0000000..638fe1b --- /dev/null +++ b/patches.suse/dm-remove-flush_scheduled_work-during-local_exit-0b22.patch @@ -0,0 +1,43 @@ +From 0b22ff5360f5c4e11050b89206370fdf7dc0a226 Mon Sep 17 00:00:00 2001 +From: Mike Snitzer +Date: Tue, 14 Feb 2023 13:06:05 -0500 +Subject: [PATCH] dm: remove flush_scheduled_work() during local_exit() +Git-commit: 0b22ff5360f5c4e11050b89206370fdf7dc0a226 +Patch-mainline: v6.3-rc1 +References: git-fixes + +Commit acfe0ad74d2e1 ("dm: allocate a special workqueue for deferred +device removal") switched from using system workqueue to a single +workqueue local to DM. But it didn't eliminate the call to +flush_scheduled_work() that was introduced purely for the benefit of +deferred device removal with commit 2c140a246dc ("dm: allow remove to +be deferred"). + +Since DM core uses its own workqueue (and queue_work) there is no need +to call flush_scheduled_work() from local_exit(). local_exit()'s +destroy_workqueue(deferred_remove_workqueue) handles flushing work +started with queue_work(). + +Fixes: acfe0ad74d2e1 ("dm: allocate a special workqueue for deferred device removal") +Signed-off-by: Mike Snitzer +Signed-off-by: Coly Li + +--- + drivers/md/dm.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/drivers/md/dm.c b/drivers/md/dm.c +index 50dc1f4e4615..90b64bfc63b0 100644 +--- a/drivers/md/dm.c ++++ b/drivers/md/dm.c +@@ -233,7 +233,6 @@ static int __init local_init(void) + + static void local_exit(void) + { +- flush_scheduled_work(); + destroy_workqueue(deferred_remove_workqueue); + + unregister_blkdev(_major, _name); +-- +2.35.3 + diff --git a/patches.suse/dm-send-just-one-event-on-resize-not-two-7533.patch b/patches.suse/dm-send-just-one-event-on-resize-not-two-7533.patch new file mode 100644 index 0000000..ace12e1 --- /dev/null +++ b/patches.suse/dm-send-just-one-event-on-resize-not-two-7533.patch @@ -0,0 +1,177 @@ +From 7533afa1d27ba1234146d31d2402c195cf195962 Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka +Date: Tue, 7 Feb 2023 08:33:06 -0500 +Subject: [PATCH] dm: send just one event on resize, not two +Git-commit: 7533afa1d27ba1234146d31d2402c195cf195962 +Patch-mainline: v6.3-rc1 +References: git-fixes + +Device mapper sends an uevent when the device is suspended, using the +function set_capacity_and_notify. However, this causes a race condition +with udev. + +Udev skips scanning dm devices that are suspended. If we send an uevent +while we are suspended, udev will be racing with device mapper resume +code. If the device mapper resume code wins the race, udev will process +the uevent after the device is resumed and it will properly scan the +device. + +However, if udev wins the race, it will receive the uevent, find out that +the dm device is suspended and skip scanning the device. This causes bugs +such as systemd unmounting the device - see +https://bugzilla.redhat.com/show_bug.cgi?id=2158628 + +This commit fixes this race. + +We replace the function set_capacity_and_notify with set_capacity, so that +the uevent is not sent at this point. In do_resume, we detect if the +capacity has changed and we pass a boolean variable need_resize_uevent to +dm_kobject_uevent. dm_kobject_uevent adds "RESIZE=1" to the uevent if +need_resize_uevent is set. + +Signed-off-by: Mikulas Patocka +Tested-by: Peter Rajnoha +Cc: stable@vger.kernel.org +Signed-off-by: Mike Snitzer +Signed-off-by: Coly Li + +--- + drivers/md/dm-ioctl.c | 13 ++++++++++--- + drivers/md/dm.c | 27 +++++++++++++-------------- + drivers/md/dm.h | 2 +- + 3 files changed, 24 insertions(+), 18 deletions(-) + +diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c +index 9160159ef881..a3b86716f606 100644 +--- a/drivers/md/dm-ioctl.c ++++ b/drivers/md/dm-ioctl.c +@@ -482,7 +482,7 @@ static struct mapped_device *dm_hash_rename(struct dm_ioctl *param, + dm_table_event(table); + dm_put_live_table(hc->md, srcu_idx); + +- if (!dm_kobject_uevent(hc->md, KOBJ_CHANGE, param->event_nr)) ++ if (!dm_kobject_uevent(hc->md, KOBJ_CHANGE, param->event_nr, false)) + param->flags |= DM_UEVENT_GENERATED_FLAG; + + md = hc->md; +@@ -995,7 +995,7 @@ static int dev_remove(struct file *filp, struct dm_ioctl *param, size_t param_si + + dm_ima_measure_on_device_remove(md, false); + +- if (!dm_kobject_uevent(md, KOBJ_REMOVE, param->event_nr)) ++ if (!dm_kobject_uevent(md, KOBJ_REMOVE, param->event_nr, false)) + param->flags |= DM_UEVENT_GENERATED_FLAG; + + dm_put(md); +@@ -1128,6 +1128,7 @@ static int do_resume(struct dm_ioctl *param) + struct hash_cell *hc; + struct mapped_device *md; + struct dm_table *new_map, *old_map = NULL; ++ bool need_resize_uevent = false; + + down_write(&_hash_lock); + +@@ -1148,6 +1149,8 @@ static int do_resume(struct dm_ioctl *param) + + /* Do we need to load a new map ? */ + if (new_map) { ++ sector_t old_size, new_size; ++ + /* Suspend if it isn't already suspended */ + if (param->flags & DM_SKIP_LOCKFS_FLAG) + suspend_flags &= ~DM_SUSPEND_LOCKFS_FLAG; +@@ -1156,6 +1159,7 @@ static int do_resume(struct dm_ioctl *param) + if (!dm_suspended_md(md)) + dm_suspend(md, suspend_flags); + ++ old_size = dm_get_size(md); + old_map = dm_swap_table(md, new_map); + if (IS_ERR(old_map)) { + dm_sync_table(md); +@@ -1163,6 +1167,9 @@ static int do_resume(struct dm_ioctl *param) + dm_put(md); + return PTR_ERR(old_map); + } ++ new_size = dm_get_size(md); ++ if (old_size && new_size && old_size != new_size) ++ need_resize_uevent = true; + + if (dm_table_get_mode(new_map) & FMODE_WRITE) + set_disk_ro(dm_disk(md), 0); +@@ -1175,7 +1182,7 @@ static int do_resume(struct dm_ioctl *param) + if (!r) { + dm_ima_measure_on_device_resume(md, new_map ? true : false); + +- if (!dm_kobject_uevent(md, KOBJ_CHANGE, param->event_nr)) ++ if (!dm_kobject_uevent(md, KOBJ_CHANGE, param->event_nr, need_resize_uevent)) + param->flags |= DM_UEVENT_GENERATED_FLAG; + } + } +diff --git a/drivers/md/dm.c b/drivers/md/dm.c +index b424a6ee27ba..c391a618fb71 100644 +--- a/drivers/md/dm.c ++++ b/drivers/md/dm.c +@@ -2172,10 +2172,7 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, + if (size != dm_get_size(md)) + memset(&md->geometry, 0, sizeof(md->geometry)); + +- if (!get_capacity(md->disk)) +- set_capacity(md->disk, size); +- else +- set_capacity_and_notify(md->disk, size); ++ set_capacity(md->disk, size); + + dm_table_event_callback(t, event_callback, md); + +@@ -2968,23 +2965,25 @@ EXPORT_SYMBOL_GPL(dm_internal_resume_fast); + * Event notification. + *---------------------------------------------------------------*/ + int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action, +- unsigned cookie) ++ unsigned cookie, bool need_resize_uevent) + { + int r; + unsigned noio_flag; + char udev_cookie[DM_COOKIE_LENGTH]; +- char *envp[] = { udev_cookie, NULL }; +- +- noio_flag = memalloc_noio_save(); +- +- if (!cookie) +- r = kobject_uevent(&disk_to_dev(md->disk)->kobj, action); +- else { ++ char *envp[3] = { NULL, NULL, NULL }; ++ char **envpp = envp; ++ if (cookie) { + snprintf(udev_cookie, DM_COOKIE_LENGTH, "%s=%u", + DM_COOKIE_ENV_VAR_NAME, cookie); +- r = kobject_uevent_env(&disk_to_dev(md->disk)->kobj, +- action, envp); ++ *envpp++ = udev_cookie; + } ++ if (need_resize_uevent) { ++ *envpp++ = "RESIZE=1"; ++ } ++ ++ noio_flag = memalloc_noio_save(); ++ ++ r = kobject_uevent_env(&disk_to_dev(md->disk)->kobj, action, envp); + + memalloc_noio_restore(noio_flag); + +diff --git a/drivers/md/dm.h b/drivers/md/dm.h +index 5201df03ce40..a9a3ffcad084 100644 +--- a/drivers/md/dm.h ++++ b/drivers/md/dm.h +@@ -203,7 +203,7 @@ int dm_get_table_device(struct mapped_device *md, dev_t dev, fmode_t mode, + void dm_put_table_device(struct mapped_device *md, struct dm_dev *d); + + int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action, +- unsigned cookie); ++ unsigned cookie, bool need_resize_uevent); + + void dm_internal_suspend(struct mapped_device *md); + void dm_internal_resume(struct mapped_device *md); +-- +2.35.3 + diff --git a/patches.suse/dm-stats-check-for-and-propagate-alloc_percpu-failur-d3aa.patch b/patches.suse/dm-stats-check-for-and-propagate-alloc_percpu-failur-d3aa.patch new file mode 100644 index 0000000..453aaf3 --- /dev/null +++ b/patches.suse/dm-stats-check-for-and-propagate-alloc_percpu-failur-d3aa.patch @@ -0,0 +1,88 @@ +From d3aa3e060c4a80827eb801fc448debc9daa7c46b Mon Sep 17 00:00:00 2001 +From: Jiasheng Jiang +Date: Thu, 16 Mar 2023 14:55:06 +0800 +Subject: [PATCH] dm stats: check for and propagate alloc_percpu failure +Git-commit: d3aa3e060c4a80827eb801fc448debc9daa7c46b +Patch-mainline: v6.3-rc4 +References: git-fixes + +Check alloc_precpu()'s return value and return an error from +dm_stats_init() if it fails. Update alloc_dev() to fail if +dm_stats_init() does. + +Otherwise, a NULL pointer dereference will occur in dm_stats_cleanup() +even if dm-stats isn't being actively used. + +Fixes: fd2ed4d25270 ("dm: add statistics support") +Cc: stable@vger.kernel.org +Signed-off-by: Jiasheng Jiang +Signed-off-by: Mike Snitzer +Signed-off-by: Coly Li + +--- + drivers/md/dm-stats.c | 7 ++++++- + drivers/md/dm-stats.h | 2 +- + drivers/md/dm.c | 4 +++- + 3 files changed, 10 insertions(+), 3 deletions(-) + +diff --git a/drivers/md/dm-stats.c b/drivers/md/dm-stats.c +index c21a19ab73f7..db2d997a6c18 100644 +--- a/drivers/md/dm-stats.c ++++ b/drivers/md/dm-stats.c +@@ -188,7 +188,7 @@ static int dm_stat_in_flight(struct dm_stat_shared *shared) + atomic_read(&shared->in_flight[WRITE]); + } + +-void dm_stats_init(struct dm_stats *stats) ++int dm_stats_init(struct dm_stats *stats) + { + int cpu; + struct dm_stats_last_position *last; +@@ -197,11 +197,16 @@ void dm_stats_init(struct dm_stats *stats) + INIT_LIST_HEAD(&stats->list); + stats->precise_timestamps = false; + stats->last = alloc_percpu(struct dm_stats_last_position); ++ if (!stats->last) ++ return -ENOMEM; ++ + for_each_possible_cpu(cpu) { + last = per_cpu_ptr(stats->last, cpu); + last->last_sector = (sector_t)ULLONG_MAX; + last->last_rw = UINT_MAX; + } ++ ++ return 0; + } + + void dm_stats_cleanup(struct dm_stats *stats) +diff --git a/drivers/md/dm-stats.h b/drivers/md/dm-stats.h +index 0bc152c8e4f3..c6728c8b4159 100644 +--- a/drivers/md/dm-stats.h ++++ b/drivers/md/dm-stats.h +@@ -21,7 +21,7 @@ struct dm_stats_aux { + unsigned long long duration_ns; + }; + +-void dm_stats_init(struct dm_stats *st); ++int dm_stats_init(struct dm_stats *st); + void dm_stats_cleanup(struct dm_stats *st); + + struct mapped_device; +diff --git a/drivers/md/dm.c b/drivers/md/dm.c +index eace45a18d45..b6ace995b9ca 100644 +--- a/drivers/md/dm.c ++++ b/drivers/md/dm.c +@@ -2097,7 +2097,9 @@ static struct mapped_device *alloc_dev(int minor) + if (!md->pending_io) + goto bad; + +- dm_stats_init(&md->stats); ++ r = dm_stats_init(&md->stats); ++ if (r < 0) ++ goto bad; + + /* Populate the mapping, nobody knows we exist yet */ + spin_lock(&_minor_lock); +-- +2.35.3 + diff --git a/patches.suse/dm-thin-add-cond_resched-to-various-workqueue-loops-e4f8.patch b/patches.suse/dm-thin-add-cond_resched-to-various-workqueue-loops-e4f8.patch new file mode 100644 index 0000000..47eaefb --- /dev/null +++ b/patches.suse/dm-thin-add-cond_resched-to-various-workqueue-loops-e4f8.patch @@ -0,0 +1,41 @@ +From e4f80303c2353952e6e980b23914e4214487f2a6 Mon Sep 17 00:00:00 2001 +From: Mike Snitzer +Date: Thu, 16 Feb 2023 15:29:44 -0500 +Subject: [PATCH] dm thin: add cond_resched() to various workqueue loops +Git-commit: e4f80303c2353952e6e980b23914e4214487f2a6 +Patch-mainline: v6.3-rc1 +References: git-fixes + +Otherwise on resource constrained systems these workqueues may be too +greedy. + +Signed-off-by: Mike Snitzer +Signed-off-by: Coly Li + +--- + drivers/md/dm-thin.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c +index affd91a53042..6cd105c1cef3 100644 +--- a/drivers/md/dm-thin.c ++++ b/drivers/md/dm-thin.c +@@ -2209,6 +2209,7 @@ static void process_thin_deferred_bios(struct thin_c *tc) + throttle_work_update(&pool->throttle); + dm_pool_issue_prefetches(pool->pmd); + } ++ cond_resched(); + } + blk_finish_plug(&plug); + } +@@ -2291,6 +2292,7 @@ static void process_thin_deferred_cells(struct thin_c *tc) + else + pool->process_cell(tc, cell); + } ++ cond_resched(); + } while (!list_empty(&cells)); + } + +-- +2.35.3 + diff --git a/patches.suse/dm-thin-fix-deadlock-when-swapping-to-thin-device-9bbf.patch b/patches.suse/dm-thin-fix-deadlock-when-swapping-to-thin-device-9bbf.patch new file mode 100644 index 0000000..0ce43ab --- /dev/null +++ b/patches.suse/dm-thin-fix-deadlock-when-swapping-to-thin-device-9bbf.patch @@ -0,0 +1,74 @@ +From 9bbf5feecc7eab2c370496c1c161bbfe62084028 Mon Sep 17 00:00:00 2001 +From: Coly Li +Date: Mon, 27 Feb 2023 23:23:17 +0800 +Subject: [PATCH] dm thin: fix deadlock when swapping to thin device +Git-commit: 9bbf5feecc7eab2c370496c1c161bbfe62084028 +Patch-mainline: v6.3-rc4 +References: bsc#1177529 + +This is an already known issue that dm-thin volume cannot be used as +swap, otherwise a deadlock may happen when dm-thin internal memory +demand triggers swap I/O on the dm-thin volume itself. + +But thanks to commit a666e5c05e7c ("dm: fix deadlock when swapping to +encrypted device"), the limit_swap_bios target flag can also be used +for dm-thin to avoid the recursive I/O when it is used as swap. + +Fix is to simply set ti->limit_swap_bios to true in both pool_ctr() +and thin_ctr(). + +In my test, I create a dm-thin volume /dev/vg/swap and use it as swap +device. Then I run fio on another dm-thin volume /dev/vg/main and use +large --blocksize to trigger swap I/O onto /dev/vg/swap. + +The following fio command line is used in my test, + fio --name recursive-swap-io --lockmem 1 --iodepth 128 \ + --ioengine libaio --filename /dev/vg/main --rw randrw \ + --blocksize 1M --numjobs 32 --time_based --runtime=12h + +Without this fix, the whole system can be locked up within 15 seconds. + +With this fix, there is no any deadlock or hung task observed after +2 hours of running fio. + +Furthermore, if blocksize is changed from 1M to 128M, after around 30 +seconds fio has no visible I/O, and the out-of-memory killer message +shows up in kernel message. After around 20 minutes all fio processes +are killed and the whole system is back to being alive. + +This is exactly what is expected when recursive I/O happens on dm-thin +volume when it is used as swap. + +Depends-on: a666e5c05e7c ("dm: fix deadlock when swapping to encrypted device") +Cc: stable@vger.kernel.org +Signed-off-by: Coly Li +Acked-by: Mikulas Patocka +Signed-off-by: Mike Snitzer + +--- + drivers/md/dm-thin.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c +index 6cd105c1cef3..13d4677baafd 100644 +--- a/drivers/md/dm-thin.c ++++ b/drivers/md/dm-thin.c +@@ -3369,6 +3369,7 @@ static int pool_ctr(struct dm_target *ti, unsigned int argc, char **argv) + pt->low_water_blocks = low_water_blocks; + pt->adjusted_pf = pt->requested_pf = pf; + ti->num_flush_bios = 1; ++ ti->limit_swap_bios = true; + + /* + * Only need to enable discards if the pool should pass +@@ -4249,6 +4250,7 @@ static int thin_ctr(struct dm_target *ti, unsigned int argc, char **argv) + goto bad; + + ti->num_flush_bios = 1; ++ ti->limit_swap_bios = true; + ti->flush_supported = true; + ti->accounts_remapped_io = true; + ti->per_io_data_size = sizeof(struct dm_thin_endio_hook); +-- +2.35.3 + diff --git a/patches.suse/dm-thin-metadata-Fix-ABBA-deadlock-by-resetting-dm_b-d483.patch b/patches.suse/dm-thin-metadata-Fix-ABBA-deadlock-by-resetting-dm_b-d483.patch new file mode 100644 index 0000000..c411a27 --- /dev/null +++ b/patches.suse/dm-thin-metadata-Fix-ABBA-deadlock-by-resetting-dm_b-d483.patch @@ -0,0 +1,340 @@ +From d48300120627a1cb98914738fff38b424625b8ad Mon Sep 17 00:00:00 2001 +From: Li Lingfeng +Date: Mon, 5 Jun 2023 15:03:16 +0800 +Subject: [PATCH] dm thin metadata: Fix ABBA deadlock by resetting + dm_bufio_client +Git-commit: d48300120627a1cb98914738fff38b424625b8ad +Patch-mainline: v6.5-rc1 +References: git-fixes + +As described in commit 8111964f1b85 ("dm thin: Fix ABBA deadlock between +shrink_slab and dm_pool_abort_metadata"), ABBA deadlocks will be +triggered because shrinker_rwsem currently needs to held by +dm_pool_abort_metadata() as a side-effect of thin-pool metadata +operation failure. + +The following three problem scenarios have been noticed: + +1) Described by commit 8111964f1b85 ("dm thin: Fix ABBA deadlock between + shrink_slab and dm_pool_abort_metadata") + +2) shrinker_rwsem and throttle->lock + P1(drop cache) P2(kworker) +drop_caches_sysctl_handler + drop_slab + shrink_slab + down_read(&shrinker_rwsem) - LOCK A + do_shrink_slab + super_cache_scan + prune_icache_sb + dispose_list + evict + ext4_evict_inode + ext4_clear_inode + ext4_discard_preallocations + ext4_mb_load_buddy_gfp + ext4_mb_init_cache + ext4_wait_block_bitmap + __ext4_error + ext4_handle_error + ext4_commit_super + ... + dm_submit_bio + do_worker + throttle_work_update + down_write(&t->lock) -- LOCK B + process_deferred_bios + commit + metadata_operation_failed + dm_pool_abort_metadata + dm_block_manager_create + dm_bufio_client_create + register_shrinker + down_write(&shrinker_rwsem) + -- LOCK A + thin_map + thin_bio_map + thin_defer_bio_with_throttle + throttle_lock + down_read(&t->lock) - LOCK B + +3) shrinker_rwsem and wait_on_buffer + P1(drop cache) P2(kworker) +drop_caches_sysctl_handler + drop_slab + shrink_slab + down_read(&shrinker_rwsem) - LOCK A + do_shrink_slab + ... + ext4_wait_block_bitmap + __ext4_error + ext4_handle_error + jbd2_journal_abort + jbd2_journal_update_sb_errno + jbd2_write_superblock + submit_bh + // LOCK B + // RELEASE B + do_worker + throttle_work_update + down_write(&t->lock) - LOCK B + process_deferred_bios + process_bio + commit + metadata_operation_failed + dm_pool_abort_metadata + dm_block_manager_create + dm_bufio_client_create + register_shrinker + register_shrinker_prepared + down_write(&shrinker_rwsem) - LOCK A + bio_endio + wait_on_buffer + __wait_on_buffer + +Fix these by resetting dm_bufio_client without holding shrinker_rwsem. + +(Coly Li: rebased for Linux 5.14 based SUSE kernel) + +Fixes: 8111964f1b85 ("dm thin: Fix ABBA deadlock between shrink_slab and dm_pool_abort_metadata") +Cc: stable@vger.kernel.org +Signed-off-by: Li Lingfeng +Signed-off-by: Mike Snitzer +Signed-off-by: Coly Li + +--- + drivers/md/dm-bufio.c | 7 ++ + drivers/md/dm-thin-metadata.c | 58 ++++++++------------ + drivers/md/persistent-data/dm-block-manager.c | 6 ++ + drivers/md/persistent-data/dm-block-manager.h | 1 + drivers/md/persistent-data/dm-space-map.h | 3 - + drivers/md/persistent-data/dm-transaction-manager.c | 3 + + include/linux/dm-bufio.h | 2 + 7 files changed, 46 insertions(+), 34 deletions(-) + +--- a/drivers/md/dm-bufio.c ++++ b/drivers/md/dm-bufio.c +@@ -1914,6 +1914,13 @@ void dm_bufio_client_destroy(struct dm_b + } + EXPORT_SYMBOL_GPL(dm_bufio_client_destroy); + ++void dm_bufio_client_reset(struct dm_bufio_client *c) ++{ ++ drop_buffers(c); ++ flush_work(&c->shrink_work); ++} ++EXPORT_SYMBOL_GPL(dm_bufio_client_reset); ++ + void dm_bufio_set_sector_offset(struct dm_bufio_client *c, sector_t start) + { + c->start = start; +--- a/drivers/md/dm-thin-metadata.c ++++ b/drivers/md/dm-thin-metadata.c +@@ -597,6 +597,8 @@ static int __format_metadata(struct dm_p + r = dm_tm_create_with_sm(pmd->bm, THIN_SUPERBLOCK_LOCATION, + &pmd->tm, &pmd->metadata_sm); + if (r < 0) { ++ pmd->tm = NULL; ++ pmd->metadata_sm = NULL; + DMERR("tm_create_with_sm failed"); + return r; + } +@@ -605,6 +607,7 @@ static int __format_metadata(struct dm_p + if (IS_ERR(pmd->data_sm)) { + DMERR("sm_disk_create failed"); + r = PTR_ERR(pmd->data_sm); ++ pmd->data_sm = NULL; + goto bad_cleanup_tm; + } + +@@ -635,11 +638,15 @@ static int __format_metadata(struct dm_p + + bad_cleanup_nb_tm: + dm_tm_destroy(pmd->nb_tm); ++ pmd->nb_tm = NULL; + bad_cleanup_data_sm: + dm_sm_destroy(pmd->data_sm); ++ pmd->data_sm = NULL; + bad_cleanup_tm: + dm_tm_destroy(pmd->tm); ++ pmd->tm = NULL; + dm_sm_destroy(pmd->metadata_sm); ++ pmd->metadata_sm = NULL; + + return r; + } +@@ -705,6 +712,8 @@ static int __open_metadata(struct dm_poo + sizeof(disk_super->metadata_space_map_root), + &pmd->tm, &pmd->metadata_sm); + if (r < 0) { ++ pmd->tm = NULL; ++ pmd->metadata_sm = NULL; + DMERR("tm_open_with_sm failed"); + goto bad_unlock_sblock; + } +@@ -714,6 +723,7 @@ static int __open_metadata(struct dm_poo + if (IS_ERR(pmd->data_sm)) { + DMERR("sm_disk_open failed"); + r = PTR_ERR(pmd->data_sm); ++ pmd->data_sm = NULL; + goto bad_cleanup_tm; + } + +@@ -740,9 +750,12 @@ static int __open_metadata(struct dm_poo + + bad_cleanup_data_sm: + dm_sm_destroy(pmd->data_sm); ++ pmd->data_sm = NULL; + bad_cleanup_tm: + dm_tm_destroy(pmd->tm); ++ pmd->tm = NULL; + dm_sm_destroy(pmd->metadata_sm); ++ pmd->metadata_sm = NULL; + bad_unlock_sblock: + dm_bm_unlock(sblock); + +@@ -789,9 +802,13 @@ static void __destroy_persistent_data_ob + bool destroy_bm) + { + dm_sm_destroy(pmd->data_sm); ++ pmd->data_sm = NULL; + dm_sm_destroy(pmd->metadata_sm); ++ pmd->metadata_sm = NULL; + dm_tm_destroy(pmd->nb_tm); ++ pmd->nb_tm = NULL; + dm_tm_destroy(pmd->tm); ++ pmd->tm = NULL; + if (destroy_bm) + dm_block_manager_destroy(pmd->bm); + } +@@ -999,8 +1016,7 @@ int dm_pool_metadata_close(struct dm_poo + __func__, r); + } + pmd_write_unlock(pmd); +- if (!pmd->fail_io) +- __destroy_persistent_data_objects(pmd, true); ++ __destroy_persistent_data_objects(pmd, true); + + kfree(pmd); + return 0; +@@ -1875,53 +1891,29 @@ static void __set_abort_with_changes_fla + int dm_pool_abort_metadata(struct dm_pool_metadata *pmd) + { + int r = -EINVAL; +- struct dm_block_manager *old_bm = NULL, *new_bm = NULL; + + /* fail_io is double-checked with pmd->root_lock held below */ + if (unlikely(pmd->fail_io)) + return r; + +- /* +- * Replacement block manager (new_bm) is created and old_bm destroyed outside of +- * pmd root_lock to avoid ABBA deadlock that would result (due to life-cycle of +- * shrinker associated with the block manager's bufio client vs pmd root_lock). +- * - must take shrinker_rwsem without holding pmd->root_lock +- */ +- new_bm = dm_block_manager_create(pmd->bdev, THIN_METADATA_BLOCK_SIZE << SECTOR_SHIFT, +- THIN_MAX_CONCURRENT_LOCKS); +- + pmd_write_lock(pmd); + if (pmd->fail_io) { + pmd_write_unlock(pmd); +- goto out; ++ return r; + } +- + __set_abort_with_changes_flags(pmd); ++ ++ /* destroy data_sm/metadata_sm/nb_tm/tm */ + __destroy_persistent_data_objects(pmd, false); +- old_bm = pmd->bm; +- if (IS_ERR(new_bm)) { +- DMERR("could not create block manager during abort"); +- pmd->bm = NULL; +- r = PTR_ERR(new_bm); +- goto out_unlock; +- } + +- pmd->bm = new_bm; ++ /* reset bm */ ++ dm_block_manager_reset(pmd->bm); ++ ++ /* rebuild data_sm/metadata_sm/nb_tm/tm */ + r = __open_or_format_metadata(pmd, false); +- if (r) { +- pmd->bm = NULL; +- goto out_unlock; +- } +- new_bm = NULL; +-out_unlock: + if (r) + pmd->fail_io = true; + pmd_write_unlock(pmd); +- dm_block_manager_destroy(old_bm); +-out: +- if (new_bm && !IS_ERR(new_bm)) +- dm_block_manager_destroy(new_bm); +- + return r; + } + +--- a/drivers/md/persistent-data/dm-block-manager.c ++++ b/drivers/md/persistent-data/dm-block-manager.c +@@ -415,6 +415,12 @@ void dm_block_manager_destroy(struct dm_ + } + EXPORT_SYMBOL_GPL(dm_block_manager_destroy); + ++void dm_block_manager_reset(struct dm_block_manager *bm) ++{ ++ dm_bufio_client_reset(bm->bufio); ++} ++EXPORT_SYMBOL_GPL(dm_block_manager_reset); ++ + unsigned dm_bm_block_size(struct dm_block_manager *bm) + { + return dm_bufio_get_block_size(bm->bufio); +--- a/drivers/md/persistent-data/dm-block-manager.h ++++ b/drivers/md/persistent-data/dm-block-manager.h +@@ -35,6 +35,7 @@ struct dm_block_manager *dm_block_manage + struct block_device *bdev, unsigned block_size, + unsigned max_held_per_thread); + void dm_block_manager_destroy(struct dm_block_manager *bm); ++void dm_block_manager_reset(struct dm_block_manager *bm); + + unsigned dm_bm_block_size(struct dm_block_manager *bm); + dm_block_t dm_bm_nr_blocks(struct dm_block_manager *bm); +--- a/drivers/md/persistent-data/dm-space-map.h ++++ b/drivers/md/persistent-data/dm-space-map.h +@@ -76,7 +76,8 @@ struct dm_space_map { + + static inline void dm_sm_destroy(struct dm_space_map *sm) + { +- sm->destroy(sm); ++ if (sm) ++ sm->destroy(sm); + } + + static inline int dm_sm_extend(struct dm_space_map *sm, dm_block_t extra_blocks) +--- a/drivers/md/persistent-data/dm-transaction-manager.c ++++ b/drivers/md/persistent-data/dm-transaction-manager.c +@@ -197,6 +197,9 @@ EXPORT_SYMBOL_GPL(dm_tm_create_non_block + + void dm_tm_destroy(struct dm_transaction_manager *tm) + { ++ if (!tm) ++ return; ++ + if (!tm->is_clone) + wipe_shadow_table(tm); + +--- a/include/linux/dm-bufio.h ++++ b/include/linux/dm-bufio.h +@@ -37,6 +37,8 @@ dm_bufio_client_create(struct block_devi + */ + void dm_bufio_client_destroy(struct dm_bufio_client *c); + ++void dm_bufio_client_reset(struct dm_bufio_client *c); ++ + /* + * Set the sector range. + * When this function is called, there must be no I/O in progress on the bufio diff --git a/patches.suse/dm-thin-metadata-check-fail_io-before-using-data_sm-cb65.patch b/patches.suse/dm-thin-metadata-check-fail_io-before-using-data_sm-cb65.patch new file mode 100644 index 0000000..24f8d80 --- /dev/null +++ b/patches.suse/dm-thin-metadata-check-fail_io-before-using-data_sm-cb65.patch @@ -0,0 +1,103 @@ +From cb65b282c9640c27d3129e2e04b711ce1b352838 Mon Sep 17 00:00:00 2001 +From: Li Lingfeng +Date: Tue, 6 Jun 2023 20:20:24 +0800 +Subject: [PATCH] dm thin metadata: check fail_io before using data_sm +Git-commit: cb65b282c9640c27d3129e2e04b711ce1b352838 +Patch-mainline: v6.4-rc7 +References: git-fixes + +Must check pmd->fail_io before using pmd->data_sm since +pmd->data_sm may be destroyed by other processes. + + P1(kworker) P2(message) +do_worker + process_prepared + process_prepared_discard_passdown_pt2 + dm_pool_dec_data_range + pool_message + commit + dm_pool_commit_metadata + ↓ + // commit failed + metadata_operation_failed + abort_transaction + dm_pool_abort_metadata + __open_or_format_metadata + ↓ + dm_sm_disk_open + ↓ + // open failed + // pmd->data_sm is NULL + dm_sm_dec_blocks + ↓ + // try to access pmd->data_sm --> UAF + +As shown above, if dm_pool_commit_metadata() and +dm_pool_abort_metadata() fail in pool_message process, kworker may +trigger UAF. + +Fixes: be500ed721a6 ("dm space maps: improve performance with inc/dec on ranges of blocks") +Cc: stable@vger.kernel.org +Signed-off-by: Li Lingfeng +Signed-off-by: Mike Snitzer +Signed-off-by: Coly Li + +--- + drivers/md/dm-thin-metadata.c | 20 ++++++++++++-------- + 1 file changed, 12 insertions(+), 8 deletions(-) + +diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c +index 9f5cb52c5763..b9461faa9f0d 100644 +--- a/drivers/md/dm-thin-metadata.c ++++ b/drivers/md/dm-thin-metadata.c +@@ -1756,13 +1756,15 @@ int dm_thin_remove_range(struct dm_thin_device *td, + + int dm_pool_block_is_shared(struct dm_pool_metadata *pmd, dm_block_t b, bool *result) + { +- int r; ++ int r = -EINVAL; + uint32_t ref_count; + + down_read(&pmd->root_lock); +- r = dm_sm_get_count(pmd->data_sm, b, &ref_count); +- if (!r) +- *result = (ref_count > 1); ++ if (!pmd->fail_io) { ++ r = dm_sm_get_count(pmd->data_sm, b, &ref_count); ++ if (!r) ++ *result = (ref_count > 1); ++ } + up_read(&pmd->root_lock); + + return r; +@@ -1770,10 +1772,11 @@ int dm_pool_block_is_shared(struct dm_pool_metadata *pmd, dm_block_t b, bool *re + + int dm_pool_inc_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_t e) + { +- int r = 0; ++ int r = -EINVAL; + + pmd_write_lock(pmd); +- r = dm_sm_inc_blocks(pmd->data_sm, b, e); ++ if (!pmd->fail_io) ++ r = dm_sm_inc_blocks(pmd->data_sm, b, e); + pmd_write_unlock(pmd); + + return r; +@@ -1781,10 +1784,11 @@ int dm_pool_inc_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_ + + int dm_pool_dec_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_t e) + { +- int r = 0; ++ int r = -EINVAL; + + pmd_write_lock(pmd); +- r = dm_sm_dec_blocks(pmd->data_sm, b, e); ++ if (!pmd->fail_io) ++ r = dm_sm_dec_blocks(pmd->data_sm, b, e); + pmd_write_unlock(pmd); + + return r; +-- +2.35.3 + diff --git a/patches.suse/dm-verity-align-struct-dm_verity_fec_io-properly-38bc.patch b/patches.suse/dm-verity-align-struct-dm_verity_fec_io-properly-38bc.patch new file mode 100644 index 0000000..4639f5a --- /dev/null +++ b/patches.suse/dm-verity-align-struct-dm_verity_fec_io-properly-38bc.patch @@ -0,0 +1,59 @@ +From 38bc1ab135db87577695816b190e7d6d8ec75879 Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka +Date: Tue, 28 Nov 2023 14:50:23 +0100 +Subject: [PATCH] dm-verity: align struct dm_verity_fec_io properly +Git-commit: 38bc1ab135db87577695816b190e7d6d8ec75879 +Patch-mainline: v6.7-rc4 +References: git-fixes + +dm_verity_fec_io is placed after the end of two hash digests. If the hash +digest has unaligned length, struct dm_verity_fec_io could be unaligned. + +This commit fixes the placement of struct dm_verity_fec_io, so that it's +aligned. + +Signed-off-by: Mikulas Patocka +Cc: stable@vger.kernel.org +Fixes: a739ff3f543a ("dm verity: add support for forward error correction") +Signed-off-by: Mike Snitzer +Signed-off-by: Coly Li + +--- + drivers/md/dm-verity-fec.c | 3 ++- + drivers/md/dm-verity.h | 6 ------ + 2 files changed, 2 insertions(+), 7 deletions(-) + +diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c +index 2099c755119e..b475200d8586 100644 +--- a/drivers/md/dm-verity-fec.c ++++ b/drivers/md/dm-verity-fec.c +@@ -24,7 +24,8 @@ bool verity_fec_is_enabled(struct dm_verity *v) + */ + static inline struct dm_verity_fec_io *fec_io(struct dm_verity_io *io) + { +- return (struct dm_verity_fec_io *) verity_io_digest_end(io->v, io); ++ return (struct dm_verity_fec_io *) ++ ((char *)io + io->v->ti->per_io_data_size - sizeof(struct dm_verity_fec_io)); + } + + /* +diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h +index f96f4e281ee4..f9d522c870e6 100644 +--- a/drivers/md/dm-verity.h ++++ b/drivers/md/dm-verity.h +@@ -115,12 +115,6 @@ static inline u8 *verity_io_want_digest(struct dm_verity *v, + return (u8 *)(io + 1) + v->ahash_reqsize + v->digest_size; + } + +-static inline u8 *verity_io_digest_end(struct dm_verity *v, +- struct dm_verity_io *io) +-{ +- return verity_io_want_digest(v, io) + v->digest_size; +-} +- + extern int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io, + struct bvec_iter *iter, + int (*process)(struct dm_verity *v, +-- +2.35.3 + diff --git a/patches.suse/dm-verity-dm-crypt-align-struct-bvec_iter-correctly-787f.patch b/patches.suse/dm-verity-dm-crypt-align-struct-bvec_iter-correctly-787f.patch new file mode 100644 index 0000000..fd70c68 --- /dev/null +++ b/patches.suse/dm-verity-dm-crypt-align-struct-bvec_iter-correctly-787f.patch @@ -0,0 +1,66 @@ +From 787f1b2800464aa277236a66eb3c279535edd460 Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka +Date: Tue, 20 Feb 2024 19:11:51 +0100 +Subject: [PATCH] dm-verity, dm-crypt: align "struct bvec_iter" correctly +Git-commit: 787f1b2800464aa277236a66eb3c279535edd460 +Patch-mainline: v6.8-rc6 +References: git-fixes + +"struct bvec_iter" is defined with the __packed attribute, so it is +aligned on a single byte. On X86 (and on other architectures that support +unaligned addresses in hardware), "struct bvec_iter" is accessed using the +8-byte and 4-byte memory instructions, however these instructions are less +efficient if they operate on unaligned addresses. + +(on RISC machines that don't have unaligned access in hardware, GCC +generates byte-by-byte accesses that are very inefficient - see [1]) + +This commit reorders the entries in "struct dm_verity_io" and "struct +convert_context", so that "struct bvec_iter" is aligned on 8 bytes. + +[1] https://lore.kernel.org/all/ZcLuWUNRZadJr0tQ@fedora/T/ + +(Coly Li: rebased for Linux 5.14 based SUSE kernel) + +Signed-off-by: Mikulas Patocka +Signed-off-by: Mike Snitzer +Signed-off-by: Coly Li + +--- + drivers/md/dm-crypt.c | 4 ++-- + drivers/md/dm-verity.h | 4 ++-- + 2 files changed, 4 insertions(+), 4 deletions(-) + +--- a/drivers/md/dm-crypt.c ++++ b/drivers/md/dm-crypt.c +@@ -52,11 +52,11 @@ + struct convert_context { + struct completion restart; + struct bio *bio_in; +- struct bio *bio_out; + struct bvec_iter iter_in; ++ struct bio *bio_out; + struct bvec_iter iter_out; +- u64 cc_sector; + atomic_t cc_pending; ++ u64 cc_sector; + union { + struct skcipher_request *req; + struct aead_request *req_aead; +--- a/drivers/md/dm-verity.h ++++ b/drivers/md/dm-verity.h +@@ -76,12 +76,12 @@ struct dm_verity_io { + /* original value of bio->bi_end_io */ + bio_end_io_t *orig_bi_end_io; + ++ struct bvec_iter iter; ++ + sector_t block; + unsigned n_blocks; + bool in_tasklet; + +- struct bvec_iter iter; +- + struct work_struct work; + + /* diff --git a/patches.suse/dm-verity-don-t-perform-FEC-for-failed-readahead-IO-0193.patch b/patches.suse/dm-verity-don-t-perform-FEC-for-failed-readahead-IO-0193.patch new file mode 100644 index 0000000..a0293b7 --- /dev/null +++ b/patches.suse/dm-verity-don-t-perform-FEC-for-failed-readahead-IO-0193.patch @@ -0,0 +1,88 @@ +From 0193e3966ceeeef69e235975918b287ab093082b Mon Sep 17 00:00:00 2001 +From: Wu Bo +Date: Tue, 21 Nov 2023 20:51:50 -0700 +Subject: [PATCH] dm verity: don't perform FEC for failed readahead IO +Git-commit: 0193e3966ceeeef69e235975918b287ab093082b +Patch-mainline: v6.7-rc4 +References: git-fixes + +We found an issue under Android OTA scenario that many BIOs have to do +FEC where the data under dm-verity is 100% complete and no corruption. + +Android OTA has many dm-block layers, from upper to lower: +dm-verity +dm-snapshot +dm-origin & dm-cow +dm-linear +ufs + +DM tables have to change 2 times during Android OTA merging process. +When doing table change, the dm-snapshot will be suspended for a while. +During this interval, many readahead IOs are submitted to dm_verity +from filesystem. Then the kverity works are busy doing FEC process +which cost too much time to finish dm-verity IO. This causes needless +delay which feels like system is hung. + +After adding debugging it was found that each readahead IO needed +around 10s to finish when this situation occurred. This is due to IO +Amplification: + +dm-snapshot suspend +erofs_readahead // 300+ io is submitted + dm_submit_bio (dm_verity) + dm_submit_bio (dm_snapshot) + bio return EIO + bio got nothing, it's empty + verity_end_io + verity_verify_io + forloop range(0, io->n_blocks) // each io->nblocks ~= 20 + verity_fec_decode + fec_decode_rsb + fec_read_bufs + forloop range(0, v->fec->rsn) // v->fec->rsn = 253 + new_read + submit_bio (dm_snapshot) + end loop + end loop +dm-snapshot resume + +Readahead BIOs get nothing while dm-snapshot is suspended, so all of +them will cause verity's FEC. +Each readahead BIO needs to verify ~20 (io->nblocks) blocks. +Each block needs to do FEC, and every block needs to do 253 +(v->fec->rsn) reads. +So during the suspend interval(~200ms), 300 readahead BIOs trigger +~1518000 (300*20*253) IOs to dm-snapshot. + +As readahead IO is not required by userspace, and to fix this issue, +it is best to pass readahead errors to upper layer to handle it. + +Cc: stable@vger.kernel.org +Fixes: a739ff3f543a ("dm verity: add support for forward error correction") +Signed-off-by: Wu Bo +Reviewed-by: Mikulas Patocka +Signed-off-by: Mike Snitzer +Signed-off-by: Coly Li + +--- + drivers/md/dm-verity-target.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c +index beec14b6b044..14e58ae70521 100644 +--- a/drivers/md/dm-verity-target.c ++++ b/drivers/md/dm-verity-target.c +@@ -667,7 +667,9 @@ static void verity_end_io(struct bio *bio) + struct dm_verity_io *io = bio->bi_private; + + if (bio->bi_status && +- (!verity_fec_is_enabled(io->v) || verity_is_system_shutting_down())) { ++ (!verity_fec_is_enabled(io->v) || ++ verity_is_system_shutting_down() || ++ (bio->bi_opf & REQ_RAHEAD))) { + verity_finish_io(io, bio->bi_status); + return; + } +-- +2.35.3 + diff --git a/patches.suse/dm-verity-fix-error-handling-for-check_at_most_once--e8c5.patch b/patches.suse/dm-verity-fix-error-handling-for-check_at_most_once--e8c5.patch new file mode 100644 index 0000000..ed8baf0 --- /dev/null +++ b/patches.suse/dm-verity-fix-error-handling-for-check_at_most_once--e8c5.patch @@ -0,0 +1,49 @@ +From e8c5d45f82ce0c238a4817739892fe8897a3dcc3 Mon Sep 17 00:00:00 2001 +From: Yeongjin Gil +Date: Mon, 20 Mar 2023 15:59:32 +0900 +Subject: [PATCH] dm verity: fix error handling for check_at_most_once on FEC +Git-commit: e8c5d45f82ce0c238a4817739892fe8897a3dcc3 +Patch-mainline: v6.4-rc1 +References: git-fixes + +In verity_end_io(), if bi_status is not BLK_STS_OK, it can be return +directly. But if FEC configured, it is desired to correct the data page +through verity_verify_io. And the return value will be converted to +blk_status and passed to verity_finish_io(). + +BTW, when a bit is set in v->validated_blocks, verity_verify_io() skips +verification regardless of I/O error for the corresponding bio. In this +case, the I/O error could not be returned properly, and as a result, +there is a problem that abnormal data could be read for the +corresponding block. + +To fix this problem, when an I/O error occurs, do not skip verification +even if the bit related is set in v->validated_blocks. + +Fixes: 843f38d382b1 ("dm verity: add 'check_at_most_once' option to only validate hashes once") +Cc: stable@vger.kernel.org +Reviewed-by: Sungjong Seo +Signed-off-by: Yeongjin Gil +Signed-off-by: Mike Snitzer +Signed-off-by: Coly Li + +--- + drivers/md/dm-verity-target.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c +index ade83ef3b439..9316399b920e 100644 +--- a/drivers/md/dm-verity-target.c ++++ b/drivers/md/dm-verity-target.c +@@ -523,7 +523,7 @@ static int verity_verify_io(struct dm_verity_io *io) + sector_t cur_block = io->block + b; + struct ahash_request *req = verity_io_hash_req(v, io); + +- if (v->validated_blocks && ++ if (v->validated_blocks && bio->bi_status == BLK_STS_OK && + likely(test_bit(cur_block, v->validated_blocks))) { + verity_bv_skip_block(v, io, iter); + continue; +-- +2.35.3 + diff --git a/patches.suse/dm-zoned-free-dmz-ddev-array-in-dmz_put_zoned_device-9850.patch b/patches.suse/dm-zoned-free-dmz-ddev-array-in-dmz_put_zoned_device-9850.patch new file mode 100644 index 0000000..9ae00f8 --- /dev/null +++ b/patches.suse/dm-zoned-free-dmz-ddev-array-in-dmz_put_zoned_device-9850.patch @@ -0,0 +1,77 @@ +From 9850ccd5dd88075b2b7fd28d96299d5535f58cc5 Mon Sep 17 00:00:00 2001 +From: Fedor Pchelkin +Date: Wed, 20 Sep 2023 13:51:16 +0300 +Subject: [PATCH] dm zoned: free dmz->ddev array in dmz_put_zoned_devices +Git-commit: 9850ccd5dd88075b2b7fd28d96299d5535f58cc5 +Patch-mainline: v6.6-rc5 +References: git-fixes + +Commit 4dba12881f88 ("dm zoned: support arbitrary number of devices") +made the pointers to additional zoned devices to be stored in a +dynamically allocated dmz->ddev array. However, this array is not freed. + +Rename dmz_put_zoned_device to dmz_put_zoned_devices and fix it to +free the dmz->ddev array when cleaning up zoned device information. +Remove NULL assignment for all dmz->ddev elements and just free the +dmz->ddev array instead. + +Found by Linux Verification Center (linuxtesting.org). + +Fixes: 4dba12881f88 ("dm zoned: support arbitrary number of devices") +Cc: stable@vger.kernel.org +Signed-off-by: Fedor Pchelkin +Signed-off-by: Mike Snitzer +Signed-off-by: Coly Li + +--- + drivers/md/dm-zoned-target.c | 15 +++++++-------- + 1 file changed, 7 insertions(+), 8 deletions(-) + +diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c +index ad8e670a2f9b..b487f7acc860 100644 +--- a/drivers/md/dm-zoned-target.c ++++ b/drivers/md/dm-zoned-target.c +@@ -748,17 +748,16 @@ static int dmz_get_zoned_device(struct dm_target *ti, char *path, + /* + * Cleanup zoned device information. + */ +-static void dmz_put_zoned_device(struct dm_target *ti) ++static void dmz_put_zoned_devices(struct dm_target *ti) + { + struct dmz_target *dmz = ti->private; + int i; + +- for (i = 0; i < dmz->nr_ddevs; i++) { +- if (dmz->ddev[i]) { ++ for (i = 0; i < dmz->nr_ddevs; i++) ++ if (dmz->ddev[i]) + dm_put_device(ti, dmz->ddev[i]); +- dmz->ddev[i] = NULL; +- } +- } ++ ++ kfree(dmz->ddev); + } + + static int dmz_fixup_devices(struct dm_target *ti) +@@ -948,7 +947,7 @@ static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv) + err_meta: + dmz_dtr_metadata(dmz->metadata); + err_dev: +- dmz_put_zoned_device(ti); ++ dmz_put_zoned_devices(ti); + err: + kfree(dmz->dev); + kfree(dmz); +@@ -978,7 +977,7 @@ static void dmz_dtr(struct dm_target *ti) + + bioset_exit(&dmz->bio_set); + +- dmz_put_zoned_device(ti); ++ dmz_put_zoned_devices(ti); + + mutex_destroy(&dmz->chunk_lock); + +-- +2.35.3 + diff --git a/patches.suse/libnvdimm-of_pmem-Use-devm_kstrdup-instead-of-kstrdu-6fd4.patch b/patches.suse/libnvdimm-of_pmem-Use-devm_kstrdup-instead-of-kstrdu-6fd4.patch new file mode 100644 index 0000000..86bfceb --- /dev/null +++ b/patches.suse/libnvdimm-of_pmem-Use-devm_kstrdup-instead-of-kstrdu-6fd4.patch @@ -0,0 +1,45 @@ +From 6fd4ebfc4d61e3097b595ab2725d513e3bbd6739 Mon Sep 17 00:00:00 2001 +From: Chen Ni +Date: Thu, 14 Sep 2023 07:03:27 +0000 +Subject: [PATCH] libnvdimm/of_pmem: Use devm_kstrdup instead of kstrdup and + check its return value +Git-commit: 6fd4ebfc4d61e3097b595ab2725d513e3bbd6739 +Patch-mainline: v6.7-rc1 +References: git-fixes + +Use devm_kstrdup() instead of kstrdup() and check its return value to +avoid memory leak. + +Fixes: 49bddc73d15c ("libnvdimm/of_pmem: Provide a unique name for bus provider") +Signed-off-by: Chen Ni +Reviewed-by: Ira Weiny +Reviewed-by: Dave Jiang +Signed-off-by: Ira Weiny +Signed-off-by: Coly Li + +--- + drivers/nvdimm/of_pmem.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/drivers/nvdimm/of_pmem.c b/drivers/nvdimm/of_pmem.c +index 1b9f5b8a6167..d3fca0ab6290 100644 +--- a/drivers/nvdimm/of_pmem.c ++++ b/drivers/nvdimm/of_pmem.c +@@ -30,7 +30,13 @@ static int of_pmem_region_probe(struct platform_device *pdev) + if (!priv) + return -ENOMEM; + +- priv->bus_desc.provider_name = kstrdup(pdev->name, GFP_KERNEL); ++ priv->bus_desc.provider_name = devm_kstrdup(&pdev->dev, pdev->name, ++ GFP_KERNEL); ++ if (!priv->bus_desc.provider_name) { ++ kfree(priv); ++ return -ENOMEM; ++ } ++ + priv->bus_desc.module = THIS_MODULE; + priv->bus_desc.of_node = np; + +-- +2.35.3 + diff --git a/patches.suse/libnvdimm-region-Allow-setting-align-attribute-on-re-2e50.patch b/patches.suse/libnvdimm-region-Allow-setting-align-attribute-on-re-2e50.patch new file mode 100644 index 0000000..a82cc09 --- /dev/null +++ b/patches.suse/libnvdimm-region-Allow-setting-align-attribute-on-re-2e50.patch @@ -0,0 +1,79 @@ +From 2e5021cc42ba26c98fe83b973d774a999fa4f219 Mon Sep 17 00:00:00 2001 +From: Tyler Hicks +Date: Tue, 30 Aug 2022 00:45:05 -0500 +Subject: [PATCH] libnvdimm/region: Allow setting align attribute on regions + without mappings +Git-commit: 2e5021cc42ba26c98fe83b973d774a999fa4f219 +Patch-mainline: v6.1-rc1 +References: git-fixes + +The alignment constraint for namespace creation in a region was +increased, from 2M to 16M, for non-PowerPC architectures in v5.7 with +commit 2522afb86a8c ("libnvdimm/region: Introduce an 'align' +attribute"). The thought behind the change was that region alignment +should be uniform across all architectures and, since PowerPC had the +largest alignment constraint of 16M, all architectures should conform to +that alignment. + +The change regressed namespace creation in pre-defined regions that +relied on 2M alignment but a workaround was provided in the form of a +sysfs attribute, named 'align', that could be adjusted to a non-default +alignment value. + +However, the sysfs attribute's store function returned an error (-ENXIO) +when userspace attempted to change the alignment of a region that had no +mappings. This affected 2M aligned regions of volatile memory that were +defined in a device tree using "pmem-region" and created by the +of_pmem_region_driver, since those regions do not contain mappings +(ndr_mappings is 0). + +Allow userspace to set the align attribute on pre-existing regions that +do not have mappings so that namespaces can still be within those +regions, despite not being aligned to 16M. + +Link: https://lore.kernel.org/lkml/CA+CK2bDJ3hrWoE91L2wpAk+Yu0_=GtYw=4gLDDD7mxs321b_aA@mail.gmail.com +Fixes: 2522afb86a8c ("libnvdimm/region: Introduce an 'align' attribute") +Signed-off-by: Tyler Hicks +Link: https://lore.kernel.org/r/20220830054505.1159488-1-tyhicks@linux.microsoft.com +Signed-off-by: Dan Williams +Signed-off-by: Coly Li + +--- + drivers/nvdimm/region_devs.c | 8 +++----- + 1 file changed, 3 insertions(+), 5 deletions(-) + +diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c +index 70f1a23cbe31..e0875d369762 100644 +--- a/drivers/nvdimm/region_devs.c ++++ b/drivers/nvdimm/region_devs.c +@@ -509,16 +509,13 @@ static ssize_t align_store(struct device *dev, + { + struct nd_region *nd_region = to_nd_region(dev); + unsigned long val, dpa; +- u32 remainder; ++ u32 mappings, remainder; + int rc; + + rc = kstrtoul(buf, 0, &val); + if (rc) + return rc; + +- if (!nd_region->ndr_mappings) +- return -ENXIO; +- + /* + * Ensure space-align is evenly divisible by the region + * interleave-width because the kernel typically has no facility +@@ -526,7 +523,8 @@ static ssize_t align_store(struct device *dev, + * contribute to the tail capacity in system-physical-address + * space for the namespace. + */ +- dpa = div_u64_rem(val, nd_region->ndr_mappings, &remainder); ++ mappings = max_t(u32, 1, nd_region->ndr_mappings); ++ dpa = div_u64_rem(val, mappings, &remainder); + if (!is_power_of_2(dpa) || dpa < PAGE_SIZE + || val > region_size(nd_region) || remainder) + return -EINVAL; +-- +2.35.3 + diff --git a/patches.suse/md-Don-t-clear-MD_CLOSING-when-the-raid-is-about-to--9674.patch b/patches.suse/md-Don-t-clear-MD_CLOSING-when-the-raid-is-about-to--9674.patch new file mode 100644 index 0000000..8a57768 --- /dev/null +++ b/patches.suse/md-Don-t-clear-MD_CLOSING-when-the-raid-is-about-to--9674.patch @@ -0,0 +1,68 @@ +From 9674f54e41fffaf06f6a60202e1fa4cc13de3cf5 Mon Sep 17 00:00:00 2001 +From: Li Nan +Date: Mon, 26 Feb 2024 11:14:40 +0800 +Subject: [PATCH] md: Don't clear MD_CLOSING when the raid is about to stop +Git-commit: 9674f54e41fffaf06f6a60202e1fa4cc13de3cf5 +Patch-mainline: v6.9-rc1 +References: git-fixes + +The raid should not be opened anymore when it is about to be stopped. +However, other processes can open it again if the flag MD_CLOSING is +cleared before exiting. From now on, this flag will not be cleared when +the raid will be stopped. + +Fixes: 065e519e71b2 ("md: MD_CLOSING needs to be cleared after called md_set_readonly or do_md_stop") +Signed-off-by: Li Nan +Reviewed-by: Yu Kuai +Signed-off-by: Song Liu +Link: https://lore.kernel.org/r/20240226031444.3606764-6-linan666@huaweicloud.com +Signed-off-by: Coly Li + +--- + drivers/md/md.c | 14 ++++++++++---- + 1 file changed, 10 insertions(+), 4 deletions(-) + +--- a/drivers/md/md.c ++++ b/drivers/md/md.c +@@ -6206,7 +6206,15 @@ static void md_clean(struct mddev *mddev + mddev->persistent = 0; + mddev->level = LEVEL_NONE; + mddev->clevel[0] = 0; +- mddev->flags = 0; ++ /* ++ * Don't clear MD_CLOSING, or mddev can be opened again. ++ * 'hold_active != 0' means mddev is still in the creation ++ * process and will be used later. ++ */ ++ if (mddev->hold_active) ++ mddev->flags = 0; ++ else ++ mddev->flags &= BIT_ULL_MASK(MD_CLOSING); + mddev->sb_flags = 0; + mddev->ro = MD_RDWR; + mddev->metadata_type[0] = 0; +@@ -7528,7 +7536,6 @@ static int md_ioctl(struct block_device + int err = 0; + void __user *argp = (void __user *)arg; + struct mddev *mddev = NULL; +- bool did_set_md_closing = false; + + if (!md_ioctl_valid(cmd)) + return -ENOTTY; +@@ -7615,7 +7622,6 @@ static int md_ioctl(struct block_device + err = -EBUSY; + goto out; + } +- did_set_md_closing = true; + mutex_unlock(&mddev->open_mutex); + sync_blockdev(bdev); + } +@@ -7778,7 +7784,7 @@ unlock: + mddev->hold_active = 0; + mddev_unlock(mddev); + out: +- if(did_set_md_closing) ++ if (cmd == STOP_ARRAY_RO || (err && cmd == STOP_ARRAY)) + clear_bit(MD_CLOSING, &mddev->flags); + return err; + } diff --git a/patches.suse/md-don-t-clear-MD_RECOVERY_FROZEN-for-new-dm-raid-un-2f03.patch b/patches.suse/md-don-t-clear-MD_RECOVERY_FROZEN-for-new-dm-raid-un-2f03.patch new file mode 100644 index 0000000..e16f583 --- /dev/null +++ b/patches.suse/md-don-t-clear-MD_RECOVERY_FROZEN-for-new-dm-raid-un-2f03.patch @@ -0,0 +1,54 @@ +From 2f03d0c2cd451c7ac2f317079d4ec518f0986b55 Mon Sep 17 00:00:00 2001 +From: Yu Kuai +Date: Tue, 5 Mar 2024 15:22:58 +0800 +Subject: [PATCH] md: don't clear MD_RECOVERY_FROZEN for new dm-raid until + resume +Git-commit: 2f03d0c2cd451c7ac2f317079d4ec518f0986b55 +Patch-mainline: v6.9-rc1 +References: git-fixes + +After commit 9dbd1aa3a81c ("dm raid: add reshaping support to the +target") raid_ctr() will set MD_RECOVERY_FROZEN before md_run() and +expect to keep array frozen until resume. However, md_run() will clear +the flag by setting mddev->recovery to 0. + +Before commit 1baae052cccd ("md: Don't ignore suspended array in +md_check_recovery()"), dm-raid actually relied on suspending to prevent +starting new sync_thread. + +Fix this problem by keeping 'MD_RECOVERY_FROZEN' for dm-raid in +md_run(). + +Fixes: 1baae052cccd ("md: Don't ignore suspended array in md_check_recovery()") +Fixes: 9dbd1aa3a81c ("dm raid: add reshaping support to the target") +Cc: stable@vger.kernel.org # v6.7+ +Signed-off-by: Yu Kuai +Signed-off-by: Xiao Ni +Acked-by: Mike Snitzer +Signed-off-by: Song Liu +Link: https://lore.kernel.org/r/20240305072306.2562024-2-yukuai1@huaweicloud.com +Signed-off-by: Coly Li + +--- + drivers/md/md.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/drivers/md/md.c b/drivers/md/md.c +index 48ae2b1cb57a..0c4e00e8d485 100644 +--- a/drivers/md/md.c ++++ b/drivers/md/md.c +@@ -6062,7 +6062,10 @@ int md_run(struct mddev *mddev) + pr_warn("True protection against single-disk failure might be compromised.\n"); + } + +- mddev->recovery = 0; ++ /* dm-raid expect sync_thread to be frozen until resume */ ++ if (mddev->gendisk) ++ mddev->recovery = 0; ++ + /* may be over-ridden by personality */ + mddev->resync_max_sectors = mddev->dev_sectors; + +-- +2.35.3 + diff --git a/patches.suse/md-raid1-fix-choose-next-idle-in-read_balance-257a.patch b/patches.suse/md-raid1-fix-choose-next-idle-in-read_balance-257a.patch new file mode 100644 index 0000000..1d9742a --- /dev/null +++ b/patches.suse/md-raid1-fix-choose-next-idle-in-read_balance-257a.patch @@ -0,0 +1,146 @@ +From 257ac239ffcfd097a9a0732bf5095fb00164f334 Mon Sep 17 00:00:00 2001 +From: Yu Kuai +Date: Thu, 29 Feb 2024 17:57:07 +0800 +Subject: [PATCH] md/raid1: fix choose next idle in read_balance() +Git-commit: 257ac239ffcfd097a9a0732bf5095fb00164f334 +Patch-mainline: v6.9-rc1 +References: git-fixes + +Commit 12cee5a8a29e ("md/raid1: prevent merging too large request") add +the case choose next idle in read_balance(): + +Read_balance: for_each_rdev + if(next_seq_sect == this_sector || dist == 0) + -> sequential reads + best_disk = disk; + if (...) + choose_next_idle = 1 + continue; + + for_each_rdev + -> iterate next rdev + if (pending == 0) + best_disk = disk; + -> choose the next idle disk + break; + + if (choose_next_idle) + -> keep using this rdev if there are no other idle disk + contine + +However, commit 2e52d449bcec ("md/raid1: add failfast handling for reads.") +remove the code: + +- /* If device is idle, use it */ +- if (pending == 0) { +- best_disk = disk; +- break; +- } + +Hence choose next idle will never work now, fix this problem by +Following: + +1) don't set best_disk in this case, read_balance() will choose the best + disk after iterating all the disks; +2) add 'pending' so that other idle disk will be chosen; +3) add a new local variable 'sequential_disk' to record the disk, and if + there is no other idle disk, 'sequential_disk' will be chosen; + +(Coly Li: rebased for Linux 5.14 based SUSE kernel) + +Fixes: 2e52d449bcec ("md/raid1: add failfast handling for reads.") +Co-developed-by: Paul Luse +Signed-off-by: Paul Luse +Signed-off-by: Yu Kuai +Reviewed-by: Xiao Ni +Signed-off-by: Song Liu +Link: https://lore.kernel.org/r/20240229095714.926789-5-yukuai1@huaweicloud.com +Signed-off-by: Coly Li + +--- + drivers/md/raid1.c | 32 ++++++++++++++++++++++---------- + 1 file changed, 22 insertions(+), 10 deletions(-) + +--- a/drivers/md/raid1.c ++++ b/drivers/md/raid1.c +@@ -601,14 +601,13 @@ static int read_balance(struct r1conf *c + const sector_t this_sector = r1_bio->sector; + int sectors; + int best_good_sectors; +- int best_disk, best_dist_disk, best_pending_disk; ++ int best_disk, best_dist_disk, best_pending_disk, sequential_disk; + int has_nonrot_disk; + int disk; + sector_t best_dist; + unsigned int min_pending; + struct md_rdev *rdev; + int choose_first; +- int choose_next_idle; + + rcu_read_lock(); + /* +@@ -620,12 +619,12 @@ static int read_balance(struct r1conf *c + sectors = r1_bio->sectors; + best_disk = -1; + best_dist_disk = -1; ++ sequential_disk = -1; + best_dist = MaxSector; + best_pending_disk = -1; + min_pending = UINT_MAX; + best_good_sectors = 0; + has_nonrot_disk = 0; +- choose_next_idle = 0; + clear_bit(R1BIO_FailFast, &r1_bio->state); + + if ((conf->mddev->recovery_cp < this_sector + sectors) || +@@ -721,7 +720,6 @@ static int read_balance(struct r1conf *c + int opt_iosize = bdev_io_opt(rdev->bdev) >> 9; + struct raid1_info *mirror = &conf->mirrors[disk]; + +- best_disk = disk; + /* + * If buffered sequential IO size exceeds optimal + * iosize, check if there is idle disk. If yes, choose +@@ -740,15 +738,22 @@ static int read_balance(struct r1conf *c + mirror->next_seq_sect > opt_iosize && + mirror->next_seq_sect - opt_iosize >= + mirror->seq_start) { +- choose_next_idle = 1; +- continue; ++ /* ++ * Add 'pending' to avoid choosing this disk if ++ * there is other idle disk. ++ */ ++ pending++; ++ /* ++ * If there is no other idle disk, this disk ++ * will be chosen. ++ */ ++ sequential_disk = disk; ++ } else { ++ best_disk = disk; ++ break; + } +- break; + } + +- if (choose_next_idle) +- continue; +- + if (min_pending > pending) { + min_pending = pending; + best_pending_disk = disk; +@@ -761,6 +766,13 @@ static int read_balance(struct r1conf *c + } + + /* ++ * sequential IO size exceeds optimal iosize, however, there is no other ++ * idle disk, so choose the sequential disk. ++ */ ++ if (best_disk == -1 && min_pending != 0) ++ best_disk = sequential_disk; ++ ++ /* + * If all disks are rotational, choose the closest disk. If any disk is + * non-rotational, choose the disk with less pending request even the + * disk is rotational, which might/might not be optimal for raids with diff --git a/patches.suse/nd_btt-Make-BTT-lanes-preemptible-36c7.patch b/patches.suse/nd_btt-Make-BTT-lanes-preemptible-36c7.patch new file mode 100644 index 0000000..65a70a5 --- /dev/null +++ b/patches.suse/nd_btt-Make-BTT-lanes-preemptible-36c7.patch @@ -0,0 +1,94 @@ +From 36c75ce3bd299878fd9b238e9803d3817ddafbf3 Mon Sep 17 00:00:00 2001 +From: Tomas Glozar +Date: Wed, 20 Sep 2023 07:37:12 +0200 +Subject: [PATCH] nd_btt: Make BTT lanes preemptible +Git-commit: 36c75ce3bd299878fd9b238e9803d3817ddafbf3 +Patch-mainline: v6.7-rc1 +References: git-fixes + +nd_region_acquire_lane uses get_cpu, which disables preemption. This is +an issue on PREEMPT_RT kernels, since btt_write_pg and also +nd_region_acquire_lane itself take a spin lock, resulting in BUG: +sleeping function called from invalid context. + +Fix the issue by replacing get_cpu with smp_process_id and +migrate_disable when needed. This makes BTT operations preemptible, thus +permitting the use of spin_lock. + +BUG example occurring when running ndctl tests on PREEMPT_RT kernel: + +Bug: sleeping function called from invalid context at +kernel/locking/spinlock_rt.c:48 +In_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 4903, name: +libndctl +Preempt_count: 1, expected: 0 +RCU nest depth: 0, expected: 0 +Preemption disabled at: +[] nd_region_acquire_lane+0x15/0x90 [libnvdimm] +Call Trace: + + dump_stack_lvl+0x8e/0xb0 + __might_resched+0x19b/0x250 + rt_spin_lock+0x4c/0x100 + ? btt_write_pg+0x2d7/0x500 [nd_btt] + btt_write_pg+0x2d7/0x500 [nd_btt] + ? local_clock_noinstr+0x9/0xc0 + btt_submit_bio+0x16d/0x270 [nd_btt] + __submit_bio+0x48/0x80 + __submit_bio_noacct+0x7e/0x1e0 + submit_bio_wait+0x58/0xb0 + __blkdev_direct_IO_simple+0x107/0x240 + ? inode_set_ctime_current+0x51/0x110 + ? __pfx_submit_bio_wait_endio+0x10/0x10 + blkdev_write_iter+0x1d8/0x290 + vfs_write+0x237/0x330 + ... + + +Fixes: 5212e11fde4d ("nd_btt: atomic sector updates") +Signed-off-by: Tomas Glozar +Reviewed-by: Ira Weiny +Reviewed-by: Vishal Verma +Signed-off-by: Ira Weiny +Signed-off-by: Coly Li + +--- + drivers/nvdimm/region_devs.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c +index 0a81f87f6f6c..e2f1fb99707f 100644 +--- a/drivers/nvdimm/region_devs.c ++++ b/drivers/nvdimm/region_devs.c +@@ -939,7 +939,8 @@ unsigned int nd_region_acquire_lane(struct nd_region *nd_region) + { + unsigned int cpu, lane; + +- cpu = get_cpu(); ++ migrate_disable(); ++ cpu = smp_processor_id(); + if (nd_region->num_lanes < nr_cpu_ids) { + struct nd_percpu_lane *ndl_lock, *ndl_count; + +@@ -958,16 +959,15 @@ EXPORT_SYMBOL(nd_region_acquire_lane); + void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane) + { + if (nd_region->num_lanes < nr_cpu_ids) { +- unsigned int cpu = get_cpu(); ++ unsigned int cpu = smp_processor_id(); + struct nd_percpu_lane *ndl_lock, *ndl_count; + + ndl_count = per_cpu_ptr(nd_region->lane, cpu); + ndl_lock = per_cpu_ptr(nd_region->lane, lane); + if (--ndl_count->count == 0) + spin_unlock(&ndl_lock->lock); +- put_cpu(); + } +- put_cpu(); ++ migrate_enable(); + } + EXPORT_SYMBOL(nd_region_release_lane); + +-- +2.35.3 + diff --git a/patches.suse/nvdimm-Allow-overwrite-in-the-presence-of-disabled-d-bb7b.patch b/patches.suse/nvdimm-Allow-overwrite-in-the-presence-of-disabled-d-bb7b.patch new file mode 100644 index 0000000..1dea63a --- /dev/null +++ b/patches.suse/nvdimm-Allow-overwrite-in-the-presence-of-disabled-d-bb7b.patch @@ -0,0 +1,50 @@ +From bb7bf697fed58eae9d3445944e457ab0de4da54f Mon Sep 17 00:00:00 2001 +From: Dan Williams +Date: Thu, 28 Apr 2022 15:47:46 -0700 +Subject: [PATCH] nvdimm: Allow overwrite in the presence of disabled dimms +Git-commit: bb7bf697fed58eae9d3445944e457ab0de4da54f +Patch-mainline: v5.19-rc1 +References: git-fixes + +It is not clear why the original implementation of overwrite support +required the dimm driver to be active before overwrite could proceed. In +fact that can lead to cases where the kernel retains an invalid cached +copy of the labels from before the overwrite. Unfortunately the kernel +has not only allowed that case, but enforced it. + +Going forward, allow for overwrite to happen while the label area is +offline, and follow-on with updates to 'ndctl sanitize-dimm --overwrite' +to trigger the label area invalidation by default. + +Cc: Vishal Verma +Cc: Dave Jiang +Cc: Ira Weiny +Cc: Jeff Moyer +Reported-by: Krzysztof Kensicki +Fixes: 7d988097c546 ("acpi/nfit, libnvdimm/security: Add security DSM overwrite support") +Signed-off-by: Dan Williams +Signed-off-by: Coly Li + +--- + drivers/nvdimm/security.c | 5 ----- + 1 file changed, 5 deletions(-) + +diff --git a/drivers/nvdimm/security.c b/drivers/nvdimm/security.c +index 4b80150e4afa..b5aa55c61461 100644 +--- a/drivers/nvdimm/security.c ++++ b/drivers/nvdimm/security.c +@@ -379,11 +379,6 @@ static int security_overwrite(struct nvdimm *nvdimm, unsigned int keyid) + || !nvdimm->sec.flags) + return -EOPNOTSUPP; + +- if (dev->driver == NULL) { +- dev_dbg(dev, "Unable to overwrite while DIMM active.\n"); +- return -EINVAL; +- } +- + rc = check_security_state(nvdimm); + if (rc) + return rc; +-- +2.35.3 + diff --git a/patches.suse/nvdimm-Fix-badblocks-clear-off-by-one-error-ef91.patch b/patches.suse/nvdimm-Fix-badblocks-clear-off-by-one-error-ef91.patch new file mode 100644 index 0000000..3cd804c --- /dev/null +++ b/patches.suse/nvdimm-Fix-badblocks-clear-off-by-one-error-ef91.patch @@ -0,0 +1,43 @@ +From ef9102004a87cb3f8b26e000a095a261fc0467d3 Mon Sep 17 00:00:00 2001 +From: Chris Ye +Date: Tue, 31 May 2022 17:09:54 -0700 +Subject: [PATCH] nvdimm: Fix badblocks clear off-by-one error +Git-commit: ef9102004a87cb3f8b26e000a095a261fc0467d3 +Patch-mainline: v5.19-rc5 +References: git-fixes + +nvdimm_clear_badblocks_region() validates badblock clearing requests +against the span of the region, however it compares the inclusive +badblock request range to the exclusive region range. Fix up the +off-by-one error. + +Fixes: 23f498448362 ("libnvdimm: rework region badblocks clearing") +Cc: +Signed-off-by: Chris Ye +Reviewed-by: Vishal Verma +Link: https://lore.kernel.org/r/165404219489.2445897.9792886413715690399.stgit@dwillia2-xfh +Signed-off-by: Dan Williams +Signed-off-by: Coly Li + +--- + drivers/nvdimm/bus.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c +index a4fc17db707c..b38d0355b0ac 100644 +--- a/drivers/nvdimm/bus.c ++++ b/drivers/nvdimm/bus.c +@@ -176,8 +176,8 @@ static int nvdimm_clear_badblocks_region(struct device *dev, void *data) + ndr_end = nd_region->ndr_start + nd_region->ndr_size - 1; + + /* make sure we are in the region */ +- if (ctx->phys < nd_region->ndr_start +- || (ctx->phys + ctx->cleared) > ndr_end) ++ if (ctx->phys < nd_region->ndr_start || ++ (ctx->phys + ctx->cleared - 1) > ndr_end) + return 0; + + sector = (ctx->phys - nd_region->ndr_start) / 512; +-- +2.35.3 + diff --git a/patches.suse/nvdimm-Fix-dereference-after-free-in-register_nvdimm-08ca.patch b/patches.suse/nvdimm-Fix-dereference-after-free-in-register_nvdimm-08ca.patch new file mode 100644 index 0000000..d5675ec --- /dev/null +++ b/patches.suse/nvdimm-Fix-dereference-after-free-in-register_nvdimm-08ca.patch @@ -0,0 +1,43 @@ +From 08ca6906a4b7e48f8e93b7c1f49a742a415be6d5 Mon Sep 17 00:00:00 2001 +From: Konstantin Meskhidze +Date: Thu, 17 Aug 2023 19:41:03 +0800 +Subject: [PATCH] nvdimm: Fix dereference after free in register_nvdimm_pmu() +Git-commit: 08ca6906a4b7e48f8e93b7c1f49a742a415be6d5 +Patch-mainline: v6.6-rc1 +References: git-fixes + +'nd_pmu->pmu.attr_groups' is dereferenced in function +'nvdimm_pmu_free_hotplug_memory' call after it has been freed. Because in +function 'nvdimm_pmu_free_hotplug_memory' memory pointed by the fields of +'nd_pmu->pmu.attr_groups' is deallocated it is necessary to call 'kfree' +after 'nvdimm_pmu_free_hotplug_memory'. + +Fixes: 0fab1ba6ad6b ("drivers/nvdimm: Add perf interface to expose nvdimm performance stats") +Co-developed-by: Ivanov Mikhail +Signed-off-by: Konstantin Meskhidze +Reviewed-by: Jeff Moyer +Link: https://lore.kernel.org/r/20230817114103.754977-1-konstantin.meskhidze@huawei.com +Signed-off-by: Dave Jiang +Signed-off-by: Coly Li + +--- + drivers/nvdimm/nd_perf.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/nvdimm/nd_perf.c b/drivers/nvdimm/nd_perf.c +index 14881c4e03e6..2b6dc80d8fb5 100644 +--- a/drivers/nvdimm/nd_perf.c ++++ b/drivers/nvdimm/nd_perf.c +@@ -308,8 +308,8 @@ int register_nvdimm_pmu(struct nvdimm_pmu *nd_pmu, struct platform_device *pdev) + + rc = perf_pmu_register(&nd_pmu->pmu, nd_pmu->pmu.name, -1); + if (rc) { +- kfree(nd_pmu->pmu.attr_groups); + nvdimm_pmu_free_hotplug_memory(nd_pmu); ++ kfree(nd_pmu->pmu.attr_groups); + return rc; + } + +-- +2.35.3 + diff --git a/patches.suse/nvdimm-Fix-firmware-activation-deadlock-scenarios-e682.patch b/patches.suse/nvdimm-Fix-firmware-activation-deadlock-scenarios-e682.patch new file mode 100644 index 0000000..1aabb41 --- /dev/null +++ b/patches.suse/nvdimm-Fix-firmware-activation-deadlock-scenarios-e682.patch @@ -0,0 +1,112 @@ +From e6829d1bd3c4b58296ee9e412f7ed4d6cb390192 Mon Sep 17 00:00:00 2001 +From: Dan Williams +Date: Tue, 26 Apr 2022 13:23:05 -0700 +Subject: [PATCH] nvdimm: Fix firmware activation deadlock scenarios +Git-commit: e6829d1bd3c4b58296ee9e412f7ed4d6cb390192 +Patch-mainline: v5.19-rc1 +References: git-fixes + +Lockdep reports the following deadlock scenarios for CXL root device +power-management, device_prepare(), operations, and device_shutdown() +operations for 'nd_region' devices: + + Chain exists of: + &nvdimm_region_key --> &nvdimm_bus->reconfig_mutex --> system_transition_mutex + + Possible unsafe locking scenario: + + CPU0 CPU1 + ---- ---- + lock(system_transition_mutex); + lock(&nvdimm_bus->reconfig_mutex); + lock(system_transition_mutex); + lock(&nvdimm_region_key); + + Chain exists of: + &cxl_nvdimm_bridge_key --> acpi_scan_lock --> &cxl_root_key + + Possible unsafe locking scenario: + + CPU0 CPU1 + ---- ---- + lock(&cxl_root_key); + lock(acpi_scan_lock); + lock(&cxl_root_key); + lock(&cxl_nvdimm_bridge_key); + +These stem from holding nvdimm_bus_lock() over hibernate_quiet_exec() +which walks the entire system device topology taking device_lock() along +the way. The nvdimm_bus_lock() is protecting against unregistration, +multiple simultaneous ops callers, and preventing activate_show() from +racing activate_store(). For the first 2, the lock is redundant. +Unregistration already flushes all ops users, and sysfs already prevents +multiple threads to be active in an ops handler at the same time. For +the last userspace should already be waiting for its last +activate_store() to complete, and does not need activate_show() to flush +the write side, so this lock usage can be deleted in these attributes. + +Fixes: 48001ea50d17 ("PM, libnvdimm: Add runtime firmware activation support") +Reviewed-by: Ira Weiny +Link: https://lore.kernel.org/r/165074883800.4116052.10737040861825806582.stgit@dwillia2-desk3.amr.corp.intel.com +Signed-off-by: Dan Williams +Signed-off-by: Coly Li + +--- + drivers/nvdimm/core.c | 9 --------- + 1 file changed, 9 deletions(-) + +diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c +index 144926b7451c..d91799b71d23 100644 +--- a/drivers/nvdimm/core.c ++++ b/drivers/nvdimm/core.c +@@ -368,9 +368,7 @@ static ssize_t capability_show(struct device *dev, + if (!nd_desc->fw_ops) + return -EOPNOTSUPP; + +- nvdimm_bus_lock(dev); + cap = nd_desc->fw_ops->capability(nd_desc); +- nvdimm_bus_unlock(dev); + + switch (cap) { + case NVDIMM_FWA_CAP_QUIESCE: +@@ -395,10 +393,8 @@ static ssize_t activate_show(struct device *dev, + if (!nd_desc->fw_ops) + return -EOPNOTSUPP; + +- nvdimm_bus_lock(dev); + cap = nd_desc->fw_ops->capability(nd_desc); + state = nd_desc->fw_ops->activate_state(nd_desc); +- nvdimm_bus_unlock(dev); + + if (cap < NVDIMM_FWA_CAP_QUIESCE) + return -EOPNOTSUPP; +@@ -443,7 +439,6 @@ static ssize_t activate_store(struct device *dev, + else + return -EINVAL; + +- nvdimm_bus_lock(dev); + state = nd_desc->fw_ops->activate_state(nd_desc); + + switch (state) { +@@ -461,7 +456,6 @@ static ssize_t activate_store(struct device *dev, + default: + rc = -ENXIO; + } +- nvdimm_bus_unlock(dev); + + if (rc == 0) + rc = len; +@@ -484,10 +478,7 @@ static umode_t nvdimm_bus_firmware_visible(struct kobject *kobj, struct attribut + if (!nd_desc->fw_ops) + return 0; + +- nvdimm_bus_lock(dev); + cap = nd_desc->fw_ops->capability(nd_desc); +- nvdimm_bus_unlock(dev); +- + if (cap < NVDIMM_FWA_CAP_QUIESCE) + return 0; + +-- +2.35.3 + diff --git a/patches.suse/nvdimm-Fix-memleak-of-pmu-attr_groups-in-unregister_-85ae.patch b/patches.suse/nvdimm-Fix-memleak-of-pmu-attr_groups-in-unregister_-85ae.patch new file mode 100644 index 0000000..92b9cc0 --- /dev/null +++ b/patches.suse/nvdimm-Fix-memleak-of-pmu-attr_groups-in-unregister_-85ae.patch @@ -0,0 +1,40 @@ +From 85ae42c72142346645e63c33835da947dfa008b3 Mon Sep 17 00:00:00 2001 +From: Konstantin Meskhidze +Date: Thu, 17 Aug 2023 19:59:45 +0800 +Subject: [PATCH] nvdimm: Fix memleak of pmu attr_groups in + unregister_nvdimm_pmu() +Git-commit: 85ae42c72142346645e63c33835da947dfa008b3 +Patch-mainline: v6.6-rc1 +References: git-fixes + +Memory pointed by 'nd_pmu->pmu.attr_groups' is allocated in function +'register_nvdimm_pmu' and is lost after 'kfree(nd_pmu)' call in function +'unregister_nvdimm_pmu'. + +Fixes: 0fab1ba6ad6b ("drivers/nvdimm: Add perf interface to expose nvdimm performance stats") +Co-developed-by: Ivanov Mikhail +Signed-off-by: Konstantin Meskhidze +Reviewed-by: Jeff Moyer +Link: https://lore.kernel.org/r/20230817115945.771826-1-konstantin.meskhidze@huawei.com +Signed-off-by: Dave Jiang +Signed-off-by: Coly Li + +--- + drivers/nvdimm/nd_perf.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/nvdimm/nd_perf.c b/drivers/nvdimm/nd_perf.c +index 433bbb68ae64..14881c4e03e6 100644 +--- a/drivers/nvdimm/nd_perf.c ++++ b/drivers/nvdimm/nd_perf.c +@@ -324,6 +324,7 @@ void unregister_nvdimm_pmu(struct nvdimm_pmu *nd_pmu) + { + perf_pmu_unregister(&nd_pmu->pmu); + nvdimm_pmu_free_hotplug_memory(nd_pmu); ++ kfree(nd_pmu->pmu.attr_groups); + kfree(nd_pmu); + } + EXPORT_SYMBOL_GPL(unregister_nvdimm_pmu); +-- +2.35.3 + diff --git a/patches.suse/nvdimm-namespace-drop-nested-variable-in-create_name-d342.patch b/patches.suse/nvdimm-namespace-drop-nested-variable-in-create_name-d342.patch new file mode 100644 index 0000000..9bbe3af --- /dev/null +++ b/patches.suse/nvdimm-namespace-drop-nested-variable-in-create_name-d342.patch @@ -0,0 +1,43 @@ +From d34213ebfea31229411583716a9ebe3610bf2d29 Mon Sep 17 00:00:00 2001 +From: Andy Shevchenko +Date: Tue, 7 Jun 2022 19:49:37 +0300 +Subject: [PATCH] nvdimm/namespace: drop nested variable in + create_namespace_pmem() +Git-commit: d34213ebfea31229411583716a9ebe3610bf2d29 +Patch-mainline: v6.0-rc7 +References: git-fixes + +Kernel build bot reported: + + namespace_devs.c:1991:10: warning: Local variable 'uuid' shadows outer variable [shadowVariable] + +Refactor create_namespace_pmem() by dropping a nested version of +the same variable. + +Fixes: d1c6e08e7503 ("libnvdimm/labels: Add uuid helpers") +Reported-by: kernel test robot +Signed-off-by: Andy Shevchenko +Link: https://lore.kernel.org/r/20220607164937.33967-1-andriy.shevchenko@linux.intel.com +Signed-off-by: Dan Williams +Signed-off-by: Coly Li + +--- + drivers/nvdimm/namespace_devs.c | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c +index bf4f5c09d9b1..bbe5099c836d 100644 +--- a/drivers/nvdimm/namespace_devs.c ++++ b/drivers/nvdimm/namespace_devs.c +@@ -1712,8 +1712,6 @@ static struct device *create_namespace_pmem(struct nd_region *nd_region, + res->flags = IORESOURCE_MEM; + + for (i = 0; i < nd_region->ndr_mappings; i++) { +- uuid_t uuid; +- + nsl_get_uuid(ndd, nd_label, &uuid); + if (has_uuid_at_pos(nd_region, &uuid, cookie, i)) + continue; +-- +2.35.3 + diff --git a/series.conf b/series.conf index 0950fc6..1df7cf0 100644 --- a/series.conf +++ b/series.conf @@ -6709,6 +6709,12 @@ patches.suse/nvme-move-command-clear-into-the-various-setup-helpe.patch patches.suse/nvme-don-t-memset-the-normal-read-write-command.patch patches.suse/nbd-Fix-use-after-free-in-pid_show.patch + patches.suse/bcache-fix-error-info-in-register_bcache-d55f.patch + patches.suse/bcache-move-calc_cached_dev_sectors-to-proper-place--0259.patch + patches.suse/bcache-remove-the-cache_dev_name-field-from-struct-c-7e84.patch + patches.suse/bcache-remove-the-backing_dev_name-field-from-struct-0f5c.patch + patches.suse/bcache-use-bvec_kmap_local-in-bch_data_verify-0038.patch + patches.suse/bcache-remove-bch_crc64_update-39fa.patch patches.suse/nvme-generate-uevent-once-a-multipath-namespace-is-o.patch patches.suse/nvme-fc-add-support-for-map_queues.patch patches.suse/qla2xxx-add-map_queues-support-for-nvme.patch @@ -6745,6 +6751,8 @@ patches.suse/nvmet-switch-check-for-subsystem-type.patch patches.suse/nvmet-register-discovery-subsystem-as-current.patch patches.suse/nvmet-use-flex_array_size-and-struct_size.patch + patches.suse/bcache-move-uapi-header-bcache.h-to-bcache-code-dire-cf21.patch + patches.suse/bcache-replace-snprintf-in-show-functions-with-sysfs-1b86.patch patches.suse/block-ataflop-Fix-warning-comparing-pointer-to-0.patch patches.suse/null_blk-Fix-handling-of-submit_queues-and-poll_queu.patch patches.suse/io_uring-dump-sqe-contents-if-issue-fails.patch @@ -13019,6 +13027,7 @@ patches.suse/io-wq-drop-wqe-lock-before-creating-new-worker.patch patches.suse/iocost-Fix-divide-by-zero-on-donation-from-low-hweig.patch patches.suse/block-reduce-kblockd_mod_delayed_work_on-CPU-consump.patch + patches.suse/bcache-fix-NULL-pointer-reference-in-cached_dev_deta-aa97.patch patches.suse/selinux-fix-sleeping-function-called-from-invalid-co.patch patches.suse/btrfs-fix-memory-leak-in-__add_inode_ref.patch patches.suse/btrfs-fix-double-free-of-anon_dev-after-failure-to-c.patch @@ -19248,6 +19257,8 @@ patches.suse/nvme-fix-the-check-for-duplicate-unique-identifiers.patch patches.suse/nvme-check-for-duplicate-identifiers-earlier.patch patches.suse/nvme-check-that-EUI-GUID-UUID-are-globally-unique.patch + patches.suse/bcache-use-bvec_kmap_local-in-bio_csum-07fe.patch + patches.suse/bcache-fixup-bcache_dev_sectors_dirty_add-multithrea-7b10.patch patches.suse/0003-bcache-fixup-multiple-threads-crash.patch patches.suse/0055-md-raid1-raid10-drop-pending_cnt.patch patches.suse/lib-raid6-test-fix-multiple-definition-linking-error.patch @@ -23486,6 +23497,7 @@ patches.suse/firmware-stratix10-svc-add-missing-callback-paramete.patch patches.suse/firmware-sysfb-fix-platform-device-leak-in-error-pat.patch patches.suse/firmware-google-Properly-state-IOMEM-dependency.patch + patches.suse/bcache-use-default_groups-in-kobj_type-fa97.patch patches.suse/component-Replace-most-references-to-master-with-agg.patch patches.suse/component-Add-common-helper-for-compare-release-func.patch patches.suse/1477-drm-komeda-Make-use-of-the-helper-component_compare_.patch @@ -28566,6 +28578,7 @@ patches.suse/clk-imx8mp-fix-usb_root_clk-parent.patch patches.suse/mfd-ipaq-micro-Fix-error-check-return-value-of-platf.patch patches.suse/mfd-davinci_voicecodec-Fix-possible-null-ptr-deref-d.patch + patches.suse/nvdimm-Allow-overwrite-in-the-presence-of-disabled-d-bb7b.patch patches.suse/testing-nvdimm-iomap-make-__nfit_test_ioremap-a-macr.patch patches.suse/testing-nvdimm-asm-mce.h-is-not-needed-in-nfit.c.patch patches.suse/acpi-nfit-rely-on-mce-misc-to-determine-poison-granu.patch @@ -28643,6 +28656,7 @@ patches.suse/PCI-ACPI-Prefer-CXL-_OSC-instead-of-PCIe-_OSC-for-CX.patch patches.suse/PCI-ACPI-negotiate-CXL-_OSC.patch patches.suse/ACPI-NFIT-Drop-nfit_device_lock.patch + patches.suse/nvdimm-Fix-firmware-activation-deadlock-scenarios-e682.patch patches.suse/pinctrl-mediatek-mt8195-enable-driver-on-mtk-platfor.patch patches.suse/pinctrl-mvebu-Fix-irq_of_parse_and_map-return-value.patch patches.suse/pinctrl-tegra-tegra194-drop-unused-pin-groups.patch @@ -29931,6 +29945,7 @@ patches.suse/cpufreq-amd-pstate-Add-resume-and-suspend-callbacks.patch patches.suse/drivers-cpufreq-Add-missing-of_node_put-in-qoriq-cpu.patch patches.suse/cpufreq-pmac32-cpufreq-Fix-refcount-leak-bug.patch + patches.suse/nvdimm-Fix-badblocks-clear-off-by-one-error-ef91.patch patches.suse/powerpc-xive-spapr-correct-bitmap-allocation-size.patch patches.suse/powerpc-prom_init-Fix-kernel-config-grep.patch patches.suse/powerpc-bpf-Fix-use-of-user_pt_regs-in-uapi.patch @@ -33028,6 +33043,7 @@ patches.suse/nvme-multipath-refactor-nvme_mpath_add_disk.patch patches.suse/0026-null_blk-fix-ida-error-handling-in-null_add_dev.patch patches.suse/nbd-add-missing-definition-of-pr_fmt-bc9d.patch + patches.suse/bcache-remove-EXPERIMENTAL-for-Kconfig-option-Asynch-640c.patch patches.suse/0211-md-raid5-Fix-sectors_to_do-bitmap-overflow-in-raid5_make_request.patch patches.suse/0212-md-raid5-Convert-prepare_to_wait-to-wait_woken-api.patch patches.suse/0213-md-fix-mddev-kobj-lifetime.patch @@ -34885,6 +34901,7 @@ patches.suse/i2c-mlxbf-incorrect-base-address-passed-during-io-wr.patch patches.suse/i2c-mlxbf-prevent-stack-overflow-in-mlxbf_i2c_smbus_.patch patches.suse/i2c-mlxbf-Fix-frequency-calculation.patch + patches.suse/nvdimm-namespace-drop-nested-variable-in-create_name-d342.patch patches.suse/devdax-Fix-soft-reservation-memory-description.patch patches.suse/ext4-fix-bug-in-extents-parsing-when-eh_entries-0-an.patch patches.suse/ext4-limit-the-number-of-retries-after-discarding-pr.patch @@ -36018,6 +36035,10 @@ patches.suse/sbitmap-fix-batched-wait_cnt-accounting.patch patches.suse/blk-throttle-fix-that-io-throttle-can-only-work-for-single-bio-320f.patch patches.suse/0032-blk-throttle-prevent-overflow-while-calculating-wait-time.patch + patches.suse/bcache-remove-unnecessary-flush_workqueue-97d2.patch + patches.suse/bcache-remove-unused-bch_mark_cache_readahead-functi-d86b.patch + patches.suse/bcache-bset-Fix-comment-typos-11e5.patch + patches.suse/bcache-fix-repeated-words-in-comments-6dd3.patch patches.suse/0033-bcache-fix-set_at_max_writeback_rate-for-multiple-attached-devices.patch patches.suse/nvmet-expose-max-queues-to-configfs.patch patches.suse/nvme-tcp-handle-number-of-queue-changes.patch @@ -36734,6 +36755,7 @@ patches.suse/ubifs-Fix-AA-deadlock-when-setting-xattr-for-encrypted-file.patch patches.suse/rtc-stmp3xxx-Add-failure-handling-for-stmp3xxx_wdt_r.patch patches.suse/rtc-cmos-Fix-event-handler-registration-ordering-iss.patch + patches.suse/libnvdimm-region-Allow-setting-align-attribute-on-re-2e50.patch patches.suse/ACPI-HMAT-Release-platform-device-in-case-of-platfor.patch patches.suse/cifs-fix-skipping-to-incorrect-offset-in-emit_cached_dirents.patch patches.suse/smb3-clarify-multichannel-warning.patch @@ -38088,6 +38110,7 @@ patches.suse/0049-dm-cache-Fix-ABBA-deadlock-between-shrink_slab-and-dm_cache_metadata_abort.patch patches.suse/0050-dm-cache-set-needs_check-flag-after-aborting-metadata.patch patches.suse/0051-dm-thin-resume-even-if-in-FAIL-mode.patch + patches.suse/dm-init-add-dm-mod.waitfor-to-wait-for-asynchronousl-0356.patch patches.suse/0052-dm-thin-Use-last-transaction-s-pmd-root-when-commit-failed.patch patches.suse/ALSA-memalloc-Allocate-more-contiguous-pages-for-fal.patch patches.suse/ALSA-pcm-fix-undefined-behavior-in-bit-shift-for-SND.patch @@ -40044,7 +40067,15 @@ patches.suse/docs-ftrace-fix-a-issue-with-duplicated-subtitle-num.patch patches.suse/docs-scripts-gdb-add-necessary-make-scripts_gdb-step.patch patches.suse/audit-update-the-mailing-list-in-MAINTAINERS.patch + patches.suse/dm-flakey-don-t-corrupt-the-zero-page-f507.patch + patches.suse/dm-flakey-fix-a-bug-with-32-bit-highmem-systems-8eb2.patch + patches.suse/dm-flakey-fix-logic-when-corrupting-a-bio-aa56.patch + patches.suse/dm-send-just-one-event-on-resize-not-two-7533.patch + patches.suse/dm-remove-flush_scheduled_work-during-local_exit-0b22.patch + patches.suse/dm-add-cond_resched-to-dm_wq_work-0ca4.patch patches.suse/dm-add-cond_resched-to-dm_wq_requeue_work-f776.patch + patches.suse/dm-thin-add-cond_resched-to-various-workqueue-loops-e4f8.patch + patches.suse/dm-cache-add-cond_resched-to-various-workqueue-loops-7622.patch patches.suse/scsi-libsas-Remove-useless-dev_list-delete-in-sas_ex_discover_end_dev.patch patches.suse/scsi-qla2xxx-check-if-port-is-online-before-sending-els.patch patches.suse/scsi-qla2xxx-fix-link-failure-in-npiv-environment.patch @@ -40873,6 +40904,10 @@ patches.suse/ACPI-resource-Add-Medion-S17413-to-IRQ-override-quir.patch patches.suse/io_uring-rsrc-fix-null-ptr-deref-in-io_file_bitmap_g.patch patches.suse/nvme-send-Identify-with-CNS-06h-only-to-I-O-controll.patch + patches.suse/dm-thin-fix-deadlock-when-swapping-to-thin-device-9bbf.patch + patches.suse/dm-crypt-add-cond_resched-to-dmcrypt_write-fb29.patch + patches.suse/dm-crypt-avoid-accessing-uninitialized-tasklet-d9a0.patch + patches.suse/dm-stats-check-for-and-propagate-alloc_percpu-failur-d3aa.patch patches.suse/drm-panel-orientation-quirks-Add-quirk-for-Lenovo-Yo.patch patches.suse/drm-meson-fix-missing-component-unbind-on-bind-error.patch patches.suse/drm-bridge-lt8912b-return-EPROBE_DEFER-if-bridge-is-.patch @@ -41423,7 +41458,12 @@ patches.suse/md-raid10-fix-memleak-of-md-thread-f0dd.patch patches.suse/md-raid10-don-t-call-bio_start_io_acct-twice-for-bio-7cdd.patch patches.suse/block-bfq-Fix-division-by-zero-error-on-zero-wsum.patch + patches.suse/dm-verity-fix-error-handling-for-check_at_most_once--e8c5.patch + patches.suse/dm-clone-call-kmem_cache_destroy-in-dm_clone_init-er-6827.patch + patches.suse/dm-integrity-call-kmem_cache_destroy-in-dm_integrity-6b79.patch patches.suse/dm-ioctl-fix-nested-locking-in-table_clear-to-remove-deadlock-concern-3b89.patch + patches.suse/dm-flakey-fix-a-crash-with-invalid-table-line-98db.patch + patches.suse/dm-don-t-lock-fs-when-the-map-is-NULL-in-process-of--38d1.patch patches.suse/scsi-lpfc-Prevent-lpfc_debugfs_lockstat_write-buffer.patch patches.suse/scsi-lpfc-Reorder-freeing-of-various-DMA-buffers-and.patch patches.suse/scsi-lpfc-Fix-lockdep-warning-for-rx_monitor-lock-wh.patch @@ -42231,6 +42271,8 @@ patches.suse/IB-isert-Fix-possible-list-corruption-in-CMA-handler.patch patches.suse/IB-isert-Fix-incorrect-release-of-isert-connection.patch patches.suse/RDMA-rxe-Fix-rxe_cq_post.patch + patches.suse/dm-don-t-lock-fs-when-the-map-is-NULL-during-suspend-2760.patch + patches.suse/dm-thin-metadata-check-fail_io-before-using-data_sm-cb65.patch patches.suse/igb-Fix-extts-capture-value-format-for-82580-i354-i3.patch patches.suse/net-usb-qmi_wwan-add-support-for-Compal-RXM-G1.patch patches.suse/net-sched-cls_u32-Fix-reference-counter-leak-leading.patch @@ -42338,6 +42380,10 @@ patches.suse/md-raid10-fix-null-ptr-deref-of-mreplace-in-raid10_s-3481.patch patches.suse/md-raid10-fix-io-loss-while-replacement-replace-rdev-2ae6.patch patches.suse/md-raid10-prevent-soft-lockup-while-flush-writes-0104.patch + patches.suse/bcache-Remove-dead-references-to-cache_readaheads-ccb8.patch + patches.suse/bcache-Remove-unnecessary-NULL-point-check-in-node-a-028d.patch + patches.suse/bcache-Fix-__bch_btree_node_alloc-to-make-the-failur-80fc.patch + patches.suse/bcache-fixup-btree_cache_wait-list-damage-f085.patch patches.suse/nvme-core-fix-memory-leak-in-dhchap_secret_store.patch patches.suse/nvme-core-fix-memory-leak-in-dhchap_ctrl_secret.patch patches.suse/nvme-core-fix-dev_pm_qos-memleak.patch @@ -42691,6 +42737,8 @@ patches.suse/scsi-core-Don-t-wait-for-quiesce-in-scsi_stop_queue.patch patches.suse/scsi-core-Don-t-wait-for-quiesce-in-scsi_device_bloc.patch patches.suse/scsi-core-Improve-warning-message-in-scsi_device_blo.patch + patches.suse/dm-thin-metadata-Fix-ABBA-deadlock-by-resetting-dm_b-d483.patch + patches.suse/dm-integrity-reduce-vmalloc-space-footprint-on-32-bi-6d50.patch patches.suse/platform-x86-intel-uncore-freq-Uncore-frequency-control-via-TPMI.patch patches.suse/platform-x86-intel-uncore-freq-Support-for-cluster-level-controls.patch patches.suse/platform-x86-intel-uncore-freq-tpmi-Provide-cluster-level-control.patch @@ -43152,6 +43200,9 @@ patches.suse/ASoC-rt711-fix-for-JD-event-handling-in-ClockStop-Mo.patch patches.suse/ASoC-rt711-sdca-fix-for-JD-event-handling-in-ClockSt.patch patches.suse/ASoC-atmel-Fix-the-8K-sample-parameter-in-I2SC-maste.patch + patches.suse/dm-raid-fix-missing-reconfig_mutex-unlock-in-raid_ct-bae3.patch + patches.suse/dm-raid-clean-up-four-equivalent-goto-tags-in-raid_c-e74c.patch + patches.suse/dm-cache-policy-smq-ensure-IO-doesn-t-prevent-cleane-1e4a.patch patches.suse/s390-dasd-fix-hanging-device-after-quiesce-resume.patch patches.suse/s390-dasd-use-correct-number-of-retries-for-ERP-requests.patch patches.suse/s390-dasd-fix-hanging-device-after-request-requeue.patch @@ -43663,6 +43714,8 @@ patches.suse/Revert-PCI-tegra194-Enable-support-for-256-Byte-payl.patch patches.suse/PCI-meson-Remove-cast-between-incompatible-function-.patch patches.suse/PCI-microchip-Remove-cast-between-incompatible-funct.patch + patches.suse/nvdimm-Fix-memleak-of-pmu-attr_groups-in-unregister_-85ae.patch + patches.suse/nvdimm-Fix-dereference-after-free-in-register_nvdimm-08ca.patch patches.suse/powerpc-pseries-Initialise-CPU-hotplug-callbacks-ear.patch patches.suse/powerpc-Add-HOTPLUG_SMT-support.patch patches.suse/powerpc-pseries-Honour-current-SMT-state-when-DLPAR-.patch @@ -44166,6 +44219,7 @@ patches.suse/RDMA-core-Require-admin-capabilities-to-set-system-p.patch patches.suse/gpio-pxa-disable-pinctrl-calls-for-MMP_GPIO.patch patches.suse/gpio-aspeed-fix-the-GPIO-number-passed-to-pinctrl_gp.patch + patches.suse/dm-zoned-free-dmz-ddev-array-in-dmz_put_zoned_device-9850.patch patches.suse/x86-sev-use-the-ghcb-protocol-when-available-for-snp-cpuid-requests.patch patches.suse/x86-sev-Change-npages-to-unsigned-long-in-snp_accept_memory.patch patches.suse/ALSA-hda-realtek-ALC287-merge-RTK-codec-with-CS-CS35.patch @@ -44565,6 +44619,8 @@ patches.suse/leds-turris-omnia-Do-not-use-SMBUS-calls.patch patches.suse/leds-pwm-Don-t-disable-the-PWM-when-the-LED-should-b.patch patches.suse/leds-trigger-ledtrig-cpu-Fix-output-may-be-truncated.patch + patches.suse/libnvdimm-of_pmem-Use-devm_kstrdup-instead-of-kstrdu-6fd4.patch + patches.suse/nd_btt-Make-BTT-lanes-preemptible-36c7.patch patches.suse/scsi-qla2xxx-Use-FIELD_GET-to-extract-PCIe-capabilit.patch patches.suse/scsi-hisi_sas-Set-debugfs_dir-pointer-to-NULL-after-removing-debugfs.patch patches.suse/scsi-ibmvfc-Remove-BUG_ON-in-the-case-of-an-empty-ev.patch @@ -44770,6 +44826,7 @@ patches.suse/mm-kmem-drop-__GFP_NOFAIL-when-allocating-objcg-vectors.patch patches.suse/Revert-i2c-pxa-move-to-generic-GPIO-recovery.patch patches.suse/i2c-designware-Fix-corrupted-memory-seen-in-the-ISR.patch + patches.suse/dm-delay-fix-a-race-between-delay_presuspend-and-del-6fc4.patch patches.suse/dm-verity-don-t-use-blocking-calls-from-tasklets-28f0.patch patches.suse/nfsd-fix-file-memleak-on-client_opens_release.patch patches.suse/scsi-qla2xxx-Fix-system-crash-due-to-bad-pointer-access.patch @@ -44787,6 +44844,16 @@ patches.suse/HID-hid-asus-add-const-to-read-only-outgoing-usb-buf.patch patches.suse/HID-hid-asus-reset-the-backlight-brightness-level-on.patch patches.suse/HID-multitouch-Add-quirk-for-HONOR-GLO-GXXX-touchpad.patch + patches.suse/bcache-avoid-oversize-memory-allocation-by-small-str-baf8.patch + patches.suse/bcache-check-return-value-from-btree_node_alloc_repl-7779.patch + patches.suse/bcache-remove-redundant-assignment-to-variable-cur_i-be93.patch + patches.suse/bcache-prevent-potential-division-by-zero-error-2c7f.patch + patches.suse/bcache-fixup-init-dirty-data-errors-7cc4.patch + patches.suse/bcache-fixup-lock-c-root-error-e348.patch + patches.suse/bcache-fixup-multi-threaded-bch_sectors_dirty_init-w-2faa.patch + patches.suse/bcache-replace-a-mistaken-IS_ERR-by-IS_ERR_OR_NULL-i-f72f.patch + patches.suse/bcache-add-code-comments-for-bch_btree_node_get-and--31f5.patch + patches.suse/bcache-avoid-NULL-checking-to-c-root-in-run_cache_se-3eba.patch patches.suse/md-fix-bi_status-reporting-in-md_end_clone_io-45b4.patch patches.suse/s390-dasd-protect-device-queue-against-concurrent-access.patch patches.suse/nvmet-nul-terminate-the-NQNs-passed-in-the-connect-c.patch @@ -44833,6 +44900,9 @@ patches.suse/wifi-cfg80211-lock-wiphy-mutex-for-rfkill-poll.patch patches.suse/uapi-propagate-__struct_group-attributes-to-the-cont.patch patches.suse/dm-verity-initialize-fec-io-before-freeing-it-7be0.patch + patches.suse/dm-verity-don-t-perform-FEC-for-failed-readahead-IO-0193.patch + patches.suse/dm-verity-align-struct-dm_verity_fec_io-properly-38bc.patch + patches.suse/bcache-revert-replacing-IS_ERR_OR_NULL-with-IS_ERR-bb6c.patch patches.suse/nvme-core-check-for-too-small-lba-shift.patch patches.suse/drm-i915-Call-intel_pre_plane_updates-also-for-pipes.patch patches.suse/drm-amd-display-Include-udelay-when-waiting-for-INBO.patch @@ -44995,6 +45065,7 @@ patches.suse/perf-Fix-perf_event_validate_size-lockdep-splat.patch patches.suse/spi-atmel-Fix-clock-issue-when-using-devices-with-di.patch patches.suse/ring-buffer-Fix-slowpath-of-interrupted-event.patch + patches.suse/dm-integrity-don-t-modify-bio-s-immutable-bio_vec-in-b86f.patch patches.suse/reset-hisilicon-hi6220-fix-Wvoid-pointer-to-enum-cas.patch patches.suse/reset-Fix-crash-when-freeing-non-existent-optional-r.patch patches.suse/bus-ti-sysc-Flush-posted-write-only-after-srst_udela.patch @@ -45525,6 +45596,7 @@ patches.suse/nvmet-fc-avoid-deadlock-on-delete-association-path.patch patches.suse/nvmet-fc-take-ref-count-on-tgtport-before-delete-ass.patch patches.suse/dm-limit-the-number-of-targets-and-parameter-size-ar.patch + patches.suse/dm-crypt-dm-verity-disable-tasklets-0a9b.patch patches.suse/ALSA-hda-realtek-fix-mute-micmute-LEDs-for-HP-ZBook-.patch patches.suse/ALSA-hda-cs8409-Suppress-vmaster-control-for-Dolphin.patch patches.suse/ALSA-usb-audio-Add-a-quirk-for-Yamaha-YIT-W12TX-tran.patch @@ -45679,6 +45751,8 @@ patches.suse/drm-ttm-Fix-an-invalid-freeing-on-already-freed-page.patch patches.suse/drm-syncobj-call-drm_syncobj_fence_add_wait-when-WAI.patch patches.suse/drm-amd-display-Fix-memory-leak-in-dm_sw_fini.patch + patches.suse/dm-crypt-don-t-modify-the-data-when-using-authentica-50c7.patch + patches.suse/dm-verity-dm-crypt-align-struct-bvec_iter-correctly-787f.patch patches.suse/s390-use-the-correct-count-for-__iowrite64_copy.patch patches.suse/arm64-dts-rockchip-set-num-cs-property-for-spi-on-px30.patch patches.suse/i2c-imx-when-being-a-target-mark-the-last-read-as-pr.patch @@ -45763,7 +45837,11 @@ patches.suse/i2c-wmt-Fix-an-error-handling-path-in-wmt_i2c_probe.patch patches.suse/i2c-aspeed-Fix-the-dummy-irq-expected-print.patch patches.suse/lib-cmdline-Fix-an-invalid-format-specifier-in-an-as.patch + patches.suse/md-Don-t-clear-MD_CLOSING-when-the-raid-is-about-to--9674.patch patches.suse/md-raid5-fix-atomicity-violation-in-raid5_cache_coun-dfd2.patch + patches.suse/md-raid1-fix-choose-next-idle-in-read_balance-257a.patch + patches.suse/md-don-t-clear-MD_RECOVERY_FROZEN-for-new-dm-raid-un-2f03.patch + patches.suse/dm-raid-fix-lockdep-waring-in-pers-hot_add_disk-9500.patch patches.suse/x86-sev-Harden-VC-instruction-emulation-somewhat patches.suse/RAS-Introduce-AMD-Address-Translation-Library.patch patches.suse/EDAC-amd64-Use-new-AMD-Address-Translation-Library.patch @@ -45831,6 +45909,8 @@ patches.suse/0001-PCI-Make-pci_dev_is_disconnected-helper-public-for-o.patch patches.suse/0002-iommu-vt-d-Don-t-issue-ATS-Invalidation-request-when.patch patches.suse/0001-iommu-amd-Mark-interrupt-as-managed.patch + patches.suse/dm-raid-fix-false-positive-for-requeue-needed-during-b25b.patch + patches.suse/dm-call-the-resume-method-on-internal-suspend-65e8.patch patches.suse/pwm-mediatek-Update-kernel-doc-for-struct-pwm_mediat.patch patches.suse/mmc-tmio-avoid-concurrent-runs-of-mmc_request_done.patch patches.suse/mmc-wmt-sdmmc-remove-an-incorrect-release_mem_region.patch @@ -45999,6 +46079,7 @@ patches.suse/ALSA-aoa-avoid-false-positive-format-truncation-warn.patch patches.suse/mmc-core-Initialize-mmc_blk_ioc_data.patch patches.suse/mmc-core-Avoid-negative-index-with-array-access.patch + patches.suse/dm-integrity-fix-out-of-range-warning-8e91.patch patches.suse/ACPICA-debugger-check-status-of-acpi_evaluate_object.patch patches.suse/x86-bugs-Fix-the-SRSO-mitigation-on-Zen3-4.patch patches.suse/drm-amdkfd-fix-TLB-flush-after-unmap-for-GFX9.4.2.patch @@ -46520,9 +46601,6 @@ # bsc#1166486 patches.suse/nvdimm-disable-namespace-on-error.patch - # bsc#1177529 - patches.suse/Avoid-deadlock-for-recursive-I-O-on-dm-thin-when-used-as-swap-4905.patch - patches.suse/sbitmap-avoid-lockups-when-waker-gets-preempted.patch # bsc#1184485, bsc#1216776