Blob Blame History Raw
From a44e84a9b7764c72896f7241a0ec9ac7e7ef38dd Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 23 Nov 2022 20:39:50 +0100
Subject: [PATCH] ext4: fix deadlock due to mbcache entry corruption
Git-commit: a44e84a9b7764c72896f7241a0ec9ac7e7ef38dd
Patch-mainline: v6.2-rc1
References: bsc#1207653

When manipulating xattr blocks, we can deadlock infinitely looping
inside ext4_xattr_block_set() where we constantly keep finding xattr
block for reuse in mbcache but we are unable to reuse it because its
reference count is too big. This happens because cache entry for the
xattr block is marked as reusable (e_reusable set) although its
reference count is too big. When this inconsistency happens, this
inconsistent state is kept indefinitely and so ext4_xattr_block_set()
keeps retrying indefinitely.

The inconsistent state is caused by non-atomic update of e_reusable bit.
e_reusable is part of a bitfield and e_reusable update can race with
update of e_referenced bit in the same bitfield resulting in loss of one
of the updates. Fix the problem by using atomic bitops instead.

This bug has been around for many years, but it became *much* easier
to hit after commit 65f8b80053a1 ("ext4: fix race when reusing xattr
blocks").

Cc: stable@vger.kernel.org
Fixes: 6048c64b2609 ("mbcache: add reusable flag to cache entries")
Fixes: 65f8b80053a1 ("ext4: fix race when reusing xattr blocks")
Reported-and-tested-by: Jeremi Piotrowski <jpiotrowski@linux.microsoft.com>
Reported-by: Thilo Fromm <t-lo@linux.microsoft.com>
Link: https://lore.kernel.org/r/c77bf00f-4618-7149-56f1-b8d1664b9d07@linux.microsoft.com/
Signed-off-by: Jan Kara <jack@suse.cz>
Reviewed-by: Andreas Dilger <adilger@dilger.ca>
Link: https://lore.kernel.org/r/20221123193950.16758-1-jack@suse.cz
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Acked-by: Jan Kara <jack@suse.cz>

---
 fs/ext4/xattr.c         |    4 ++--
 fs/mbcache.c            |   15 +++++++++------
 include/linux/mbcache.h |    9 +++++++--
 3 files changed, 18 insertions(+), 10 deletions(-)

--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -604,7 +604,7 @@ retry_ref:
 			ce = mb_cache_entry_get(ext4_mb_cache, hash,
 						bh->b_blocknr);
 			if (ce) {
-				ce->e_reusable = 1;
+				set_bit(MBE_REUSABLE_B, &ce->e_flags);
 				mb_cache_entry_put(ext4_mb_cache, ce);
 			}
 		}
@@ -962,7 +962,7 @@ inserted:
 				}
 				BHDR(new_bh)->h_refcount = cpu_to_le32(ref);
 				if (ref == EXT4_XATTR_REFCOUNT_MAX)
-					ce->e_reusable = 0;
+					clear_bit(MBE_REUSABLE_B, &ce->e_flags);
 				ea_bdebug(new_bh, "reusing; refcount now=%d",
 					  ref);
 				ext4_xattr_block_csum_set(inode, new_bh);
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -92,8 +92,9 @@ int mb_cache_entry_create(struct mb_cach
 	atomic_set(&entry->e_refcnt, 1);
 	entry->e_key = key;
 	entry->e_block = block;
-	entry->e_reusable = reusable;
-	entry->e_referenced = 0;
+	entry->e_flags = 0;
+	if (reusable)
+		set_bit(MBE_REUSABLE_B, &entry->e_flags);
 	head = mb_cache_entry_head(cache, key);
 	hlist_bl_lock(head);
 	hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) {
@@ -153,7 +154,8 @@ static struct mb_cache_entry *__entry_fi
 	while (node) {
 		entry = hlist_bl_entry(node, struct mb_cache_entry,
 				       e_hash_list);
-		if (entry->e_key == key && entry->e_reusable) {
+		if (entry->e_key == key &&
+		    test_bit(MBE_REUSABLE_B, &entry->e_flags)) {
 			atomic_inc(&entry->e_refcnt);
 			goto out;
 		}
@@ -322,7 +324,7 @@ EXPORT_SYMBOL(mb_cache_entry_delete_or_g
 void mb_cache_entry_touch(struct mb_cache *cache,
 			  struct mb_cache_entry *entry)
 {
-	entry->e_referenced = 1;
+	set_bit(MBE_REFERENCED_B, &entry->e_flags);
 }
 EXPORT_SYMBOL(mb_cache_entry_touch);
 
@@ -347,8 +349,9 @@ static unsigned long mb_cache_shrink(str
 	while (nr_to_scan-- && !list_empty(&cache->c_list)) {
 		entry = list_first_entry(&cache->c_list,
 					 struct mb_cache_entry, e_list);
-		if (entry->e_referenced || atomic_read(&entry->e_refcnt) > 2) {
-			entry->e_referenced = 0;
+		if (test_bit(MBE_REFERENCED_B, &entry->e_flags) ||
+		    atomic_read(&entry->e_refcnt) > 2) {
+			clear_bit(MBE_REFERENCED_B, &entry->e_flags);
 			list_move_tail(&entry->e_list, &cache->c_list);
 			continue;
 		}
--- a/include/linux/mbcache.h
+++ b/include/linux/mbcache.h
@@ -9,6 +9,12 @@
 
 struct mb_cache;
 
+/* Cache entry flags */
+enum {
+	MBE_REFERENCED_B = 0,
+	MBE_REUSABLE_B
+};
+
 struct mb_cache_entry {
 	/* List of entries in cache - protected by cache->c_list_lock */
 	struct list_head	e_list;
@@ -17,8 +23,7 @@ struct mb_cache_entry {
 	atomic_t		e_refcnt;
 	/* Key in hash - stable during lifetime of the entry */
 	u32			e_key;
-	u32			e_referenced:1;
-	u32			e_reusable:1;
+	unsigned long		e_flags;
 	/* Block number of hashed block - stable during lifetime of the entry */
 	sector_t		e_block;
 };