diff --git a/patches.kernel.org/6.3.3-229-ext4-avoid-deadlock-in-fs-reclaim-with-page-wri.patch b/patches.kernel.org/6.3.3-229-ext4-avoid-deadlock-in-fs-reclaim-with-page-wri.patch new file mode 100644 index 0000000..090461c --- /dev/null +++ b/patches.kernel.org/6.3.3-229-ext4-avoid-deadlock-in-fs-reclaim-with-page-wri.patch @@ -0,0 +1,228 @@ +From: Jan Kara +Date: Thu, 4 May 2023 14:47:23 +0200 +Subject: [PATCH] ext4: avoid deadlock in fs reclaim with page writeback +References: bsc#1012628 +Patch-mainline: 6.3.3 +Git-commit: 00d873c17e29cc32d90ca852b82685f1673acaa5 + +commit 00d873c17e29cc32d90ca852b82685f1673acaa5 upstream. + +Ext4 has a filesystem wide lock protecting ext4_writepages() calls to +avoid races with switching of journalled data flag or inode format. This +lock can however cause a deadlock like: + +CPU0 CPU1 + +ext4_writepages() + percpu_down_read(sbi->s_writepages_rwsem); + ext4_change_inode_journal_flag() + percpu_down_write(sbi->s_writepages_rwsem); + - blocks, all readers block from now on + ext4_do_writepages() + ext4_init_io_end() + kmem_cache_zalloc(io_end_cachep, GFP_KERNEL) + fs_reclaim frees dentry... + dentry_unlink_inode() + iput() - last ref => + iput_final() - inode dirty => + write_inode_now()... + ext4_writepages() tries to acquire sbi->s_writepages_rwsem + and blocks forever + +Make sure we cannot recurse into filesystem reclaim from writeback code +to avoid the deadlock. + +Reported-by: syzbot+6898da502aef574c5f8a@syzkaller.appspotmail.com +Link: https://lore.kernel.org/all/0000000000004c66b405fa108e27@google.com +Fixes: c8585c6fcaf2 ("ext4: fix races between changing inode journal mode and ext4_writepages") +CC: stable@vger.kernel.org +Signed-off-by: Jan Kara +Link: https://lore.kernel.org/r/20230504124723.20205-1-jack@suse.cz +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Jiri Slaby +--- + fs/ext4/ext4.h | 24 ++++++++++++++++++++++++ + fs/ext4/inode.c | 18 ++++++++++-------- + fs/ext4/migrate.c | 11 ++++++----- + 3 files changed, 40 insertions(+), 13 deletions(-) + +diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h +index 08b29c28..df0255b7 100644 +--- a/fs/ext4/ext4.h ++++ b/fs/ext4/ext4.h +@@ -1774,6 +1774,30 @@ static inline struct ext4_inode_info *EXT4_I(struct inode *inode) + return container_of(inode, struct ext4_inode_info, vfs_inode); + } + ++static inline int ext4_writepages_down_read(struct super_block *sb) ++{ ++ percpu_down_read(&EXT4_SB(sb)->s_writepages_rwsem); ++ return memalloc_nofs_save(); ++} ++ ++static inline void ext4_writepages_up_read(struct super_block *sb, int ctx) ++{ ++ memalloc_nofs_restore(ctx); ++ percpu_up_read(&EXT4_SB(sb)->s_writepages_rwsem); ++} ++ ++static inline int ext4_writepages_down_write(struct super_block *sb) ++{ ++ percpu_down_write(&EXT4_SB(sb)->s_writepages_rwsem); ++ return memalloc_nofs_save(); ++} ++ ++static inline void ext4_writepages_up_write(struct super_block *sb, int ctx) ++{ ++ memalloc_nofs_restore(ctx); ++ percpu_up_write(&EXT4_SB(sb)->s_writepages_rwsem); ++} ++ + static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) + { + return ino == EXT4_ROOT_INO || +diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c +index 41ba1c43..87d01f1c 100644 +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -2956,13 +2956,14 @@ static int ext4_writepages(struct address_space *mapping, + .can_map = 1, + }; + int ret; ++ int alloc_ctx; + + if (unlikely(ext4_forced_shutdown(EXT4_SB(sb)))) + return -EIO; + +- percpu_down_read(&EXT4_SB(sb)->s_writepages_rwsem); ++ alloc_ctx = ext4_writepages_down_read(sb); + ret = ext4_do_writepages(&mpd); +- percpu_up_read(&EXT4_SB(sb)->s_writepages_rwsem); ++ ext4_writepages_up_read(sb, alloc_ctx); + + return ret; + } +@@ -2990,17 +2991,18 @@ static int ext4_dax_writepages(struct address_space *mapping, + long nr_to_write = wbc->nr_to_write; + struct inode *inode = mapping->host; + struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); ++ int alloc_ctx; + + if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) + return -EIO; + +- percpu_down_read(&sbi->s_writepages_rwsem); ++ alloc_ctx = ext4_writepages_down_read(inode->i_sb); + trace_ext4_writepages(inode, wbc); + + ret = dax_writeback_mapping_range(mapping, sbi->s_daxdev, wbc); + trace_ext4_writepages_result(inode, wbc, ret, + nr_to_write - wbc->nr_to_write); +- percpu_up_read(&sbi->s_writepages_rwsem); ++ ext4_writepages_up_read(inode->i_sb, alloc_ctx); + return ret; + } + +@@ -6122,7 +6124,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) + journal_t *journal; + handle_t *handle; + int err; +- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); ++ int alloc_ctx; + + /* + * We have to be very careful here: changing a data block's +@@ -6160,7 +6162,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) + } + } + +- percpu_down_write(&sbi->s_writepages_rwsem); ++ alloc_ctx = ext4_writepages_down_write(inode->i_sb); + jbd2_journal_lock_updates(journal); + + /* +@@ -6177,7 +6179,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) + err = jbd2_journal_flush(journal, 0); + if (err < 0) { + jbd2_journal_unlock_updates(journal); +- percpu_up_write(&sbi->s_writepages_rwsem); ++ ext4_writepages_up_write(inode->i_sb, alloc_ctx); + return err; + } + ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); +@@ -6185,7 +6187,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) + ext4_set_aops(inode); + + jbd2_journal_unlock_updates(journal); +- percpu_up_write(&sbi->s_writepages_rwsem); ++ ext4_writepages_up_write(inode->i_sb, alloc_ctx); + + if (val) + filemap_invalidate_unlock(inode->i_mapping); +diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c +index a19a9661..d98ac2af 100644 +--- a/fs/ext4/migrate.c ++++ b/fs/ext4/migrate.c +@@ -408,7 +408,6 @@ static int free_ext_block(handle_t *handle, struct inode *inode) + + int ext4_ext_migrate(struct inode *inode) + { +- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + handle_t *handle; + int retval = 0, i; + __le32 *i_data; +@@ -418,6 +417,7 @@ int ext4_ext_migrate(struct inode *inode) + unsigned long max_entries; + __u32 goal, tmp_csum_seed; + uid_t owner[2]; ++ int alloc_ctx; + + /* + * If the filesystem does not support extents, or the inode +@@ -434,7 +434,7 @@ int ext4_ext_migrate(struct inode *inode) + */ + return retval; + +- percpu_down_write(&sbi->s_writepages_rwsem); ++ alloc_ctx = ext4_writepages_down_write(inode->i_sb); + + /* + * Worst case we can touch the allocation bitmaps and a block +@@ -586,7 +586,7 @@ int ext4_ext_migrate(struct inode *inode) + unlock_new_inode(tmp_inode); + iput(tmp_inode); + out_unlock: +- percpu_up_write(&sbi->s_writepages_rwsem); ++ ext4_writepages_up_write(inode->i_sb, alloc_ctx); + return retval; + } + +@@ -605,6 +605,7 @@ int ext4_ind_migrate(struct inode *inode) + ext4_fsblk_t blk; + handle_t *handle; + int ret, ret2 = 0; ++ int alloc_ctx; + + if (!ext4_has_feature_extents(inode->i_sb) || + (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) +@@ -621,7 +622,7 @@ int ext4_ind_migrate(struct inode *inode) + if (test_opt(inode->i_sb, DELALLOC)) + ext4_alloc_da_blocks(inode); + +- percpu_down_write(&sbi->s_writepages_rwsem); ++ alloc_ctx = ext4_writepages_down_write(inode->i_sb); + + handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1); + if (IS_ERR(handle)) { +@@ -665,6 +666,6 @@ int ext4_ind_migrate(struct inode *inode) + ext4_journal_stop(handle); + up_write(&EXT4_I(inode)->i_data_sem); + out_unlock: +- percpu_up_write(&sbi->s_writepages_rwsem); ++ ext4_writepages_up_write(inode->i_sb, alloc_ctx); + return ret; + } +-- +2.35.3 + diff --git a/series.conf b/series.conf index 6b0e505..97f5994 100644 --- a/series.conf +++ b/series.conf @@ -962,6 +962,7 @@ patches.kernel.org/6.3.3-226-ext4-fix-WARNING-in-mb_find_extent.patch patches.kernel.org/6.3.3-227-ext4-avoid-a-potential-slab-out-of-bounds-in-ex.patch patches.kernel.org/6.3.3-228-ext4-fix-data-races-when-using-cached-status-ex.patch + patches.kernel.org/6.3.3-229-ext4-avoid-deadlock-in-fs-reclaim-with-page-wri.patch ######################################################## # Build fixes that apply to the vanilla kernel too.