Mel Gorman fcc0d3
From d76e2382feea66e9b5a47de31d333f1a44932d55 Mon Sep 17 00:00:00 2001
Mel Gorman fcc0d3
From: Hugh Dickins <hughd@google.com>
Mel Gorman fcc0d3
Date: Thu, 2 Sep 2021 14:54:21 -0700
Mel Gorman fcc0d3
Subject: [PATCH] huge tmpfs: fix split_huge_page() after FALLOC_FL_KEEP_SIZE
Mel Gorman fcc0d3
Mel Gorman fcc0d3
References: git fixes (mm/shmem)
Mel Gorman fcc0d3
Patch-mainline: v5.15-rc1
Mel Gorman fcc0d3
Git-commit: d144bf6205342a4b5fed5d204ae18849a4de741b
Mel Gorman fcc0d3
Mel Gorman fcc0d3
A successful shmem_fallocate() guarantees that the extent has been
Mel Gorman fcc0d3
reserved, even beyond i_size when the FALLOC_FL_KEEP_SIZE flag was used.
Mel Gorman fcc0d3
But that guarantee is broken by shmem_unused_huge_shrink()'s attempts to
Mel Gorman fcc0d3
split huge pages and free their excess beyond i_size; and by other uses of
Mel Gorman fcc0d3
split_huge_page() near i_size.
Mel Gorman fcc0d3
Mel Gorman fcc0d3
It's sad to add a shmem inode field just for this, but I did not find a
Mel Gorman fcc0d3
better way to keep the guarantee.  A flag to say KEEP_SIZE has been used
Mel Gorman fcc0d3
would be cheaper, but I'm averse to unclearable flags.  The fallocend
Mel Gorman fcc0d3
field is not perfect either (many disjoint ranges might be fallocated),
Mel Gorman fcc0d3
but good enough; and gains another use later on.
Mel Gorman fcc0d3
Mel Gorman fcc0d3
Link: https://lkml.kernel.org/r/ca9a146-3a59-6cd3-7f28-e9a044bb1052@google.com
Mel Gorman fcc0d3
Fixes: 779750d20b93 ("shmem: split huge pages beyond i_size under memory pressure")
Mel Gorman fcc0d3
Signed-off-by: Hugh Dickins <hughd@google.com>
Mel Gorman fcc0d3
Reviewed-by: Yang Shi <shy828301@gmail.com>
Mel Gorman fcc0d3
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Mel Gorman fcc0d3
Cc: Matthew Wilcox <willy@infradead.org>
Mel Gorman fcc0d3
Cc: Miaohe Lin <linmiaohe@huawei.com>
Mel Gorman fcc0d3
Cc: Michal Hocko <mhocko@suse.com>
Mel Gorman fcc0d3
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Mel Gorman fcc0d3
Cc: Rik van Riel <riel@surriel.com>
Mel Gorman fcc0d3
Cc: Shakeel Butt <shakeelb@google.com>
Mel Gorman fcc0d3
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Mel Gorman fcc0d3
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Mel Gorman fcc0d3
Signed-off-by: Mel Gorman <mgorman@suse.de>
Mel Gorman fcc0d3
---
Mel Gorman fcc0d3
 include/linux/shmem_fs.h | 13 +++++++++++++
Mel Gorman fcc0d3
 mm/huge_memory.c         |  6 ++++--
Mel Gorman fcc0d3
 mm/shmem.c               | 15 ++++++++++++++-
Mel Gorman fcc0d3
 3 files changed, 31 insertions(+), 3 deletions(-)
Mel Gorman fcc0d3
Mel Gorman fcc0d3
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
Mel Gorman fcc0d3
index 0a8499fb9c3c..bfc5899d18e0 100644
Mel Gorman fcc0d3
--- a/include/linux/shmem_fs.h
Mel Gorman fcc0d3
+++ b/include/linux/shmem_fs.h
Mel Gorman fcc0d3
@@ -18,6 +18,7 @@ struct shmem_inode_info {
Mel Gorman fcc0d3
 	unsigned long		flags;
Mel Gorman fcc0d3
 	unsigned long		alloced;	/* data pages alloced to file */
Mel Gorman fcc0d3
 	unsigned long		swapped;	/* subtotal assigned to swap */
Mel Gorman fcc0d3
+	pgoff_t			fallocend;	/* highest fallocate endindex */
Mel Gorman fcc0d3
 	struct list_head        shrinklist;     /* shrinkable hpage inodes */
Mel Gorman fcc0d3
 	struct list_head	swaplist;	/* chain of maybes on swap */
Mel Gorman fcc0d3
 	struct shared_policy	policy;		/* NUMA memory alloc policy */
Mel Gorman fcc0d3
@@ -119,6 +120,18 @@ static inline bool shmem_file(struct file *file)
Mel Gorman fcc0d3
 	return shmem_mapping(file->f_mapping);
Mel Gorman fcc0d3
 }
Mel Gorman fcc0d3
 
Mel Gorman fcc0d3
+/*
Mel Gorman fcc0d3
+ * If fallocate(FALLOC_FL_KEEP_SIZE) has been used, there may be pages
Mel Gorman fcc0d3
+ * beyond i_size's notion of EOF, which fallocate has committed to reserving:
Mel Gorman fcc0d3
+ * which split_huge_page() must therefore not delete.  This use of a single
Mel Gorman fcc0d3
+ * "fallocend" per inode errs on the side of not deleting a reservation when
Mel Gorman fcc0d3
+ * in doubt: there are plenty of cases when it preserves unreserved pages.
Mel Gorman fcc0d3
+ */
Mel Gorman fcc0d3
+static inline pgoff_t shmem_fallocend(struct inode *inode, pgoff_t eof)
Mel Gorman fcc0d3
+{
Mel Gorman fcc0d3
+	return max(eof, SHMEM_I(inode)->fallocend);
Mel Gorman fcc0d3
+}
Mel Gorman fcc0d3
+
Mel Gorman fcc0d3
 extern bool shmem_charge(struct inode *inode, long pages);
Mel Gorman fcc0d3
 extern void shmem_uncharge(struct inode *inode, long pages);
Mel Gorman fcc0d3
 
Mel Gorman fcc0d3
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
Mel Gorman fcc0d3
index 9f21e44c9030..5e9ef0fc261e 100644
Mel Gorman fcc0d3
--- a/mm/huge_memory.c
Mel Gorman fcc0d3
+++ b/mm/huge_memory.c
Mel Gorman fcc0d3
@@ -2428,11 +2428,11 @@ static void __split_huge_page(struct page *page, struct list_head *list,
Mel Gorman fcc0d3
 
Mel Gorman fcc0d3
 	for (i = nr - 1; i >= 1; i--) {
Mel Gorman fcc0d3
 		__split_huge_page_tail(head, i, lruvec, list);
Mel Gorman fcc0d3
-		/* Some pages can be beyond i_size: drop them from page cache */
Mel Gorman fcc0d3
+		/* Some pages can be beyond EOF: drop them from page cache */
Mel Gorman fcc0d3
 		if (head[i].index >= end) {
Mel Gorman fcc0d3
 			ClearPageDirty(head + i);
Mel Gorman fcc0d3
 			__delete_from_page_cache(head + i, NULL);
Mel Gorman fcc0d3
-			if (IS_ENABLED(CONFIG_SHMEM) && PageSwapBacked(head))
Mel Gorman fcc0d3
+			if (shmem_mapping(head->mapping))
Mel Gorman fcc0d3
 				shmem_uncharge(head->mapping->host, 1);
Mel Gorman fcc0d3
 			put_page(head + i);
Mel Gorman fcc0d3
 		} else if (!PageAnon(page)) {
Mel Gorman fcc0d3
@@ -2660,6 +2660,8 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
Mel Gorman fcc0d3
 		 * head page lock is good enough to serialize the trimming.
Mel Gorman fcc0d3
 		 */
Mel Gorman fcc0d3
 		end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);
Mel Gorman fcc0d3
+		if (shmem_mapping(mapping))
Mel Gorman fcc0d3
+			end = shmem_fallocend(mapping->host, end);
Mel Gorman fcc0d3
 	}
Mel Gorman fcc0d3
 
Mel Gorman fcc0d3
 	/*
Mel Gorman fcc0d3
diff --git a/mm/shmem.c b/mm/shmem.c
Mel Gorman fcc0d3
index 9ef579f6cab3..e391325dfc21 100644
Mel Gorman fcc0d3
--- a/mm/shmem.c
Mel Gorman fcc0d3
+++ b/mm/shmem.c
Mel Gorman fcc0d3
@@ -902,6 +902,9 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
Mel Gorman fcc0d3
 	if (lend == -1)
Mel Gorman fcc0d3
 		end = -1;	/* unsigned, so actually very big */
Mel Gorman fcc0d3
 
Mel Gorman fcc0d3
+	if (info->fallocend > start && info->fallocend <= end && !unfalloc)
Mel Gorman fcc0d3
+		info->fallocend = start;
Mel Gorman fcc0d3
+
Mel Gorman fcc0d3
 	pagevec_init(&pvec);
Mel Gorman fcc0d3
 	index = start;
Mel Gorman fcc0d3
 	while (index < end && find_lock_entries(mapping, index, end - 1,
Mel Gorman fcc0d3
@@ -2650,7 +2653,7 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
Mel Gorman fcc0d3
 	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
Mel Gorman fcc0d3
 	struct shmem_inode_info *info = SHMEM_I(inode);
Mel Gorman fcc0d3
 	struct shmem_falloc shmem_falloc;
Mel Gorman fcc0d3
-	pgoff_t start, index, end;
Mel Gorman fcc0d3
+	pgoff_t start, index, end, undo_fallocend;
Mel Gorman fcc0d3
 	int error;
Mel Gorman fcc0d3
 
Mel Gorman fcc0d3
 	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
Mel Gorman fcc0d3
@@ -2719,6 +2722,15 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
Mel Gorman fcc0d3
 	inode->i_private = &shmem_falloc;
Mel Gorman fcc0d3
 	spin_unlock(&inode->i_lock);
Mel Gorman fcc0d3
 
Mel Gorman fcc0d3
+	/*
Mel Gorman fcc0d3
+	 * info->fallocend is only relevant when huge pages might be
Mel Gorman fcc0d3
+	 * involved: to prevent split_huge_page() freeing fallocated
Mel Gorman fcc0d3
+	 * pages when FALLOC_FL_KEEP_SIZE committed beyond i_size.
Mel Gorman fcc0d3
+	 */
Mel Gorman fcc0d3
+	undo_fallocend = info->fallocend;
Mel Gorman fcc0d3
+	if (info->fallocend < end)
Mel Gorman fcc0d3
+		info->fallocend = end;
Mel Gorman fcc0d3
+
Mel Gorman fcc0d3
 	for (index = start; index < end; ) {
Mel Gorman fcc0d3
 		struct page *page;
Mel Gorman fcc0d3
 
Mel Gorman fcc0d3
@@ -2733,6 +2745,7 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
Mel Gorman fcc0d3
 		else
Mel Gorman fcc0d3
 			error = shmem_getpage(inode, index, &page, SGP_FALLOC);
Mel Gorman fcc0d3
 		if (error) {
Mel Gorman fcc0d3
+			info->fallocend = undo_fallocend;
Mel Gorman fcc0d3
 			/* Remove the !PageUptodate pages we added */
Mel Gorman fcc0d3
 			if (index > start) {
Mel Gorman fcc0d3
 				shmem_undo_range(inode,