From 45ff8b471cdc58701a7ba5c5dcd8dfc57ae06829 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Thu, 12 May 2022 15:12:57 +1000
Subject: [PATCH] xfs: can't use kmem_zalloc() for attribute buffers
References: bsc#1216909
Git-commit: 45ff8b471cdc58701a7ba5c5dcd8dfc57ae06829
Patch-mainline: v5.19-rc1
Because heap allocation of 64kB buffers will fail:
....
XFS: fs_mark(8414) possible memory allocation deadlock size 65768 in kmem_alloc (mode:0x2d40)
XFS: fs_mark(8417) possible memory allocation deadlock size 65768 in kmem_alloc (mode:0x2d40)
XFS: fs_mark(8409) possible memory allocation deadlock size 65768 in kmem_alloc (mode:0x2d40)
XFS: fs_mark(8428) possible memory allocation deadlock size 65768 in kmem_alloc (mode:0x2d40)
XFS: fs_mark(8430) possible memory allocation deadlock size 65768 in kmem_alloc (mode:0x2d40)
XFS: fs_mark(8437) possible memory allocation deadlock size 65768 in kmem_alloc (mode:0x2d40)
XFS: fs_mark(8433) possible memory allocation deadlock size 65768 in kmem_alloc (mode:0x2d40)
XFS: fs_mark(8406) possible memory allocation deadlock size 65768 in kmem_alloc (mode:0x2d40)
XFS: fs_mark(8412) possible memory allocation deadlock size 65768 in kmem_alloc (mode:0x2d40)
XFS: fs_mark(8432) possible memory allocation deadlock size 65768 in kmem_alloc (mode:0x2d40)
XFS: fs_mark(8424) possible memory allocation deadlock size 65768 in kmem_alloc (mode:0x2d40)
....
I'd use kvmalloc() instead, but....
- 48.19% xfs_attr_create_intent
- 46.89% xfs_attri_init
- kvmalloc_node
- 46.04% __kmalloc_node
- kmalloc_large_node
- 45.99% __alloc_pages
- 39.39% __alloc_pages_slowpath.constprop.0
- 38.89% __alloc_pages_direct_compact
- 38.71% try_to_compact_pages
- compact_zone_order
- compact_zone
- 21.09% isolate_migratepages_block
10.31% PageHuge
5.82% set_pfnblock_flags_mask
0.86% get_pfnblock_flags_mask
- 4.48% __reset_isolation_suitable
4.44% __reset_isolation_pfn
- 3.56% __pageblock_pfn_to_page
1.33% pfn_to_online_page
2.83% get_pfnblock_flags_mask
- 0.87% migrate_pages
0.86% compaction_alloc
0.84% find_suitable_fallback
- 6.60% get_page_from_freelist
4.99% clear_page_erms
- 1.19% _raw_spin_lock_irqsave
- do_raw_spin_lock
__pv_queued_spin_lock_slowpath
- 0.86% __vmalloc_node_range
0.65% __alloc_pages_bulk
.... this is just yet another reminder of how much kvmalloc() sucks.
So lift xlog_cil_kvmalloc(), rename it to xlog_kvmalloc() and use
that instead....
We also clean up the attribute name and value lengths as they no
longer need to be rounded out to sizes compatible with log vectors.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Dave Chinner <david@fromorbit.com>
Signed-off-by: Thomas Abraham <tabraham@suse.com>
[tabraham@suse.com: handled backport]
---
fs/xfs/xfs_attr_item.c | 35 +++++++++++++++--------------------
fs/xfs/xfs_log_cil.c | 35 +----------------------------------
fs/xfs/xfs_log_priv.h | 34 ++++++++++++++++++++++++++++++++++
3 files changed, 50 insertions(+), 54 deletions(-)
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 70f718d76ceb..6ca6fe8f2747 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -283,7 +250,7 @@ xlog_cil_alloc_shadow_bufs(
*/
kmem_free(lip->li_lv_shadow);
- lv = kmem_alloc_large(buf_size, KM_NOFS);
+ lv = xlog_kvmalloc(buf_size);
memset(lv, 0, xlog_cil_iovec_space(niovecs));
lv->lv_item = lip;
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 4f7e844d28ad..67fd9789e69a 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -651,4 +651,38 @@ xlog_valid_lsn(
return valid;
}
+/*
+ * Log vector and shadow buffers can be large, so we need to use kvmalloc() here
+ * to ensure success. Unfortunately, kvmalloc() only allows GFP_KERNEL contexts
+ * to fall back to vmalloc, so we can't actually do anything useful with gfp
+ * flags to control the kmalloc() behaviour within kvmalloc(). Hence kmalloc()
+ * will do direct reclaim and compaction in the slow path, both of which are
+ * horrendously expensive. We just want kmalloc to fail fast and fall back to
+ * vmalloc if it can't get somethign straight away from the free lists or
+ * buddy allocator. Hence we have to open code kvmalloc outselves here.
+ *
+ * This assumes that the caller uses memalloc_nofs_save task context here, so
+ * despite the use of GFP_KERNEL here, we are going to be doing GFP_NOFS
+ * allocations. This is actually the only way to make vmalloc() do GFP_NOFS
+ * allocations, so lets just all pretend this is a GFP_KERNEL context
+ * operation....
+ */
+static inline void *
+xlog_kvmalloc(
+ size_t buf_size)
+{
+ gfp_t flags = GFP_KERNEL;
+ void *p;
+
+ flags &= ~__GFP_DIRECT_RECLAIM;
+ flags |= __GFP_NOWARN | __GFP_NORETRY;
+ do {
+ p = kmalloc(buf_size, flags);
+ if (!p)
+ p = vmalloc(buf_size);
+ } while (!p);
+
+ return p;
+}
+
#endif /* __XFS_LOG_PRIV_H__ */
--
2.35.3