|
|
a9c5d6 |
From 65d759c8f9f57b96c199f3fe5cfb93ac7da095e9 Mon Sep 17 00:00:00 2001
|
|
|
a9c5d6 |
From: Charan Teja Reddy <charante@codeaurora.org>
|
|
|
a9c5d6 |
Date: Thu, 2 Sep 2021 14:59:59 -0700
|
|
|
a9c5d6 |
Subject: [PATCH] mm: compaction: support triggering of proactive compaction by
|
|
|
a9c5d6 |
user
|
|
|
a9c5d6 |
References: bsc#1207010
|
|
|
a9c5d6 |
Git-commit: 65d759c8f9f57b96c199f3fe5cfb93ac7da095e9
|
|
|
a9c5d6 |
Patch-mainline: v5.15-rc1
|
|
|
a9c5d6 |
|
|
|
a9c5d6 |
The proactive compaction[1] gets triggered for every 500msec and run
|
|
|
a9c5d6 |
compaction on the node for COMPACTION_HPAGE_ORDER (usually order-9) pages
|
|
|
a9c5d6 |
based on the value set to sysctl.compaction_proactiveness. Triggering the
|
|
|
a9c5d6 |
compaction for every 500msec in search of COMPACTION_HPAGE_ORDER pages is
|
|
|
a9c5d6 |
not needed for all applications, especially on the embedded system
|
|
|
a9c5d6 |
usecases which may have few MB's of RAM. Enabling the proactive
|
|
|
a9c5d6 |
compaction in its state will endup in running almost always on such
|
|
|
a9c5d6 |
systems.
|
|
|
a9c5d6 |
|
|
|
a9c5d6 |
Other side, proactive compaction can still be very much useful for getting
|
|
|
a9c5d6 |
a set of higher order pages in some controllable manner(controlled by
|
|
|
a9c5d6 |
using the sysctl.compaction_proactiveness). So, on systems where enabling
|
|
|
a9c5d6 |
the proactive compaction always may proove not required, can trigger the
|
|
|
a9c5d6 |
same from user space on write to its sysctl interface. As an example, say
|
|
|
a9c5d6 |
app launcher decide to launch the memory heavy application which can be
|
|
|
a9c5d6 |
launched fast if it gets more higher order pages thus launcher can prepare
|
|
|
a9c5d6 |
the system in advance by triggering the proactive compaction from
|
|
|
a9c5d6 |
userspace.
|
|
|
a9c5d6 |
|
|
|
a9c5d6 |
This triggering of proactive compaction is done on a write to
|
|
|
a9c5d6 |
sysctl.compaction_proactiveness by user.
|
|
|
a9c5d6 |
|
|
|
a9c5d6 |
[1]https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit?id=facdaa917c4d5a376d09d25865f5a863f906234a
|
|
|
a9c5d6 |
|
|
|
a9c5d6 |
[akpm@linux-foundation.org: tweak vm.rst, per Mike]
|
|
|
a9c5d6 |
|
|
|
a9c5d6 |
Link: https://lkml.kernel.org/r/1627653207-12317-1-git-send-email-charante@codeaurora.org
|
|
|
a9c5d6 |
Signed-off-by: Charan Teja Reddy <charante@codeaurora.org>
|
|
|
a9c5d6 |
Acked-by: Vlastimil Babka <vbabka@suse.cz>
|
|
|
a9c5d6 |
Acked-by: Rafael Aquini <aquini@redhat.com>
|
|
|
a9c5d6 |
Cc: Mike Rapoport <rppt@kernel.org>
|
|
|
a9c5d6 |
Cc: Luis Chamberlain <mcgrof@kernel.org>
|
|
|
a9c5d6 |
Cc: Kees Cook <keescook@chromium.org>
|
|
|
a9c5d6 |
Cc: Iurii Zaikin <yzaikin@google.com>
|
|
|
a9c5d6 |
Cc: Dave Hansen <dave.hansen@linux.intel.com>
|
|
|
a9c5d6 |
Cc: Mel Gorman <mgorman@techsingularity.net>
|
|
|
a9c5d6 |
Cc: Nitin Gupta <nigupta@nvidia.com>
|
|
|
a9c5d6 |
Cc: Jonathan Corbet <corbet@lwn.net>
|
|
|
a9c5d6 |
Cc: Khalid Aziz <khalid.aziz@oracle.com>
|
|
|
a9c5d6 |
Cc: David Rientjes <rientjes@google.com>
|
|
|
a9c5d6 |
Cc: Vinayak Menon <vinmenon@codeaurora.org>
|
|
|
a9c5d6 |
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
|
|
a9c5d6 |
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
|
|
|
a9c5d6 |
Signed-off-by: Thomas Abraham <tabraham@suse.com>
|
|
|
a9c5d6 |
---
|
|
|
a9c5d6 |
Documentation/admin-guide/sysctl/vm.rst | 3 +-
|
|
|
a9c5d6 |
include/linux/compaction.h | 2 ++
|
|
|
a9c5d6 |
include/linux/mmzone.h | 1 +
|
|
|
a9c5d6 |
kernel/sysctl.c | 2 +-
|
|
|
a9c5d6 |
mm/compaction.c | 38 +++++++++++++++++++++++--
|
|
|
a9c5d6 |
5 files changed, 42 insertions(+), 4 deletions(-)
|
|
|
a9c5d6 |
|
|
|
a9c5d6 |
diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst
|
|
|
a9c5d6 |
index 003d5cc3751b..5e795202111f 100644
|
|
|
a9c5d6 |
--- a/Documentation/admin-guide/sysctl/vm.rst
|
|
|
a9c5d6 |
+++ b/Documentation/admin-guide/sysctl/vm.rst
|
|
|
a9c5d6 |
@@ -118,7 +118,8 @@ compaction_proactiveness
|
|
|
a9c5d6 |
|
|
|
a9c5d6 |
This tunable takes a value in the range [0, 100] with a default value of
|
|
|
a9c5d6 |
20. This tunable determines how aggressively compaction is done in the
|
|
|
a9c5d6 |
-background. Setting it to 0 disables proactive compaction.
|
|
|
a9c5d6 |
+background. Write of a non zero value to this tunable will immediately
|
|
|
a9c5d6 |
+trigger the proactive compaction. Setting it to 0 disables proactive compaction.
|
|
|
a9c5d6 |
|
|
|
a9c5d6 |
Note that compaction has a non-trivial system-wide impact as pages
|
|
|
a9c5d6 |
belonging to different processes are moved around, which could also lead
|
|
|
a9c5d6 |
diff --git a/include/linux/compaction.h b/include/linux/compaction.h
|
|
|
a9c5d6 |
index c24098c7acca..34bce35c808d 100644
|
|
|
a9c5d6 |
--- a/include/linux/compaction.h
|
|
|
a9c5d6 |
+++ b/include/linux/compaction.h
|
|
|
a9c5d6 |
@@ -84,6 +84,8 @@ static inline unsigned long compact_gap(unsigned int order)
|
|
|
a9c5d6 |
extern unsigned int sysctl_compaction_proactiveness;
|
|
|
a9c5d6 |
extern int sysctl_compaction_handler(struct ctl_table *table, int write,
|
|
|
a9c5d6 |
void *buffer, size_t *length, loff_t *ppos);
|
|
|
a9c5d6 |
+extern int compaction_proactiveness_sysctl_handler(struct ctl_table *table,
|
|
|
a9c5d6 |
+ int write, void *buffer, size_t *length, loff_t *ppos);
|
|
|
a9c5d6 |
extern int sysctl_extfrag_threshold;
|
|
|
a9c5d6 |
extern int sysctl_compact_unevictable_allowed;
|
|
|
a9c5d6 |
|
|
|
a9c5d6 |
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
|
|
|
a9c5d6 |
index 59bad25ce78e..1bd5f5955f9a 100644
|
|
|
a9c5d6 |
--- a/include/linux/mmzone.h
|
|
|
a9c5d6 |
+++ b/include/linux/mmzone.h
|
|
|
a9c5d6 |
@@ -846,6 +846,7 @@ typedef struct pglist_data {
|
|
|
a9c5d6 |
enum zone_type kcompactd_highest_zoneidx;
|
|
|
a9c5d6 |
wait_queue_head_t kcompactd_wait;
|
|
|
a9c5d6 |
struct task_struct *kcompactd;
|
|
|
a9c5d6 |
+ bool proactive_compact_trigger;
|
|
|
a9c5d6 |
#endif
|
|
|
a9c5d6 |
/*
|
|
|
a9c5d6 |
* This is a per-node reserve of pages that are not available
|
|
|
a9c5d6 |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
|
|
|
a9c5d6 |
index 272f4a272f8c..297f0b3966bd 100644
|
|
|
a9c5d6 |
--- a/kernel/sysctl.c
|
|
|
a9c5d6 |
+++ b/kernel/sysctl.c
|
|
|
a9c5d6 |
@@ -2871,7 +2871,7 @@ static struct ctl_table vm_table[] = {
|
|
|
a9c5d6 |
.data = &sysctl_compaction_proactiveness,
|
|
|
a9c5d6 |
.maxlen = sizeof(sysctl_compaction_proactiveness),
|
|
|
a9c5d6 |
.mode = 0644,
|
|
|
a9c5d6 |
- .proc_handler = proc_dointvec_minmax,
|
|
|
a9c5d6 |
+ .proc_handler = compaction_proactiveness_sysctl_handler,
|
|
|
a9c5d6 |
.extra1 = SYSCTL_ZERO,
|
|
|
a9c5d6 |
.extra2 = &one_hundred,
|
|
|
a9c5d6 |
},
|
|
|
a9c5d6 |
diff --git a/mm/compaction.c b/mm/compaction.c
|
|
|
a9c5d6 |
index 4ee0d40d93f2..fa9b2b598eab 100644
|
|
|
a9c5d6 |
--- a/mm/compaction.c
|
|
|
a9c5d6 |
+++ b/mm/compaction.c
|
|
|
a9c5d6 |
@@ -2706,6 +2706,30 @@ static void compact_nodes(void)
|
|
|
a9c5d6 |
*/
|
|
|
a9c5d6 |
unsigned int __read_mostly sysctl_compaction_proactiveness = 20;
|
|
|
a9c5d6 |
|
|
|
a9c5d6 |
+int compaction_proactiveness_sysctl_handler(struct ctl_table *table, int write,
|
|
|
a9c5d6 |
+ void *buffer, size_t *length, loff_t *ppos)
|
|
|
a9c5d6 |
+{
|
|
|
a9c5d6 |
+ int rc, nid;
|
|
|
a9c5d6 |
+
|
|
|
a9c5d6 |
+ rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
|
|
|
a9c5d6 |
+ if (rc)
|
|
|
a9c5d6 |
+ return rc;
|
|
|
a9c5d6 |
+
|
|
|
a9c5d6 |
+ if (write && sysctl_compaction_proactiveness) {
|
|
|
a9c5d6 |
+ for_each_online_node(nid) {
|
|
|
a9c5d6 |
+ pg_data_t *pgdat = NODE_DATA(nid);
|
|
|
a9c5d6 |
+
|
|
|
a9c5d6 |
+ if (pgdat->proactive_compact_trigger)
|
|
|
a9c5d6 |
+ continue;
|
|
|
a9c5d6 |
+
|
|
|
a9c5d6 |
+ pgdat->proactive_compact_trigger = true;
|
|
|
a9c5d6 |
+ wake_up_interruptible(&pgdat->kcompactd_wait);
|
|
|
a9c5d6 |
+ }
|
|
|
a9c5d6 |
+ }
|
|
|
a9c5d6 |
+
|
|
|
a9c5d6 |
+ return 0;
|
|
|
a9c5d6 |
+}
|
|
|
a9c5d6 |
+
|
|
|
a9c5d6 |
/*
|
|
|
a9c5d6 |
* This is the entry point for compacting all nodes via
|
|
|
a9c5d6 |
* /proc/sys/vm/compact_memory
|
|
|
a9c5d6 |
@@ -2750,7 +2774,8 @@ void compaction_unregister_node(struct node *node)
|
|
|
a9c5d6 |
|
|
|
a9c5d6 |
static inline bool kcompactd_work_requested(pg_data_t *pgdat)
|
|
|
a9c5d6 |
{
|
|
|
a9c5d6 |
- return pgdat->kcompactd_max_order > 0 || kthread_should_stop();
|
|
|
a9c5d6 |
+ return pgdat->kcompactd_max_order > 0 || kthread_should_stop() ||
|
|
|
a9c5d6 |
+ pgdat->proactive_compact_trigger;
|
|
|
a9c5d6 |
}
|
|
|
a9c5d6 |
|
|
|
a9c5d6 |
static bool kcompactd_node_suitable(pg_data_t *pgdat)
|
|
|
a9c5d6 |
@@ -2901,9 +2926,16 @@ static int kcompactd(void *p)
|
|
|
a9c5d6 |
while (!kthread_should_stop()) {
|
|
|
a9c5d6 |
unsigned long pflags;
|
|
|
a9c5d6 |
|
|
|
a9c5d6 |
+ /*
|
|
|
a9c5d6 |
+ * Avoid the unnecessary wakeup for proactive compaction
|
|
|
a9c5d6 |
+ * when it is disabled.
|
|
|
a9c5d6 |
+ */
|
|
|
a9c5d6 |
+ if (!sysctl_compaction_proactiveness)
|
|
|
a9c5d6 |
+ timeout = MAX_SCHEDULE_TIMEOUT;
|
|
|
a9c5d6 |
trace_mm_compaction_kcompactd_sleep(pgdat->node_id);
|
|
|
a9c5d6 |
if (wait_event_freezable_timeout(pgdat->kcompactd_wait,
|
|
|
a9c5d6 |
- kcompactd_work_requested(pgdat), timeout)) {
|
|
|
a9c5d6 |
+ kcompactd_work_requested(pgdat), timeout) &&
|
|
|
a9c5d6 |
+ !pgdat->proactive_compact_trigger) {
|
|
|
a9c5d6 |
|
|
|
a9c5d6 |
psi_memstall_enter(&pflags);
|
|
|
a9c5d6 |
kcompactd_do_work(pgdat);
|
|
|
a9c5d6 |
@@ -2938,6 +2970,8 @@ static int kcompactd(void *p)
|
|
|
a9c5d6 |
timeout =
|
|
|
a9c5d6 |
default_timeout << COMPACT_MAX_DEFER_SHIFT;
|
|
|
a9c5d6 |
}
|
|
|
a9c5d6 |
+ if (unlikely(pgdat->proactive_compact_trigger))
|
|
|
a9c5d6 |
+ pgdat->proactive_compact_trigger = false;
|
|
|
a9c5d6 |
}
|
|
|
a9c5d6 |
|
|
|
a9c5d6 |
return 0;
|
|
|
a9c5d6 |
--
|
|
|
a9c5d6 |
2.35.3
|
|
|
a9c5d6 |
|