From 053e5b89996fa4cec1fa9886054fe7ad32d06010 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 17 Feb 2022 10:47:59 +0100
Subject: [PATCH] mm/memcg: Disable threshold event handlers on PREEMPT_RT
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
References: SLE Realtime Extension
Patch-mainline: v5.18-rc1
Git-commit: 2343e88d238f5de973d609d861c505890f94f22e
During the integration of PREEMPT_RT support, the code flow around
memcg_check_events() resulted in `twisted code'. Moving the code around
and avoiding then would then lead to an additional local-irq-save
section within memcg_check_events(). While looking better, it adds a
local-irq-save section to code flow which is usually within an
local-irq-off block on non-PREEMPT_RT configurations.
The threshold event handler is a deprecated memcg v1 feature. Instead of
trying to get it to work under PREEMPT_RT just disable it. There should
be no users on PREEMPT_RT. From that perspective it makes even less
sense to get it to work under PREEMPT_RT while having zero users.
Make memory.soft_limit_in_bytes and cgroup.event_control return
-EOPNOTSUPP on PREEMPT_RT. Make an empty memcg_check_events() and
memcg_write_event_control() which return only -EOPNOTSUPP on PREEMPT_RT.
Document that the two knobs are disabled on PREEMPT_RT.
Suggested-by: Michal Hocko <mhocko@kernel.org>
Suggested-by: Michal Koutný <mkoutny@suse.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Acked-by: Roman Gushchin <guro@fb.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Mel Gorman <mgorman@suse.de>
---
Documentation/admin-guide/cgroup-v1/memory.rst | 2 ++
mm/memcontrol.c | 14 ++++++++++++--
2 files changed, 14 insertions(+), 2 deletions(-)
diff --git a/Documentation/admin-guide/cgroup-v1/memory.rst b/Documentation/admin-guide/cgroup-v1/memory.rst
index 9f10e247f8a5..eeec75d7f8e2 100644
--- a/Documentation/admin-guide/cgroup-v1/memory.rst
+++ b/Documentation/admin-guide/cgroup-v1/memory.rst
@@ -64,6 +64,7 @@ Brief summary of control files.
threads
cgroup.procs show list of processes
cgroup.event_control an interface for event_fd()
+ This knob is not available on CONFIG_PREEMPT_RT systems.
memory.usage_in_bytes show current usage for memory
(See 5.5 for details)
memory.memsw.usage_in_bytes show current usage for memory+Swap
@@ -75,6 +76,7 @@ Brief summary of control files.
memory.max_usage_in_bytes show max memory usage recorded
memory.memsw.max_usage_in_bytes show max memory+Swap usage recorded
memory.soft_limit_in_bytes set/show soft limit of memory usage
+ This knob is not available on CONFIG_PREEMPT_RT systems.
memory.stat show various statistics
memory.use_hierarchy set/show hierarchical account enabled
This knob is deprecated and shouldn't be
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c6c19a183de1..1391300a6ee6 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -876,6 +876,9 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
*/
static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
{
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ return;
+
/* threshold event is triggered in finer grain than soft limit */
if (unlikely(mem_cgroup_event_ratelimit(memcg,
MEM_CGROUP_TARGET_THRESH))) {
@@ -3767,8 +3770,12 @@ static ssize_t mem_cgroup_write(struct kernfs_open_file *of,
}
break;
case RES_SOFT_LIMIT:
- memcg->soft_limit = nr_pages;
- ret = 0;
+ if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
+ ret = -EOPNOTSUPP;
+ } else {
+ memcg->soft_limit = nr_pages;
+ ret = 0;
+ }
break;
}
return ret ?: nbytes;
@@ -4744,6 +4751,9 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of,
char *endp;
int ret;
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ return -EOPNOTSUPP;
+
buf = strstrip(buf);
efd = simple_strtoul(buf, &endp, 10);