From 97be689a1001254070ca27624e6bd80067d3f2c1 Mon Sep 17 00:00:00 2001
From: Vishal Chourasia <vishalc@linux.vnet.ibm.com>
Date: Tue, 8 Nov 2022 14:21:15 +0530
Subject: [PATCH] sched: Disable sched domain debugfs creation on ppc64 unless
sched_verbose is specified
References: bnc#1205653
Patch-mainline: Never, upstream requires a kernfs conversion
PPC64 booting with many CPUs may take an excessive amount of time due to
the time needed to create debugfs files. A similar problem occurs when
hot-plugging many CPUs or disabling SMT. Upstream suggest that debugfs
be moved to kernfs for speed and less memory usage but that will take
too long in the context of a bug. Temporarily disable the export of sched
domain information by default on ppc64 unless sched_verbose is specified
on the kernel command line.
Signed-off-by: Mel Gorman <mgorman@suse.de>
---
Documentation/admin-guide/kernel-parameters.txt | 4 ++++
kernel/sched/debug.c | 13 ++++++++++---
kernel/sched/sched.h | 1 +
kernel/sched/topology.c | 5 ++++-
4 files changed, 19 insertions(+), 4 deletions(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 0878c7dcdbf2..af1aa534cd19 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5038,6 +5038,10 @@
sched_verbose [KNL] Enables verbose scheduler debug messages.
+ On ppc64, the domains debugfs files are not created by
+ default due to bnc#1205653. sched_verbose also creates
+ the debugfs files.
+
schedstats= [KNL,X86] Enable or disable scheduled statistics.
Allowed values are enable and disable. This feature
incurs a small amount of overhead in the scheduler
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 4f7b94c114b8..c3b8f8f6dd9d 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -280,6 +280,11 @@ static const struct file_operations sched_dynamic_fops = {
#endif /* CONFIG_PREEMPT_DYNAMIC */
__read_mostly bool sched_debug_verbose;
+#ifdef CONFIG_PPC64
+__read_mostly bool sched_domain_expose_debugfs;
+#else
+__read_mostly bool sched_domain_expose_debugfs = true;
+#endif
static const struct seq_operations sched_debug_sops;
@@ -322,9 +327,11 @@ static __init int sched_init_debug(void)
debugfs_create_u32("migration_cost_ns", 0644, debugfs_sched, &sysctl_sched_migration_cost);
debugfs_create_u32("nr_migrate", 0644, debugfs_sched, &sysctl_sched_nr_migrate);
- mutex_lock(&sched_domains_mutex);
- update_sched_domain_debugfs();
- mutex_unlock(&sched_domains_mutex);
+ if (sched_domain_expose_debugfs) {
+ mutex_lock(&sched_domains_mutex);
+ update_sched_domain_debugfs();
+ mutex_unlock(&sched_domains_mutex);
+ }
#endif
#ifdef CONFIG_NUMA_BALANCING
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 3e5ab6e09140..421e7c13b7f5 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2688,6 +2688,7 @@ extern struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq);
#ifdef CONFIG_SCHED_DEBUG
extern bool sched_debug_verbose;
+extern bool sched_domain_expose_debugfs;
extern void print_cfs_stats(struct seq_file *m, int cpu);
extern void print_rt_stats(struct seq_file *m, int cpu);
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index a05968bfd536..b72e7825f454 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -15,6 +15,7 @@ static cpumask_var_t sched_domains_tmpmask2;
static int __init sched_debug_setup(char *str)
{
sched_debug_verbose = true;
+ sched_domain_expose_debugfs = true;
return 0;
}
@@ -153,6 +154,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
#else /* !CONFIG_SCHED_DEBUG */
# define sched_debug_verbose 0
+# define sched_domain_expose_debugfs 1
# define sched_domain_debug(sd, cpu) do { } while (0)
static inline bool sched_debug(void)
{
@@ -2578,7 +2580,8 @@ void partition_sched_domains_locked(int ndoms_new, cpumask_var_t doms_new[],
dattr_cur = dattr_new;
ndoms_cur = ndoms_new;
- update_sched_domain_debugfs();
+ if (sched_domain_expose_debugfs)
+ update_sched_domain_debugfs();
}
/*