Blob Blame History Raw
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Subject: s390/topology: enable / disable topology dynamically
Patch-mainline: v4.14-rc2
Git-commit: 51dce3867c6c63c7500332e5448c2ba76808d6b5
References: bnc#1066983, LTC#159177

Description:  kernel: alternative topology
Symptom:      Overall performance is lower than expected.
Problem:      When running within a z/VM guest the Linux scheduling domains
              are configured in such a way that each CPU is far away from
              all other CPUs. This way the kernel tries to avoid to send
              expensive inter processor interrupts to other CPUs. As a
              downside sometimes this also leads to the situation where
              idle CPUs won't be woken up even if there is a runable process
              waiting for execution. It depends on the workload if this
              behavior is good or not.
Solution:     Add a sysctl file /proc/sys/s390/topology which allows to
              change the default configuration. When running within a
              z/VM guest writing 1 to the file will result in a different
              topology where all CPUs are configured to be close to each
              other. This may increase overall performance depending on
              the workload.
              The used default topology within a z/VM guest can also be
              changed with the kernel command line parameter 'topology=on'.
Reproduction: -

Upstream-Description:

              s390/topology: enable / disable topology dynamically

              Add a new sysctl file /proc/sys/s390/topology which displays if
              topology is on (1) or off (0) as specified by the "topology=" kernel
              parameter.

              This allows to change topology information during runtime and
              configuring it via /etc/sysctl.conf instead of using the kernel line
              parameter.

              Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
              Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>


Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Acked-by: Hannes Reinecke <hare@suse.com>
---
 arch/s390/kernel/topology.c |   76 ++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 74 insertions(+), 2 deletions(-)

--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -8,6 +8,8 @@
 
 #include <linux/workqueue.h>
 #include <linux/bootmem.h>
+#include <linux/uaccess.h>
+#include <linux/sysctl.h>
 #include <linux/cpuset.h>
 #include <linux/device.h>
 #include <linux/export.h>
@@ -207,10 +209,8 @@ static void topology_update_polarization
 {
 	int cpu;
 
-	mutex_lock(&smp_cpu_state_mutex);
 	for_each_possible_cpu(cpu)
 		smp_cpu_set_polarization(cpu, POLARIZATION_HRZ);
-	mutex_unlock(&smp_cpu_state_mutex);
 }
 
 static int ptf(unsigned long fc)
@@ -278,6 +278,7 @@ static int __arch_update_cpu_topology(vo
 	struct sysinfo_15_1_x *info = tl_info;
 	int rc = 0;
 
+	mutex_lock(&smp_cpu_state_mutex);
 	cpumask_clear(&cpus_with_topology);
 	if (MACHINE_HAS_TOPOLOGY) {
 		rc = 1;
@@ -287,6 +288,7 @@ static int __arch_update_cpu_topology(vo
 	update_cpu_masks();
 	if (!MACHINE_HAS_TOPOLOGY)
 		topology_update_polarization_simple();
+	mutex_unlock(&smp_cpu_state_mutex);
 	return rc;
 }
 
@@ -313,6 +315,11 @@ void topology_schedule_update(void)
 	schedule_work(&topology_work);
 }
 
+static void topology_flush_work(void)
+{
+	flush_work(&topology_work);
+}
+
 static void topology_timer_fn(unsigned long ignored)
 {
 	if (ptf(PTF_CHECK))
@@ -511,6 +518,11 @@ static inline int topology_get_mode(int
 	return MACHINE_HAS_TOPOLOGY ? TOPOLOGY_MODE_HW : TOPOLOGY_MODE_PACKAGE;
 }
 
+static inline int topology_is_enabled(void)
+{
+	return topology_mode != TOPOLOGY_MODE_SINGLE;
+}
+
 static int __init topology_setup(char *str)
 {
 	bool enabled;
@@ -524,12 +536,72 @@ static int __init topology_setup(char *s
 }
 early_param("topology", topology_setup);
 
+static int topology_ctl_handler(struct ctl_table *ctl, int write,
+				void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	unsigned int len;
+	int new_mode;
+	char buf[2];
+
+	if (!*lenp || *ppos) {
+		*lenp = 0;
+		return 0;
+	}
+	if (!write) {
+		strncpy(buf, topology_is_enabled() ? "1\n" : "0\n",
+			ARRAY_SIZE(buf));
+		len = strnlen(buf, ARRAY_SIZE(buf));
+		if (len > *lenp)
+			len = *lenp;
+		if (copy_to_user(buffer, buf, len))
+			return -EFAULT;
+		goto out;
+	}
+	len = *lenp;
+	if (copy_from_user(buf, buffer, len > sizeof(buf) ? sizeof(buf) : len))
+		return -EFAULT;
+	if (buf[0] != '0' && buf[0] != '1')
+		return -EINVAL;
+	mutex_lock(&smp_cpu_state_mutex);
+	new_mode = topology_get_mode(buf[0] == '1');
+	if (topology_mode != new_mode) {
+		topology_mode = new_mode;
+		topology_schedule_update();
+	}
+	mutex_unlock(&smp_cpu_state_mutex);
+	topology_flush_work();
+out:
+	*lenp = len;
+	*ppos += len;
+	return 0;
+}
+
+static struct ctl_table topology_ctl_table[] = {
+	{
+		.procname	= "topology",
+		.mode		= 0644,
+		.proc_handler	= topology_ctl_handler,
+	},
+	{ },
+};
+
+static struct ctl_table topology_dir_table[] = {
+	{
+		.procname	= "s390",
+		.maxlen		= 0,
+		.mode		= 0555,
+		.child		= topology_ctl_table,
+	},
+	{ },
+};
+
 static int __init topology_init(void)
 {
 	if (MACHINE_HAS_TOPOLOGY)
 		set_topology_timer();
 	else
 		topology_update_polarization_simple();
+	register_sysctl_table(topology_dir_table);
 	return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching);
 }
 device_initcall(topology_init);