From: Heiko Carstens <heiko.carstens@de.ibm.com>
Subject: s390/topology: alternative topology for topology-less machines
Patch-mainline: v4.14-rc2
Git-commit: 1b25fda0533462c9cee3a22e8a7bea68fa670af2
References: bnc#1066983, LTC#159177
Description: kernel: alternative topology
Symptom: Overall performance is lower than expected.
Problem: When running within a z/VM guest the Linux scheduling domains
are configured in such a way that each CPU is far away from
all other CPUs. This way the kernel tries to avoid to send
expensive inter processor interrupts to other CPUs. As a
downside sometimes this also leads to the situation where
idle CPUs won't be woken up even if there is a runable process
waiting for execution. It depends on the workload if this
behavior is good or not.
Solution: Add a sysctl file /proc/sys/s390/topology which allows to
change the default configuration. When running within a
z/VM guest writing 1 to the file will result in a different
topology where all CPUs are configured to be close to each
other. This may increase overall performance depending on
the workload.
The used default topology within a z/VM guest can also be
changed with the kernel command line parameter 'topology=on'.
Reproduction: -
Upstream-Description:
s390/topology: alternative topology for topology-less machines
If running on machines that do not provide topology information we
currently generate a "fake" topology which defines the maximum
distance between each cpu: each cpu will be put into an own drawer.
Historically this used to be the best option for (virtual) machines in
overcommited hypervisors.
For some workloads however it is better to generate a different
topology where all cpus are siblings within a package (all cpus are
core siblings). This shows performance improvements of up to 10%,
depending on the workload.
In order to keep the current behaviour, but also allow to switch to
the different core sibling topology use the existing "topology="
kernel parameter:
Specifying "topology=on" on machines without topology information will
generate the core siblings (fake) topology information, instead of the
default topology information where all cpus have the maximum distance.
On machines which provide topology information specifying
"topology=on" does not have any effect.
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Acked-by: Hannes Reinecke <hare@suse.com>
---
arch/s390/kernel/early.c | 12 -------
arch/s390/kernel/topology.c | 72 +++++++++++++++++++++++++++++++++++++-------
2 files changed, 61 insertions(+), 23 deletions(-)
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -397,18 +397,6 @@ static inline void save_vector_registers
#endif
}
-static int __init topology_setup(char *str)
-{
- bool enabled;
- int rc;
-
- rc = kstrtobool(str, &enabled);
- if (!rc && !enabled)
- S390_lowcore.machine_flags &= ~MACHINE_FLAG_TOPOLOGY;
- return rc;
-}
-early_param("topology", topology_setup);
-
static int __init disable_vector_extension(char *str)
{
S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX;
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -29,12 +29,20 @@
#define PTF_VERTICAL (1UL)
#define PTF_CHECK (2UL)
+enum {
+ TOPOLOGY_MODE_HW,
+ TOPOLOGY_MODE_SINGLE,
+ TOPOLOGY_MODE_PACKAGE,
+ TOPOLOGY_MODE_UNINITIALIZED
+};
+
struct mask_info {
struct mask_info *next;
unsigned char id;
cpumask_t mask;
};
+static int topology_mode = TOPOLOGY_MODE_UNINITIALIZED;
static void set_topology_timer(void);
static void topology_work_fn(struct work_struct *work);
static struct sysinfo_15_1_x *tl_info;
@@ -59,11 +67,26 @@ static cpumask_t cpu_group_map(struct ma
cpumask_t mask;
cpumask_copy(&mask, cpumask_of(cpu));
- if (!MACHINE_HAS_TOPOLOGY)
- return mask;
- for (; info; info = info->next) {
- if (cpumask_test_cpu(cpu, &info->mask))
- return info->mask;
+ switch (topology_mode) {
+ case TOPOLOGY_MODE_HW:
+ while (info) {
+ if (cpumask_test_cpu(cpu, &info->mask)) {
+ mask = info->mask;
+ break;
+ }
+ info = info->next;
+ }
+ if (cpumask_empty(&mask))
+ cpumask_copy(&mask, cpumask_of(cpu));
+ break;
+ case TOPOLOGY_MODE_PACKAGE:
+ cpumask_copy(&mask, cpu_present_mask);
+ break;
+ default:
+ /* fallthrough */
+ case TOPOLOGY_MODE_SINGLE:
+ cpumask_copy(&mask, cpumask_of(cpu));
+ break;
}
return mask;
}
@@ -74,7 +97,7 @@ static cpumask_t cpu_thread_map(unsigned
int i;
cpumask_copy(&mask, cpumask_of(cpu));
- if (!MACHINE_HAS_TOPOLOGY)
+ if (topology_mode != TOPOLOGY_MODE_HW)
return mask;
cpu -= cpu % (smp_cpu_mtid + 1);
for (i = 0; i <= smp_cpu_mtid; i++)
@@ -223,7 +246,7 @@ int topology_set_cpu_management(int fc)
static void update_cpu_masks(void)
{
struct cpu_topology_s390 *topo;
- int cpu;
+ int cpu, id;
for_each_possible_cpu(cpu) {
topo = &cpu_topology[cpu];
@@ -231,12 +254,13 @@ static void update_cpu_masks(void)
topo->core_mask = cpu_group_map(&socket_info, cpu);
topo->book_mask = cpu_group_map(&book_info, cpu);
topo->drawer_mask = cpu_group_map(&drawer_info, cpu);
- if (!MACHINE_HAS_TOPOLOGY) {
+ if (topology_mode != TOPOLOGY_MODE_HW) {
+ id = topology_mode == TOPOLOGY_MODE_PACKAGE ? 0 : cpu;
topo->thread_id = cpu;
topo->core_id = cpu;
- topo->socket_id = cpu;
- topo->book_id = cpu;
- topo->drawer_id = cpu;
+ topo->socket_id = id;
+ topo->book_id = id;
+ topo->drawer_id = id;
if (cpu_present(cpu))
cpumask_set_cpu(cpu, &cpus_with_topology);
}
@@ -459,6 +483,12 @@ void __init topology_init_early(void)
struct sysinfo_15_1_x *info;
set_sched_topology(s390_topology);
+ if (topology_mode == TOPOLOGY_MODE_UNINITIALIZED) {
+ if (MACHINE_HAS_TOPOLOGY)
+ topology_mode = TOPOLOGY_MODE_HW;
+ else
+ topology_mode = TOPOLOGY_MODE_SINGLE;
+ }
if (!MACHINE_HAS_TOPOLOGY)
goto out;
tl_info = memblock_virt_alloc(PAGE_SIZE, PAGE_SIZE);
@@ -474,6 +504,26 @@ out:
__arch_update_cpu_topology();
}
+static inline int topology_get_mode(int enabled)
+{
+ if (!enabled)
+ return TOPOLOGY_MODE_SINGLE;
+ return MACHINE_HAS_TOPOLOGY ? TOPOLOGY_MODE_HW : TOPOLOGY_MODE_PACKAGE;
+}
+
+static int __init topology_setup(char *str)
+{
+ bool enabled;
+ int rc;
+
+ rc = kstrtobool(str, &enabled);
+ if (rc)
+ return rc;
+ topology_mode = topology_get_mode(enabled);
+ return 0;
+}
+early_param("topology", topology_setup);
+
static int __init topology_init(void)
{
if (MACHINE_HAS_TOPOLOGY)