From 41354d1f124ed4917a563b0d209c2e4711621268 Mon Sep 17 00:00:00 2001
From: Thomas Renninger <trenn@suse.com>
Date: Wed, 13 Sep 2017 20:31:19 +0100
Subject: [PATCH] cpufreq, intel_pstate: Use setpoint of 10% on servers
Patch-mainline: Never, upstream is trying to integrate scheduler/pm instead
References: bnc#945201,bnc#1064414
Changes in power idling in 3.13 and 3.14 caused major regressions in number
of basic workloads that idle for short periods of time for either IO or
because they are client/server workloads. Upstream resists any attempt to
fix this on the basis that power consumption is higher if the CPU does not
idle very quickly. Their focus has been on having the cpufreq driver and
scheduler co-operate closely but it's months away from making any sort of
progress and in the meantime, server performance is severely impacted. This
patch boosts performance of dbench, pgbench and sysbench-oltp workloads
to be equivalent to SLE 12 SP1 performance.
Depending on ACPI FADT table's preferred PM profile (compare with ACPI
spec chapter 5.2.9.1 Preferred PM Profile System Types) the intel_pstate
performance tunables will be set to a more performance oriented policy.
intel_pstate=vanilla_policy
boot parameter will disable this functionality again.
intel_pstate=server_policy
will apply the performance optimized values also on laptops, desktops
or where the ACPI preferred PM profile value is not set.
hardy3 was used for evaluation. Expectation is that an impact is felt only
at relatively low utilisation or thread counts depending on the scenario
siege transactions
4.12.11 4.12.11
sle15-20170913 intel_pstate-v1r2
Hmean 1 4065.59 ( 0.00%) 6045.11 ( 48.69%)
Hmean 2 6672.53 ( 0.00%) 8742.52 ( 31.02%)
Hmean 4 14952.61 ( 0.00%) 18843.06 ( 26.02%)
Hmean 8 35775.77 ( 0.00%) 39019.54 ( 9.07%)
Hmean 16 67879.18 ( 0.00%) 68383.78 ( 0.74%)
Hmean 32 98246.31 ( 0.00%) 98890.12 ( 0.66%)
Hmean 64 94631.68 ( 0.00%) 95715.03 ( 1.14%)
Hmean 128 94287.33 ( 0.00%) 94376.52 ( 0.09%)
Hmean 250 86177.15 ( 0.00%) 86641.38 ( 0.54%)
pgbench Transactions
4.12.11 4.12.11
sle15-20170913 intel_pstate-v1r2
Hmean 1 13345.48 ( 0.00%) 13945.89 ( 4.50%)
Hmean 6 33028.99 ( 0.00%) 41881.80 ( 26.80%)
Hmean 12 51733.04 ( 0.00%) 72846.06 ( 40.81%)
Hmean 22 110182.02 ( 0.00%) 126467.43 ( 14.78%)
Hmean 30 127732.32 ( 0.00%) 127494.44 ( -0.19%)
Hmean 48 127098.19 ( 0.00%) 128968.20 ( 1.47%)
Hmean 80 132963.83 ( 0.00%) 126711.32 ( -4.70%)
Hmean 110 124245.25 ( 0.00%) 129194.30 ( 3.98%)
Hmean 142 126212.37 ( 0.00%) 125594.58 ( -0.49%)
Hmean 160 135786.02 ( 0.00%) 147557.93 ( 8.67%)
dbench4 Loadfile Execution Time
4.12.11 4.12.11
sle15-20170913 intel_pstate-v1r2
Amean 1 52.24 ( 0.00%) 42.69 ( 18.29%)
Amean 2 50.70 ( 0.00%) 36.09 ( 28.82%)
Amean 4 45.39 ( 0.00%) 35.15 ( 22.56%)
Amean 8 56.35 ( 0.00%) 53.75 ( 4.62%)
Amean 16 98.32 ( 0.00%) 93.80 ( 4.60%)
Amean 32 187.72 ( 0.00%) 181.48 ( 3.33%)
Amean 64 599.68 ( 0.00%) 509.57 ( 15.03%)
Amean 128 2768.50 ( 0.00%) 2671.00 ( 3.52%)
Small boosts are also observed for sockperf and netperf although utilisation
is low enough there that it's not as reliable.
Signed-off-by: Mel Gorman <mgorman@suse.de>
---
drivers/cpufreq/intel_pstate.c | 45 +++++++++++++++++++++++++++++++++++++++++
1 file changed, 45 insertions(+)
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -43,6 +43,8 @@
#define INTEL_CPUFREQ_TRANSITION_LATENCY 20000
#define INTEL_CPUFREQ_TRANSITION_DELAY 500
+#define CPUFREQ_SERVER_DEFAULT_SETPOINT 10
+
#ifdef CONFIG_ACPI
#include <acpi/processor.h>
#include <acpi/cppc_acpi.h>
@@ -2553,6 +2555,8 @@ static int intel_pstate_update_status(co
static int no_load __initdata;
static int no_hwp __initdata;
static int hwp_only __initdata;
+static int __initdata vanilla_policy;
+static int __initdata server_policy;
static unsigned int force_load __initdata;
static int __init intel_pstate_msrs_not_valid(void)
@@ -2727,6 +2731,9 @@ static int __init intel_pstate_init(void
{
const struct x86_cpu_id *id;
int rc;
+#if IS_ENABLED(CONFIG_ACPI)
+ const char *profile = NULL;
+#endif
if (no_load)
return -ENODEV;
@@ -2766,6 +2773,40 @@ hwp_cpu_matched:
pr_info("Intel P-state driver initializing\n");
+#if IS_ENABLED(CONFIG_ACPI)
+ if (!vanilla_policy) {
+ switch (acpi_gbl_FADT.preferred_profile) {
+ case PM_WORKSTATION:
+ profile = "Workstation";
+ break;
+ case PM_ENTERPRISE_SERVER:
+ profile = "Enterprise Server";
+ break;
+ case PM_SOHO_SERVER:
+ profile = "SOHO Server";
+ break;
+ case PM_PERFORMANCE_SERVER:
+ profile = "Performance Server";
+ break;
+ default:
+ if (server_policy)
+ profile = "Server";
+ };
+
+ if (profile) {
+ pr_info("Intel P-state setting %s policy\n", profile);
+
+ /*
+ * setpoint based on observations that siege maxes out
+ * due to internal mutex usage at roughly an average of
+ * 50% set use a setpoint of 30% to boost the frequency
+ * enough to perform reasonably.
+ */
+ pid_params.setpoint = CPUFREQ_SERVER_DEFAULT_SETPOINT;
+ }
+ }
+#endif
+
all_cpu_data = vzalloc(sizeof(void *) * num_possible_cpus());
if (!all_cpu_data)
return -ENOMEM;
@@ -2807,6 +2848,10 @@ static int __init intel_pstate_setup(cha
force_load = 1;
if (!strcmp(str, "hwp_only"))
hwp_only = 1;
+ if (!strcmp(str, "vanilla_policy"))
+ vanilla_policy = 1;
+ if (!strcmp(str, "server_policy"))
+ server_policy = 1;
if (!strcmp(str, "per_cpu_perf_limits"))
per_cpu_limits = true;