Blob Blame History Raw
From abf65ab7567783ec13c14ca456c529d8b7f033a6 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Thu, 16 Nov 2017 13:53:53 +0000
Subject: [PATCH] cpufreq: intel_pstate: Use load-based policy by default but
 allow PID to be used

References: bnc#1068680 Update schedutil and intel_pstate to default to load-based policy
Patch-mainline: No, upstream only has load-based but we want an optional fallback in the field

Commit 9d0ef7af1f2d ("cpufreq: intel_pstate: Do not use PID-based P-state
selection") notes that all machines with a preferred profile is not a
"server" has used the load-based P-state selection algorithm since 4.12-rc1
without regressions. The patch moves all servers to the load-based algorithm
and removes the PID controller.

At one level, this is not a terrible idea because the history of the PID
controller is riddled with problems and SLE has frequently had to carry
out-of-tree patches trying to fix it. On the other hand, relying entirely
on schedutil causes problems of its own as it relied on accurate information
from the scheduler.

This patch takes a half-way approach. It uses the load-based P-state
selection by default for servers but it's possible to override that via
kernel command line in case it doesn't work out in the field.

Signed-off-by: Mel Gorman <mgorman@suse.com
---
 drivers/cpufreq/intel_pstate.c |   62 ++++++++++++++++++++++++-----------------
 1 file changed, 37 insertions(+), 25 deletions(-)

--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1980,7 +1980,7 @@ static struct pstate_funcs core_funcs =
 	.get_turbo = core_get_turbo_pstate,
 	.get_scaling = core_get_scaling,
 	.get_val = core_get_val,
-	.update_util = intel_pstate_update_util_pid,
+	.update_util = intel_pstate_update_util,
 };
 
 static const struct pstate_funcs silvermont_funcs = {
@@ -2013,7 +2013,7 @@ static const struct pstate_funcs knl_fun
 	.get_aperf_mperf_shift = knl_get_aperf_mperf_shift,
 	.get_scaling = core_get_scaling,
 	.get_val = core_get_val,
-	.update_util = intel_pstate_update_util_pid,
+	.update_util = intel_pstate_update_util,
 };
 
 #define X86_MATCH(model, policy)					 \
@@ -2571,6 +2571,7 @@ static int no_hwp __initdata;
 static int hwp_only __initdata;
 static int __initdata vanilla_policy;
 static int __initdata server_policy;
+static int __initdata use_pid_policy;
 static unsigned int force_load __initdata;
 
 static int __init intel_pstate_msrs_not_valid(void)
@@ -2583,24 +2584,6 @@ static int __init intel_pstate_msrs_not_
 	return 0;
 }
 
-#ifdef CONFIG_ACPI
-static void intel_pstate_use_acpi_profile(void)
-{
-	switch (acpi_gbl_FADT.preferred_profile) {
-	case PM_MOBILE:
-	case PM_TABLET:
-	case PM_APPLIANCE_PC:
-	case PM_DESKTOP:
-	case PM_WORKSTATION:
-		pstate_funcs.update_util = intel_pstate_update_util;
-	}
-}
-#else
-static void intel_pstate_use_acpi_profile(void)
-{
-}
-#endif
-
 static void __init copy_cpu_funcs(struct pstate_funcs *funcs)
 {
 	pstate_funcs.get_max   = funcs->get_max;
@@ -2612,8 +2595,6 @@ static void __init copy_cpu_funcs(struct
 	pstate_funcs.get_vid   = funcs->get_vid;
 	pstate_funcs.update_util = funcs->update_util;
 	pstate_funcs.get_aperf_mperf_shift = funcs->get_aperf_mperf_shift;
-
-	intel_pstate_use_acpi_profile();
 }
 
 #ifdef CONFIG_ACPI
@@ -2806,6 +2787,18 @@ hwp_cpu_matched:
 #if IS_ENABLED(CONFIG_ACPI)
 	if (!vanilla_policy) {
 		switch (acpi_gbl_FADT.preferred_profile) {
+		case PM_MOBILE:
+			profile = "Mobile\n";
+			break;
+		case PM_TABLET:
+			profile = "Tablet\n";
+			break;
+		case PM_APPLIANCE_PC:
+			profile = "Appliance PC\n";
+			break;
+		case PM_DESKTOP:
+			profile = "Desktop";
+			break;
 		case PM_WORKSTATION:
 			profile = "Workstation";
 			break;
@@ -2824,17 +2817,24 @@ hwp_cpu_matched:
 		};
 
 		if (profile) {
-			pr_info("Intel P-state setting %s policy\n", profile);
+			pr_info("Intel P-state setting %s %s policy\n", profile,
+				use_pid_policy ? "PID" : "Load-based");
 
 			/*
 			 * setpoint based on observations that siege maxes out
 			 * due to internal mutex usage at roughly an average of
 			 * 50% set use a setpoint of 30% to boost the frequency
 			 * enough to perform reasonably.
+			 *
+			 * Note that this is meaningless unless the PID
+			 * controller is used which means specifying
+			 * vanilla_pid_policy or server_pid_policy.
 			 */
 			pid_params.setpoint = CPUFREQ_SERVER_DEFAULT_SETPOINT;
 		}
 	}
+	if (use_pid_policy)
+		pstate_funcs.update_util = intel_pstate_update_util_pid;
 #endif
 
 	all_cpu_data = vzalloc(sizeof(void *) * num_possible_cpus());
@@ -2878,10 +2878,22 @@ static int __init intel_pstate_setup(cha
 		force_load = 1;
 	if (!strcmp(str, "hwp_only"))
 		hwp_only = 1;
-	if (!strcmp(str, "vanilla_policy"))
+	if (!strcmp(str, "vanilla_policy")) {
+		vanilla_policy = 1;
+		use_pid_policy = 0;
+	}
+	if (!strcmp(str, "vanilla_pid_policy")) {
 		vanilla_policy = 1;
-	if (!strcmp(str, "server_policy"))
+		use_pid_policy = 1;
+	}
+	if (!strcmp(str, "server_policy")) {
 		server_policy = 1;
+		use_pid_policy = 0;
+	}
+	if (!strcmp(str, "server_pid_policy")) {
+		server_policy = 1;
+		use_pid_policy = 1;
+	}
 	if (!strcmp(str, "per_cpu_perf_limits"))
 		per_cpu_limits = true;