Blob Blame History Raw
From ea212497a99807e42fadcf9e2b82116d4aa3c9ee Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Mon, 12 Jun 2017 16:30:27 -0700
Subject: [PATCH] cpufreq: intel_pstate: Remove max/min fractions to limit
 performance

References: bnc#1068680 Update schedutil and intel_pstate to default to load-based policy
Patch-mainline: v4.13-rc1
Git-commit: 1a4fe38add8be45eb3873ad756561b9baf6bcaef

In the current model the max/min perf limits are a fraction of current
user space limits to the allowed max_freq or 100% for global limits.
This results in wrong ratio limits calculation because of rounding
issues for some user space limits.

Initially we tried to solve this issue by issue by having more shift
bits to increase precision. Still there are isolated cases where we still
have error.

This can be avoided by using ratios all together. Since the way we get
cpuinfo.max_freq is by multiplying scaling factor to max ratio, we can
easily keep the max/min ratios in terms of ratios and not fractions.

For example:
if the max ratio = 36
cpuinfo.max_freq = 36 * 100000 = 3600000

Suppose user space sets a limit of 1200000, then we can calculate
max ratio limit as
= 36 * 1200000 / 3600000
= 12
This will be correct for any user limits.

The other advantage is that, we don't need to do any calculation in the
fast path as ratio limit is already calculated via set_policy() callback.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Mel Gorman <mgorman@suse.de>
---
 drivers/cpufreq/intel_pstate.c | 114 +++++++++++++++++++++++------------------
 1 file changed, 63 insertions(+), 51 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 7053d3a38a59..a872ab1ee080 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -234,10 +234,8 @@ struct global_params {
  * @prev_cummulative_iowait: IO Wait time difference from last and
  *			current sample
  * @sample:		Storage for storing last Sample data
- * @min_perf:		Minimum capacity limit as a fraction of the maximum
- *			turbo P-state capacity.
- * @max_perf:		Maximum capacity limit as a fraction of the maximum
- *			turbo P-state capacity.
+ * @min_perf_ratio:	Minimum capacity in terms of PERF or HWP ratios
+ * @max_perf_ratio:	Maximum capacity in terms of PERF or HWP ratios
  * @acpi_perf_data:	Stores ACPI perf information read from _PSS
  * @valid_pss_table:	Set to true for valid ACPI _PSS entries found
  * @epp_powersave:	Last saved HWP energy performance preference
@@ -270,8 +268,8 @@ struct cpudata {
 	u64	prev_tsc;
 	u64	prev_cummulative_iowait;
 	struct sample sample;
-	int32_t	min_perf;
-	int32_t	max_perf;
+	int32_t	min_perf_ratio;
+	int32_t	max_perf_ratio;
 #ifdef CONFIG_ACPI
 	struct acpi_processor_performance acpi_perf_data;
 	bool valid_pss_table;
@@ -795,25 +793,32 @@ static struct freq_attr *hwp_cpufreq_attrs[] = {
 	NULL,
 };
 
-static void intel_pstate_hwp_set(unsigned int cpu)
+static void intel_pstate_get_hwp_max(unsigned int cpu, int *phy_max,
+				     int *current_max)
 {
-	struct cpudata *cpu_data = all_cpu_data[cpu];
-	int min, hw_min, max, hw_max;
-	u64 value, cap;
-	s16 epp;
+	u64 cap;
 
 	rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap);
-	hw_min = HWP_LOWEST_PERF(cap);
 	if (global.no_turbo)
-		hw_max = HWP_GUARANTEED_PERF(cap);
+		*current_max = HWP_GUARANTEED_PERF(cap);
 	else
-		hw_max = HWP_HIGHEST_PERF(cap);
+		*current_max = HWP_HIGHEST_PERF(cap);
+
+	*phy_max = HWP_HIGHEST_PERF(cap);
+}
+
+static void intel_pstate_hwp_set(unsigned int cpu)
+{
+	struct cpudata *cpu_data = all_cpu_data[cpu];
+	int max, min;
+	u64 value;
+	s16 epp;
+
+	max = cpu_data->max_perf_ratio;
+	min = cpu_data->min_perf_ratio;
 
-	max = fp_ext_toint(hw_max * cpu_data->max_perf);
 	if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE)
 		min = max;
-	else
-		min = fp_ext_toint(hw_max * cpu_data->min_perf);
 
 	rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
 
@@ -1534,8 +1539,7 @@ static void intel_pstate_max_within_limits(struct cpudata *cpu)
 
 	update_turbo_state();
 	pstate = intel_pstate_get_base_pstate(cpu);
-	pstate = max(cpu->pstate.min_pstate,
-		     fp_ext_toint(pstate * cpu->max_perf));
+	pstate = max(cpu->pstate.min_pstate, cpu->max_perf_ratio);
 	intel_pstate_set_pstate(cpu, pstate);
 }
 
@@ -1706,9 +1710,8 @@ static int intel_pstate_prepare_request(struct cpudata *cpu, int pstate)
 	int max_pstate = intel_pstate_get_base_pstate(cpu);
 	int min_pstate;
 
-	min_pstate = max(cpu->pstate.min_pstate,
-			 fp_ext_toint(max_pstate * cpu->min_perf));
-	max_pstate = max(min_pstate, fp_ext_toint(max_pstate * cpu->max_perf));
+	min_pstate = max(cpu->pstate.min_pstate, cpu->min_perf_ratio);
+	max_pstate = max(min_pstate, cpu->max_perf_ratio);
 	return clamp_t(int, pstate, min_pstate, max_pstate);
 }
 
@@ -1979,52 +1982,61 @@ static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy,
 {
 	int max_freq = intel_pstate_get_max_freq(cpu);
 	int32_t max_policy_perf, min_policy_perf;
+	int max_state, turbo_max;
 
-	max_policy_perf = div_ext_fp(policy->max, max_freq);
-	max_policy_perf = clamp_t(int32_t, max_policy_perf, 0, int_ext_tofp(1));
+	/*
+	 * HWP needs some special consideration, because on BDX the
+	 * HWP_REQUEST uses abstract value to represent performance
+	 * rather than pure ratios.
+	 */
+	if (hwp_active) {
+		intel_pstate_get_hwp_max(cpu->cpu, &turbo_max, &max_state);
+	} else {
+		max_state = intel_pstate_get_base_pstate(cpu);
+		turbo_max = cpu->pstate.turbo_pstate;
+	}
+
+	max_policy_perf = max_state * policy->max / max_freq;
 	if (policy->max == policy->min) {
 		min_policy_perf = max_policy_perf;
 	} else {
-		min_policy_perf = div_ext_fp(policy->min, max_freq);
+		min_policy_perf = max_state * policy->min / max_freq;
 		min_policy_perf = clamp_t(int32_t, min_policy_perf,
 					  0, max_policy_perf);
 	}
 
+	pr_debug("cpu:%d max_state %d min_policy_perf:%d max_policy_perf:%d\n",
+		 policy->cpu, max_state,
+		 min_policy_perf, max_policy_perf);
+
 	/* Normalize user input to [min_perf, max_perf] */
 	if (per_cpu_limits) {
-		cpu->min_perf = min_policy_perf;
-		cpu->max_perf = max_policy_perf;
+		cpu->min_perf_ratio = min_policy_perf;
+		cpu->max_perf_ratio = max_policy_perf;
 	} else {
 		int32_t global_min, global_max;
 
 		/* Global limits are in percent of the maximum turbo P-state. */
-		global_max = percent_ext_fp(global.max_perf_pct);
-		global_min = percent_ext_fp(global.min_perf_pct);
-		if (max_freq != cpu->pstate.turbo_freq) {
-			int32_t turbo_factor;
-
-			turbo_factor = div_ext_fp(cpu->pstate.turbo_pstate,
-						  cpu->pstate.max_pstate);
-			global_min = mul_ext_fp(global_min, turbo_factor);
-			global_max = mul_ext_fp(global_max, turbo_factor);
-		}
+		global_max = DIV_ROUND_UP(turbo_max * global.max_perf_pct, 100);
+		global_min = DIV_ROUND_UP(turbo_max * global.min_perf_pct, 100);
 		global_min = clamp_t(int32_t, global_min, 0, global_max);
 
-		cpu->min_perf = max(min_policy_perf, global_min);
-		cpu->min_perf = min(cpu->min_perf, max_policy_perf);
-		cpu->max_perf = min(max_policy_perf, global_max);
-		cpu->max_perf = max(min_policy_perf, cpu->max_perf);
+		pr_debug("cpu:%d global_min:%d global_max:%d\n", policy->cpu,
+			 global_min, global_max);
 
-		/* Make sure min_perf <= max_perf */
-		cpu->min_perf = min(cpu->min_perf, cpu->max_perf);
-	}
+		cpu->min_perf_ratio = max(min_policy_perf, global_min);
+		cpu->min_perf_ratio = min(cpu->min_perf_ratio, max_policy_perf);
+		cpu->max_perf_ratio = min(max_policy_perf, global_max);
+		cpu->max_perf_ratio = max(min_policy_perf, cpu->max_perf_ratio);
 
-	cpu->max_perf = round_up(cpu->max_perf, EXT_FRAC_BITS);
-	cpu->min_perf = round_up(cpu->min_perf, EXT_FRAC_BITS);
+		/* Make sure min_perf <= max_perf */
+		cpu->min_perf_ratio = min(cpu->min_perf_ratio,
+					  cpu->max_perf_ratio);
 
-	pr_debug("cpu:%d max_perf_pct:%d min_perf_pct:%d\n", policy->cpu,
-		 fp_ext_toint(cpu->max_perf * 100),
-		 fp_ext_toint(cpu->min_perf * 100));
+	}
+	pr_debug("cpu:%d max_perf_ratio:%d min_perf_ratio:%d\n", policy->cpu,
+		 cpu->max_perf_ratio,
+		 cpu->min_perf_ratio);
 }
 
 static int intel_pstate_set_policy(struct cpufreq_policy *policy)
@@ -2127,8 +2139,8 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy)
 
 	cpu = all_cpu_data[policy->cpu];
 
-	cpu->max_perf = int_ext_tofp(1);
-	cpu->min_perf = 0;
+	cpu->max_perf_ratio = 0xFF;
+	cpu->min_perf_ratio = 0;
 
 	policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling;
 	policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling;