From 6a35fc2d6c22bafe45117cdc5d8cee332244edbb Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Wed, 14 Oct 2015 16:11:59 -0700 Subject: cpufreq: intel_pstate: get P1 from TAR when available After Ivybridge, the max non turbo ratio obtained from platform info msr is not always guaranteed P1 on client platforms. The max non turbo activation ratio (TAR), determines the max for the current level of TDP. The ratio in platform info is physical max. The TAR MSR can be locked, so updating this value is not possible on all platforms. This change gets this ratio from MSR TURBO_ACTIVATION_RATIO if available, but also do some sanity checking to make sure that this value is correct. The sanity check involves reading the TDP ratio for the current tdp control value when platform has configurable TDP present and matching TAC with this. Signed-off-by: Srinivas Pandruvada Acked-by: Kristen Carlson Accardi Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 39 +++++++++++++++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 4 deletions(-) (limited to 'drivers/cpufreq/intel_pstate.c') diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 3af9dd7332e6..da92d0201d52 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -43,7 +43,6 @@ #define int_tofp(X) ((int64_t)(X) << FRAC_BITS) #define fp_toint(X) ((X) >> FRAC_BITS) - static inline int32_t mul_fp(int32_t x, int32_t y) { return ((int64_t)x * (int64_t)y) >> FRAC_BITS; @@ -593,10 +592,42 @@ static int core_get_min_pstate(void) static int core_get_max_pstate(void) { - u64 value; + u64 tar; + u64 plat_info; + int max_pstate; + int err; + + rdmsrl(MSR_PLATFORM_INFO, plat_info); + max_pstate = (plat_info >> 8) & 0xFF; + + err = rdmsrl_safe(MSR_TURBO_ACTIVATION_RATIO, &tar); + if (!err) { + /* Do some sanity checking for safety */ + if (plat_info & 0x600000000) { + u64 tdp_ctrl; + u64 tdp_ratio; + int tdp_msr; + + err = rdmsrl_safe(MSR_CONFIG_TDP_CONTROL, &tdp_ctrl); + if (err) + goto skip_tar; + + tdp_msr = MSR_CONFIG_TDP_NOMINAL + tdp_ctrl; + err = rdmsrl_safe(tdp_msr, &tdp_ratio); + if (err) + goto skip_tar; + + if (tdp_ratio - 1 == tar) { + max_pstate = tar; + pr_debug("max_pstate=TAC %x\n", max_pstate); + } else { + goto skip_tar; + } + } + } - rdmsrl(MSR_PLATFORM_INFO, value); - return (value >> 8) & 0xFF; +skip_tar: + return max_pstate; } static int core_get_turbo_pstate(void) -- cgit v1.2.3 From 3bcc6fa971c06151d6bf90cb0dc80807f71b93f6 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Wed, 14 Oct 2015 16:12:00 -0700 Subject: cpufreq: intel-pstate: Use separate max pstate for scaling Systems with configurable TDP have multiple max non turbo p state. Intel P state uses max non turbo P state for scaling. But using the real max non turbo p state causes underestimation of next P state. So using the physical max non turbo P state as before for scaling. Signed-off-by: Srinivas Pandruvada Acked-by: Kristen Carlson Accardi Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'drivers/cpufreq/intel_pstate.c') diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index da92d0201d52..1369afdc1e19 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -77,6 +77,7 @@ struct pstate_data { int current_pstate; int min_pstate; int max_pstate; + int max_pstate_physical; int scaling; int turbo_pstate; }; @@ -126,6 +127,7 @@ struct pstate_adjust_policy { struct pstate_funcs { int (*get_max)(void); + int (*get_max_physical)(void); int (*get_min)(void); int (*get_turbo)(void); int (*get_scaling)(void); @@ -590,6 +592,14 @@ static int core_get_min_pstate(void) return (value >> 40) & 0xFF; } +static int core_get_max_pstate_physical(void) +{ + u64 value; + + rdmsrl(MSR_PLATFORM_INFO, value); + return (value >> 8) & 0xFF; +} + static int core_get_max_pstate(void) { u64 tar; @@ -683,6 +693,7 @@ static struct cpu_defaults core_params = { }, .funcs = { .get_max = core_get_max_pstate, + .get_max_physical = core_get_max_pstate_physical, .get_min = core_get_min_pstate, .get_turbo = core_get_turbo_pstate, .get_scaling = core_get_scaling, @@ -701,6 +712,7 @@ static struct cpu_defaults byt_params = { }, .funcs = { .get_max = byt_get_max_pstate, + .get_max_physical = byt_get_max_pstate, .get_min = byt_get_min_pstate, .get_turbo = byt_get_turbo_pstate, .set = byt_set_pstate, @@ -720,6 +732,7 @@ static struct cpu_defaults knl_params = { }, .funcs = { .get_max = core_get_max_pstate, + .get_max_physical = core_get_max_pstate_physical, .get_min = core_get_min_pstate, .get_turbo = knl_get_turbo_pstate, .get_scaling = core_get_scaling, @@ -774,6 +787,7 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) { cpu->pstate.min_pstate = pstate_funcs.get_min(); cpu->pstate.max_pstate = pstate_funcs.get_max(); + cpu->pstate.max_pstate_physical = pstate_funcs.get_max_physical(); cpu->pstate.turbo_pstate = pstate_funcs.get_turbo(); cpu->pstate.scaling = pstate_funcs.get_scaling(); @@ -792,7 +806,8 @@ static inline void intel_pstate_calc_busy(struct cpudata *cpu) sample->freq = fp_toint( mul_fp(int_tofp( - cpu->pstate.max_pstate * cpu->pstate.scaling / 100), + cpu->pstate.max_pstate_physical * + cpu->pstate.scaling / 100), core_pct)); sample->core_pct_busy = (int32_t)core_pct; @@ -860,7 +875,7 @@ static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu) * specified pstate. */ core_busy = cpu->sample.core_pct_busy; - max_pstate = int_tofp(cpu->pstate.max_pstate); + max_pstate = int_tofp(cpu->pstate.max_pstate_physical); current_pstate = int_tofp(cpu->pstate.current_pstate); core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate)); @@ -1144,6 +1159,7 @@ static void copy_pid_params(struct pstate_adjust_policy *policy) static void copy_cpu_funcs(struct pstate_funcs *funcs) { pstate_funcs.get_max = funcs->get_max; + pstate_funcs.get_max_physical = funcs->get_max_physical; pstate_funcs.get_min = funcs->get_min; pstate_funcs.get_turbo = funcs->get_turbo; pstate_funcs.get_scaling = funcs->get_scaling; -- cgit v1.2.3 From 37afb00032424d684a48d649fcfb8b5e4f17c409 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Wed, 14 Oct 2015 16:12:01 -0700 Subject: cpufreq: intel_pstate: Use ACPI perf configuration Use ACPI _PSS to limit the Intel P State turbo, max and min ratios. This driver uses acpi processor perf lib calls to register performance. The following logic is used to adjust Intel P state driver limits: - If there is no turbo entry in _PSS, then disable Intel P state turbo and limit to non turbo max - If the non turbo max ratio is more than _PSS max non turbo value, then set the max non turbo ratio to _PSS non turbo max - If the min ratio is less than _PSS min then change the min ratio matching _PSS min - Scale the _PSS turbo frequency to max turbo frequency based on control value. This feature can be disabled by using kernel parameters: intel_pstate=no_acpi Signed-off-by: Srinivas Pandruvada Acked-by: Kristen Carlson Accardi Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/Kconfig.x86 | 1 + drivers/cpufreq/intel_pstate.c | 171 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 171 insertions(+), 1 deletion(-) (limited to 'drivers/cpufreq/intel_pstate.c') diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86 index c59bdcb83217..adbd1de1cea5 100644 --- a/drivers/cpufreq/Kconfig.x86 +++ b/drivers/cpufreq/Kconfig.x86 @@ -5,6 +5,7 @@ config X86_INTEL_PSTATE bool "Intel P state control" depends on X86 + select ACPI_PROCESSOR if ACPI help This driver provides a P state for Intel core processors. The driver implements an internal governor and will become diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 1369afdc1e19..041cb4107991 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -34,6 +34,10 @@ #include #include +#if IS_ENABLED(CONFIG_ACPI) +#include +#endif + #define BYT_RATIOS 0x66a #define BYT_VIDS 0x66b #define BYT_TURBO_RATIOS 0x66c @@ -113,6 +117,9 @@ struct cpudata { u64 prev_mperf; u64 prev_tsc; struct sample sample; +#if IS_ENABLED(CONFIG_ACPI) + struct acpi_processor_performance acpi_perf_data; +#endif }; static struct cpudata **all_cpu_data; @@ -143,6 +150,7 @@ struct cpu_defaults { static struct pstate_adjust_policy pid_params; static struct pstate_funcs pstate_funcs; static int hwp_active; +static int no_acpi_perf; struct perf_limits { int no_turbo; @@ -170,6 +178,153 @@ static struct perf_limits limits = { .min_sysfs_pct = 0, }; +#if IS_ENABLED(CONFIG_ACPI) +/* + * The max target pstate ratio is a 8 bit value in both PLATFORM_INFO MSR and + * in TURBO_RATIO_LIMIT MSR, which pstate driver stores in max_pstate and + * max_turbo_pstate fields. The PERF_CTL MSR contains 16 bit value for P state + * ratio, out of it only high 8 bits are used. For example 0x1700 is setting + * target ratio 0x17. The _PSS control value stores in a format which can be + * directly written to PERF_CTL MSR. But in intel_pstate driver this shift + * occurs during write to PERF_CTL (E.g. for cores core_set_pstate()). + * This function converts the _PSS control value to intel pstate driver format + * for comparison and assignment. + */ +static int convert_to_native_pstate_format(struct cpudata *cpu, int index) +{ + return cpu->acpi_perf_data.states[index].control >> 8; +} + +static int intel_pstate_init_perf_limits(struct cpufreq_policy *policy) +{ + struct cpudata *cpu; + int ret; + bool turbo_absent = false; + int max_pstate_index; + int min_pss_ctl, max_pss_ctl, turbo_pss_ctl; + int i; + + cpu = all_cpu_data[policy->cpu]; + + pr_debug("intel_pstate: default limits 0x%x 0x%x 0x%x\n", + cpu->pstate.min_pstate, cpu->pstate.max_pstate, + cpu->pstate.turbo_pstate); + + if (!cpu->acpi_perf_data.shared_cpu_map && + zalloc_cpumask_var_node(&cpu->acpi_perf_data.shared_cpu_map, + GFP_KERNEL, cpu_to_node(policy->cpu))) { + return -ENOMEM; + } + + ret = acpi_processor_register_performance(&cpu->acpi_perf_data, + policy->cpu); + if (ret) + return ret; + + /* + * Check if the control value in _PSS is for PERF_CTL MSR, which should + * guarantee that the states returned by it map to the states in our + * list directly. + */ + if (cpu->acpi_perf_data.control_register.space_id != + ACPI_ADR_SPACE_FIXED_HARDWARE) + return -EIO; + + pr_debug("intel_pstate: CPU%u - ACPI _PSS perf data\n", policy->cpu); + for (i = 0; i < cpu->acpi_perf_data.state_count; i++) + pr_debug(" %cP%d: %u MHz, %u mW, 0x%x\n", + (i == cpu->acpi_perf_data.state ? '*' : ' '), i, + (u32) cpu->acpi_perf_data.states[i].core_frequency, + (u32) cpu->acpi_perf_data.states[i].power, + (u32) cpu->acpi_perf_data.states[i].control); + + /* + * If there is only one entry _PSS, simply ignore _PSS and continue as + * usual without taking _PSS into account + */ + if (cpu->acpi_perf_data.state_count < 2) + return 0; + + turbo_pss_ctl = convert_to_native_pstate_format(cpu, 0); + min_pss_ctl = convert_to_native_pstate_format(cpu, + cpu->acpi_perf_data.state_count - 1); + /* Check if there is a turbo freq in _PSS */ + if (turbo_pss_ctl <= cpu->pstate.max_pstate && + turbo_pss_ctl > cpu->pstate.min_pstate) { + pr_debug("intel_pstate: no turbo range exists in _PSS\n"); + limits.no_turbo = limits.turbo_disabled = 1; + cpu->pstate.turbo_pstate = cpu->pstate.max_pstate; + turbo_absent = true; + } + + /* Check if the max non turbo p state < Intel P state max */ + max_pstate_index = turbo_absent ? 0 : 1; + max_pss_ctl = convert_to_native_pstate_format(cpu, max_pstate_index); + if (max_pss_ctl < cpu->pstate.max_pstate && + max_pss_ctl > cpu->pstate.min_pstate) + cpu->pstate.max_pstate = max_pss_ctl; + + /* check If min perf > Intel P State min */ + if (min_pss_ctl > cpu->pstate.min_pstate && + min_pss_ctl < cpu->pstate.max_pstate) { + cpu->pstate.min_pstate = min_pss_ctl; + policy->cpuinfo.min_freq = min_pss_ctl * cpu->pstate.scaling; + } + + if (turbo_absent) + policy->cpuinfo.max_freq = cpu->pstate.max_pstate * + cpu->pstate.scaling; + else { + policy->cpuinfo.max_freq = cpu->pstate.turbo_pstate * + cpu->pstate.scaling; + /* + * The _PSS table doesn't contain whole turbo frequency range. + * This just contains +1 MHZ above the max non turbo frequency, + * with control value corresponding to max turbo ratio. But + * when cpufreq set policy is called, it will call with this + * max frequency, which will cause a reduced performance as + * this driver uses real max turbo frequency as the max + * frequeny. So correct this frequency in _PSS table to + * correct max turbo frequency based on the turbo ratio. + * Also need to convert to MHz as _PSS freq is in MHz. + */ + cpu->acpi_perf_data.states[0].core_frequency = + turbo_pss_ctl * 100; + } + + pr_debug("intel_pstate: Updated limits using _PSS 0x%x 0x%x 0x%x\n", + cpu->pstate.min_pstate, cpu->pstate.max_pstate, + cpu->pstate.turbo_pstate); + pr_debug("intel_pstate: policy max_freq=%d Khz min_freq = %d KHz\n", + policy->cpuinfo.max_freq, policy->cpuinfo.min_freq); + + return 0; +} + +static int intel_pstate_exit_perf_limits(struct cpufreq_policy *policy) +{ + struct cpudata *cpu; + + if (!no_acpi_perf) + return 0; + + cpu = all_cpu_data[policy->cpu]; + acpi_processor_unregister_performance(policy->cpu); + return 0; +} + +#else +static int intel_pstate_init_perf_limits(struct cpufreq_policy *policy) +{ + return 0; +} + +static int intel_pstate_exit_perf_limits(struct cpufreq_policy *policy) +{ + return 0; +} +#endif + static inline void pid_reset(struct _pid *pid, int setpoint, int busy, int deadband, int integral) { pid->setpoint = setpoint; @@ -1115,18 +1270,30 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy) policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling; policy->cpuinfo.max_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling; + if (!no_acpi_perf) + intel_pstate_init_perf_limits(policy); + /* + * If there is no acpi perf data or error, we ignore and use Intel P + * state calculated limits, So this is not fatal error. + */ policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; cpumask_set_cpu(policy->cpu, policy->cpus); return 0; } +static int intel_pstate_cpu_exit(struct cpufreq_policy *policy) +{ + return intel_pstate_exit_perf_limits(policy); +} + static struct cpufreq_driver intel_pstate_driver = { .flags = CPUFREQ_CONST_LOOPS, .verify = intel_pstate_verify_policy, .setpolicy = intel_pstate_set_policy, .get = intel_pstate_get, .init = intel_pstate_cpu_init, + .exit = intel_pstate_cpu_exit, .stop_cpu = intel_pstate_stop_cpu, .name = "intel_pstate", }; @@ -1168,7 +1335,6 @@ static void copy_cpu_funcs(struct pstate_funcs *funcs) } #if IS_ENABLED(CONFIG_ACPI) -#include static bool intel_pstate_no_acpi_pss(void) { @@ -1360,6 +1526,9 @@ static int __init intel_pstate_setup(char *str) force_load = 1; if (!strcmp(str, "hwp_only")) hwp_only = 1; + if (!strcmp(str, "no_acpi")) + no_acpi_perf = 1; + return 0; } early_param("intel_pstate", intel_pstate_setup); -- cgit v1.2.3 From 4ef45148701917fbc08a7c05bc6a3bb0c0573047 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Wed, 14 Oct 2015 16:12:03 -0700 Subject: cpufreq: intel_pstate: Avoid calculation for max/min When requested from cpufreq to set policy, look into _pss and get control values, instead of using max/min perf calculations. These calculation misses next control state in boundary conditions. Signed-off-by: Srinivas Pandruvada Acked-by: Kristen Carlson Accardi Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 50 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 45 insertions(+), 5 deletions(-) (limited to 'drivers/cpufreq/intel_pstate.c') diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 041cb4107991..c568226bbcdf 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -163,6 +163,8 @@ struct perf_limits { int max_sysfs_pct; int min_policy_pct; int min_sysfs_pct; + int max_perf_ctl; + int min_perf_ctl; }; static struct perf_limits limits = { @@ -176,6 +178,8 @@ static struct perf_limits limits = { .max_sysfs_pct = 100, .min_policy_pct = 0, .min_sysfs_pct = 0, + .max_perf_ctl = 0, + .min_perf_ctl = 0, }; #if IS_ENABLED(CONFIG_ACPI) @@ -909,12 +913,23 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max) * policy, or by cpu specific default values determined through * experimentation. */ - max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), limits.max_perf)); - *max = clamp_t(int, max_perf_adj, - cpu->pstate.min_pstate, cpu->pstate.turbo_pstate); + if (limits.max_perf_ctl && limits.max_sysfs_pct >= + limits.max_policy_pct) { + *max = limits.max_perf_ctl; + } else { + max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), + limits.max_perf)); + *max = clamp_t(int, max_perf_adj, cpu->pstate.min_pstate, + cpu->pstate.turbo_pstate); + } - min_perf = fp_toint(mul_fp(int_tofp(max_perf), limits.min_perf)); - *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf); + if (limits.min_perf_ctl) { + *min = limits.min_perf_ctl; + } else { + min_perf = fp_toint(mul_fp(int_tofp(max_perf), + limits.min_perf)); + *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf); + } } static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate, bool force) @@ -1184,6 +1199,12 @@ static unsigned int intel_pstate_get(unsigned int cpu_num) static int intel_pstate_set_policy(struct cpufreq_policy *policy) { +#if IS_ENABLED(CONFIG_ACPI) + struct cpudata *cpu; + int i; +#endif + pr_debug("intel_pstate: %s max %u policy->max %u\n", __func__, + policy->cpuinfo.max_freq, policy->max); if (!policy->cpuinfo.max_freq) return -ENODEV; @@ -1196,6 +1217,8 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) limits.max_perf_pct = 100; limits.max_perf = int_tofp(1); limits.no_turbo = 0; + limits.max_perf_ctl = 0; + limits.min_perf_ctl = 0; return 0; } @@ -1216,6 +1239,23 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100)); limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100)); +#if IS_ENABLED(CONFIG_ACPI) + cpu = all_cpu_data[policy->cpu]; + for (i = 0; i < cpu->acpi_perf_data.state_count; i++) { + int control; + + control = convert_to_native_pstate_format(cpu, i); + if (control * cpu->pstate.scaling == policy->max) + limits.max_perf_ctl = control; + if (control * cpu->pstate.scaling == policy->min) + limits.min_perf_ctl = control; + } + + pr_debug("intel_pstate: max %u policy_max %u perf_ctl [0x%x-0x%x]\n", + policy->cpuinfo.max_freq, policy->max, limits.min_perf_ctl, + limits.max_perf_ctl); +#endif + if (hwp_active) intel_pstate_hwp_set(); -- cgit v1.2.3