From e788892ba3cc71d385b75895f7a375fbc659ce86 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 2 Jun 2016 23:24:15 +0200 Subject: cpufreq: governor: Get rid of governor events The design of the cpufreq governor API is not very straightforward, as struct cpufreq_governor provides only one callback to be invoked from different code paths for different purposes. The purpose it is invoked for is determined by its second "event" argument, causing it to act as a "callback multiplexer" of sorts. Unfortunately, that leads to extra complexity in governors, some of which implement the ->governor() callback as a switch statement that simply checks the event argument and invokes a separate function to handle that specific event. That extra complexity can be eliminated by replacing the all-purpose ->governor() callback with a family of callbacks to carry out specific governor operations: initialization and exit, start and stop and policy limits updates. That also turns out to reduce the code size too, so do it. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- arch/powerpc/platforms/cell/cpufreq_spudemand.c | 72 ++++++++++++------------- 1 file changed, 34 insertions(+), 38 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/cell/cpufreq_spudemand.c b/arch/powerpc/platforms/cell/cpufreq_spudemand.c index 82607d621aca..88301e53f085 100644 --- a/arch/powerpc/platforms/cell/cpufreq_spudemand.c +++ b/arch/powerpc/platforms/cell/cpufreq_spudemand.c @@ -85,61 +85,57 @@ static void spu_gov_cancel_work(struct spu_gov_info_struct *info) cancel_delayed_work_sync(&info->work); } -static int spu_gov_govern(struct cpufreq_policy *policy, unsigned int event) +static int spu_gov_start(struct cpufreq_policy *policy) { unsigned int cpu = policy->cpu; - struct spu_gov_info_struct *info, *affected_info; + struct spu_gov_info_struct *info = &per_cpu(spu_gov_info, cpu); + struct spu_gov_info_struct *affected_info; int i; - int ret = 0; - info = &per_cpu(spu_gov_info, cpu); - - switch (event) { - case CPUFREQ_GOV_START: - if (!cpu_online(cpu)) { - printk(KERN_ERR "cpu %d is not online\n", cpu); - ret = -EINVAL; - break; - } + if (!cpu_online(cpu)) { + printk(KERN_ERR "cpu %d is not online\n", cpu); + return -EINVAL; + } - if (!policy->cur) { - printk(KERN_ERR "no cpu specified in policy\n"); - ret = -EINVAL; - break; - } + if (!policy->cur) { + printk(KERN_ERR "no cpu specified in policy\n"); + return -EINVAL; + } - /* initialize spu_gov_info for all affected cpus */ - for_each_cpu(i, policy->cpus) { - affected_info = &per_cpu(spu_gov_info, i); - affected_info->policy = policy; - } + /* initialize spu_gov_info for all affected cpus */ + for_each_cpu(i, policy->cpus) { + affected_info = &per_cpu(spu_gov_info, i); + affected_info->policy = policy; + } - info->poll_int = POLL_TIME; + info->poll_int = POLL_TIME; - /* setup timer */ - spu_gov_init_work(info); + /* setup timer */ + spu_gov_init_work(info); - break; + return 0; +} - case CPUFREQ_GOV_STOP: - /* cancel timer */ - spu_gov_cancel_work(info); +static void spu_gov_stop(struct cpufreq_policy *policy) +{ + unsigned int cpu = policy->cpu; + struct spu_gov_info_struct *info = &per_cpu(spu_gov_info, cpu); + int i; - /* clean spu_gov_info for all affected cpus */ - for_each_cpu (i, policy->cpus) { - info = &per_cpu(spu_gov_info, i); - info->policy = NULL; - } + /* cancel timer */ + spu_gov_cancel_work(info); - break; + /* clean spu_gov_info for all affected cpus */ + for_each_cpu (i, policy->cpus) { + info = &per_cpu(spu_gov_info, i); + info->policy = NULL; } - - return ret; } static struct cpufreq_governor spu_governor = { .name = "spudemand", - .governor = spu_gov_govern, + .start = spu_gov_start, + .stop = spu_gov_stop, .owner = THIS_MODULE, }; -- cgit v1.2.3 From a0c9b8cc43e0acada4574b33a19b5178520f1218 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Wed, 6 Jul 2016 16:07:57 -0700 Subject: x86: remove duplicate turbo ratio limit MSRs Remove MSR_NHM_TURBO_RATIO_LIMIT and MSR_IVT_TURBO_RATIO_LIMIT as they are duplicate. Signed-off-by: Srinivas Pandruvada Acked-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki --- arch/x86/include/asm/msr-index.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 5a73a9c62c39..56f4c6676b29 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -64,8 +64,6 @@ #define MSR_OFFCORE_RSP_0 0x000001a6 #define MSR_OFFCORE_RSP_1 0x000001a7 -#define MSR_NHM_TURBO_RATIO_LIMIT 0x000001ad -#define MSR_IVT_TURBO_RATIO_LIMIT 0x000001ae #define MSR_TURBO_RATIO_LIMIT 0x000001ad #define MSR_TURBO_RATIO_LIMIT1 0x000001ae #define MSR_TURBO_RATIO_LIMIT2 0x000001af -- cgit v1.2.3 From 406f992e4a372dafbe3c2cff7efbb2002a5c8ebd Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 14 Jul 2016 03:55:23 +0200 Subject: x86 / hibernate: Use hlt_play_dead() when resuming from hibernation On Intel hardware, native_play_dead() uses mwait_play_dead() by default and only falls back to the other methods if that fails. That also happens during resume from hibernation, when the restore (boot) kernel runs disable_nonboot_cpus() to take all of the CPUs except for the boot one offline. However, that is problematic, because the address passed to __monitor() in mwait_play_dead() is likely to be written to in the last phase of hibernate image restoration and that causes the "dead" CPU to start executing instructions again. Unfortunately, the page containing the address in that CPU's instruction pointer may not be valid any more at that point. First, that page may have been overwritten with image kernel memory contents already, so the instructions the CPU attempts to execute may simply be invalid. Second, the page tables previously used by that CPU may have been overwritten by image kernel memory contents, so the address in its instruction pointer is impossible to resolve then. A report from Varun Koyyalagunta and investigation carried out by Chen Yu show that the latter sometimes happens in practice. To prevent it from happening, temporarily change the smp_ops.play_dead pointer during resume from hibernation so that it points to a special "play dead" routine which uses hlt_play_dead() and avoids the inadvertent "revivals" of "dead" CPUs this way. A slightly unpleasant consequence of this change is that if the system is hibernated with one or more CPUs offline, it will generally draw more power after resume than it did before hibernation, because the physical state entered by CPUs via hlt_play_dead() is higher-power than the mwait_play_dead() one in the majority of cases. It is possible to work around this, but it is unclear how much of a problem that's going to be in practice, so the workaround will be implemented later if it turns out to be necessary. Link: https://bugzilla.kernel.org/show_bug.cgi?id=106371 Reported-by: Varun Koyyalagunta Original-by: Chen Yu Tested-by: Chen Yu Signed-off-by: Rafael J. Wysocki Acked-by: Ingo Molnar --- arch/x86/include/asm/smp.h | 1 + arch/x86/kernel/smpboot.c | 2 +- arch/x86/power/cpu.c | 30 ++++++++++++++++++++++++++++++ kernel/power/hibernate.c | 7 ++++++- kernel/power/power.h | 2 ++ 5 files changed, 40 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 66b057306f40..7427ca895a27 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -135,6 +135,7 @@ int native_cpu_up(unsigned int cpunum, struct task_struct *tidle); int native_cpu_disable(void); int common_cpu_die(unsigned int cpu); void native_cpu_die(unsigned int cpu); +void hlt_play_dead(void); void native_play_dead(void); void play_dead_common(void); void wbinvd_on_cpu(int cpu); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index fafe8b923cac..8264dfad9cf8 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1622,7 +1622,7 @@ static inline void mwait_play_dead(void) } } -static inline void hlt_play_dead(void) +void hlt_play_dead(void) { if (__this_cpu_read(cpu_info.x86) >= 4) wbinvd(); diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index d5f64996394a..b12c26e2e309 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -266,6 +267,35 @@ void notrace restore_processor_state(void) EXPORT_SYMBOL(restore_processor_state); #endif +#if defined(CONFIG_HIBERNATION) && defined(CONFIG_HOTPLUG_CPU) +static void resume_play_dead(void) +{ + play_dead_common(); + tboot_shutdown(TB_SHUTDOWN_WFS); + hlt_play_dead(); +} + +int hibernate_resume_nonboot_cpu_disable(void) +{ + void (*play_dead)(void) = smp_ops.play_dead; + int ret; + + /* + * Ensure that MONITOR/MWAIT will not be used in the "play dead" loop + * during hibernate image restoration, because it is likely that the + * monitored address will be actually written to at that time and then + * the "dead" CPU will attempt to execute instructions again, but the + * address in its instruction pointer may not be possible to resolve + * any more at that point (the page tables used by it previously may + * have been overwritten by hibernate image data). + */ + smp_ops.play_dead = resume_play_dead; + ret = disable_nonboot_cpus(); + smp_ops.play_dead = play_dead; + return ret; +} +#endif + /* * When bsp_check() is called in hibernate and suspend, cpu hotplug * is disabled already. So it's unnessary to handle race condition between diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 51441d87f0b6..5f3523e18e46 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -409,6 +409,11 @@ int hibernation_snapshot(int platform_mode) goto Close; } +int __weak hibernate_resume_nonboot_cpu_disable(void) +{ + return disable_nonboot_cpus(); +} + /** * resume_target_kernel - Restore system state from a hibernation image. * @platform_mode: Whether or not to use the platform driver. @@ -433,7 +438,7 @@ static int resume_target_kernel(bool platform_mode) if (error) goto Cleanup; - error = disable_nonboot_cpus(); + error = hibernate_resume_nonboot_cpu_disable(); if (error) goto Enable_cpus; diff --git a/kernel/power/power.h b/kernel/power/power.h index 064963e89194..242d8b827dd5 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -38,6 +38,8 @@ static inline char *check_image_kernel(struct swsusp_info *info) } #endif /* CONFIG_ARCH_HIBERNATION_HEADER */ +extern int hibernate_resume_nonboot_cpu_disable(void); + /* * Keep some memory free so that I/O operations can succeed without paging * [Might this be more than 4 MB?] -- cgit v1.2.3