diff options
76 files changed, 2887 insertions, 925 deletions
diff --git a/Documentation/admin-guide/pm/cpufreq.rst b/Documentation/admin-guide/pm/cpufreq.rst index 09aa2e949787..463cf7e73db8 100644 --- a/Documentation/admin-guide/pm/cpufreq.rst +++ b/Documentation/admin-guide/pm/cpufreq.rst @@ -269,16 +269,16 @@ are the following: ``scaling_cur_freq`` Current frequency of all of the CPUs belonging to this policy (in kHz). - For the majority of scaling drivers, this is the frequency of the last - P-state requested by the driver from the hardware using the scaling + In the majority of cases, this is the frequency of the last P-state + requested by the scaling driver from the hardware using the scaling interface provided by it, which may or may not reflect the frequency the CPU is actually running at (due to hardware design and other limitations). - Some scaling drivers (e.g. |intel_pstate|) attempt to provide - information more precisely reflecting the current CPU frequency through - this attribute, but that still may not be the exact current CPU - frequency as seen by the hardware at the moment. + Some architectures (e.g. ``x86``) may attempt to provide information + more precisely reflecting the current CPU frequency through this + attribute, but that still may not be the exact current CPU frequency as + seen by the hardware at the moment. ``scaling_driver`` The scaling driver currently in use. diff --git a/Documentation/admin-guide/pm/intel_pstate.rst b/Documentation/admin-guide/pm/intel_pstate.rst index 33d703989ea8..1d6249825efc 100644 --- a/Documentation/admin-guide/pm/intel_pstate.rst +++ b/Documentation/admin-guide/pm/intel_pstate.rst @@ -157,10 +157,8 @@ Without HWP, this P-state selection algorithm is always the same regardless of the processor model and platform configuration. It selects the maximum P-state it is allowed to use, subject to limits set via -``sysfs``, every time the P-state selection computations are carried out by the -driver's utilization update callback for the given CPU (that does not happen -more often than every 10 ms), but the hardware configuration will not be changed -if the new P-state is the same as the current one. +``sysfs``, every time the driver configuration for the given CPU is updated +(e.g. via ``sysfs``). This is the default P-state selection algorithm if the :c:macro:`CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE` kernel configuration option diff --git a/Documentation/devicetree/bindings/opp/opp.txt b/Documentation/devicetree/bindings/opp/opp.txt index 63725498bd20..e36d261b9ba6 100644 --- a/Documentation/devicetree/bindings/opp/opp.txt +++ b/Documentation/devicetree/bindings/opp/opp.txt @@ -186,20 +186,20 @@ Example 1: Single cluster Dual-core ARM cortex A9, switch DVFS states together. compatible = "operating-points-v2"; opp-shared; - opp@1000000000 { + opp-1000000000 { opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <975000 970000 985000>; opp-microamp = <70000>; clock-latency-ns = <300000>; opp-suspend; }; - opp@1100000000 { + opp-1100000000 { opp-hz = /bits/ 64 <1100000000>; opp-microvolt = <1000000 980000 1010000>; opp-microamp = <80000>; clock-latency-ns = <310000>; }; - opp@1200000000 { + opp-1200000000 { opp-hz = /bits/ 64 <1200000000>; opp-microvolt = <1025000>; clock-latency-ns = <290000>; @@ -265,20 +265,20 @@ independently. * independently. */ - opp@1000000000 { + opp-1000000000 { opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <975000 970000 985000>; opp-microamp = <70000>; clock-latency-ns = <300000>; opp-suspend; }; - opp@1100000000 { + opp-1100000000 { opp-hz = /bits/ 64 <1100000000>; opp-microvolt = <1000000 980000 1010000>; opp-microamp = <80000>; clock-latency-ns = <310000>; }; - opp@1200000000 { + opp-1200000000 { opp-hz = /bits/ 64 <1200000000>; opp-microvolt = <1025000>; opp-microamp = <90000; @@ -341,20 +341,20 @@ DVFS state together. compatible = "operating-points-v2"; opp-shared; - opp@1000000000 { + opp-1000000000 { opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <975000 970000 985000>; opp-microamp = <70000>; clock-latency-ns = <300000>; opp-suspend; }; - opp@1100000000 { + opp-1100000000 { opp-hz = /bits/ 64 <1100000000>; opp-microvolt = <1000000 980000 1010000>; opp-microamp = <80000>; clock-latency-ns = <310000>; }; - opp@1200000000 { + opp-1200000000 { opp-hz = /bits/ 64 <1200000000>; opp-microvolt = <1025000>; opp-microamp = <90000>; @@ -367,20 +367,20 @@ DVFS state together. compatible = "operating-points-v2"; opp-shared; - opp@1300000000 { + opp-1300000000 { opp-hz = /bits/ 64 <1300000000>; opp-microvolt = <1050000 1045000 1055000>; opp-microamp = <95000>; clock-latency-ns = <400000>; opp-suspend; }; - opp@1400000000 { + opp-1400000000 { opp-hz = /bits/ 64 <1400000000>; opp-microvolt = <1075000>; opp-microamp = <100000>; clock-latency-ns = <400000>; }; - opp@1500000000 { + opp-1500000000 { opp-hz = /bits/ 64 <1500000000>; opp-microvolt = <1100000 1010000 1110000>; opp-microamp = <95000>; @@ -409,7 +409,7 @@ Example 4: Handling multiple regulators compatible = "operating-points-v2"; opp-shared; - opp@1000000000 { + opp-1000000000 { opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <970000>, /* Supply 0 */ <960000>, /* Supply 1 */ @@ -422,7 +422,7 @@ Example 4: Handling multiple regulators /* OR */ - opp@1000000000 { + opp-1000000000 { opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <975000 970000 985000>, /* Supply 0 */ <965000 960000 975000>, /* Supply 1 */ @@ -435,7 +435,7 @@ Example 4: Handling multiple regulators /* OR */ - opp@1000000000 { + opp-1000000000 { opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <975000 970000 985000>, /* Supply 0 */ <965000 960000 975000>, /* Supply 1 */ @@ -467,7 +467,7 @@ Example 5: opp-supported-hw status = "okay"; opp-shared; - opp@600000000 { + opp-600000000 { /* * Supports all substrate and process versions for 0xF * cuts, i.e. only first four cuts. @@ -478,7 +478,7 @@ Example 5: opp-supported-hw ... }; - opp@800000000 { + opp-800000000 { /* * Supports: * - cuts: only one, 6th cut (represented by 6th bit). @@ -510,7 +510,7 @@ Example 6: opp-microvolt-<name>, opp-microamp-<name>: compatible = "operating-points-v2"; opp-shared; - opp@1000000000 { + opp-1000000000 { opp-hz = /bits/ 64 <1000000000>; opp-microvolt-slow = <915000 900000 925000>; opp-microvolt-fast = <975000 970000 985000>; @@ -518,7 +518,7 @@ Example 6: opp-microvolt-<name>, opp-microamp-<name>: opp-microamp-fast = <71000>; }; - opp@1200000000 { + opp-1200000000 { opp-hz = /bits/ 64 <1200000000>; opp-microvolt-slow = <915000 900000 925000>, /* Supply vcc0 */ <925000 910000 935000>; /* Supply vcc1 */ diff --git a/Documentation/devicetree/bindings/power/rockchip-io-domain.txt b/Documentation/devicetree/bindings/power/rockchip-io-domain.txt index d3a5a93a65cd..43c21fb04564 100644 --- a/Documentation/devicetree/bindings/power/rockchip-io-domain.txt +++ b/Documentation/devicetree/bindings/power/rockchip-io-domain.txt @@ -32,6 +32,7 @@ SoC is on the same page. Required properties: - compatible: should be one of: - "rockchip,rk3188-io-voltage-domain" for rk3188 + - "rockchip,rk3228-io-voltage-domain" for rk3228 - "rockchip,rk3288-io-voltage-domain" for rk3288 - "rockchip,rk3328-io-voltage-domain" for rk3328 - "rockchip,rk3368-io-voltage-domain" for rk3368 @@ -59,6 +60,12 @@ Possible supplies for rk3188: - vccio1-supply: The supply connected to VCCIO1. Sometimes also labeled VCCIO1 and VCCIO2. +Possible supplies for rk3228: +- vccio1-supply: The supply connected to VCCIO1. +- vccio2-supply: The supply connected to VCCIO2. +- vccio3-supply: The supply connected to VCCIO3. +- vccio4-supply: The supply connected to VCCIO4. + Possible supplies for rk3288: - audio-supply: The supply connected to APIO4_VDD. - bb-supply: The supply connected to APIO5_VDD. diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt index ee69d7532172..0fde3dcf077a 100644 --- a/Documentation/power/runtime_pm.txt +++ b/Documentation/power/runtime_pm.txt @@ -105,9 +105,9 @@ knows what to do to handle the device). In particular, if the driver requires remote wakeup capability (i.e. hardware mechanism allowing the device to request a change of its power state, such as -PCI PME) for proper functioning and device_run_wake() returns 'false' for the +PCI PME) for proper functioning and device_can_wakeup() returns 'false' for the device, then ->runtime_suspend() should return -EBUSY. On the other hand, if -device_run_wake() returns 'true' for the device and the device is put into a +device_can_wakeup() returns 'true' for the device and the device is put into a low-power state during the execution of the suspend callback, it is expected that remote wakeup will be enabled for the device. Generally, remote wakeup should be enabled for all input devices put into low-power states at run time. @@ -253,9 +253,6 @@ defined in include/linux/pm.h: being executed for that device and it is not practical to wait for the suspend to complete; means "start a resume as soon as you've suspended" - unsigned int run_wake; - - set if the device is capable of generating runtime wake-up events - enum rpm_status runtime_status; - the runtime PM status of the device; this field's initial value is RPM_SUSPENDED, which means that each device is initially regarded by the diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 18b162322eff..d406894cd9a2 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -251,9 +251,13 @@ #define HWP_MIN_PERF(x) (x & 0xff) #define HWP_MAX_PERF(x) ((x & 0xff) << 8) #define HWP_DESIRED_PERF(x) ((x & 0xff) << 16) -#define HWP_ENERGY_PERF_PREFERENCE(x) ((x & 0xff) << 24) -#define HWP_ACTIVITY_WINDOW(x) ((x & 0xff3) << 32) -#define HWP_PACKAGE_CONTROL(x) ((x & 0x1) << 42) +#define HWP_ENERGY_PERF_PREFERENCE(x) (((unsigned long long) x & 0xff) << 24) +#define HWP_EPP_PERFORMANCE 0x00 +#define HWP_EPP_BALANCE_PERFORMANCE 0x80 +#define HWP_EPP_BALANCE_POWERSAVE 0xC0 +#define HWP_EPP_POWERSAVE 0xFF +#define HWP_ACTIVITY_WINDOW(x) ((unsigned long long)(x & 0xff3) << 32) +#define HWP_PACKAGE_CONTROL(x) ((unsigned long long)(x & 0x1) << 42) /* IA32_HWP_STATUS */ #define HWP_GUARANTEED_CHANGE(x) (x & 0x1) @@ -476,9 +480,11 @@ #define MSR_MISC_PWR_MGMT 0x000001aa #define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0 -#define ENERGY_PERF_BIAS_PERFORMANCE 0 -#define ENERGY_PERF_BIAS_NORMAL 6 -#define ENERGY_PERF_BIAS_POWERSAVE 15 +#define ENERGY_PERF_BIAS_PERFORMANCE 0 +#define ENERGY_PERF_BIAS_BALANCE_PERFORMANCE 4 +#define ENERGY_PERF_BIAS_NORMAL 6 +#define ENERGY_PERF_BIAS_BALANCE_POWERSAVE 8 +#define ENERGY_PERF_BIAS_POWERSAVE 15 #define MSR_IA32_PACKAGE_THERM_STATUS 0x000001b1 diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h index 6136a18152af..2bd96b4df140 100644 --- a/arch/x86/include/asm/suspend_64.h +++ b/arch/x86/include/asm/suspend_64.h @@ -42,8 +42,7 @@ struct saved_context { set_debugreg((thread)->debugreg##register, register) /* routines for saving/restoring kernel state */ -extern int acpi_save_state_mem(void); -extern char core_restore_code; -extern char restore_registers; +extern char core_restore_code[]; +extern char restore_registers[]; #endif /* _ASM_X86_SUSPEND_64_H */ diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index 8233a630280f..dde437f5d14f 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -167,7 +167,8 @@ static int __init ffh_cstate_init(void) { struct cpuinfo_x86 *c = &boot_cpu_data; - if (c->x86_vendor != X86_VENDOR_INTEL) + if (c->x86_vendor != X86_VENDOR_INTEL && + c->x86_vendor != X86_VENDOR_AMD) return -1; cpu_cstate_entry = alloc_percpu(struct cstate_entry); diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 52000010c62e..cdf82492b770 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -21,6 +21,7 @@ obj-y += common.o obj-y += rdrand.o obj-y += match.o obj-y += bugs.o +obj-$(CONFIG_CPU_FREQ) += aperfmperf.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c new file mode 100644 index 000000000000..d869c8671e36 --- /dev/null +++ b/arch/x86/kernel/cpu/aperfmperf.c @@ -0,0 +1,79 @@ +/* + * x86 APERF/MPERF KHz calculation for + * /sys/.../cpufreq/scaling_cur_freq + * + * Copyright (C) 2017 Intel Corp. + * Author: Len Brown <len.brown@intel.com> + * + * This file is licensed under GPLv2. + */ + +#include <linux/jiffies.h> +#include <linux/math64.h> +#include <linux/percpu.h> +#include <linux/smp.h> + +struct aperfmperf_sample { + unsigned int khz; + unsigned long jiffies; + u64 aperf; + u64 mperf; +}; + +static DEFINE_PER_CPU(struct aperfmperf_sample, samples); + +/* + * aperfmperf_snapshot_khz() + * On the current CPU, snapshot APERF, MPERF, and jiffies + * unless we already did it within 10ms + * calculate kHz, save snapshot + */ +static void aperfmperf_snapshot_khz(void *dummy) +{ + u64 aperf, aperf_delta; + u64 mperf, mperf_delta; + struct aperfmperf_sample *s = this_cpu_ptr(&samples); + + /* Don't bother re-computing within 10 ms */ + if (time_before(jiffies, s->jiffies + HZ/100)) + return; + + rdmsrl(MSR_IA32_APERF, aperf); + rdmsrl(MSR_IA32_MPERF, mperf); + + aperf_delta = aperf - s->aperf; + mperf_delta = mperf - s->mperf; + + /* + * There is no architectural guarantee that MPERF + * increments faster than we can read it. + */ + if (mperf_delta == 0) + return; + + /* + * if (cpu_khz * aperf_delta) fits into ULLONG_MAX, then + * khz = (cpu_khz * aperf_delta) / mperf_delta + */ + if (div64_u64(ULLONG_MAX, cpu_khz) > aperf_delta) + s->khz = div64_u64((cpu_khz * aperf_delta), mperf_delta); + else /* khz = aperf_delta / (mperf_delta / cpu_khz) */ + s->khz = div64_u64(aperf_delta, + div64_u64(mperf_delta, cpu_khz)); + s->jiffies = jiffies; + s->aperf = aperf; + s->mperf = mperf; +} + +unsigned int arch_freq_get_on_cpu(int cpu) +{ + if (!cpu_khz) + return 0; + + if (!static_cpu_has(X86_FEATURE_APERFMPERF)) + return 0; + + smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, 1); + + return per_cpu(samples.khz, cpu); +} diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 6df621ae62a7..218f79825b3c 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -2,7 +2,6 @@ #include <linux/timex.h> #include <linux/string.h> #include <linux/seq_file.h> -#include <linux/cpufreq.h> /* * Get CPU information for use by the procfs. @@ -76,14 +75,9 @@ static int show_cpuinfo(struct seq_file *m, void *v) if (c->microcode) seq_printf(m, "microcode\t: 0x%x\n", c->microcode); - if (cpu_has(c, X86_FEATURE_TSC)) { - unsigned int freq = cpufreq_quick_get(cpu); - - if (!freq) - freq = cpu_khz; + if (cpu_has(c, X86_FEATURE_TSC)) seq_printf(m, "cpu MHz\t\t: %u.%03u\n", - freq / 1000, (freq % 1000)); - } + cpu_khz / 1000, (cpu_khz % 1000)); /* Cache size */ if (c->x86_cache_size >= 0) diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c index e3e62c8a8e70..f2598d81cd55 100644 --- a/arch/x86/power/hibernate_64.c +++ b/arch/x86/power/hibernate_64.c @@ -147,7 +147,7 @@ static int relocate_restore_code(void) if (!relocated_restore_code) return -ENOMEM; - memcpy((void *)relocated_restore_code, &core_restore_code, PAGE_SIZE); + memcpy((void *)relocated_restore_code, core_restore_code, PAGE_SIZE); /* Make the page containing the relocated code executable */ pgd = (pgd_t *)__va(read_cr3_pa()) + @@ -293,8 +293,8 @@ int arch_hibernation_header_save(void *addr, unsigned int max_size) if (max_size < sizeof(struct restore_data_record)) return -EOVERFLOW; - rdr->jump_address = (unsigned long)&restore_registers; - rdr->jump_address_phys = __pa_symbol(&restore_registers); + rdr->jump_address = (unsigned long)restore_registers; + rdr->jump_address_phys = __pa_symbol(restore_registers); rdr->cr3 = restore_cr3; rdr->magic = RESTORE_MAGIC; diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c index d42eeef9d928..1cbb88d938e5 100644 --- a/drivers/acpi/battery.c +++ b/drivers/acpi/battery.c @@ -782,7 +782,7 @@ static int acpi_battery_update(struct acpi_battery *battery, bool resume) if ((battery->state & ACPI_BATTERY_STATE_CRITICAL) || (test_bit(ACPI_BATTERY_ALARM_PRESENT, &battery->flags) && (battery->capacity_now <= battery->alarm))) - pm_wakeup_event(&battery->device->dev, 0); + acpi_pm_wakeup_event(&battery->device->dev); return result; } diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c index e19f530f1083..91cfdf377df7 100644 --- a/drivers/acpi/button.c +++ b/drivers/acpi/button.c @@ -217,7 +217,7 @@ static int acpi_lid_notify_state(struct acpi_device *device, int state) } if (state) - pm_wakeup_event(&device->dev, 0); + acpi_pm_wakeup_event(&device->dev); ret = blocking_notifier_call_chain(&acpi_lid_notifier, state, device); if (ret == NOTIFY_DONE) @@ -402,7 +402,7 @@ static void acpi_button_notify(struct acpi_device *device, u32 event) } else { int keycode; - pm_wakeup_event(&device->dev, 0); + acpi_pm_wakeup_event(&device->dev); if (button->suspended) break; @@ -534,6 +534,7 @@ static int acpi_button_add(struct acpi_device *device) lid_device = device; } + device_init_wakeup(&device->dev, true); printk(KERN_INFO PREFIX "%s [%s]\n", name, acpi_device_bid(device)); return 0; diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index 993fd31394c8..28938b5a334e 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -24,6 +24,7 @@ #include <linux/pm_qos.h> #include <linux/pm_domain.h> #include <linux/pm_runtime.h> +#include <linux/suspend.h> #include "internal.h" @@ -385,6 +386,12 @@ EXPORT_SYMBOL(acpi_bus_power_manageable); #ifdef CONFIG_PM static DEFINE_MUTEX(acpi_pm_notifier_lock); +void acpi_pm_wakeup_event(struct device *dev) +{ + pm_wakeup_dev_event(dev, 0, acpi_s2idle_wakeup()); +} +EXPORT_SYMBOL_GPL(acpi_pm_wakeup_event); + static void acpi_pm_notify_handler(acpi_handle handle, u32 val, void *not_used) { struct acpi_device *adev; @@ -399,9 +406,9 @@ static void acpi_pm_notify_handler(acpi_handle handle, u32 val, void *not_used) mutex_lock(&acpi_pm_notifier_lock); if (adev->wakeup.flags.notifier_present) { - __pm_wakeup_event(adev->wakeup.ws, 0); - if (adev->wakeup.context.work.func) - queue_pm_work(&adev->wakeup.context.work); + pm_wakeup_ws_event(adev->wakeup.ws, 0, acpi_s2idle_wakeup()); + if (adev->wakeup.context.func) + adev->wakeup.context.func(&adev->wakeup.context); } mutex_unlock(&acpi_pm_notifier_lock); @@ -413,7 +420,7 @@ static void acpi_pm_notify_handler(acpi_handle handle, u32 val, void *not_used) * acpi_add_pm_notifier - Register PM notify handler for given ACPI device. * @adev: ACPI device to add the notify handler for. * @dev: Device to generate a wakeup event for while handling the notification. - * @work_func: Work function to execute when handling the notification. + * @func: Work function to execute when handling the notification. * * NOTE: @adev need not be a run-wake or wakeup device to be a valid source of * PM wakeup events. For example, wakeup events may be generated for bridges @@ -421,11 +428,11 @@ static void acpi_pm_notify_handler(acpi_handle handle, u32 val, void *not_used) * bridge itself doesn't have a wakeup GPE associated with it. */ acpi_status acpi_add_pm_notifier(struct acpi_device *adev, struct device *dev, - void (*work_func)(struct work_struct *work)) + void (*func)(struct acpi_device_wakeup_context *context)) { acpi_status status = AE_ALREADY_EXISTS; - if (!dev && !work_func) + if (!dev && !func) return AE_BAD_PARAMETER; mutex_lock(&acpi_pm_notifier_lock); @@ -435,8 +442,7 @@ acpi_status acpi_add_pm_notifier(struct acpi_device *adev, struct device *dev, adev->wakeup.ws = wakeup_source_register(dev_name(&adev->dev)); adev->wakeup.context.dev = dev; - if (work_func) - INIT_WORK(&adev->wakeup.context.work, work_func); + adev->wakeup.context.func = func; status = acpi_install_notify_handler(adev->handle, ACPI_SYSTEM_NOTIFY, acpi_pm_notify_handler, NULL); @@ -469,10 +475,7 @@ acpi_status acpi_remove_pm_notifier(struct acpi_device *adev) if (ACPI_FAILURE(status)) goto out; - if (adev->wakeup.context.work.func) { - cancel_work_sync(&adev->wakeup.context.work); - adev->wakeup.context.work.func = NULL; - } + adev->wakeup.context.func = NULL; adev->wakeup.context.dev = NULL; wakeup_source_unregister(adev->wakeup.ws); @@ -493,6 +496,13 @@ bool acpi_bus_can_wakeup(acpi_handle handle) } EXPORT_SYMBOL(acpi_bus_can_wakeup); +bool acpi_pm_device_can_wakeup(struct device *dev) +{ + struct acpi_device *adev = ACPI_COMPANION(dev); + + return adev ? acpi_device_can_wakeup(adev) : false; +} + /** * acpi_dev_pm_get_state - Get preferred power state of ACPI device. * @dev: Device whose preferred target power state to return. @@ -658,16 +668,15 @@ EXPORT_SYMBOL(acpi_pm_device_sleep_state); /** * acpi_pm_notify_work_func - ACPI devices wakeup notification work function. - * @work: Work item to handle. + * @context: Device wakeup context. */ -static void acpi_pm_notify_work_func(struct work_struct *work) +static void acpi_pm_notify_work_func(struct acpi_device_wakeup_context *context) { - struct device *dev; + struct device *dev = context->dev; - dev = container_of(work, struct acpi_device_wakeup_context, work)->dev; if (dev) { pm_wakeup_event(dev, 0); - pm_runtime_resume(dev); + pm_request_resume(dev); } } @@ -693,80 +702,53 @@ static int acpi_device_wakeup(struct acpi_device *adev, u32 target_state, acpi_status res; int error; + if (adev->wakeup.flags.enabled) + return 0; + error = acpi_enable_wakeup_device_power(adev, target_state); if (error) return error; - if (adev->wakeup.flags.enabled) - return 0; - res = acpi_enable_gpe(wakeup->gpe_device, wakeup->gpe_number); - if (ACPI_SUCCESS(res)) { - adev->wakeup.flags.enabled = 1; - } else { + if (ACPI_FAILURE(res)) { acpi_disable_wakeup_device_power(adev); return -EIO; } - } else { - if (adev->wakeup.flags.enabled) { - acpi_disable_gpe(wakeup->gpe_device, wakeup->gpe_number); - adev->wakeup.flags.enabled = 0; - } + adev->wakeup.flags.enabled = 1; + } else if (adev->wakeup.flags.enabled) { + acpi_disable_gpe(wakeup->gpe_device, wakeup->gpe_number); acpi_disable_wakeup_device_power(adev); + adev->wakeup.flags.enabled = 0; } return 0; } /** - * acpi_pm_device_run_wake - Enable/disable remote wakeup for given device. - * @dev: Device to enable/disable the platform to wake up. + * acpi_pm_set_device_wakeup - Enable/disable remote wakeup for given device. + * @dev: Device to enable/disable to generate wakeup events. * @enable: Whether to enable or disable the wakeup functionality. */ -int acpi_pm_device_run_wake(struct device *phys_dev, bool enable) -{ - struct acpi_device *adev; - - if (!device_run_wake(phys_dev)) - return -EINVAL; - - adev = ACPI_COMPANION(phys_dev); - if (!adev) { - dev_dbg(phys_dev, "ACPI companion missing in %s!\n", __func__); - return -ENODEV; - } - - return acpi_device_wakeup(adev, ACPI_STATE_S0, enable); -} -EXPORT_SYMBOL(acpi_pm_device_run_wake); - -#ifdef CONFIG_PM_SLEEP -/** - * acpi_pm_device_sleep_wake - Enable or disable device to wake up the system. - * @dev: Device to enable/desible to wake up the system from sleep states. - * @enable: Whether to enable or disable @dev to wake up the system. - */ -int acpi_pm_device_sleep_wake(struct device *dev, bool enable) +int acpi_pm_set_device_wakeup(struct device *dev, bool enable) { struct acpi_device *adev; int error; - if (!device_can_wakeup(dev)) - return -EINVAL; - adev = ACPI_COMPANION(dev); if (!adev) { dev_dbg(dev, "ACPI companion missing in %s!\n", __func__); return -ENODEV; } + if (!acpi_device_can_wakeup(adev)) + return -EINVAL; + error = acpi_device_wakeup(adev, acpi_target_system_state(), enable); if (!error) - dev_info(dev, "System wakeup %s by ACPI\n", - enable ? "enabled" : "disabled"); + dev_dbg(dev, "Wakeup %s by ACPI\n", enable ? "enabled" : "disabled"); return error; } -#endif /* CONFIG_PM_SLEEP */ +EXPORT_SYMBOL(acpi_pm_set_device_wakeup); /** * acpi_dev_pm_low_power - Put ACPI device into a low-power state. diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index c24235d8fb52..156e15c35ffa 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -1835,7 +1835,7 @@ static int acpi_ec_suspend(struct device *dev) struct acpi_ec *ec = acpi_driver_data(to_acpi_device(dev)); - if (ec_freeze_events) + if (acpi_sleep_no_ec_events() && ec_freeze_events) acpi_ec_disable_event(ec); return 0; } diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 66229ffa909b..be79f7db1850 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -198,8 +198,12 @@ void acpi_ec_remove_query_handler(struct acpi_ec *ec, u8 query_bit); Suspend/Resume -------------------------------------------------------------------------- */ #ifdef CONFIG_ACPI_SYSTEM_POWER_STATES_SUPPORT +extern bool acpi_s2idle_wakeup(void); +extern bool acpi_sleep_no_ec_events(void); extern int acpi_sleep_init(void); #else +static inline bool acpi_s2idle_wakeup(void) { return false; } +static inline bool acpi_sleep_no_ec_events(void) { return true; } static inline int acpi_sleep_init(void) { return -ENXIO; } #endif diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index 240544253ccd..9eec3095e6c3 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -608,8 +608,7 @@ static int acpi_pci_root_add(struct acpi_device *device, pcie_no_aspm(); pci_acpi_add_bus_pm_notifier(device); - if (device->wakeup.flags.run_wake) - device_set_run_wake(root->bus->bridge, true); + device_set_wakeup_capable(root->bus->bridge, device->wakeup.flags.valid); if (hotadd) { pcibios_resource_survey_bus(root->bus); @@ -649,7 +648,7 @@ static void acpi_pci_root_remove(struct acpi_device *device) pci_stop_root_bus(root->bus); pci_ioapic_remove(root); - device_set_run_wake(root->bus->bridge, false); + device_set_wakeup_capable(root->bus->bridge, false); pci_acpi_remove_bus_pm_notifier(device); pci_remove_root_bus(root->bus); diff --git a/drivers/acpi/proc.c b/drivers/acpi/proc.c index a34669cc823b..85ac848ac6ab 100644 --- a/drivers/acpi/proc.c +++ b/drivers/acpi/proc.c @@ -42,7 +42,7 @@ acpi_system_wakeup_device_seq_show(struct seq_file *seq, void *offset) if (!dev->physical_node_count) { seq_printf(seq, "%c%-8s\n", - dev->wakeup.flags.run_wake ? '*' : ' ', + dev->wakeup.flags.valid ? '*' : ' ', device_may_wakeup(&dev->dev) ? "enabled" : "disabled"); } else { @@ -58,7 +58,7 @@ acpi_system_wakeup_device_seq_show(struct seq_file *seq, void *offset) seq_printf(seq, "\t\t"); seq_printf(seq, "%c%-8s %s:%s\n", - dev->wakeup.flags.run_wake ? '*' : ' ', + dev->wakeup.flags.valid ? '*' : ' ', (device_may_wakeup(&dev->dev) || device_may_wakeup(ldev)) ? "enabled" : "disabled", diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index d53162997f32..09f65f57bebe 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -835,7 +835,7 @@ static int acpi_bus_extract_wakeup_device_power_package(acpi_handle handle, return err; } -static void acpi_wakeup_gpe_init(struct acpi_device *device) +static bool acpi_wakeup_gpe_init(struct acpi_device *device) { static const struct acpi_device_id button_device_ids[] = { {"PNP0C0C", 0}, @@ -845,13 +845,11 @@ static void acpi_wakeup_gpe_init(struct acpi_device *device) }; struct acpi_device_wakeup *wakeup = &device->wakeup; acpi_status status; - acpi_event_status event_status; wakeup->flags.notifier_present = 0; /* Power button, Lid switch always enable wakeup */ if (!acpi_match_device_ids(device, button_device_ids)) { - wakeup->flags.run_wake = 1; if (!acpi_match_device_ids(device, &button_device_ids[1])) { /* Do not use Lid/sleep button for S5 wakeup */ if (wakeup->sleep_state == ACPI_STATE_S5) @@ -859,17 +857,12 @@ static void acpi_wakeup_gpe_init(struct acpi_device *device) } acpi_mark_gpe_for_wake(wakeup->gpe_device, wakeup->gpe_number); device_set_wakeup_capable(&device->dev, true); - return; + return true; } - acpi_setup_gpe_for_wake(device->handle, wakeup->gpe_device, - wakeup->gpe_number); - status = acpi_get_gpe_status(wakeup->gpe_device, wakeup->gpe_number, - &event_status); - if (ACPI_FAILURE(status)) - return; - - wakeup->flags.run_wake = !!(event_status & ACPI_EVENT_FLAG_HAS_HANDLER); + status = acpi_setup_gpe_for_wake(device->handle, wakeup->gpe_device, + wakeup->gpe_number); + return ACPI_SUCCESS(status); } static void acpi_bus_get_wakeup_device_flags(struct acpi_device *device) @@ -887,10 +880,10 @@ static void acpi_bus_get_wakeup_device_flags(struct acpi_device *device) return; } - device->wakeup.flags.valid = 1; + device->wakeup.flags.valid = acpi_wakeup_gpe_init(device); device->wakeup.prepare_count = 0; - acpi_wakeup_gpe_init(device); - /* Call _PSW/_DSW object to disable its ability to wake the sleeping + /* + * Call _PSW/_DSW object to disable its ability to wake the sleeping * system for the ACPI device with the _PRW object. * The _PSW object is depreciated in ACPI 3.0 and is replaced by _DSW. * So it is necessary to call _DSW object first. Only when it is not diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 097d630ab886..be17664736b2 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -650,38 +650,165 @@ static const struct platform_suspend_ops acpi_suspend_ops_old = { .recover = acpi_pm_finish, }; +static bool s2idle_in_progress; +static bool s2idle_wakeup; + +/* + * On platforms supporting the Low Power S0 Idle interface there is an ACPI + * device object with the PNP0D80 compatible device ID (System Power Management + * Controller) and a specific _DSM method under it. That method, if present, + * can be used to indicate to the platform that the OS is transitioning into a + * low-power state in which certain types of activity are not desirable or that + * it is leaving such a state, which allows the platform to adjust its operation + * mode accordingly. + */ +static const struct acpi_device_id lps0_device_ids[] = { + {"PNP0D80", }, + {"", }, +}; + +#define ACPI_LPS0_DSM_UUID "c4eb40a0-6cd2-11e2-bcfd-0800200c9a66" + +#define ACPI_LPS0_SCREEN_OFF 3 +#define ACPI_LPS0_SCREEN_ON 4 +#define ACPI_LPS0_ENTRY 5 +#define ACPI_LPS0_EXIT 6 + +#define ACPI_S2IDLE_FUNC_MASK ((1 << ACPI_LPS0_ENTRY) | (1 << ACPI_LPS0_EXIT)) + +static acpi_handle lps0_device_handle; +static guid_t lps0_dsm_guid; +static char lps0_dsm_func_mask; + +static void acpi_sleep_run_lps0_dsm(unsigned int func) +{ + union acpi_object *out_obj; + + if (!(lps0_dsm_func_mask & (1 << func))) + return; + + out_obj = acpi_evaluate_dsm(lps0_device_handle, &lps0_dsm_guid, 1, func, NULL); + ACPI_FREE(out_obj); + + acpi_handle_debug(lps0_device_handle, "_DSM function %u evaluation %s\n", + func, out_obj ? "successful" : "failed"); +} + +static int lps0_device_attach(struct acpi_device *adev, + const struct acpi_device_id *not_used) +{ + union acpi_object *out_obj; + + if (lps0_device_handle) + return 0; + + if (!(acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0)) + return 0; + + guid_parse(ACPI_LPS0_DSM_UUID, &lps0_dsm_guid); + /* Check if the _DSM is present and as expected. */ + out_obj = acpi_evaluate_dsm(adev->handle, &lps0_dsm_guid, 1, 0, NULL); + if (out_obj && out_obj->type == ACPI_TYPE_BUFFER) { + char bitmask = *(char *)out_obj->buffer.pointer; + + if ((bitmask & ACPI_S2IDLE_FUNC_MASK) == ACPI_S2IDLE_FUNC_MASK) { + lps0_dsm_func_mask = bitmask; + lps0_device_handle = adev->handle; + } + + acpi_handle_debug(adev->handle, "_DSM function mask: 0x%x\n", + bitmask); + } else { + acpi_handle_debug(adev->handle, + "_DSM function 0 evaluation failed\n"); + } + ACPI_FREE(out_obj); + return 0; +} + +static struct acpi_scan_handler lps0_handler = { + .ids = lps0_device_ids, + .attach = lps0_device_attach, +}; + static int acpi_freeze_begin(void) { acpi_scan_lock_acquire(); + s2idle_in_progress = true; return 0; } static int acpi_freeze_prepare(void) { - acpi_enable_wakeup_devices(ACPI_STATE_S0); - acpi_enable_all_wakeup_gpes(); - acpi_os_wait_events_complete(); + if (lps0_device_handle) { + acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_OFF); + acpi_sleep_run_lps0_dsm(ACPI_LPS0_ENTRY); + } else { + /* + * The configuration of GPEs is changed here to avoid spurious + * wakeups, but that should not be necessary if this is a + * "low-power S0" platform and the low-power S0 _DSM is present. + */ + acpi_enable_all_wakeup_gpes(); + acpi_os_wait_events_complete(); + } if (acpi_sci_irq_valid()) enable_irq_wake(acpi_sci_irq); + return 0; } +static void acpi_freeze_wake(void) +{ + /* + * If IRQD_WAKEUP_ARMED is not set for the SCI at this point, it means + * that the SCI has triggered while suspended, so cancel the wakeup in + * case it has not been a wakeup event (the GPEs will be checked later). + */ + if (acpi_sci_irq_valid() && + !irqd_is_wakeup_armed(irq_get_irq_data(acpi_sci_irq))) { + pm_system_cancel_wakeup(); + s2idle_wakeup = true; + } +} + +static void acpi_freeze_sync(void) +{ + /* + * Process all pending events in case there are any wakeup ones. + * + * The EC driver uses the system workqueue, so that one needs to be + * flushed too. + */ + acpi_os_wait_events_complete(); + flush_scheduled_work(); + s2idle_wakeup = false; +} + static void acpi_freeze_restore(void) { - acpi_disable_wakeup_devices(ACPI_STATE_S0); if (acpi_sci_irq_valid()) disable_irq_wake(acpi_sci_irq); - acpi_enable_all_runtime_gpes(); + + if (lps0_device_handle) { + acpi_sleep_run_lps0_dsm(ACPI_LPS0_EXIT); + acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_ON); + } else { + acpi_enable_all_runtime_gpes(); + } } static void acpi_freeze_end(void) { + s2idle_in_progress = false; acpi_scan_lock_release(); } static const struct platform_freeze_ops acpi_freeze_ops = { .begin = acpi_freeze_begin, .prepare = acpi_freeze_prepare, + .wake = acpi_freeze_wake, + .sync = acpi_freeze_sync, .restore = acpi_freeze_restore, .end = acpi_freeze_end, }; @@ -696,13 +823,28 @@ static void acpi_sleep_suspend_setup(void) suspend_set_ops(old_suspend_ordering ? &acpi_suspend_ops_old : &acpi_suspend_ops); + + acpi_scan_add_handler(&lps0_handler); freeze_set_ops(&acpi_freeze_ops); } #else /* !CONFIG_SUSPEND */ +#define s2idle_in_progress (false) +#define s2idle_wakeup (false) +#define lps0_device_handle (NULL) static inline void acpi_sleep_suspend_setup(void) {} #endif /* !CONFIG_SUSPEND */ +bool acpi_s2idle_wakeup(void) +{ + return s2idle_wakeup; +} + +bool acpi_sleep_no_ec_events(void) +{ + return !s2idle_in_progress || !lps0_device_handle; +} + #ifdef CONFIG_PM_SLEEP static u32 saved_bm_rld; diff --git a/drivers/ata/libata-zpodd.c b/drivers/ata/libata-zpodd.c index f3a65a3140d3..8a01d09ac4db 100644 --- a/drivers/ata/libata-zpodd.c +++ b/drivers/ata/libata-zpodd.c @@ -174,8 +174,7 @@ void zpodd_enable_run_wake(struct ata_device *dev) sdev_disable_disk_events(dev->sdev); zpodd->powered_off = true; - device_set_run_wake(&dev->tdev, true); - acpi_pm_device_run_wake(&dev->tdev, true); + acpi_pm_set_device_wakeup(&dev->tdev, true); } /* Disable runtime wake capability if it is enabled */ @@ -183,10 +182,8 @@ void zpodd_disable_run_wake(struct ata_device *dev) { struct zpodd *zpodd = dev->zpodd; - if (zpodd->powered_off) { - acpi_pm_device_run_wake(&dev->tdev, false); - device_set_run_wake(&dev->tdev, false); - } + if (zpodd->powered_off) + acpi_pm_set_device_wakeup(&dev->tdev, false); } /* diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index da49a8383dc3..b8e4b966c74d 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -126,7 +126,7 @@ static const struct genpd_lock_ops genpd_spin_ops = { #define genpd_is_always_on(genpd) (genpd->flags & GENPD_FLAG_ALWAYS_ON) static inline bool irq_safe_dev_in_no_sleep_domain(struct device *dev, - struct generic_pm_domain *genpd) + const struct generic_pm_domain *genpd) { bool ret; @@ -181,12 +181,14 @@ static struct generic_pm_domain *dev_to_genpd(struct device *dev) return pd_to_genpd(dev->pm_domain); } -static int genpd_stop_dev(struct generic_pm_domain *genpd, struct device *dev) +static int genpd_stop_dev(const struct generic_pm_domain *genpd, + struct device *dev) { return GENPD_DEV_CALLBACK(genpd, int, stop, dev); } -static int genpd_start_dev(struct generic_pm_domain *genpd, struct device *dev) +static int genpd_start_dev(const struct generic_pm_domain *genpd, + struct device *dev) { return GENPD_DEV_CALLBACK(genpd, int, start, dev); } @@ -443,7 +445,7 @@ static int genpd_dev_pm_qos_notifier(struct notifier_block *nb, pdd = dev->power.subsys_data ? dev->power.subsys_data->domain_data : NULL; - if (pdd && pdd->dev) { + if (pdd) { to_gpd_data(pdd)->td.constraint_changed = true; genpd = dev_to_genpd(dev); } else { @@ -738,7 +740,7 @@ static bool pm_genpd_present(const struct generic_pm_domain *genpd) #ifdef CONFIG_PM_SLEEP -static bool genpd_dev_active_wakeup(struct generic_pm_domain *genpd, +static bool genpd_dev_active_wakeup(const struct generic_pm_domain *genpd, struct device *dev) { return GENPD_DEV_CALLBACK(genpd, bool, active_wakeup, dev); @@ -840,7 +842,8 @@ static void genpd_sync_power_on(struct generic_pm_domain *genpd, bool use_lock, * signal remote wakeup from the system's working state as needed by runtime PM. * Return 'true' in either of the above cases. */ -static bool resume_needed(struct device *dev, struct generic_pm_domain *genpd) +static bool resume_needed(struct device *dev, + const struct generic_pm_domain *genpd) { bool active_wakeup; @@ -899,19 +902,19 @@ static int pm_genpd_prepare(struct device *dev) } /** - * pm_genpd_suspend_noirq - Completion of suspend of device in an I/O PM domain. + * genpd_finish_suspend - Completion of suspend or hibernation of device in an + * I/O pm domain. * @dev: Device to suspend. + * @poweroff: Specifies if this is a poweroff_noirq or suspend_noirq callback. * * Stop the device and remove power from the domain if all devices in it have * been stopped. */ -static int pm_genpd_suspend_noirq(struct device *dev) +static int genpd_finish_suspend(struct device *dev, bool poweroff) { struct generic_pm_domain *genpd; int ret; - dev_dbg(dev, "%s()\n", __func__); - genpd = dev_to_genpd(dev); if (IS_ERR(genpd)) return -EINVAL; @@ -919,6 +922,13 @@ static int pm_genpd_suspend_noirq(struct device *dev) if (dev->power.wakeup_path && genpd_dev_active_wakeup(genpd, dev)) return 0; + if (poweroff) + ret = pm_generic_poweroff_noirq(dev); + else + ret = pm_generic_suspend_noirq(dev); + if (ret) + return ret; + if (genpd->dev_ops.stop && genpd->dev_ops.start) { ret = pm_runtime_force_suspend(dev); if (ret) @@ -934,6 +944,20 @@ static int pm_genpd_suspend_noirq(struct device *dev) } /** + * pm_genpd_suspend_noirq - Completion of suspend of device in an I/O PM domain. + * @dev: Device to suspend. + * + * Stop the device and remove power from the domain if all devices in it have + * been stopped. + */ +static int pm_genpd_suspend_noirq(struct device *dev) +{ + dev_dbg(dev, "%s()\n", __func__); + + return genpd_finish_suspend(dev, false); +} + +/** * pm_genpd_resume_noirq - Start of resume of device in an I/O PM domain. * @dev: Device to resume. * @@ -961,6 +985,10 @@ static int pm_genpd_resume_noirq(struct device *dev) if (genpd->dev_ops.stop && genpd->dev_ops.start) ret = pm_runtime_force_resume(dev); + ret = pm_generic_resume_noirq(dev); + if (ret) + return ret; + return ret; } @@ -975,7 +1003,7 @@ static int pm_genpd_resume_noirq(struct device *dev) */ static int pm_genpd_freeze_noirq(struct device *dev) { - struct generic_pm_domain *genpd; + const struct generic_pm_domain *genpd; int ret = 0; dev_dbg(dev, "%s()\n", __func__); @@ -984,6 +1012,10 @@ static int pm_genpd_freeze_noirq(struct device *dev) if (IS_ERR(genpd)) return -EINVAL; + ret = pm_generic_freeze_noirq(dev); + if (ret) + return ret; + if (genpd->dev_ops.stop && genpd->dev_ops.start) ret = pm_runtime_force_suspend(dev); @@ -999,7 +1031,7 @@ static int pm_genpd_freeze_noirq(struct device *dev) */ static int pm_genpd_thaw_noirq(struct device *dev) { - struct generic_pm_domain *genpd; + const struct generic_pm_domain *genpd; int ret = 0; dev_dbg(dev, "%s()\n", __func__); @@ -1008,10 +1040,28 @@ static int pm_genpd_thaw_noirq(struct device *dev) if (IS_ERR(genpd)) return -EINVAL; - if (genpd->dev_ops.stop && genpd->dev_ops.start) + if (genpd->dev_ops.stop && genpd->dev_ops.start) { ret = pm_runtime_force_resume(dev); + if (ret) + return ret; + } - return ret; + return pm_generic_thaw_noirq(dev); +} + +/** + * pm_genpd_poweroff_noirq - Completion of hibernation of device in an + * I/O PM domain. + * @dev: Device to poweroff. + * + * Stop the device and remove power from the domain if all devices in it have + * been stopped. + */ +static int pm_genpd_poweroff_noirq(struct device *dev) +{ + dev_dbg(dev, "%s()\n", __func__); + + return genpd_finish_suspend(dev, true); } /** @@ -1048,10 +1098,13 @@ static int pm_genpd_restore_noirq(struct device *dev) genpd_sync_power_on(genpd, true, 0); genpd_unlock(genpd); - if (genpd->dev_ops.stop && genpd->dev_ops.start) + if (genpd->dev_ops.stop && genpd->dev_ops.start) { ret = pm_runtime_force_resume(dev); + if (ret) + return ret; + } - return ret; + return pm_generic_restore_noirq(dev); } /** @@ -1095,8 +1148,8 @@ static void genpd_syscore_switch(struct device *dev, bool suspend) { struct generic_pm_domain *genpd; - genpd = dev_to_genpd(dev); - if (!pm_genpd_present(genpd)) + genpd = genpd_lookup_dev(dev); + if (!genpd) return; if (suspend) { @@ -1393,7 +1446,7 @@ EXPORT_SYMBOL_GPL(pm_genpd_add_subdomain); int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, struct generic_pm_domain *subdomain) { - struct gpd_link *link; + struct gpd_link *l, *link; int ret = -EINVAL; if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(subdomain)) @@ -1409,7 +1462,7 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, goto out; } - list_for_each_entry(link, &genpd->master_links, master_node) { + list_for_each_entry_safe(link, l, &genpd->master_links, master_node) { if (link->slave != subdomain) continue; @@ -1493,7 +1546,7 @@ int pm_genpd_init(struct generic_pm_domain *genpd, genpd->domain.ops.resume_noirq = pm_genpd_resume_noirq; genpd->domain.ops.freeze_noirq = pm_genpd_freeze_noirq; genpd->domain.ops.thaw_noirq = pm_genpd_thaw_noirq; - genpd->domain.ops.poweroff_noirq = pm_genpd_suspend_noirq; + genpd->domain.ops.poweroff_noirq = pm_genpd_poweroff_noirq; genpd->domain.ops.restore_noirq = pm_genpd_restore_noirq; genpd->domain.ops.complete = pm_genpd_complete; @@ -1780,12 +1833,12 @@ EXPORT_SYMBOL_GPL(of_genpd_add_provider_onecell); */ void of_genpd_del_provider(struct device_node *np) { - struct of_genpd_provider *cp; + struct of_genpd_provider *cp, *tmp; struct generic_pm_domain *gpd; mutex_lock(&gpd_list_lock); mutex_lock(&of_genpd_mutex); - list_for_each_entry(cp, &of_genpd_providers, link) { + list_for_each_entry_safe(cp, tmp, &of_genpd_providers, link) { if (cp->node == np) { /* * For each PM domain associated with the @@ -1925,14 +1978,14 @@ EXPORT_SYMBOL_GPL(of_genpd_add_subdomain); */ struct generic_pm_domain *of_genpd_remove_last(struct device_node *np) { - struct generic_pm_domain *gpd, *genpd = ERR_PTR(-ENOENT); + struct generic_pm_domain *gpd, *tmp, *genpd = ERR_PTR(-ENOENT); int ret; if (IS_ERR_OR_NULL(np)) return ERR_PTR(-EINVAL); mutex_lock(&gpd_list_lock); - list_for_each_entry(gpd, &gpd_list, gpd_list_node) { + list_for_each_entry_safe(gpd, tmp, &gpd_list, gpd_list_node) { if (gpd->provider == &np->fwnode) { ret = genpd_remove(gpd); genpd = ret ? ERR_PTR(ret) : gpd; diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c index 2e0fce711135..281f949c5ffe 100644 --- a/drivers/base/power/domain_governor.c +++ b/drivers/base/power/domain_governor.c @@ -92,12 +92,6 @@ static bool default_suspend_ok(struct device *dev) return td->cached_suspend_ok; } -/** - * default_power_down_ok - Default generic PM domain power off governor routine. - * @pd: PM domain to check. - * - * This routine must be executed under the PM domain's lock. - */ static bool __default_power_down_ok(struct dev_pm_domain *pd, unsigned int state) { @@ -187,6 +181,12 @@ static bool __default_power_down_ok(struct dev_pm_domain *pd, return true; } +/** + * default_power_down_ok - Default generic PM domain power off governor routine. + * @pd: PM domain to check. + * + * This routine must be executed under the PM domain's lock. + */ static bool default_power_down_ok(struct dev_pm_domain *pd) { struct generic_pm_domain *genpd = pd_to_genpd(pd); diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 9faee1c893e5..c99f8730de82 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -62,7 +62,7 @@ static pm_message_t pm_transition; static int async_error; -static char *pm_verb(int event) +static const char *pm_verb(int event) { switch (event) { case PM_EVENT_SUSPEND: @@ -208,7 +208,8 @@ static ktime_t initcall_debug_start(struct device *dev) } static void initcall_debug_report(struct device *dev, ktime_t calltime, - int error, pm_message_t state, char *info) + int error, pm_message_t state, + const char *info) { ktime_t rettime; s64 nsecs; @@ -403,21 +404,23 @@ static pm_callback_t pm_noirq_op(const struct dev_pm_ops *ops, pm_message_t stat return NULL; } -static void pm_dev_dbg(struct device *dev, pm_message_t state, char *info) +static void pm_dev_dbg(struct device *dev, pm_message_t state, const char *info) { dev_dbg(dev, "%s%s%s\n", info, pm_verb(state.event), ((state.event & PM_EVENT_SLEEP) && device_may_wakeup(dev)) ? ", may wakeup" : ""); } -static void pm_dev_err(struct device *dev, pm_message_t state, char *info, +static void pm_dev_err(struct device *dev, pm_message_t state, const char *info, int error) { printk(KERN_ERR "PM: Device %s failed to %s%s: error %d\n", dev_name(dev), pm_verb(state.event), info, error); } -static void dpm_show_time(ktime_t starttime, pm_message_t state, char *info) +#ifdef CONFIG_PM_DEBUG +static void dpm_show_time(ktime_t starttime, pm_message_t state, + const char *info) { ktime_t calltime; u64 usecs64; @@ -433,9 +436,13 @@ static void dpm_show_time(ktime_t starttime, pm_message_t state, char *info) info ?: "", info ? " " : "", pm_verb(state.event), usecs / USEC_PER_MSEC, usecs % USEC_PER_MSEC); } +#else +static inline void dpm_show_time(ktime_t starttime, pm_message_t state, + const char *info) {} +#endif /* CONFIG_PM_DEBUG */ static int dpm_run_callback(pm_callback_t cb, struct device *dev, - pm_message_t state, char *info) + pm_message_t state, const char *info) { ktime_t calltime; int error; @@ -535,7 +542,7 @@ static void dpm_watchdog_clear(struct dpm_watchdog *wd) static int device_resume_noirq(struct device *dev, pm_message_t state, bool async) { pm_callback_t callback = NULL; - char *info = NULL; + const char *info = NULL; int error = 0; TRACE_DEVICE(dev); @@ -665,7 +672,7 @@ void dpm_resume_noirq(pm_message_t state) static int device_resume_early(struct device *dev, pm_message_t state, bool async) { pm_callback_t callback = NULL; - char *info = NULL; + const char *info = NULL; int error = 0; TRACE_DEVICE(dev); @@ -793,7 +800,7 @@ EXPORT_SYMBOL_GPL(dpm_resume_start); static int device_resume(struct device *dev, pm_message_t state, bool async) { pm_callback_t callback = NULL; - char *info = NULL; + const char *info = NULL; int error = 0; DECLARE_DPM_WATCHDOG_ON_STACK(wd); @@ -955,7 +962,7 @@ void dpm_resume(pm_message_t state) static void device_complete(struct device *dev, pm_message_t state) { void (*callback)(struct device *) = NULL; - char *info = NULL; + const char *info = NULL; if (dev->power.syscore) return; @@ -1080,7 +1087,7 @@ static pm_message_t resume_event(pm_message_t sleep_state) static int __device_suspend_noirq(struct device *dev, pm_message_t state, bool async) { pm_callback_t callback = NULL; - char *info = NULL; + const char *info = NULL; int error = 0; TRACE_DEVICE(dev); @@ -1091,11 +1098,6 @@ static int __device_suspend_noirq(struct device *dev, pm_message_t state, bool a if (async_error) goto Complete; - if (pm_wakeup_pending()) { - async_error = -EBUSY; - goto Complete; - } - if (dev->power.syscore || dev->power.direct_complete) goto Complete; @@ -1225,7 +1227,7 @@ int dpm_suspend_noirq(pm_message_t state) static int __device_suspend_late(struct device *dev, pm_message_t state, bool async) { pm_callback_t callback = NULL; - char *info = NULL; + const char *info = NULL; int error = 0; TRACE_DEVICE(dev); @@ -1384,7 +1386,7 @@ EXPORT_SYMBOL_GPL(dpm_suspend_end); */ static int legacy_suspend(struct device *dev, pm_message_t state, int (*cb)(struct device *dev, pm_message_t state), - char *info) + const char *info) { int error; ktime_t calltime; @@ -1426,7 +1428,7 @@ static void dpm_clear_suppliers_direct_complete(struct device *dev) static int __device_suspend(struct device *dev, pm_message_t state, bool async) { pm_callback_t callback = NULL; - char *info = NULL; + const char *info = NULL; int error = 0; DECLARE_DPM_WATCHDOG_ON_STACK(wd); diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c index dae61720b314..a8cc14fd8ae4 100644 --- a/drivers/base/power/opp/core.c +++ b/drivers/base/power/opp/core.c @@ -180,7 +180,7 @@ unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev) { struct opp_table *opp_table; struct dev_pm_opp *opp; - struct regulator *reg, **regulators; + struct regulator *reg; unsigned long latency_ns = 0; int ret, i, count; struct { @@ -198,15 +198,9 @@ unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev) if (!count) goto put_opp_table; - regulators = kmalloc_array(count, sizeof(*regulators), GFP_KERNEL); - if (!regulators) - goto put_opp_table; - uV = kmalloc_array(count, sizeof(*uV), GFP_KERNEL); if (!uV) - goto free_regulators; - - memcpy(regulators, opp_table->regulators, count * sizeof(*regulators)); + goto put_opp_table; mutex_lock(&opp_table->lock); @@ -232,15 +226,13 @@ unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev) * isn't freed, while we are executing this routine. */ for (i = 0; i < count; i++) { - reg = regulators[i]; + reg = opp_table->regulators[i]; ret = regulator_set_voltage_time(reg, uV[i].min, uV[i].max); if (ret > 0) latency_ns += ret * 1000; } kfree(uV); -free_regulators: - kfree(regulators); put_opp_table: dev_pm_opp_put_opp_table(opp_table); @@ -543,17 +535,18 @@ _generic_set_opp_clk_only(struct device *dev, struct clk *clk, return ret; } -static int _generic_set_opp(struct dev_pm_set_opp_data *data) +static int _generic_set_opp_regulator(const struct opp_table *opp_table, + struct device *dev, + unsigned long old_freq, + unsigned long freq, + struct dev_pm_opp_supply *old_supply, + struct dev_pm_opp_supply *new_supply) { - struct dev_pm_opp_supply *old_supply = data->old_opp.supplies; - struct dev_pm_opp_supply *new_supply = data->new_opp.supplies; - unsigned long old_freq = data->old_opp.rate, freq = data->new_opp.rate; - struct regulator *reg = data->regulators[0]; - struct device *dev= data->dev; + struct regulator *reg = opp_table->regulators[0]; int ret; /* This function only supports single regulator per device */ - if (WARN_ON(data->regulator_count > 1)) { + if (WARN_ON(opp_table->regulator_count > 1)) { dev_err(dev, "multiple regulators are not supported\n"); return -EINVAL; } @@ -566,7 +559,7 @@ static int _generic_set_opp(struct dev_pm_set_opp_data *data) } /* Change frequency */ - ret = _generic_set_opp_clk_only(dev, data->clk, old_freq, freq); + ret = _generic_set_opp_clk_only(dev, opp_table->clk, old_freq, freq); if (ret) goto restore_voltage; @@ -580,12 +573,12 @@ static int _generic_set_opp(struct dev_pm_set_opp_data *data) return 0; restore_freq: - if (_generic_set_opp_clk_only(dev, data->clk, freq, old_freq)) + if (_generic_set_opp_clk_only(dev, opp_table->clk, freq, old_freq)) dev_err(dev, "%s: failed to restore old-freq (%lu Hz)\n", __func__, old_freq); restore_voltage: /* This shouldn't harm even if the voltages weren't updated earlier */ - if (old_supply->u_volt) + if (old_supply) _set_opp_voltage(dev, reg, old_supply); return ret; @@ -603,10 +596,7 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) { struct opp_table *opp_table; unsigned long freq, old_freq; - int (*set_opp)(struct dev_pm_set_opp_data *data); struct dev_pm_opp *old_opp, *opp; - struct regulator **regulators; - struct dev_pm_set_opp_data *data; struct clk *clk; int ret, size; @@ -661,38 +651,35 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) dev_dbg(dev, "%s: switching OPP: %lu Hz --> %lu Hz\n", __func__, old_freq, freq); - regulators = opp_table->regulators; - /* Only frequency scaling */ - if (!regulators) { + if (!opp_table->regulators) { ret = _generic_set_opp_clk_only(dev, clk, old_freq, freq); - goto put_opps; - } + } else if (!opp_table->set_opp) { + ret = _generic_set_opp_regulator(opp_table, dev, old_freq, freq, + IS_ERR(old_opp) ? NULL : old_opp->supplies, + opp->supplies); + } else { + struct dev_pm_set_opp_data *data; - if (opp_table->set_opp) - set_opp = opp_table->set_opp; - else - set_opp = _generic_set_opp; - - data = opp_table->set_opp_data; - data->regulators = regulators; - data->regulator_count = opp_table->regulator_count; - data->clk = clk; - data->dev = dev; - - data->old_opp.rate = old_freq; - size = sizeof(*opp->supplies) * opp_table->regulator_count; - if (IS_ERR(old_opp)) - memset(data->old_opp.supplies, 0, size); - else - memcpy(data->old_opp.supplies, old_opp->supplies, size); + data = opp_table->set_opp_data; + data->regulators = opp_table->regulators; + data->regulator_count = opp_table->regulator_count; + data->clk = clk; + data->dev = dev; - data->new_opp.rate = freq; - memcpy(data->new_opp.supplies, opp->supplies, size); + data->old_opp.rate = old_freq; + size = sizeof(*opp->supplies) * opp_table->regulator_count; + if (IS_ERR(old_opp)) + memset(data->old_opp.supplies, 0, size); + else + memcpy(data->old_opp.supplies, old_opp->supplies, size); - ret = set_opp(data); + data->new_opp.rate = freq; + memcpy(data->new_opp.supplies, opp->supplies, size); + + ret = opp_table->set_opp(data); + } -put_opps: dev_pm_opp_put(opp); put_old_opp: if (!IS_ERR(old_opp)) @@ -1376,6 +1363,73 @@ void dev_pm_opp_put_regulators(struct opp_table *opp_table) EXPORT_SYMBOL_GPL(dev_pm_opp_put_regulators); /** + * dev_pm_opp_set_clkname() - Set clk name for the device + * @dev: Device for which clk name is being set. + * @name: Clk name. + * + * In order to support OPP switching, OPP layer needs to get pointer to the + * clock for the device. Simple cases work fine without using this routine (i.e. + * by passing connection-id as NULL), but for a device with multiple clocks + * available, the OPP core needs to know the exact name of the clk to use. + * + * This must be called before any OPPs are initialized for the device. + */ +struct opp_table *dev_pm_opp_set_clkname(struct device *dev, const char *name) +{ + struct opp_table *opp_table; + int ret; + + opp_table = dev_pm_opp_get_opp_table(dev); + if (!opp_table) + return ERR_PTR(-ENOMEM); + + /* This should be called before OPPs are initialized */ + if (WARN_ON(!list_empty(&opp_table->opp_list))) { + ret = -EBUSY; + goto err; + } + + /* Already have default clk set, free it */ + if (!IS_ERR(opp_table->clk)) + clk_put(opp_table->clk); + + /* Find clk for the device */ + opp_table->clk = clk_get(dev, name); + if (IS_ERR(opp_table->clk)) { + ret = PTR_ERR(opp_table->clk); + if (ret != -EPROBE_DEFER) { + dev_err(dev, "%s: Couldn't find clock: %d\n", __func__, + ret); + } + goto err; + } + + return opp_table; + +err: + dev_pm_opp_put_opp_table(opp_table); + + return ERR_PTR(ret); +} +EXPORT_SYMBOL_GPL(dev_pm_opp_set_clkname); + +/** + * dev_pm_opp_put_clkname() - Releases resources blocked for clk. + * @opp_table: OPP table returned from dev_pm_opp_set_clkname(). + */ +void dev_pm_opp_put_clkname(struct opp_table *opp_table) +{ + /* Make sure there are no concurrent readers while updating opp_table */ + WARN_ON(!list_empty(&opp_table->opp_list)); + + clk_put(opp_table->clk); + opp_table->clk = ERR_PTR(-EINVAL); + + dev_pm_opp_put_opp_table(opp_table); +} +EXPORT_SYMBOL_GPL(dev_pm_opp_put_clkname); + +/** * dev_pm_opp_register_set_opp_helper() - Register custom set OPP helper * @dev: Device for which the helper is getting registered. * @set_opp: Custom set OPP helper. diff --git a/drivers/base/power/opp/debugfs.c b/drivers/base/power/opp/debugfs.c index 95f433db4ac7..81cf120fcf43 100644 --- a/drivers/base/power/opp/debugfs.c +++ b/drivers/base/power/opp/debugfs.c @@ -40,11 +40,10 @@ static bool opp_debug_create_supplies(struct dev_pm_opp *opp, struct dentry *pdentry) { struct dentry *d; - int i = 0; + int i; char *name; - /* Always create at least supply-0 directory */ - do { + for (i = 0; i < opp_table->regulator_count; i++) { name = kasprintf(GFP_KERNEL, "supply-%d", i); /* Create per-opp directory */ @@ -70,7 +69,7 @@ static bool opp_debug_create_supplies(struct dev_pm_opp *opp, if (!debugfs_create_ulong("u_amp", S_IRUGO, d, &opp->supplies[i].u_amp)) return false; - } while (++i < opp_table->regulator_count); + } return true; } diff --git a/drivers/base/power/opp/of.c b/drivers/base/power/opp/of.c index 779428676f63..57eec1ca0569 100644 --- a/drivers/base/power/opp/of.c +++ b/drivers/base/power/opp/of.c @@ -131,8 +131,14 @@ static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev, prop = of_find_property(opp->np, name, NULL); /* Missing property isn't a problem, but an invalid entry is */ - if (!prop) - return 0; + if (!prop) { + if (!opp_table->regulator_count) + return 0; + + dev_err(dev, "%s: opp-microvolt missing although OPP managing regulators\n", + __func__); + return -EINVAL; + } } vcount = of_property_count_u32_elems(opp->np, name); diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c index 33b4b902741a..185a52581cfa 100644 --- a/drivers/base/power/sysfs.c +++ b/drivers/base/power/sysfs.c @@ -607,7 +607,7 @@ static struct attribute *power_attrs[] = { #endif /* CONFIG_PM_ADVANCED_DEBUG */ NULL, }; -static struct attribute_group pm_attr_group = { +static const struct attribute_group pm_attr_group = { .name = power_group_name, .attrs = power_attrs, }; @@ -629,7 +629,7 @@ static struct attribute *wakeup_attrs[] = { #endif NULL, }; -static struct attribute_group pm_wakeup_attr_group = { +static const struct attribute_group pm_wakeup_attr_group = { .name = power_group_name, .attrs = wakeup_attrs, }; @@ -644,7 +644,7 @@ static struct attribute *runtime_attrs[] = { &dev_attr_autosuspend_delay_ms.attr, NULL, }; -static struct attribute_group pm_runtime_attr_group = { +static const struct attribute_group pm_runtime_attr_group = { .name = power_group_name, .attrs = runtime_attrs, }; @@ -653,7 +653,7 @@ static struct attribute *pm_qos_resume_latency_attrs[] = { &dev_attr_pm_qos_resume_latency_us.attr, NULL, }; -static struct attribute_group pm_qos_resume_latency_attr_group = { +static const struct attribute_group pm_qos_resume_latency_attr_group = { .name = power_group_name, .attrs = pm_qos_resume_latency_attrs, }; @@ -662,7 +662,7 @@ static struct attribute *pm_qos_latency_tolerance_attrs[] = { &dev_attr_pm_qos_latency_tolerance_us.attr, NULL, }; -static struct attribute_group pm_qos_latency_tolerance_attr_group = { +static const struct attribute_group pm_qos_latency_tolerance_attr_group = { .name = power_group_name, .attrs = pm_qos_latency_tolerance_attrs, }; @@ -672,7 +672,7 @@ static struct attribute *pm_qos_flags_attrs[] = { &dev_attr_pm_qos_remote_wakeup.attr, NULL, }; -static struct attribute_group pm_qos_flags_attr_group = { +static const struct attribute_group pm_qos_flags_attr_group = { .name = power_group_name, .attrs = pm_qos_flags_attrs, }; diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index c313b600d356..144e6d8fafc8 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -28,8 +28,8 @@ bool events_check_enabled __read_mostly; /* First wakeup IRQ seen by the kernel in the last cycle. */ unsigned int pm_wakeup_irq __read_mostly; -/* If set and the system is suspending, terminate the suspend. */ -static bool pm_abort_suspend __read_mostly; +/* If greater than 0 and the system is suspending, terminate the suspend. */ +static atomic_t pm_abort_suspend __read_mostly; /* * Combined counters of registered wakeup events and wakeup events in progress. @@ -60,6 +60,8 @@ static LIST_HEAD(wakeup_sources); static DECLARE_WAIT_QUEUE_HEAD(wakeup_count_wait_queue); +DEFINE_STATIC_SRCU(wakeup_srcu); + static struct wakeup_source deleted_ws = { .name = "deleted", .lock = __SPIN_LOCK_UNLOCKED(deleted_ws.lock), @@ -198,7 +200,7 @@ void wakeup_source_remove(struct wakeup_source *ws) spin_lock_irqsave(&events_lock, flags); list_del_rcu(&ws->entry); spin_unlock_irqrestore(&events_lock, flags); - synchronize_rcu(); + synchronize_srcu(&wakeup_srcu); } EXPORT_SYMBOL_GPL(wakeup_source_remove); @@ -332,12 +334,12 @@ void device_wakeup_detach_irq(struct device *dev) void device_wakeup_arm_wake_irqs(void) { struct wakeup_source *ws; + int srcuidx; - rcu_read_lock(); + srcuidx = srcu_read_lock(&wakeup_srcu); list_for_each_entry_rcu(ws, &wakeup_sources, entry) dev_pm_arm_wake_irq(ws->wakeirq); - - rcu_read_unlock(); + srcu_read_unlock(&wakeup_srcu, srcuidx); } /** @@ -348,12 +350,12 @@ void device_wakeup_arm_wake_irqs(void) void device_wakeup_disarm_wake_irqs(void) { struct wakeup_source *ws; + int srcuidx; - rcu_read_lock(); + srcuidx = srcu_read_lock(&wakeup_srcu); list_for_each_entry_rcu(ws, &wakeup_sources, entry) dev_pm_disarm_wake_irq(ws->wakeirq); - - rcu_read_unlock(); + srcu_read_unlock(&wakeup_srcu, srcuidx); } /** @@ -804,10 +806,10 @@ EXPORT_SYMBOL_GPL(pm_wakeup_dev_event); void pm_print_active_wakeup_sources(void) { struct wakeup_source *ws; - int active = 0; + int srcuidx, active = 0; struct wakeup_source *last_activity_ws = NULL; - rcu_read_lock(); + srcuidx = srcu_read_lock(&wakeup_srcu); list_for_each_entry_rcu(ws, &wakeup_sources, entry) { if (ws->active) { pr_debug("active wakeup source: %s\n", ws->name); @@ -823,7 +825,7 @@ void pm_print_active_wakeup_sources(void) if (!active && last_activity_ws) pr_debug("last active wakeup source: %s\n", last_activity_ws->name); - rcu_read_unlock(); + srcu_read_unlock(&wakeup_srcu, srcuidx); } EXPORT_SYMBOL_GPL(pm_print_active_wakeup_sources); @@ -855,20 +857,26 @@ bool pm_wakeup_pending(void) pm_print_active_wakeup_sources(); } - return ret || pm_abort_suspend; + return ret || atomic_read(&pm_abort_suspend) > 0; } void pm_system_wakeup(void) { - pm_abort_suspend = true; + atomic_inc(&pm_abort_suspend); freeze_wake(); } EXPORT_SYMBOL_GPL(pm_system_wakeup); -void pm_wakeup_clear(void) +void pm_system_cancel_wakeup(void) +{ + atomic_dec(&pm_abort_suspend); +} + +void pm_wakeup_clear(bool reset) { - pm_abort_suspend = false; pm_wakeup_irq = 0; + if (reset) + atomic_set(&pm_abort_suspend, 0); } void pm_system_irq_wakeup(unsigned int irq_number) @@ -950,8 +958,9 @@ void pm_wakep_autosleep_enabled(bool set) { struct wakeup_source *ws; ktime_t now = ktime_get(); + int srcuidx; - rcu_read_lock(); + srcuidx = srcu_read_lock(&wakeup_srcu); list_for_each_entry_rcu(ws, &wakeup_sources, entry) { spin_lock_irq(&ws->lock); if (ws->autosleep_enabled != set) { @@ -965,7 +974,7 @@ void pm_wakep_autosleep_enabled(bool set) } spin_unlock_irq(&ws->lock); } - rcu_read_unlock(); + srcu_read_unlock(&wakeup_srcu, srcuidx); } #endif /* CONFIG_PM_AUTOSLEEP */ @@ -1026,15 +1035,16 @@ static int print_wakeup_source_stats(struct seq_file *m, static int wakeup_sources_stats_show(struct seq_file *m, void *unused) { struct wakeup_source *ws; + int srcuidx; seq_puts(m, "name\t\tactive_count\tevent_count\twakeup_count\t" "expire_count\tactive_since\ttotal_time\tmax_time\t" "last_change\tprevent_suspend_time\n"); - rcu_read_lock(); + srcuidx = srcu_read_lock(&wakeup_srcu); list_for_each_entry_rcu(ws, &wakeup_sources, entry) print_wakeup_source_stats(m, ws); - rcu_read_unlock(); + srcu_read_unlock(&wakeup_srcu, srcuidx); print_wakeup_source_stats(m, &deleted_ws); diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index e82bb3c30b92..10be285c9055 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -144,10 +144,23 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) cppc_dmi_max_khz = cppc_get_dmi_max_khz(); - policy->min = cpu->perf_caps.lowest_perf * cppc_dmi_max_khz / cpu->perf_caps.highest_perf; + /* + * Set min to lowest nonlinear perf to avoid any efficiency penalty (see + * Section 8.4.7.1.1.5 of ACPI 6.1 spec) + */ + policy->min = cpu->perf_caps.lowest_nonlinear_perf * cppc_dmi_max_khz / + cpu->perf_caps.highest_perf; policy->max = cppc_dmi_max_khz; - policy->cpuinfo.min_freq = policy->min; - policy->cpuinfo.max_freq = policy->max; + + /* + * Set cpuinfo.min_freq to Lowest to make the full range of performance + * available if userspace wants to use any perf between lowest & lowest + * nonlinear perf + */ + policy->cpuinfo.min_freq = cpu->perf_caps.lowest_perf * cppc_dmi_max_khz / + cpu->perf_caps.highest_perf; + policy->cpuinfo.max_freq = cppc_dmi_max_khz; + policy->cpuinfo.transition_latency = cppc_get_transition_latency(cpu_num); policy->shared_type = cpu->shared_type; diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index 921b4a6c3d16..1c262923fe58 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -31,6 +31,7 @@ static const struct of_device_id machines[] __initconst = { { .compatible = "arm,integrator-ap", }, { .compatible = "arm,integrator-cp", }, + { .compatible = "hisilicon,hi3660", }, { .compatible = "hisilicon,hi6220", }, { .compatible = "fsl,imx27", }, diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 29c5b0cbad96..9bf97a366029 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -632,11 +632,21 @@ show_one(cpuinfo_transition_latency, cpuinfo.transition_latency); show_one(scaling_min_freq, min); show_one(scaling_max_freq, max); +__weak unsigned int arch_freq_get_on_cpu(int cpu) +{ + return 0; +} + static ssize_t show_scaling_cur_freq(struct cpufreq_policy *policy, char *buf) { ssize_t ret; + unsigned int freq; - if (cpufreq_driver && cpufreq_driver->setpolicy && cpufreq_driver->get) + freq = arch_freq_get_on_cpu(policy->cpu); + if (freq) + ret = sprintf(buf, "%u\n", freq); + else if (cpufreq_driver && cpufreq_driver->setpolicy && + cpufreq_driver->get) ret = sprintf(buf, "%u\n", cpufreq_driver->get(policy->cpu)); else ret = sprintf(buf, "%u\n", policy->cur); diff --git a/drivers/cpufreq/exynos5440-cpufreq.c b/drivers/cpufreq/exynos5440-cpufreq.c index 9180d34cc9fc..b6b369c22272 100644 --- a/drivers/cpufreq/exynos5440-cpufreq.c +++ b/drivers/cpufreq/exynos5440-cpufreq.c @@ -173,12 +173,12 @@ static void exynos_enable_dvfs(unsigned int cur_frequency) /* Enable PSTATE Change Event */ tmp = __raw_readl(dvfs_info->base + XMU_PMUEVTEN); tmp |= (1 << PSTATE_CHANGED_EVTEN_SHIFT); - __raw_writel(tmp, dvfs_info->base + XMU_PMUEVTEN); + __raw_writel(tmp, dvfs_info->base + XMU_PMUEVTEN); /* Enable PSTATE Change IRQ */ tmp = __raw_readl(dvfs_info->base + XMU_PMUIRQEN); tmp |= (1 << PSTATE_CHANGED_IRQEN_SHIFT); - __raw_writel(tmp, dvfs_info->base + XMU_PMUIRQEN); + __raw_writel(tmp, dvfs_info->base + XMU_PMUIRQEN); /* Set initial performance index */ cpufreq_for_each_entry(pos, freq_table) @@ -330,7 +330,7 @@ static int exynos_cpufreq_probe(struct platform_device *pdev) struct resource res; unsigned int cur_frequency; - np = pdev->dev.of_node; + np = pdev->dev.of_node; if (!np) return -ENODEV; diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c index 9c13f097fd8c..b6edd3ccaa55 100644 --- a/drivers/cpufreq/imx6q-cpufreq.c +++ b/drivers/cpufreq/imx6q-cpufreq.c @@ -101,7 +101,8 @@ static int imx6q_set_target(struct cpufreq_policy *policy, unsigned int index) * - Reprogram pll1_sys_clk and reparent pll1_sw_clk back to it * - Disable pll2_pfd2_396m_clk */ - if (of_machine_is_compatible("fsl,imx6ul")) { + if (of_machine_is_compatible("fsl,imx6ul") || + of_machine_is_compatible("fsl,imx6ull")) { /* * When changing pll1_sw_clk's parent to pll1_sys_clk, * CPU may run at higher than 528MHz, this will lead to @@ -215,7 +216,8 @@ static int imx6q_cpufreq_probe(struct platform_device *pdev) goto put_clk; } - if (of_machine_is_compatible("fsl,imx6ul")) { + if (of_machine_is_compatible("fsl,imx6ul") || + of_machine_is_compatible("fsl,imx6ull")) { pll2_bus_clk = clk_get(cpu_dev, "pll2_bus"); secondary_sel_clk = clk_get(cpu_dev, "secondary_sel"); if (IS_ERR(pll2_bus_clk) || IS_ERR(secondary_sel_clk)) { diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index eb1158532de3..48a98f11a84e 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -231,10 +231,8 @@ struct global_params { * @prev_cummulative_iowait: IO Wait time difference from last and * current sample * @sample: Storage for storing last Sample data - * @min_perf: Minimum capacity limit as a fraction of the maximum - * turbo P-state capacity. - * @max_perf: Maximum capacity limit as a fraction of the maximum - * turbo P-state capacity. + * @min_perf_ratio: Minimum capacity in terms of PERF or HWP ratios + * @max_perf_ratio: Maximum capacity in terms of PERF or HWP ratios * @acpi_perf_data: Stores ACPI perf information read from _PSS * @valid_pss_table: Set to true for valid ACPI _PSS entries found * @epp_powersave: Last saved HWP energy performance preference @@ -266,8 +264,8 @@ struct cpudata { u64 prev_tsc; u64 prev_cummulative_iowait; struct sample sample; - int32_t min_perf; - int32_t max_perf; + int32_t min_perf_ratio; + int32_t max_perf_ratio; #ifdef CONFIG_ACPI struct acpi_processor_performance acpi_perf_data; bool valid_pss_table; @@ -653,6 +651,12 @@ static const char * const energy_perf_strings[] = { "power", NULL }; +static const unsigned int epp_values[] = { + HWP_EPP_PERFORMANCE, + HWP_EPP_BALANCE_PERFORMANCE, + HWP_EPP_BALANCE_POWERSAVE, + HWP_EPP_POWERSAVE +}; static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data) { @@ -664,17 +668,14 @@ static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data) return epp; if (static_cpu_has(X86_FEATURE_HWP_EPP)) { - /* - * Range: - * 0x00-0x3F : Performance - * 0x40-0x7F : Balance performance - * 0x80-0xBF : Balance power - * 0xC0-0xFF : Power - * The EPP is a 8 bit value, but our ranges restrict the - * value which can be set. Here only using top two bits - * effectively. - */ - index = (epp >> 6) + 1; + if (epp == HWP_EPP_PERFORMANCE) + return 1; + if (epp <= HWP_EPP_BALANCE_PERFORMANCE) + return 2; + if (epp <= HWP_EPP_BALANCE_POWERSAVE) + return 3; + else + return 4; } else if (static_cpu_has(X86_FEATURE_EPB)) { /* * Range: @@ -712,15 +713,8 @@ static int intel_pstate_set_energy_pref_index(struct cpudata *cpu_data, value &= ~GENMASK_ULL(31, 24); - /* - * If epp is not default, convert from index into - * energy_perf_strings to epp value, by shifting 6 - * bits left to use only top two bits in epp. - * The resultant epp need to shifted by 24 bits to - * epp position in MSR_HWP_REQUEST. - */ if (epp == -EINVAL) - epp = (pref_index - 1) << 6; + epp = epp_values[pref_index - 1]; value |= (u64)epp << 24; ret = wrmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST, value); @@ -794,25 +788,32 @@ static struct freq_attr *hwp_cpufreq_attrs[] = { NULL, }; -static void intel_pstate_hwp_set(unsigned int cpu) +static void intel_pstate_get_hwp_max(unsigned int cpu, int *phy_max, + int *current_max) { - struct cpudata *cpu_data = all_cpu_data[cpu]; - int min, hw_min, max, hw_max; - u64 value, cap; - s16 epp; + u64 cap; rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap); - hw_min = HWP_LOWEST_PERF(cap); if (global.no_turbo) - hw_max = HWP_GUARANTEED_PERF(cap); + *current_max = HWP_GUARANTEED_PERF(cap); else - hw_max = HWP_HIGHEST_PERF(cap); + *current_max = HWP_HIGHEST_PERF(cap); + + *phy_max = HWP_HIGHEST_PERF(cap); +} + +static void intel_pstate_hwp_set(unsigned int cpu) +{ + struct cpudata *cpu_data = all_cpu_data[cpu]; + int max, min; + u64 value; + s16 epp; + + max = cpu_data->max_perf_ratio; + min = cpu_data->min_perf_ratio; - max = fp_ext_toint(hw_max * cpu_data->max_perf); if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE) min = max; - else - min = fp_ext_toint(hw_max * cpu_data->min_perf); rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value); @@ -1528,8 +1529,7 @@ static void intel_pstate_max_within_limits(struct cpudata *cpu) update_turbo_state(); pstate = intel_pstate_get_base_pstate(cpu); - pstate = max(cpu->pstate.min_pstate, - fp_ext_toint(pstate * cpu->max_perf)); + pstate = max(cpu->pstate.min_pstate, cpu->max_perf_ratio); intel_pstate_set_pstate(cpu, pstate); } @@ -1616,9 +1616,6 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu) int32_t busy_frac, boost; int target, avg_pstate; - if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) - return cpu->pstate.turbo_pstate; - busy_frac = div_fp(sample->mperf, sample->tsc); boost = cpu->iowait_boost; @@ -1655,9 +1652,6 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu) int32_t perf_scaled, max_pstate, current_pstate, sample_ratio; u64 duration_ns; - if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) - return cpu->pstate.turbo_pstate; - /* * perf_scaled is the ratio of the average P-state during the last * sampling period to the P-state requested last time (in percent). @@ -1695,9 +1689,8 @@ static int intel_pstate_prepare_request(struct cpudata *cpu, int pstate) int max_pstate = intel_pstate_get_base_pstate(cpu); int min_pstate; - min_pstate = max(cpu->pstate.min_pstate, - fp_ext_toint(max_pstate * cpu->min_perf)); - max_pstate = max(min_pstate, fp_ext_toint(max_pstate * cpu->max_perf)); + min_pstate = max(cpu->pstate.min_pstate, cpu->min_perf_ratio); + max_pstate = max(min_pstate, cpu->max_perf_ratio); return clamp_t(int, pstate, min_pstate, max_pstate); } @@ -1733,16 +1726,6 @@ static void intel_pstate_adjust_pstate(struct cpudata *cpu, int target_pstate) fp_toint(cpu->iowait_boost * 100)); } -static void intel_pstate_update_util_hwp(struct update_util_data *data, - u64 time, unsigned int flags) -{ - struct cpudata *cpu = container_of(data, struct cpudata, update_util); - u64 delta_ns = time - cpu->sample.time; - - if ((s64)delta_ns >= INTEL_PSTATE_HWP_SAMPLING_INTERVAL) - intel_pstate_sample(cpu, time); -} - static void intel_pstate_update_util_pid(struct update_util_data *data, u64 time, unsigned int flags) { @@ -1934,6 +1917,9 @@ static void intel_pstate_set_update_util_hook(unsigned int cpu_num) { struct cpudata *cpu = all_cpu_data[cpu_num]; + if (hwp_active) + return; + if (cpu->update_util_set) return; @@ -1967,52 +1953,61 @@ static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy, { int max_freq = intel_pstate_get_max_freq(cpu); int32_t max_policy_perf, min_policy_perf; + int max_state, turbo_max; - max_policy_perf = div_ext_fp(policy->max, max_freq); - max_policy_perf = clamp_t(int32_t, max_policy_perf, 0, int_ext_tofp(1)); + /* + * HWP needs some special consideration, because on BDX the + * HWP_REQUEST uses abstract value to represent performance + * rather than pure ratios. + */ + if (hwp_active) { + intel_pstate_get_hwp_max(cpu->cpu, &turbo_max, &max_state); + } else { + max_state = intel_pstate_get_base_pstate(cpu); + turbo_max = cpu->pstate.turbo_pstate; + } + + max_policy_perf = max_state * policy->max / max_freq; if (policy->max == policy->min) { min_policy_perf = max_policy_perf; } else { - min_policy_perf = div_ext_fp(policy->min, max_freq); + min_policy_perf = max_state * policy->min / max_freq; min_policy_perf = clamp_t(int32_t, min_policy_perf, 0, max_policy_perf); } + pr_debug("cpu:%d max_state %d min_policy_perf:%d max_policy_perf:%d\n", + policy->cpu, max_state, + min_policy_perf, max_policy_perf); + /* Normalize user input to [min_perf, max_perf] */ if (per_cpu_limits) { - cpu->min_perf = min_policy_perf; - cpu->max_perf = max_policy_perf; + cpu->min_perf_ratio = min_policy_perf; + cpu->max_perf_ratio = max_policy_perf; } else { int32_t global_min, global_max; /* Global limits are in percent of the maximum turbo P-state. */ - global_max = percent_ext_fp(global.max_perf_pct); - global_min = percent_ext_fp(global.min_perf_pct); - if (max_freq != cpu->pstate.turbo_freq) { - int32_t turbo_factor; - - turbo_factor = div_ext_fp(cpu->pstate.turbo_pstate, - cpu->pstate.max_pstate); - global_min = mul_ext_fp(global_min, turbo_factor); - global_max = mul_ext_fp(global_max, turbo_factor); - } + global_max = DIV_ROUND_UP(turbo_max * global.max_perf_pct, 100); + global_min = DIV_ROUND_UP(turbo_max * global.min_perf_pct, 100); global_min = clamp_t(int32_t, global_min, 0, global_max); - cpu->min_perf = max(min_policy_perf, global_min); - cpu->min_perf = min(cpu->min_perf, max_policy_perf); - cpu->max_perf = min(max_policy_perf, global_max); - cpu->max_perf = max(min_policy_perf, cpu->max_perf); + pr_debug("cpu:%d global_min:%d global_max:%d\n", policy->cpu, + global_min, global_max); - /* Make sure min_perf <= max_perf */ - cpu->min_perf = min(cpu->min_perf, cpu->max_perf); - } + cpu->min_perf_ratio = max(min_policy_perf, global_min); + cpu->min_perf_ratio = min(cpu->min_perf_ratio, max_policy_perf); + cpu->max_perf_ratio = min(max_policy_perf, global_max); + cpu->max_perf_ratio = max(min_policy_perf, cpu->max_perf_ratio); - cpu->max_perf = round_up(cpu->max_perf, EXT_FRAC_BITS); - cpu->min_perf = round_up(cpu->min_perf, EXT_FRAC_BITS); + /* Make sure min_perf <= max_perf */ + cpu->min_perf_ratio = min(cpu->min_perf_ratio, + cpu->max_perf_ratio); - pr_debug("cpu:%d max_perf_pct:%d min_perf_pct:%d\n", policy->cpu, - fp_ext_toint(cpu->max_perf * 100), - fp_ext_toint(cpu->min_perf * 100)); + } + pr_debug("cpu:%d max_perf_ratio:%d min_perf_ratio:%d\n", policy->cpu, + cpu->max_perf_ratio, + cpu->min_perf_ratio); } static int intel_pstate_set_policy(struct cpufreq_policy *policy) @@ -2039,10 +2034,10 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) */ intel_pstate_clear_update_util_hook(policy->cpu); intel_pstate_max_within_limits(cpu); + } else { + intel_pstate_set_update_util_hook(policy->cpu); } - intel_pstate_set_update_util_hook(policy->cpu); - if (hwp_active) intel_pstate_hwp_set(policy->cpu); @@ -2115,8 +2110,8 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy) cpu = all_cpu_data[policy->cpu]; - cpu->max_perf = int_ext_tofp(1); - cpu->min_perf = 0; + cpu->max_perf_ratio = 0xFF; + cpu->min_perf_ratio = 0; policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling; policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling; @@ -2558,7 +2553,6 @@ static int __init intel_pstate_init(void) } else { hwp_active++; intel_pstate.attr = hwp_cpufreq_attrs; - pstate_funcs.update_util = intel_pstate_update_util_hwp; goto hwp_cpu_matched; } } else { diff --git a/drivers/cpufreq/sfi-cpufreq.c b/drivers/cpufreq/sfi-cpufreq.c index 992ce6f9abec..3779742f86e3 100644 --- a/drivers/cpufreq/sfi-cpufreq.c +++ b/drivers/cpufreq/sfi-cpufreq.c @@ -24,7 +24,7 @@ #include <asm/msr.h> -struct cpufreq_frequency_table *freq_table; +static struct cpufreq_frequency_table *freq_table; static struct sfi_freq_table_entry *sfi_cpufreq_array; static int num_freq_table_entries; diff --git a/drivers/cpuidle/Kconfig.arm b/drivers/cpuidle/Kconfig.arm index 21340e0be73e..f52144808455 100644 --- a/drivers/cpuidle/Kconfig.arm +++ b/drivers/cpuidle/Kconfig.arm @@ -4,6 +4,7 @@ config ARM_CPUIDLE bool "Generic ARM/ARM64 CPU idle Driver" select DT_IDLE_STATES + select CPU_IDLE_MULTIPLE_DRIVERS help Select this to enable generic cpuidle driver for ARM. It provides a generic idle driver whose idle states are configured diff --git a/drivers/cpuidle/cpuidle-arm.c b/drivers/cpuidle/cpuidle-arm.c index f440d385ed34..7080c384ad5d 100644 --- a/drivers/cpuidle/cpuidle-arm.c +++ b/drivers/cpuidle/cpuidle-arm.c @@ -18,6 +18,7 @@ #include <linux/module.h> #include <linux/of.h> #include <linux/slab.h> +#include <linux/topology.h> #include <asm/cpuidle.h> @@ -44,7 +45,7 @@ static int arm_enter_idle_state(struct cpuidle_device *dev, return CPU_PM_CPU_IDLE_ENTER(arm_cpuidle_suspend, idx); } -static struct cpuidle_driver arm_idle_driver = { +static struct cpuidle_driver arm_idle_driver __initdata = { .name = "arm_idle", .owner = THIS_MODULE, /* @@ -80,30 +81,42 @@ static const struct of_device_id arm_idle_state_match[] __initconst = { static int __init arm_idle_init(void) { int cpu, ret; - struct cpuidle_driver *drv = &arm_idle_driver; + struct cpuidle_driver *drv; struct cpuidle_device *dev; - /* - * Initialize idle states data, starting at index 1. - * This driver is DT only, if no DT idle states are detected (ret == 0) - * let the driver initialization fail accordingly since there is no - * reason to initialize the idle driver if only wfi is supported. - */ - ret = dt_init_idle_driver(drv, arm_idle_state_match, 1); - if (ret <= 0) - return ret ? : -ENODEV; - - ret = cpuidle_register_driver(drv); - if (ret) { - pr_err("Failed to register cpuidle driver\n"); - return ret; - } - - /* - * Call arch CPU operations in order to initialize - * idle states suspend back-end specific data - */ for_each_possible_cpu(cpu) { + + drv = kmemdup(&arm_idle_driver, sizeof(*drv), GFP_KERNEL); + if (!drv) { + ret = -ENOMEM; + goto out_fail; + } + + drv->cpumask = (struct cpumask *)cpumask_of(cpu); + + /* + * Initialize idle states data, starting at index 1. This + * driver is DT only, if no DT idle states are detected (ret + * == 0) let the driver initialization fail accordingly since + * there is no reason to initialize the idle driver if only + * wfi is supported. + */ + ret = dt_init_idle_driver(drv, arm_idle_state_match, 1); + if (ret <= 0) { + ret = ret ? : -ENODEV; + goto out_fail; + } + + ret = cpuidle_register_driver(drv); + if (ret) { + pr_err("Failed to register cpuidle driver\n"); + goto out_fail; + } + + /* + * Call arch CPU operations in order to initialize + * idle states suspend back-end specific data + */ ret = arm_cpuidle_init(cpu); /* @@ -141,10 +154,11 @@ out_fail: dev = per_cpu(cpuidle_devices, cpu); cpuidle_unregister_device(dev); kfree(dev); + drv = cpuidle_get_driver(); + cpuidle_unregister_driver(drv); + kfree(drv); } - cpuidle_unregister_driver(drv); - return ret; } device_initcall(arm_idle_init); diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index b2330fd69e34..61b64c2b2cb8 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -286,6 +286,8 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) struct device *device = get_cpu_device(dev->cpu); int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY); int i; + int first_idx; + int idx; unsigned int interactivity_req; unsigned int expected_interval; unsigned long nr_iowaiters, cpu_load; @@ -335,11 +337,11 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) if (data->next_timer_us > polling_threshold && latency_req > s->exit_latency && !s->disabled && !dev->states_usage[CPUIDLE_DRIVER_STATE_START].disable) - data->last_state_idx = CPUIDLE_DRIVER_STATE_START; + first_idx = CPUIDLE_DRIVER_STATE_START; else - data->last_state_idx = CPUIDLE_DRIVER_STATE_START - 1; + first_idx = CPUIDLE_DRIVER_STATE_START - 1; } else { - data->last_state_idx = CPUIDLE_DRIVER_STATE_START; + first_idx = 0; } /* @@ -359,20 +361,28 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) * Find the idle state with the lowest power while satisfying * our constraints. */ - for (i = data->last_state_idx + 1; i < drv->state_count; i++) { + idx = -1; + for (i = first_idx; i < drv->state_count; i++) { struct cpuidle_state *s = &drv->states[i]; struct cpuidle_state_usage *su = &dev->states_usage[i]; if (s->disabled || su->disable) continue; + if (idx == -1) + idx = i; /* first enabled state */ if (s->target_residency > data->predicted_us) break; if (s->exit_latency > latency_req) break; - data->last_state_idx = i; + idx = i; } + if (idx == -1) + idx = 0; /* No states enabled. Must use 0. */ + + data->last_state_idx = idx; + return data->last_state_idx; } diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 216d7ec88c0c..c2ae819a871c 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -51,6 +51,8 @@ /* un-comment DEBUG to enable pr_debug() statements */ #define DEBUG +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/kernel.h> #include <linux/cpuidle.h> #include <linux/tick.h> @@ -65,7 +67,6 @@ #include <asm/msr.h> #define INTEL_IDLE_VERSION "0.4.1" -#define PREFIX "intel_idle: " static struct cpuidle_driver intel_idle_driver = { .name = "intel_idle", @@ -1111,7 +1112,7 @@ static int __init intel_idle_probe(void) const struct x86_cpu_id *id; if (max_cstate == 0) { - pr_debug(PREFIX "disabled\n"); + pr_debug("disabled\n"); return -EPERM; } @@ -1119,8 +1120,8 @@ static int __init intel_idle_probe(void) if (!id) { if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && boot_cpu_data.x86 == 6) - pr_debug(PREFIX "does not run on family %d model %d\n", - boot_cpu_data.x86, boot_cpu_data.x86_model); + pr_debug("does not run on family %d model %d\n", + boot_cpu_data.x86, boot_cpu_data.x86_model); return -ENODEV; } @@ -1134,13 +1135,13 @@ static int __init intel_idle_probe(void) !mwait_substates) return -ENODEV; - pr_debug(PREFIX "MWAIT substates: 0x%x\n", mwait_substates); + pr_debug("MWAIT substates: 0x%x\n", mwait_substates); icpu = (const struct idle_cpu *)id->driver_data; cpuidle_state_table = icpu->state_table; - pr_debug(PREFIX "v" INTEL_IDLE_VERSION - " model 0x%X\n", boot_cpu_data.x86_model); + pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n", + boot_cpu_data.x86_model); return 0; } @@ -1340,8 +1341,7 @@ static void __init intel_idle_cpuidle_driver_init(void) break; if (cstate + 1 > max_cstate) { - printk(PREFIX "max_cstate %d reached\n", - max_cstate); + pr_info("max_cstate %d reached\n", max_cstate); break; } @@ -1358,8 +1358,8 @@ static void __init intel_idle_cpuidle_driver_init(void) /* if state marked as disabled, skip it */ if (cpuidle_state_table[cstate].disabled != 0) { - pr_debug(PREFIX "state %s is disabled", - cpuidle_state_table[cstate].name); + pr_debug("state %s is disabled\n", + cpuidle_state_table[cstate].name); continue; } @@ -1395,7 +1395,7 @@ static int intel_idle_cpu_init(unsigned int cpu) dev->cpu = cpu; if (cpuidle_register_device(dev)) { - pr_debug(PREFIX "cpuidle_register_device %d failed!\n", cpu); + pr_debug("cpuidle_register_device %d failed!\n", cpu); return -EIO; } @@ -1447,8 +1447,8 @@ static int __init intel_idle_init(void) retval = cpuidle_register_driver(&intel_idle_driver); if (retval) { struct cpuidle_driver *drv = cpuidle_get_driver(); - printk(KERN_DEBUG PREFIX "intel_idle yielding to %s", - drv ? drv->name : "none"); + printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"), + drv ? drv->name : "none"); goto init_driver_fail; } @@ -1460,8 +1460,8 @@ static int __init intel_idle_init(void) if (retval < 0) goto hp_setup_fail; - pr_debug(PREFIX "lapic_timer_reliable_states 0x%x\n", - lapic_timer_reliable_states); + pr_debug("lapic_timer_reliable_states 0x%x\n", + lapic_timer_reliable_states); return 0; diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index 47070cff508c..e70c1c7ba1bf 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -394,29 +394,26 @@ bool pciehp_is_native(struct pci_dev *pdev) /** * pci_acpi_wake_bus - Root bus wakeup notification fork function. - * @work: Work item to handle. + * @context: Device wakeup context. */ -static void pci_acpi_wake_bus(struct work_struct *work) +static void pci_acpi_wake_bus(struct acpi_device_wakeup_context *context) { struct acpi_device *adev; struct acpi_pci_root *root; - adev = container_of(work, struct acpi_device, wakeup.context.work); + adev = container_of(context, struct acpi_device, wakeup.context); root = acpi_driver_data(adev); pci_pme_wakeup_bus(root->bus); } /** * pci_acpi_wake_dev - PCI device wakeup notification work function. - * @handle: ACPI handle of a device the notification is for. - * @work: Work item to handle. + * @context: Device wakeup context. */ -static void pci_acpi_wake_dev(struct work_struct *work) +static void pci_acpi_wake_dev(struct acpi_device_wakeup_context *context) { - struct acpi_device_wakeup_context *context; struct pci_dev *pci_dev; - context = container_of(work, struct acpi_device_wakeup_context, work); pci_dev = to_pci_dev(context->dev); if (pci_dev->pme_poll) @@ -424,7 +421,7 @@ static void pci_acpi_wake_dev(struct work_struct *work) if (pci_dev->current_state == PCI_D3cold) { pci_wakeup_event(pci_dev); - pm_runtime_resume(&pci_dev->dev); + pm_request_resume(&pci_dev->dev); return; } @@ -433,7 +430,7 @@ static void pci_acpi_wake_dev(struct work_struct *work) pci_check_pme_status(pci_dev); pci_wakeup_event(pci_dev); - pm_runtime_resume(&pci_dev->dev); + pm_request_resume(&pci_dev->dev); pci_pme_wakeup_bus(pci_dev->subordinate); } @@ -572,67 +569,29 @@ static pci_power_t acpi_pci_get_power_state(struct pci_dev *dev) return state_conv[state]; } -static bool acpi_pci_can_wakeup(struct pci_dev *dev) -{ - struct acpi_device *adev = ACPI_COMPANION(&dev->dev); - return adev ? acpi_device_can_wakeup(adev) : false; -} - -static void acpi_pci_propagate_wakeup_enable(struct pci_bus *bus, bool enable) -{ - while (bus->parent) { - if (!acpi_pm_device_sleep_wake(&bus->self->dev, enable)) - return; - bus = bus->parent; - } - - /* We have reached the root bus. */ - if (bus->bridge) - acpi_pm_device_sleep_wake(bus->bridge, enable); -} - -static int acpi_pci_sleep_wake(struct pci_dev *dev, bool enable) -{ - if (acpi_pci_can_wakeup(dev)) - return acpi_pm_device_sleep_wake(&dev->dev, enable); - - acpi_pci_propagate_wakeup_enable(dev->bus, enable); - return 0; -} - -static void acpi_pci_propagate_run_wake(struct pci_bus *bus, bool enable) +static int acpi_pci_propagate_wakeup(struct pci_bus *bus, bool enable) { while (bus->parent) { - struct pci_dev *bridge = bus->self; + if (acpi_pm_device_can_wakeup(&bus->self->dev)) + return acpi_pm_set_device_wakeup(&bus->self->dev, enable); - if (bridge->pme_interrupt) - return; - if (!acpi_pm_device_run_wake(&bridge->dev, enable)) - return; bus = bus->parent; } /* We have reached the root bus. */ - if (bus->bridge) - acpi_pm_device_run_wake(bus->bridge, enable); + if (bus->bridge) { + if (acpi_pm_device_can_wakeup(bus->bridge)) + return acpi_pm_set_device_wakeup(bus->bridge, enable); + } + return 0; } -static int acpi_pci_run_wake(struct pci_dev *dev, bool enable) +static int acpi_pci_wakeup(struct pci_dev *dev, bool enable) { - /* - * Per PCI Express Base Specification Revision 2.0 section - * 5.3.3.2 Link Wakeup, platform support is needed for D3cold - * waking up to power on the main link even if there is PME - * support for D3cold - */ - if (dev->pme_interrupt && !dev->runtime_d3cold) - return 0; - - if (!acpi_pm_device_run_wake(&dev->dev, enable)) - return 0; + if (acpi_pm_device_can_wakeup(&dev->dev)) + return acpi_pm_set_device_wakeup(&dev->dev, enable); - acpi_pci_propagate_run_wake(dev->bus, enable); - return 0; + return acpi_pci_propagate_wakeup(dev->bus, enable); } static bool acpi_pci_need_resume(struct pci_dev *dev) @@ -656,8 +615,7 @@ static const struct pci_platform_pm_ops acpi_pci_platform_pm = { .set_state = acpi_pci_set_power_state, .get_state = acpi_pci_get_power_state, .choose_state = acpi_pci_choose_state, - .sleep_wake = acpi_pci_sleep_wake, - .run_wake = acpi_pci_run_wake, + .set_wakeup = acpi_pci_wakeup, .need_resume = acpi_pci_need_resume, }; @@ -780,9 +738,7 @@ static void pci_acpi_setup(struct device *dev) return; device_set_wakeup_capable(dev, true); - acpi_pci_sleep_wake(pci_dev, false); - if (adev->wakeup.flags.run_wake) - device_set_run_wake(dev, true); + acpi_pci_wakeup(pci_dev, false); } static void pci_acpi_cleanup(struct device *dev) @@ -793,10 +749,8 @@ static void pci_acpi_cleanup(struct device *dev) return; pci_acpi_remove_pm_notifier(adev); - if (adev->wakeup.flags.valid) { + if (adev->wakeup.flags.valid) device_set_wakeup_capable(dev, false); - device_set_run_wake(dev, false); - } } static bool pci_acpi_bus_match(struct device *dev) diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 00e10bf7f6a2..df4aead394f2 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -1219,7 +1219,7 @@ static int pci_pm_runtime_resume(struct device *dev) pci_restore_standard_config(pci_dev); pci_fixup_device(pci_fixup_resume_early, pci_dev); - __pci_enable_wake(pci_dev, PCI_D0, true, false); + pci_enable_wake(pci_dev, PCI_D0, false); pci_fixup_device(pci_fixup_resume, pci_dev); rc = pm->runtime_resume(dev); diff --git a/drivers/pci/pci-mid.c b/drivers/pci/pci-mid.c index 1c4af7227bca..a4ac940c7696 100644 --- a/drivers/pci/pci-mid.c +++ b/drivers/pci/pci-mid.c @@ -39,12 +39,7 @@ static pci_power_t mid_pci_choose_state(struct pci_dev *pdev) return PCI_D3hot; } -static int mid_pci_sleep_wake(struct pci_dev *dev, bool enable) -{ - return 0; -} - -static int mid_pci_run_wake(struct pci_dev *dev, bool enable) +static int mid_pci_wakeup(struct pci_dev *dev, bool enable) { return 0; } @@ -59,8 +54,7 @@ static const struct pci_platform_pm_ops mid_pci_platform_pm = { .set_state = mid_pci_set_power_state, .get_state = mid_pci_get_power_state, .choose_state = mid_pci_choose_state, - .sleep_wake = mid_pci_sleep_wake, - .run_wake = mid_pci_run_wake, + .set_wakeup = mid_pci_wakeup, .need_resume = mid_pci_need_resume, }; diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 563901cd9c06..0b5302a9fdae 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -574,8 +574,7 @@ static const struct pci_platform_pm_ops *pci_platform_pm; int pci_set_platform_pm(const struct pci_platform_pm_ops *ops) { if (!ops->is_manageable || !ops->set_state || !ops->get_state || - !ops->choose_state || !ops->sleep_wake || !ops->run_wake || - !ops->need_resume) + !ops->choose_state || !ops->set_wakeup || !ops->need_resume) return -EINVAL; pci_platform_pm = ops; return 0; @@ -603,16 +602,10 @@ static inline pci_power_t platform_pci_choose_state(struct pci_dev *dev) pci_platform_pm->choose_state(dev) : PCI_POWER_ERROR; } -static inline int platform_pci_sleep_wake(struct pci_dev *dev, bool enable) +static inline int platform_pci_set_wakeup(struct pci_dev *dev, bool enable) { return pci_platform_pm ? - pci_platform_pm->sleep_wake(dev, enable) : -ENODEV; -} - -static inline int platform_pci_run_wake(struct pci_dev *dev, bool enable) -{ - return pci_platform_pm ? - pci_platform_pm->run_wake(dev, enable) : -ENODEV; + pci_platform_pm->set_wakeup(dev, enable) : -ENODEV; } static inline bool platform_pci_need_resume(struct pci_dev *dev) @@ -1805,6 +1798,23 @@ static void __pci_pme_active(struct pci_dev *dev, bool enable) pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, pmcsr); } +static void pci_pme_restore(struct pci_dev *dev) +{ + u16 pmcsr; + + if (!dev->pme_support) + return; + + pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr); + if (dev->wakeup_prepared) { + pmcsr |= PCI_PM_CTRL_PME_ENABLE; + } else { + pmcsr &= ~PCI_PM_CTRL_PME_ENABLE; + pmcsr |= PCI_PM_CTRL_PME_STATUS; + } + pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, pmcsr); +} + /** * pci_pme_active - enable or disable PCI device's PME# function * @dev: PCI device to handle. @@ -1872,10 +1882,9 @@ void pci_pme_active(struct pci_dev *dev, bool enable) EXPORT_SYMBOL(pci_pme_active); /** - * __pci_enable_wake - enable PCI device as wakeup event source + * pci_enable_wake - enable PCI device as wakeup event source * @dev: PCI device affected * @state: PCI state from which device will issue wakeup events - * @runtime: True if the events are to be generated at run time * @enable: True to enable event generation; false to disable * * This enables the device as a wakeup event source, or disables it. @@ -1891,17 +1900,18 @@ EXPORT_SYMBOL(pci_pme_active); * Error code depending on the platform is returned if both the platform and * the native mechanism fail to enable the generation of wake-up events */ -int __pci_enable_wake(struct pci_dev *dev, pci_power_t state, - bool runtime, bool enable) +int pci_enable_wake(struct pci_dev *dev, pci_power_t state, bool enable) { int ret = 0; - if (enable && !runtime && !device_may_wakeup(&dev->dev)) - return -EINVAL; - - /* Don't do the same thing twice in a row for one device. */ - if (!!enable == !!dev->wakeup_prepared) + /* + * Don't do the same thing twice in a row for one device, but restore + * PME Enable in case it has been updated by config space restoration. + */ + if (!!enable == !!dev->wakeup_prepared) { + pci_pme_restore(dev); return 0; + } /* * According to "PCI System Architecture" 4th ed. by Tom Shanley & Don @@ -1916,24 +1926,20 @@ int __pci_enable_wake(struct pci_dev *dev, pci_power_t state, pci_pme_active(dev, true); else ret = 1; - error = runtime ? platform_pci_run_wake(dev, true) : - platform_pci_sleep_wake(dev, true); + error = platform_pci_set_wakeup(dev, true); if (ret) ret = error; if (!ret) dev->wakeup_prepared = true; } else { - if (runtime) - platform_pci_run_wake(dev, false); - else - platform_pci_sleep_wake(dev, false); + platform_pci_set_wakeup(dev, false); pci_pme_active(dev, false); dev->wakeup_prepared = false; } return ret; } -EXPORT_SYMBOL(__pci_enable_wake); +EXPORT_SYMBOL(pci_enable_wake); /** * pci_wake_from_d3 - enable/disable device to wake up from D3_hot or D3_cold @@ -2075,12 +2081,12 @@ int pci_finish_runtime_suspend(struct pci_dev *dev) dev->runtime_d3cold = target_state == PCI_D3cold; - __pci_enable_wake(dev, target_state, true, pci_dev_run_wake(dev)); + pci_enable_wake(dev, target_state, pci_dev_run_wake(dev)); error = pci_set_power_state(dev, target_state); if (error) { - __pci_enable_wake(dev, target_state, true, false); + pci_enable_wake(dev, target_state, false); dev->runtime_d3cold = false; } @@ -2099,7 +2105,7 @@ bool pci_dev_run_wake(struct pci_dev *dev) { struct pci_bus *bus = dev->bus; - if (device_run_wake(&dev->dev)) + if (device_can_wakeup(&dev->dev)) return true; if (!dev->pme_support) @@ -2112,7 +2118,7 @@ bool pci_dev_run_wake(struct pci_dev *dev) while (bus->parent) { struct pci_dev *bridge = bus->self; - if (device_run_wake(&bridge->dev)) + if (device_can_wakeup(&bridge->dev)) return true; bus = bus->parent; @@ -2120,7 +2126,7 @@ bool pci_dev_run_wake(struct pci_dev *dev) /* We have reached the root bus. */ if (bus->bridge) - return device_run_wake(bus->bridge); + return device_can_wakeup(bus->bridge); return false; } diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index f8113e5b9812..240b2c0fed4b 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -47,11 +47,7 @@ int pci_probe_reset_function(struct pci_dev *dev); * platform; to be used during system-wide transitions from a * sleeping state to the working state and vice versa * - * @sleep_wake: enables/disables the system wake up capability of given device - * - * @run_wake: enables/disables the platform to generate run-time wake-up events - * for given device (the device's wake-up capability has to be - * enabled by @sleep_wake for this feature to work) + * @set_wakeup: enables/disables wakeup capability for the device * * @need_resume: returns 'true' if the given device (which is currently * suspended) needs to be resumed to be configured for system @@ -65,8 +61,7 @@ struct pci_platform_pm_ops { int (*set_state)(struct pci_dev *dev, pci_power_t state); pci_power_t (*get_state)(struct pci_dev *dev); pci_power_t (*choose_state)(struct pci_dev *dev); - int (*sleep_wake)(struct pci_dev *dev, bool enable); - int (*run_wake)(struct pci_dev *dev, bool enable); + int (*set_wakeup)(struct pci_dev *dev, bool enable); bool (*need_resume)(struct pci_dev *dev); }; diff --git a/drivers/pci/pcie/pme.c b/drivers/pci/pcie/pme.c index 2dd1c68e6de8..80e58d25006d 100644 --- a/drivers/pci/pcie/pme.c +++ b/drivers/pci/pcie/pme.c @@ -294,31 +294,29 @@ static irqreturn_t pcie_pme_irq(int irq, void *context) } /** - * pcie_pme_set_native - Set the PME interrupt flag for given device. + * pcie_pme_can_wakeup - Set the wakeup capability flag. * @dev: PCI device to handle. * @ign: Ignored. */ -static int pcie_pme_set_native(struct pci_dev *dev, void *ign) +static int pcie_pme_can_wakeup(struct pci_dev *dev, void *ign) { - device_set_run_wake(&dev->dev, true); - dev->pme_interrupt = true; + device_set_wakeup_capable(&dev->dev, true); return 0; } /** - * pcie_pme_mark_devices - Set the PME interrupt flag for devices below a port. + * pcie_pme_mark_devices - Set the wakeup flag for devices below a port. * @port: PCIe root port or event collector to handle. * * For each device below given root port, including the port itself (or for each * root complex integrated endpoint if @port is a root complex event collector) - * set the flag indicating that it can signal run-time wake-up events via PCIe - * PME interrupts. + * set the flag indicating that it can signal run-time wake-up events. */ static void pcie_pme_mark_devices(struct pci_dev *port) { - pcie_pme_set_native(port, NULL); + pcie_pme_can_wakeup(port, NULL); if (port->subordinate) - pci_walk_bus(port->subordinate, pcie_pme_set_native, NULL); + pci_walk_bus(port->subordinate, pcie_pme_can_wakeup, NULL); } /** diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 8489020ecf44..a3ccc3c795a5 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -794,6 +794,25 @@ config INTEL_CHT_INT33FE This driver instantiates i2c-clients for these, so that standard i2c drivers for these chips can bind to the them. +config INTEL_INT0002_VGPIO + tristate "Intel ACPI INT0002 Virtual GPIO driver" + depends on GPIOLIB && ACPI + select GPIOLIB_IRQCHIP + ---help--- + Some peripherals on Bay Trail and Cherry Trail platforms signal a + Power Management Event (PME) to the Power Management Controller (PMC) + to wakeup the system. When this happens software needs to explicitly + clear the PME bus 0 status bit in the GPE0a_STS register to avoid an + IRQ storm on IRQ 9. + + This is modelled in ACPI through the INT0002 ACPI device, which is + called a "Virtual GPIO controller" in ACPI because it defines the + event handler to call when the PME triggers through _AEI and _L02 + methods as would be done for a real GPIO interrupt in ACPI. + + To compile this driver as a module, choose M here: the module will + be called intel_int0002_vgpio. + config INTEL_HID_EVENT tristate "INTEL HID Event" depends on ACPI diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile index 182a3ed6605a..ab22ce77fb66 100644 --- a/drivers/platform/x86/Makefile +++ b/drivers/platform/x86/Makefile @@ -46,6 +46,7 @@ obj-$(CONFIG_TOSHIBA_BT_RFKILL) += toshiba_bluetooth.o obj-$(CONFIG_TOSHIBA_HAPS) += toshiba_haps.o obj-$(CONFIG_TOSHIBA_WMI) += toshiba-wmi.o obj-$(CONFIG_INTEL_CHT_INT33FE) += intel_cht_int33fe.o +obj-$(CONFIG_INTEL_INT0002_VGPIO) += intel_int0002_vgpio.o obj-$(CONFIG_INTEL_HID_EVENT) += intel-hid.o obj-$(CONFIG_INTEL_VBTN) += intel-vbtn.o obj-$(CONFIG_INTEL_SCU_IPC) += intel_scu_ipc.o diff --git a/drivers/platform/x86/intel-hid.c b/drivers/platform/x86/intel-hid.c index 63ba2cbd04c2..8519e0f97bdd 100644 --- a/drivers/platform/x86/intel-hid.c +++ b/drivers/platform/x86/intel-hid.c @@ -23,6 +23,7 @@ #include <linux/platform_device.h> #include <linux/input/sparse-keymap.h> #include <linux/acpi.h> +#include <linux/suspend.h> #include <acpi/acpi_bus.h> MODULE_LICENSE("GPL"); @@ -75,6 +76,7 @@ static const struct key_entry intel_array_keymap[] = { struct intel_hid_priv { struct input_dev *input_dev; struct input_dev *array; + bool wakeup_mode; }; static int intel_hid_set_enable(struct device *device, bool enable) @@ -116,23 +118,37 @@ static void intel_button_array_enable(struct device *device, bool enable) dev_warn(device, "failed to set button capability\n"); } -static int intel_hid_pl_suspend_handler(struct device *device) +static int intel_hid_pm_prepare(struct device *device) { - intel_hid_set_enable(device, false); - intel_button_array_enable(device, false); + struct intel_hid_priv *priv = dev_get_drvdata(device); + + priv->wakeup_mode = true; + return 0; +} +static int intel_hid_pl_suspend_handler(struct device *device) +{ + if (pm_suspend_via_firmware()) { + intel_hid_set_enable(device, false); + intel_button_array_enable(device, false); + } return 0; } static int intel_hid_pl_resume_handler(struct device *device) { - intel_hid_set_enable(device, true); - intel_button_array_enable(device, true); + struct intel_hid_priv *priv = dev_get_drvdata(device); + priv->wakeup_mode = false; + if (pm_resume_via_firmware()) { + intel_hid_set_enable(device, true); + intel_button_array_enable(device, true); + } return 0; } static const struct dev_pm_ops intel_hid_pl_pm_ops = { + .prepare = intel_hid_pm_prepare, .freeze = intel_hid_pl_suspend_handler, .thaw = intel_hid_pl_resume_handler, .restore = intel_hid_pl_resume_handler, @@ -186,6 +202,19 @@ static void notify_handler(acpi_handle handle, u32 event, void *context) unsigned long long ev_index; acpi_status status; + if (priv->wakeup_mode) { + /* Wake up on 5-button array events only. */ + if (event == 0xc0 || !priv->array) + return; + + if (sparse_keymap_entry_from_scancode(priv->array, event)) + pm_wakeup_hard_event(&device->dev); + else + dev_info(&device->dev, "unknown event 0x%x\n", event); + + return; + } + /* 0xC0 is for HID events, other values are for 5 button array */ if (event != 0xc0) { if (!priv->array || @@ -270,6 +299,7 @@ static int intel_hid_probe(struct platform_device *device) "failed to enable HID power button\n"); } + device_init_wakeup(&device->dev, true); return 0; err_remove_notify: diff --git a/drivers/platform/x86/intel-vbtn.c b/drivers/platform/x86/intel-vbtn.c index c2035e121ac2..61f106377661 100644 --- a/drivers/platform/x86/intel-vbtn.c +++ b/drivers/platform/x86/intel-vbtn.c @@ -23,6 +23,7 @@ #include <linux/platform_device.h> #include <linux/input/sparse-keymap.h> #include <linux/acpi.h> +#include <linux/suspend.h> #include <acpi/acpi_bus.h> MODULE_LICENSE("GPL"); @@ -46,6 +47,7 @@ static const struct key_entry intel_vbtn_keymap[] = { struct intel_vbtn_priv { struct input_dev *input_dev; + bool wakeup_mode; }; static int intel_vbtn_input_setup(struct platform_device *device) @@ -73,9 +75,15 @@ static void notify_handler(acpi_handle handle, u32 event, void *context) struct platform_device *device = context; struct intel_vbtn_priv *priv = dev_get_drvdata(&device->dev); - if (!sparse_keymap_report_event(priv->input_dev, event, 1, true)) - dev_info(&device->dev, "unknown event index 0x%x\n", - event); + if (priv->wakeup_mode) { + if (sparse_keymap_entry_from_scancode(priv->input_dev, event)) { + pm_wakeup_hard_event(&device->dev); + return; + } + } else if (sparse_keymap_report_event(priv->input_dev, event, 1, true)) { + return; + } + dev_info(&device->dev, "unknown event index 0x%x\n", event); } static int intel_vbtn_probe(struct platform_device *device) @@ -109,6 +117,7 @@ static int intel_vbtn_probe(struct platform_device *device) if (ACPI_FAILURE(status)) return -EBUSY; + device_init_wakeup(&device->dev, true); return 0; } @@ -125,10 +134,34 @@ static int intel_vbtn_remove(struct platform_device *device) return 0; } +static int intel_vbtn_pm_prepare(struct device *dev) +{ + struct intel_vbtn_priv *priv = dev_get_drvdata(dev); + + priv->wakeup_mode = true; + return 0; +} + +static int intel_vbtn_pm_resume(struct device *dev) +{ + struct intel_vbtn_priv *priv = dev_get_drvdata(dev); + + priv->wakeup_mode = false; + return 0; +} + +static const struct dev_pm_ops intel_vbtn_pm_ops = { + .prepare = intel_vbtn_pm_prepare, + .resume = intel_vbtn_pm_resume, + .restore = intel_vbtn_pm_resume, + .thaw = intel_vbtn_pm_resume, +}; + static struct platform_driver intel_vbtn_pl_driver = { .driver = { .name = "intel-vbtn", .acpi_match_table = intel_vbtn_ids, + .pm = &intel_vbtn_pm_ops, }, .probe = intel_vbtn_probe, .remove = intel_vbtn_remove, diff --git a/drivers/platform/x86/intel_int0002_vgpio.c b/drivers/platform/x86/intel_int0002_vgpio.c new file mode 100644 index 000000000000..92dc230ef5b2 --- /dev/null +++ b/drivers/platform/x86/intel_int0002_vgpio.c @@ -0,0 +1,219 @@ +/* + * Intel INT0002 "Virtual GPIO" driver + * + * Copyright (C) 2017 Hans de Goede <hdegoede@redhat.com> + * + * Loosely based on android x86 kernel code which is: + * + * Copyright (c) 2014, Intel Corporation. + * + * Author: Dyut Kumar Sil <dyut.k.sil@intel.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Some peripherals on Bay Trail and Cherry Trail platforms signal a Power + * Management Event (PME) to the Power Management Controller (PMC) to wakeup + * the system. When this happens software needs to clear the PME bus 0 status + * bit in the GPE0a_STS register to avoid an IRQ storm on IRQ 9. + * + * This is modelled in ACPI through the INT0002 ACPI device, which is + * called a "Virtual GPIO controller" in ACPI because it defines the event + * handler to call when the PME triggers through _AEI and _L02 / _E02 + * methods as would be done for a real GPIO interrupt in ACPI. Note this + * is a hack to define an AML event handler for the PME while using existing + * ACPI mechanisms, this is not a real GPIO at all. + * + * This driver will bind to the INT0002 device, and register as a GPIO + * controller, letting gpiolib-acpi.c call the _L02 handler as it would + * for a real GPIO controller. + */ + +#include <linux/acpi.h> +#include <linux/bitmap.h> +#include <linux/gpio/driver.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/slab.h> +#include <linux/suspend.h> + +#include <asm/cpu_device_id.h> +#include <asm/intel-family.h> + +#define DRV_NAME "INT0002 Virtual GPIO" + +/* For some reason the virtual GPIO pin tied to the GPE is numbered pin 2 */ +#define GPE0A_PME_B0_VIRT_GPIO_PIN 2 + +#define GPE0A_PME_B0_STS_BIT BIT(13) +#define GPE0A_PME_B0_EN_BIT BIT(13) +#define GPE0A_STS_PORT 0x420 +#define GPE0A_EN_PORT 0x428 + +#define ICPU(model) { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, } + +static const struct x86_cpu_id int0002_cpu_ids[] = { +/* + * Limit ourselves to Cherry Trail for now, until testing shows we + * need to handle the INT0002 device on Baytrail too. + * ICPU(INTEL_FAM6_ATOM_SILVERMONT1), * Valleyview, Bay Trail * + */ + ICPU(INTEL_FAM6_ATOM_AIRMONT), /* Braswell, Cherry Trail */ + {} +}; + +/* + * As this is not a real GPIO at all, but just a hack to model an event in + * ACPI the get / set functions are dummy functions. + */ + +static int int0002_gpio_get(struct gpio_chip *chip, unsigned int offset) +{ + return 0; +} + +static void int0002_gpio_set(struct gpio_chip *chip, unsigned int offset, + int value) +{ +} + +static int int0002_gpio_direction_output(struct gpio_chip *chip, + unsigned int offset, int value) +{ + return 0; +} + +static void int0002_irq_ack(struct irq_data *data) +{ + outl(GPE0A_PME_B0_STS_BIT, GPE0A_STS_PORT); +} + +static void int0002_irq_unmask(struct irq_data *data) +{ + u32 gpe_en_reg; + + gpe_en_reg = inl(GPE0A_EN_PORT); + gpe_en_reg |= GPE0A_PME_B0_EN_BIT; + outl(gpe_en_reg, GPE0A_EN_PORT); +} + +static void int0002_irq_mask(struct irq_data *data) +{ + u32 gpe_en_reg; + + gpe_en_reg = inl(GPE0A_EN_PORT); + gpe_en_reg &= ~GPE0A_PME_B0_EN_BIT; + outl(gpe_en_reg, GPE0A_EN_PORT); +} + +static irqreturn_t int0002_irq(int irq, void *data) +{ + struct gpio_chip *chip = data; + u32 gpe_sts_reg; + + gpe_sts_reg = inl(GPE0A_STS_PORT); + if (!(gpe_sts_reg & GPE0A_PME_B0_STS_BIT)) + return IRQ_NONE; + + generic_handle_irq(irq_find_mapping(chip->irqdomain, + GPE0A_PME_B0_VIRT_GPIO_PIN)); + + pm_system_wakeup(); + + return IRQ_HANDLED; +} + +static struct irq_chip int0002_irqchip = { + .name = DRV_NAME, + .irq_ack = int0002_irq_ack, + .irq_mask = int0002_irq_mask, + .irq_unmask = int0002_irq_unmask, +}; + +static int int0002_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + const struct x86_cpu_id *cpu_id; + struct gpio_chip *chip; + int irq, ret; + + /* Menlow has a different INT0002 device? <sigh> */ + cpu_id = x86_match_cpu(int0002_cpu_ids); + if (!cpu_id) + return -ENODEV; + + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + dev_err(dev, "Error getting IRQ: %d\n", irq); + return irq; + } + + chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL); + if (!chip) + return -ENOMEM; + + chip->label = DRV_NAME; + chip->parent = dev; + chip->owner = THIS_MODULE; + chip->get = int0002_gpio_get; + chip->set = int0002_gpio_set; + chip->direction_input = int0002_gpio_get; + chip->direction_output = int0002_gpio_direction_output; + chip->base = -1; + chip->ngpio = GPE0A_PME_B0_VIRT_GPIO_PIN + 1; + chip->irq_need_valid_mask = true; + + ret = devm_gpiochip_add_data(&pdev->dev, chip, NULL); + if (ret) { + dev_err(dev, "Error adding gpio chip: %d\n", ret); + return ret; + } + + bitmap_clear(chip->irq_valid_mask, 0, GPE0A_PME_B0_VIRT_GPIO_PIN); + + /* + * We manually request the irq here instead of passing a flow-handler + * to gpiochip_set_chained_irqchip, because the irq is shared. + */ + ret = devm_request_irq(dev, irq, int0002_irq, + IRQF_SHARED | IRQF_NO_THREAD, "INT0002", chip); + if (ret) { + dev_err(dev, "Error requesting IRQ %d: %d\n", irq, ret); + return ret; + } + + ret = gpiochip_irqchip_add(chip, &int0002_irqchip, 0, handle_edge_irq, + IRQ_TYPE_NONE); + if (ret) { + dev_err(dev, "Error adding irqchip: %d\n", ret); + return ret; + } + + gpiochip_set_chained_irqchip(chip, &int0002_irqchip, irq, NULL); + + return 0; +} + +static const struct acpi_device_id int0002_acpi_ids[] = { + { "INT0002", 0 }, + { }, +}; +MODULE_DEVICE_TABLE(acpi, int0002_acpi_ids); + +static struct platform_driver int0002_driver = { + .driver = { + .name = DRV_NAME, + .acpi_match_table = int0002_acpi_ids, + }, + .probe = int0002_probe, +}; + +module_platform_driver(int0002_driver); + +MODULE_AUTHOR("Hans de Goede <hdegoede@redhat.com>"); +MODULE_DESCRIPTION("Intel INT0002 Virtual GPIO driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/pnp/pnpacpi/core.c b/drivers/pnp/pnpacpi/core.c index 9113876487ed..3a4c1aa0201e 100644 --- a/drivers/pnp/pnpacpi/core.c +++ b/drivers/pnp/pnpacpi/core.c @@ -149,8 +149,8 @@ static int pnpacpi_suspend(struct pnp_dev *dev, pm_message_t state) } if (device_can_wakeup(&dev->dev)) { - error = acpi_pm_device_sleep_wake(&dev->dev, - device_may_wakeup(&dev->dev)); + error = acpi_pm_set_device_wakeup(&dev->dev, + device_may_wakeup(&dev->dev)); if (error) return error; } @@ -185,7 +185,7 @@ static int pnpacpi_resume(struct pnp_dev *dev) } if (device_may_wakeup(&dev->dev)) - acpi_pm_device_sleep_wake(&dev->dev, false); + acpi_pm_set_device_wakeup(&dev->dev, false); if (acpi_device_power_manageable(acpi_dev)) error = acpi_device_set_power(acpi_dev, ACPI_STATE_D0); diff --git a/drivers/power/avs/rockchip-io-domain.c b/drivers/power/avs/rockchip-io-domain.c index 85812521b6ba..031a34372191 100644 --- a/drivers/power/avs/rockchip-io-domain.c +++ b/drivers/power/avs/rockchip-io-domain.c @@ -253,6 +253,16 @@ static const struct rockchip_iodomain_soc_data soc_data_rk3188 = { }, }; +static const struct rockchip_iodomain_soc_data soc_data_rk3228 = { + .grf_offset = 0x418, + .supply_names = { + "vccio1", + "vccio2", + "vccio3", + "vccio4", + }, +}; + static const struct rockchip_iodomain_soc_data soc_data_rk3288 = { .grf_offset = 0x380, .supply_names = { @@ -345,6 +355,10 @@ static const struct of_device_id rockchip_iodomain_match[] = { .data = (void *)&soc_data_rk3188 }, { + .compatible = "rockchip,rk3228-io-voltage-domain", + .data = (void *)&soc_data_rk3228 + }, + { .compatible = "rockchip,rk3288-io-voltage-domain", .data = (void *)&soc_data_rk3288 }, diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c index 9ddad0815ba9..d1694f1def72 100644 --- a/drivers/powercap/intel_rapl.c +++ b/drivers/powercap/intel_rapl.c @@ -874,7 +874,9 @@ static int rapl_write_data_raw(struct rapl_domain *rd, cpu = rd->rp->lead_cpu; bits = rapl_unit_xlate(rd, rp->unit, value, 1); - bits |= bits << rp->shift; + bits <<= rp->shift; + bits &= rp->mask; + memset(&ma, 0, sizeof(ma)); ma.msr_no = rd->msrs[rp->id]; diff --git a/drivers/usb/core/hcd-pci.c b/drivers/usb/core/hcd-pci.c index 7859d738df41..ea829ad798c0 100644 --- a/drivers/usb/core/hcd-pci.c +++ b/drivers/usb/core/hcd-pci.c @@ -584,12 +584,7 @@ static int hcd_pci_suspend_noirq(struct device *dev) static int hcd_pci_resume_noirq(struct device *dev) { - struct pci_dev *pci_dev = to_pci_dev(dev); - - powermac_set_asic(pci_dev, 1); - - /* Go back to D0 and disable remote wakeup */ - pci_back_from_sleep(pci_dev); + powermac_set_asic(to_pci_dev(dev), 1); return 0; } diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index fe851544d7fb..7e995df7a797 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -230,7 +230,6 @@ static int dwc3_pci_probe(struct pci_dev *pci, } device_init_wakeup(dev, true); - device_set_run_wake(dev, true); pci_set_drvdata(pci, dwc); pm_runtime_put(dev); @@ -310,7 +309,7 @@ static int dwc3_pci_runtime_suspend(struct device *dev) { struct dwc3_pci *dwc = dev_get_drvdata(dev); - if (device_run_wake(dev)) + if (device_can_wakeup(dev)) return dwc3_pci_dsm(dwc, PCI_INTEL_BXT_STATE_D3); return -EBUSY; diff --git a/drivers/usb/host/uhci-pci.c b/drivers/usb/host/uhci-pci.c index 02260cfdedb1..49effdc0d857 100644 --- a/drivers/usb/host/uhci-pci.c +++ b/drivers/usb/host/uhci-pci.c @@ -131,7 +131,7 @@ static int uhci_pci_init(struct usb_hcd *hcd) /* Intel controllers use non-PME wakeup signalling */ if (to_pci_dev(uhci_dev(uhci))->vendor == PCI_VENDOR_ID_INTEL) - device_set_run_wake(uhci_dev(uhci), 1); + device_set_wakeup_capable(uhci_dev(uhci), true); /* Set up pointers to PCI-specific functions */ uhci->reset_hc = uhci_pci_reset_hc; diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index c1b163cb68b1..68bc6be447fd 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -315,13 +315,12 @@ struct acpi_device_perf { /* Wakeup Management */ struct acpi_device_wakeup_flags { u8 valid:1; /* Can successfully enable wakeup? */ - u8 run_wake:1; /* Run-Wake GPE devices */ u8 notifier_present:1; /* Wake-up notify handler has been installed */ u8 enabled:1; /* Enabled for wakeup */ }; struct acpi_device_wakeup_context { - struct work_struct work; + void (*func)(struct acpi_device_wakeup_context *context); struct device *dev; }; @@ -600,15 +599,20 @@ static inline bool acpi_device_always_present(struct acpi_device *adev) #endif #ifdef CONFIG_PM +void acpi_pm_wakeup_event(struct device *dev); acpi_status acpi_add_pm_notifier(struct acpi_device *adev, struct device *dev, - void (*work_func)(struct work_struct *work)); + void (*func)(struct acpi_device_wakeup_context *context)); acpi_status acpi_remove_pm_notifier(struct acpi_device *adev); +bool acpi_pm_device_can_wakeup(struct device *dev); int acpi_pm_device_sleep_state(struct device *, int *, int); -int acpi_pm_device_run_wake(struct device *, bool); +int acpi_pm_set_device_wakeup(struct device *dev, bool enable); #else +static inline void acpi_pm_wakeup_event(struct device *dev) +{ +} static inline acpi_status acpi_add_pm_notifier(struct acpi_device *adev, struct device *dev, - void (*work_func)(struct work_struct *work)) + void (*func)(struct acpi_device_wakeup_context *context)) { return AE_SUPPORT; } @@ -616,6 +620,10 @@ static inline acpi_status acpi_remove_pm_notifier(struct acpi_device *adev) { return AE_SUPPORT; } +static inline bool acpi_pm_device_can_wakeup(struct device *dev) +{ + return false; +} static inline int acpi_pm_device_sleep_state(struct device *d, int *p, int m) { if (p) @@ -624,16 +632,7 @@ static inline int acpi_pm_device_sleep_state(struct device *d, int *p, int m) return (m >= ACPI_STATE_D0 && m <= ACPI_STATE_D3_COLD) ? m : ACPI_STATE_D0; } -static inline int acpi_pm_device_run_wake(struct device *dev, bool enable) -{ - return -ENODEV; -} -#endif - -#ifdef CONFIG_PM_SLEEP -int acpi_pm_device_sleep_wake(struct device *, bool); -#else -static inline int acpi_pm_device_sleep_wake(struct device *dev, bool enable) +static inline int acpi_pm_set_device_wakeup(struct device *dev, bool enable) { return -ENODEV; } diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index a5ce0bbeadb5..905117bd5012 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -883,6 +883,8 @@ static inline bool policy_has_boost_freq(struct cpufreq_policy *policy) } #endif +extern unsigned int arch_freq_get_on_cpu(int cpu); + /* the following are really really optional */ extern struct freq_attr cpufreq_freq_attr_scaling_available_freqs; extern struct freq_attr cpufreq_freq_attr_scaling_boost_freqs; diff --git a/include/linux/pci.h b/include/linux/pci.h index 58f1ab06c4e8..1ef093866581 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -307,7 +307,6 @@ struct pci_dev { u8 pm_cap; /* PM capability offset */ unsigned int pme_support:5; /* Bitmask of states from which PME# can be generated */ - unsigned int pme_interrupt:1; unsigned int pme_poll:1; /* Poll device's PME status bit */ unsigned int d1_support:1; /* Low power state D1 is supported */ unsigned int d2_support:1; /* Low power state D2 is supported */ @@ -1099,8 +1098,7 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state); pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state); bool pci_pme_capable(struct pci_dev *dev, pci_power_t state); void pci_pme_active(struct pci_dev *dev, bool enable); -int __pci_enable_wake(struct pci_dev *dev, pci_power_t state, - bool runtime, bool enable); +int pci_enable_wake(struct pci_dev *dev, pci_power_t state, bool enable); int pci_wake_from_d3(struct pci_dev *dev, bool enable); int pci_prepare_to_sleep(struct pci_dev *dev); int pci_back_from_sleep(struct pci_dev *dev); @@ -1110,12 +1108,6 @@ void pci_pme_wakeup_bus(struct pci_bus *bus); void pci_d3cold_enable(struct pci_dev *dev); void pci_d3cold_disable(struct pci_dev *dev); -static inline int pci_enable_wake(struct pci_dev *dev, pci_power_t state, - bool enable) -{ - return __pci_enable_wake(dev, state, false, enable); -} - /* PCI Virtual Channel */ int pci_save_vc_state(struct pci_dev *dev); void pci_restore_vc_state(struct pci_dev *dev); diff --git a/include/linux/pm.h b/include/linux/pm.h index a0894bc52bb4..b8b4df09fd8f 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -584,7 +584,6 @@ struct dev_pm_info { unsigned int idle_notification:1; unsigned int request_pending:1; unsigned int deferred_resume:1; - unsigned int run_wake:1; unsigned int runtime_auto:1; bool ignore_children:1; unsigned int no_callbacks:1; diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index a6685b3dde26..51ec727b4824 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -121,6 +121,8 @@ struct opp_table *dev_pm_opp_set_prop_name(struct device *dev, const char *name) void dev_pm_opp_put_prop_name(struct opp_table *opp_table); struct opp_table *dev_pm_opp_set_regulators(struct device *dev, const char * const names[], unsigned int count); void dev_pm_opp_put_regulators(struct opp_table *opp_table); +struct opp_table *dev_pm_opp_set_clkname(struct device *dev, const char * name); +void dev_pm_opp_put_clkname(struct opp_table *opp_table); struct opp_table *dev_pm_opp_register_set_opp_helper(struct device *dev, int (*set_opp)(struct dev_pm_set_opp_data *data)); void dev_pm_opp_register_put_opp_helper(struct opp_table *opp_table); int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq); @@ -257,6 +259,13 @@ static inline struct opp_table *dev_pm_opp_set_regulators(struct device *dev, co static inline void dev_pm_opp_put_regulators(struct opp_table *opp_table) {} +static inline struct opp_table *dev_pm_opp_set_clkname(struct device *dev, const char * name) +{ + return ERR_PTR(-ENOTSUPP); +} + +static inline void dev_pm_opp_put_clkname(struct opp_table *opp_table) {} + static inline int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) { return -ENOTSUPP; diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index ca4823e675e2..2efb08a60e63 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -76,16 +76,6 @@ static inline void pm_runtime_put_noidle(struct device *dev) atomic_add_unless(&dev->power.usage_count, -1, 0); } -static inline bool device_run_wake(struct device *dev) -{ - return dev->power.run_wake; -} - -static inline void device_set_run_wake(struct device *dev, bool enable) -{ - dev->power.run_wake = enable; -} - static inline bool pm_runtime_suspended(struct device *dev) { return dev->power.runtime_status == RPM_SUSPENDED @@ -163,8 +153,6 @@ static inline void pm_runtime_forbid(struct device *dev) {} static inline void pm_suspend_ignore_children(struct device *dev, bool enable) {} static inline void pm_runtime_get_noresume(struct device *dev) {} static inline void pm_runtime_put_noidle(struct device *dev) {} -static inline bool device_run_wake(struct device *dev) { return false; } -static inline void device_set_run_wake(struct device *dev, bool enable) {} static inline bool pm_runtime_suspended(struct device *dev) { return false; } static inline bool pm_runtime_active(struct device *dev) { return true; } static inline bool pm_runtime_status_suspended(struct device *dev) { return false; } diff --git a/include/linux/suspend.h b/include/linux/suspend.h index d9718378a8be..0b1cf32edfd7 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -189,6 +189,8 @@ struct platform_suspend_ops { struct platform_freeze_ops { int (*begin)(void); int (*prepare)(void); + void (*wake)(void); + void (*sync)(void); void (*restore)(void); void (*end)(void); }; @@ -428,7 +430,8 @@ extern unsigned int pm_wakeup_irq; extern bool pm_wakeup_pending(void); extern void pm_system_wakeup(void); -extern void pm_wakeup_clear(void); +extern void pm_system_cancel_wakeup(void); +extern void pm_wakeup_clear(bool reset); extern void pm_system_irq_wakeup(unsigned int irq_number); extern bool pm_get_wakeup_count(unsigned int *count, bool block); extern bool pm_save_wakeup_count(unsigned int count); @@ -478,7 +481,7 @@ static inline int unregister_pm_notifier(struct notifier_block *nb) static inline bool pm_wakeup_pending(void) { return false; } static inline void pm_system_wakeup(void) {} -static inline void pm_wakeup_clear(void) {} +static inline void pm_wakeup_clear(bool reset) {} static inline void pm_system_irq_wakeup(unsigned int irq_number) {} static inline void lock_system_sleep(void) {} diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index a8b978c35a6a..e1914c7b85b1 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -1108,7 +1108,7 @@ static struct attribute * g[] = { }; -static struct attribute_group attr_group = { +static const struct attribute_group attr_group = { .attrs = g, }; diff --git a/kernel/power/process.c b/kernel/power/process.c index c7209f060eeb..78672d324a6e 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -132,7 +132,7 @@ int freeze_processes(void) if (!pm_freezing) atomic_inc(&system_freezing_cnt); - pm_wakeup_clear(); + pm_wakeup_clear(true); pr_info("Freezing user space processes ... "); pm_freezing = true; error = try_to_freeze_tasks(true); diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index fa46606f3356..b7708e319941 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -36,13 +36,13 @@ #include <asm/pgtable.h> #include <asm/tlbflush.h> #include <asm/io.h> -#ifdef CONFIG_STRICT_KERNEL_RWX +#ifdef CONFIG_ARCH_HAS_SET_MEMORY #include <asm/set_memory.h> #endif #include "power.h" -#ifdef CONFIG_STRICT_KERNEL_RWX +#if defined(CONFIG_STRICT_KERNEL_RWX) && defined(CONFIG_ARCH_HAS_SET_MEMORY) static bool hibernate_restore_protection; static bool hibernate_restore_protection_active; @@ -77,7 +77,7 @@ static inline void hibernate_restore_protection_begin(void) {} static inline void hibernate_restore_protection_end(void) {} static inline void hibernate_restore_protect_page(void *page_address) {} static inline void hibernate_restore_unprotect_page(void *page_address) {} -#endif /* CONFIG_STRICT_KERNEL_RWX */ +#endif /* CONFIG_STRICT_KERNEL_RWX && CONFIG_ARCH_HAS_SET_MEMORY */ static int swsusp_page_is_free(struct page *); static void swsusp_set_page_forbidden(struct page *); @@ -1929,8 +1929,7 @@ static inline unsigned int alloc_highmem_pages(struct memory_bitmap *bm, * also be located in the high memory, because of the way in which * copy_data_pages() works. */ -static int swsusp_alloc(struct memory_bitmap *orig_bm, - struct memory_bitmap *copy_bm, +static int swsusp_alloc(struct memory_bitmap *copy_bm, unsigned int nr_pages, unsigned int nr_highmem) { if (nr_highmem > 0) { @@ -1976,7 +1975,7 @@ asmlinkage __visible int swsusp_save(void) return -ENOMEM; } - if (swsusp_alloc(&orig_bm, ©_bm, nr_pages, nr_highmem)) { + if (swsusp_alloc(©_bm, nr_pages, nr_highmem)) { printk(KERN_ERR "PM: Memory allocation failed\n"); return -ENOMEM; } diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 15e6baef5c73..3ecf275d7e44 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -72,6 +72,8 @@ static void freeze_begin(void) static void freeze_enter(void) { + trace_suspend_resume(TPS("machine_suspend"), PM_SUSPEND_FREEZE, true); + spin_lock_irq(&suspend_freeze_lock); if (pm_wakeup_pending()) goto out; @@ -84,11 +86,9 @@ static void freeze_enter(void) /* Push all the CPUs into the idle loop. */ wake_up_all_idle_cpus(); - pr_debug("PM: suspend-to-idle\n"); /* Make the current CPU wait so it can enter the idle loop too. */ wait_event(suspend_freeze_wait_head, suspend_freeze_state == FREEZE_STATE_WAKE); - pr_debug("PM: resume from suspend-to-idle\n"); cpuidle_pause(); put_online_cpus(); @@ -98,6 +98,31 @@ static void freeze_enter(void) out: suspend_freeze_state = FREEZE_STATE_NONE; spin_unlock_irq(&suspend_freeze_lock); + + trace_suspend_resume(TPS("machine_suspend"), PM_SUSPEND_FREEZE, false); +} + +static void s2idle_loop(void) +{ + pr_debug("PM: suspend-to-idle\n"); + + do { + freeze_enter(); + + if (freeze_ops && freeze_ops->wake) + freeze_ops->wake(); + + dpm_resume_noirq(PMSG_RESUME); + if (freeze_ops && freeze_ops->sync) + freeze_ops->sync(); + + if (pm_wakeup_pending()) + break; + + pm_wakeup_clear(false); + } while (!dpm_suspend_noirq(PMSG_SUSPEND)); + + pr_debug("PM: resume from suspend-to-idle\n"); } void freeze_wake(void) @@ -371,10 +396,8 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) * all the devices are suspended. */ if (state == PM_SUSPEND_FREEZE) { - trace_suspend_resume(TPS("machine_suspend"), state, true); - freeze_enter(); - trace_suspend_resume(TPS("machine_suspend"), state, false); - goto Platform_wake; + s2idle_loop(); + goto Platform_early_resume; } error = disable_nonboot_cpus(); diff --git a/tools/power/cpupower/utils/helpers/amd.c b/tools/power/cpupower/utils/helpers/amd.c index 6437ef39aeea..5fd5c5b8c7b8 100644 --- a/tools/power/cpupower/utils/helpers/amd.c +++ b/tools/power/cpupower/utils/helpers/amd.c @@ -26,6 +26,15 @@ union msr_pstate { unsigned res3:21; unsigned en:1; } bits; + struct { + unsigned fid:8; + unsigned did:6; + unsigned vid:8; + unsigned iddval:8; + unsigned idddiv:2; + unsigned res1:30; + unsigned en:1; + } fam17h_bits; unsigned long long val; }; @@ -35,6 +44,8 @@ static int get_did(int family, union msr_pstate pstate) if (family == 0x12) t = pstate.val & 0xf; + else if (family == 0x17) + t = pstate.fam17h_bits.did; else t = pstate.bits.did; @@ -44,16 +55,20 @@ static int get_did(int family, union msr_pstate pstate) static int get_cof(int family, union msr_pstate pstate) { int t; - int fid, did; + int fid, did, cof; did = get_did(family, pstate); - - t = 0x10; - fid = pstate.bits.fid; - if (family == 0x11) - t = 0x8; - - return (100 * (fid + t)) >> did; + if (family == 0x17) { + fid = pstate.fam17h_bits.fid; + cof = 200 * fid / did; + } else { + t = 0x10; + fid = pstate.bits.fid; + if (family == 0x11) + t = 0x8; + cof = (100 * (fid + t)) >> did; + } + return cof; } /* Needs: diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h index afb66f80554e..799a18be60aa 100644 --- a/tools/power/cpupower/utils/helpers/helpers.h +++ b/tools/power/cpupower/utils/helpers/helpers.h @@ -70,6 +70,8 @@ enum cpupower_cpu_vendor {X86_VENDOR_UNKNOWN = 0, X86_VENDOR_INTEL, #define CPUPOWER_CAP_IS_SNB 0x00000020 #define CPUPOWER_CAP_INTEL_IDA 0x00000040 +#define CPUPOWER_AMD_CPBDIS 0x02000000 + #define MAX_HW_PSTATES 10 struct cpupower_cpu_info { diff --git a/tools/power/cpupower/utils/helpers/misc.c b/tools/power/cpupower/utils/helpers/misc.c index 1609243f5c64..601d719d4e60 100644 --- a/tools/power/cpupower/utils/helpers/misc.c +++ b/tools/power/cpupower/utils/helpers/misc.c @@ -2,11 +2,14 @@ #include "helpers/helpers.h" +#define MSR_AMD_HWCR 0xc0010015 + int cpufreq_has_boost_support(unsigned int cpu, int *support, int *active, int *states) { struct cpupower_cpu_info cpu_info; int ret; + unsigned long long val; *support = *active = *states = 0; @@ -16,10 +19,22 @@ int cpufreq_has_boost_support(unsigned int cpu, int *support, int *active, if (cpupower_cpu_info.caps & CPUPOWER_CAP_AMD_CBP) { *support = 1; - amd_pci_get_num_boost_states(active, states); - if (ret <= 0) - return ret; - *support = 1; + + /* AMD Family 0x17 does not utilize PCI D18F4 like prior + * families and has no fixed discrete boost states but + * has Hardware determined variable increments instead. + */ + + if (cpu_info.family == 0x17) { + if (!read_msr(cpu, MSR_AMD_HWCR, &val)) { + if (!(val & CPUPOWER_AMD_CPBDIS)) + *active = 1; + } + } else { + ret = amd_pci_get_num_boost_states(active, states); + if (ret) + return ret; + } } else if (cpupower_cpu_info.caps & CPUPOWER_CAP_INTEL_IDA) *support = *active = 1; return 0; diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index b11294730771..0dafba2c1e7d 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -57,7 +57,6 @@ unsigned int list_header_only; unsigned int dump_only; unsigned int do_snb_cstates; unsigned int do_knl_cstates; -unsigned int do_skl_residency; unsigned int do_slm_cstates; unsigned int use_c1_residency_msr; unsigned int has_aperf; @@ -93,6 +92,7 @@ unsigned int do_ring_perf_limit_reasons; unsigned int crystal_hz; unsigned long long tsc_hz; int base_cpu; +int do_migrate; double discover_bclk(unsigned int family, unsigned int model); unsigned int has_hwp; /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */ /* IA32_HWP_REQUEST, IA32_HWP_STATUS */ @@ -151,6 +151,8 @@ size_t cpu_present_setsize, cpu_affinity_setsize, cpu_subset_size; #define MAX_ADDED_COUNTERS 16 struct thread_data { + struct timeval tv_begin; + struct timeval tv_end; unsigned long long tsc; unsigned long long aperf; unsigned long long mperf; @@ -301,6 +303,9 @@ int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg int cpu_migrate(int cpu) { + if (!do_migrate) + return 0; + CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set); if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1) @@ -384,8 +389,14 @@ struct msr_counter bic[] = { { 0x0, "CPU" }, { 0x0, "Mod%c6" }, { 0x0, "sysfs" }, + { 0x0, "Totl%C0" }, + { 0x0, "Any%C0" }, + { 0x0, "GFX%C0" }, + { 0x0, "CPUGFX%" }, }; + + #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) #define BIC_Package (1ULL << 0) #define BIC_Avg_MHz (1ULL << 1) @@ -426,6 +437,10 @@ struct msr_counter bic[] = { #define BIC_CPU (1ULL << 36) #define BIC_Mod_c6 (1ULL << 37) #define BIC_sysfs (1ULL << 38) +#define BIC_Totl_c0 (1ULL << 39) +#define BIC_Any_c0 (1ULL << 40) +#define BIC_GFX_c0 (1ULL << 41) +#define BIC_CPUGFX (1ULL << 42) unsigned long long bic_enabled = 0xFFFFFFFFFFFFFFFFULL; unsigned long long bic_present = BIC_sysfs; @@ -521,6 +536,8 @@ void print_header(char *delim) struct msr_counter *mp; int printed = 0; + if (debug) + outp += sprintf(outp, "usec %s", delim); if (DO_BIC(BIC_Package)) outp += sprintf(outp, "%sPackage", (printed++ ? delim : "")); if (DO_BIC(BIC_Core)) @@ -599,12 +616,14 @@ void print_header(char *delim) if (DO_BIC(BIC_GFXMHz)) outp += sprintf(outp, "%sGFXMHz", (printed++ ? delim : "")); - if (do_skl_residency) { + if (DO_BIC(BIC_Totl_c0)) outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : "")); + if (DO_BIC(BIC_Any_c0)) outp += sprintf(outp, "%sAny%%C0", (printed++ ? delim : "")); + if (DO_BIC(BIC_GFX_c0)) outp += sprintf(outp, "%sGFX%%C0", (printed++ ? delim : "")); + if (DO_BIC(BIC_CPUGFX)) outp += sprintf(outp, "%sCPUGFX%%", (printed++ ? delim : "")); - } if (DO_BIC(BIC_Pkgpc2)) outp += sprintf(outp, "%sPkg%%pc2", (printed++ ? delim : "")); @@ -771,6 +790,14 @@ int format_counters(struct thread_data *t, struct core_data *c, (cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset))) return 0; + if (debug) { + /* on each row, print how many usec each timestamp took to gather */ + struct timeval tv; + + timersub(&t->tv_end, &t->tv_begin, &tv); + outp += sprintf(outp, "%5ld\t", tv.tv_sec * 1000000 + tv.tv_usec); + } + interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0; tsc = t->tsc * tsc_tweak; @@ -912,12 +939,14 @@ int format_counters(struct thread_data *t, struct core_data *c, outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_mhz); /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */ - if (do_skl_residency) { + if (DO_BIC(BIC_Totl_c0)) outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0/tsc); + if (DO_BIC(BIC_Any_c0)) outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_core_c0/tsc); + if (DO_BIC(BIC_GFX_c0)) outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_gfxe_c0/tsc); + if (DO_BIC(BIC_CPUGFX)) outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_both_core_gfxe_c0/tsc); - } if (DO_BIC(BIC_Pkgpc2)) outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc2/tsc); @@ -1038,12 +1067,16 @@ delta_package(struct pkg_data *new, struct pkg_data *old) int i; struct msr_counter *mp; - if (do_skl_residency) { + + if (DO_BIC(BIC_Totl_c0)) old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0; + if (DO_BIC(BIC_Any_c0)) old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0; + if (DO_BIC(BIC_GFX_c0)) old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0; + if (DO_BIC(BIC_CPUGFX)) old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0; - } + old->pc2 = new->pc2 - old->pc2; if (DO_BIC(BIC_Pkgpc3)) old->pc3 = new->pc3 - old->pc3; @@ -1292,12 +1325,14 @@ int sum_counters(struct thread_data *t, struct core_data *c, if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) return 0; - if (do_skl_residency) { + if (DO_BIC(BIC_Totl_c0)) average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0; + if (DO_BIC(BIC_Any_c0)) average.packages.pkg_any_core_c0 += p->pkg_any_core_c0; + if (DO_BIC(BIC_GFX_c0)) average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0; + if (DO_BIC(BIC_CPUGFX)) average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0; - } average.packages.pc2 += p->pc2; if (DO_BIC(BIC_Pkgpc3)) @@ -1357,12 +1392,14 @@ void compute_average(struct thread_data *t, struct core_data *c, average.cores.c7 /= topo.num_cores; average.cores.mc6_us /= topo.num_cores; - if (do_skl_residency) { + if (DO_BIC(BIC_Totl_c0)) average.packages.pkg_wtd_core_c0 /= topo.num_packages; + if (DO_BIC(BIC_Any_c0)) average.packages.pkg_any_core_c0 /= topo.num_packages; + if (DO_BIC(BIC_GFX_c0)) average.packages.pkg_any_gfxe_c0 /= topo.num_packages; + if (DO_BIC(BIC_CPUGFX)) average.packages.pkg_both_core_gfxe_c0 /= topo.num_packages; - } average.packages.pc2 /= topo.num_packages; if (DO_BIC(BIC_Pkgpc3)) @@ -1482,6 +1519,9 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) struct msr_counter *mp; int i; + + gettimeofday(&t->tv_begin, (struct timezone *)NULL); + if (cpu_migrate(cpu)) { fprintf(outf, "Could not migrate to CPU %d\n", cpu); return -1; @@ -1565,7 +1605,7 @@ retry: /* collect core counters only for 1st thread in core */ if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) - return 0; + goto done; if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates) { if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3)) @@ -1601,15 +1641,21 @@ retry: /* collect package counters only for 1st core in package */ if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) - return 0; + goto done; - if (do_skl_residency) { + if (DO_BIC(BIC_Totl_c0)) { if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0)) return -10; + } + if (DO_BIC(BIC_Any_c0)) { if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0)) return -11; + } + if (DO_BIC(BIC_GFX_c0)) { if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0)) return -12; + } + if (DO_BIC(BIC_CPUGFX)) { if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0)) return -13; } @@ -1688,6 +1734,8 @@ retry: if (get_mp(cpu, mp, &p->counter[i])) return -10; } +done: + gettimeofday(&t->tv_end, (struct timezone *)NULL); return 0; } @@ -3895,6 +3943,9 @@ void decode_misc_enable_msr(void) { unsigned long long msr; + if (!genuine_intel) + return; + if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr)) fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%sTCC %sEIST %sMWAIT %sPREFETCH %sTURBO)\n", base_cpu, msr, @@ -4198,7 +4249,12 @@ void process_cpuid() BIC_PRESENT(BIC_Pkgpc10); } do_irtl_hsw = has_hsw_msrs(family, model); - do_skl_residency = has_skl_msrs(family, model); + if (has_skl_msrs(family, model)) { + BIC_PRESENT(BIC_Totl_c0); + BIC_PRESENT(BIC_Any_c0); + BIC_PRESENT(BIC_GFX_c0); + BIC_PRESENT(BIC_CPUGFX); + } do_slm_cstates = is_slm(family, model); do_knl_cstates = is_knl(family, model); @@ -4578,7 +4634,7 @@ int get_and_dump_counters(void) } void print_version() { - fprintf(outf, "turbostat version 17.04.12" + fprintf(outf, "turbostat version 17.06.23" " - Len Brown <lenb@kernel.org>\n"); } @@ -4951,6 +5007,7 @@ void cmdline(int argc, char **argv) {"hide", required_argument, 0, 'H'}, // meh, -h taken by --help {"Joules", no_argument, 0, 'J'}, {"list", no_argument, 0, 'l'}, + {"migrate", no_argument, 0, 'm'}, {"out", required_argument, 0, 'o'}, {"quiet", no_argument, 0, 'q'}, {"show", required_argument, 0, 's'}, @@ -4962,7 +5019,7 @@ void cmdline(int argc, char **argv) progname = argv[0]; - while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:o:qST:v", + while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:Jmo:qST:v", long_options, &option_index)) != -1) { switch (opt) { case 'a': @@ -5005,6 +5062,9 @@ void cmdline(int argc, char **argv) list_header_only++; quiet++; break; + case 'm': + do_migrate = 1; + break; case 'o': outf = fopen_or_die(optarg, "w"); break; diff --git a/tools/power/x86/x86_energy_perf_policy/Makefile b/tools/power/x86/x86_energy_perf_policy/Makefile index 971c9ffdcb50..a711eec0c895 100644 --- a/tools/power/x86/x86_energy_perf_policy/Makefile +++ b/tools/power/x86/x86_energy_perf_policy/Makefile @@ -1,10 +1,27 @@ -DESTDIR ?= +CC = $(CROSS_COMPILE)gcc +BUILD_OUTPUT := $(CURDIR) +PREFIX := /usr +DESTDIR := + +ifeq ("$(origin O)", "command line") + BUILD_OUTPUT := $(O) +endif x86_energy_perf_policy : x86_energy_perf_policy.c +CFLAGS += -Wall +CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"' + +%: %.c + @mkdir -p $(BUILD_OUTPUT) + $(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@ +.PHONY : clean clean : - rm -f x86_energy_perf_policy + @rm -f $(BUILD_OUTPUT)/x86_energy_perf_policy + +install : x86_energy_perf_policy + install -d $(DESTDIR)$(PREFIX)/bin + install $(BUILD_OUTPUT)/x86_energy_perf_policy $(DESTDIR)$(PREFIX)/bin/x86_energy_perf_policy + install -d $(DESTDIR)$(PREFIX)/share/man/man8 + install x86_energy_perf_policy.8 $(DESTDIR)$(PREFIX)/share/man/man8 -install : - install x86_energy_perf_policy ${DESTDIR}/usr/bin/ - install x86_energy_perf_policy.8 ${DESTDIR}/usr/share/man/man8/ diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8 b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8 index 8eaaad648cdb..17db1c3af4d0 100644 --- a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8 +++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8 @@ -1,104 +1,213 @@ -.\" This page Copyright (C) 2010 Len Brown <len.brown@intel.com> +.\" This page Copyright (C) 2010 - 2015 Len Brown <len.brown@intel.com> .\" Distributed under the GPL, Copyleft 1994. .TH X86_ENERGY_PERF_POLICY 8 .SH NAME -x86_energy_perf_policy \- read or write MSR_IA32_ENERGY_PERF_BIAS +x86_energy_perf_policy \- Manage Energy vs. Performance Policy via x86 Model Specific Registers .SH SYNOPSIS -.ft B .B x86_energy_perf_policy -.RB [ "\-c cpu" ] -.RB [ "\-v" ] -.RB "\-r" +.RB "[ options ] [ scope ] [field \ value]" .br -.B x86_energy_perf_policy -.RB [ "\-c cpu" ] -.RB [ "\-v" ] -.RB 'performance' +.RB "scope: \-\-cpu\ cpu-list | \-\-pkg\ pkg-list" .br -.B x86_energy_perf_policy -.RB [ "\-c cpu" ] -.RB [ "\-v" ] -.RB 'normal' +.RB "cpu-list, pkg-list: # | #,# | #-# | all" .br -.B x86_energy_perf_policy -.RB [ "\-c cpu" ] -.RB [ "\-v" ] -.RB 'powersave' +.RB "field: \-\-all | \-\-epb | \-\-hwp-epp | \-\-hwp-min | \-\-hwp-max | \-\-hwp-desired" .br -.B x86_energy_perf_policy -.RB [ "\-c cpu" ] -.RB [ "\-v" ] -.RB n +.RB "other: (\-\-force | \-\-hwp-enable | \-\-turbo-enable) value)" .br +.RB "value: # | default | performance | balance-performance | balance-power | power" .SH DESCRIPTION \fBx86_energy_perf_policy\fP -allows software to convey -its policy for the relative importance of performance -versus energy savings to the processor. +displays and updates energy-performance policy settings specific to +Intel Architecture Processors. Settings are accessed via Model Specific Register (MSR) +updates, no matter if the Linux cpufreq sub-system is enabled or not. -The processor uses this information in model-specific ways -when it must select trade-offs between performance and -energy efficiency. +Policy in MSR_IA32_ENERGY_PERF_BIAS (EPB) +may affect a wide range of hardware decisions, +such as how aggressively the hardware enters and exits CPU idle states (C-states) +and Processor Performance States (P-states). +This policy hint does not replace explicit OS C-state and P-state selection. +Rather, it tells the hardware how aggressively to implement those selections. +Further, it allows the OS to influence energy/performance trade-offs where there +is no software interface, such as in the opportunistic "turbo-mode" P-state range. +Note that MSR_IA32_ENERGY_PERF_BIAS is defined per CPU, +but some implementations +share a single MSR among all CPUs in each processor package. +On those systems, a write to EPB on one processor will +be visible, and will have an effect, on all CPUs +in the same processor package. -This policy hint does not supersede Processor Performance states -(P-states) or CPU Idle power states (C-states), but allows -software to have influence where it would otherwise be unable -to express a preference. +Hardware P-States (HWP) are effectively an expansion of hardware +P-state control from the opportunistic turbo-mode P-state range +to include the entire range of available P-states. +On Broadwell Xeon, the initial HWP implementation, EBP influenced HWP. +That influence was removed in subsequent generations, +where it was moved to the +Energy_Performance_Preference (EPP) field in +a pair of dedicated MSRs -- MSR_IA32_HWP_REQUEST and MSR_IA32_HWP_REQUEST_PKG. -For example, this setting may tell the hardware how -aggressively or conservatively to control frequency -in the "turbo range" above the explicitly OS-controlled -P-state frequency range. It may also tell the hardware -how aggressively is should enter the OS requested C-states. +EPP is the most commonly managed knob in HWP mode, +but MSR_IA32_HWP_REQUEST also allows the user to specify +minimum-frequency for Quality-of-Service, +and maximum-frequency for power-capping. +MSR_IA32_HWP_REQUEST is defined per-CPU. -Support for this feature is indicated by CPUID.06H.ECX.bit3 -per the Intel Architectures Software Developer's Manual. +MSR_IA32_HWP_REQUEST_PKG has the same capability as MSR_IA32_HWP_REQUEST, +but it can simultaneously set the default policy for all CPUs within a package. +A bit in per-CPU MSR_IA32_HWP_REQUEST indicates whether it is +over-ruled-by or exempt-from MSR_IA32_HWP_REQUEST_PKG. -.SS Options -\fB-c\fP limits operation to a single CPU. -The default is to operate on all CPUs. -Note that MSR_IA32_ENERGY_PERF_BIAS is defined per -logical processor, but that the initial implementations -of the MSR were shared among all processors in each package. -.PP -\fB-v\fP increases verbosity. By default -x86_energy_perf_policy is silent. -.PP -\fB-r\fP is for "read-only" mode - the unchanged state -is read and displayed. +MSR_HWP_CAPABILITIES shows the default values for the fields +in MSR_IA32_HWP_REQUEST. It is displayed when no values +are being written. + +.SS SCOPE OPTIONS .PP -.I performance -Set a policy where performance is paramount. -The processor will be unwilling to sacrifice any performance -for the sake of energy saving. This is the hardware default. +\fB-c, --cpu\fP Operate on the MSR_IA32_HWP_REQUEST for each CPU in a CPU-list. +The CPU-list may be comma-separated CPU numbers, with dash for range +or the string "all". Eg. '--cpu 1,4,6-8' or '--cpu all'. +When --cpu is used, \fB--hwp-use-pkg\fP is available, which specifies whether the per-cpu +MSR_IA32_HWP_REQUEST should be over-ruled by MSR_IA32_HWP_REQUEST_PKG (1), +or exempt from MSR_IA32_HWP_REQUEST_PKG (0). + +\fB-p, --pkg\fP Operate on the MSR_IA32_HWP_REQUEST_PKG for each package in the package-list. +The list is a string of individual package numbers separated +by commas, and or ranges of package numbers separated by a dash, +or the string "all". +For example '--pkg 1,3' or '--pkg all' + +.SS VALUE OPTIONS .PP -.I normal +.I normal | default Set a policy with a normal balance between performance and energy efficiency. The processor will tolerate minor performance compromise for potentially significant energy savings. -This reasonable default for most desktops and servers. +This is a reasonable default for most desktops and servers. +"default" is a synonym for "normal". .PP -.I powersave +.I performance +Set a policy for maximum performance, +accepting no performance sacrifice for the benefit of energy efficiency. +.PP +.I balance-performance +Set a policy with a high priority on performance, +but allowing some performance loss to benefit energy efficiency. +.PP +.I balance-power +Set a policy where the performance and power are balanced. +This is the default. +.PP +.I power Set a policy where the processor can accept -a measurable performance hit to maximize energy efficiency. +a measurable performance impact to maximize energy efficiency. + .PP -.I n -Set MSR_IA32_ENERGY_PERF_BIAS to the specified number. -The range of valid numbers is 0-15, where 0 is maximum -performance and 15 is maximum energy efficiency. +The following table shows the mapping from the value strings above to actual MSR values. +This mapping is defined in the Linux-kernel header, msr-index.h. +.nf +VALUE STRING EPB EPP +performance 0 0 +balance-performance 4 128 +normal, default 6 128 +balance-power 8 192 +power 15 255 +.fi +.PP +For MSR_IA32_HWP_REQUEST performance fields +(--hwp-min, --hwp-max, --hwp-desired), the value option +is in units of 100 MHz, Eg. 12 signifies 1200 MHz. + +.SS FIELD OPTIONS +\fB-a, --all value-string\fP Sets all EPB and EPP and HWP limit fields to the value associated with +the value-string. In addition, enables turbo-mode and HWP-mode, if they were previous disabled. +Thus "--all normal" will set a system without cpufreq into a well known configuration. +.PP +\fB-B, --epb\fP set EPB per-core or per-package. +See value strings in the table above. +.PP +\fB-d, --debug\fP debug increases verbosity. By default +x86_energy_perf_policy is silent for updates, +and verbose for read-only mode. +.PP +\fB-P, --hwp-epp\fP set HWP.EPP per-core or per-package. +See value strings in the table above. +.PP +\fB-m, --hwp-min\fP request HWP to not go below the specified core/bus ratio. +The "default" is the value found in IA32_HWP_CAPABILITIES.min. +.PP +\fB-M, --hwp-max\fP request HWP not exceed a the specified core/bus ratio. +The "default" is the value found in IA32_HWP_CAPABILITIES.max. +.PP +\fB-D, --hwp-desired\fP request HWP 'desired' frequency. +The "normal" setting is 0, which +corresponds to 'full autonomous' HWP control. +Non-zero performance values request a specific performance +level on this processor, specified in multiples of 100 MHz. +.PP +\fB-w, --hwp-window\fP specify integer number of microsec +in the sliding window that HWP uses to maintain average frequency. +This parameter is meaningful only when the "desired" field above is non-zero. +Default is 0, allowing the HW to choose. +.SH OTHER OPTIONS +.PP +\fB-f, --force\fP writes the specified values without bounds checking. +.PP +\fB-U, --hwp-use-pkg\fP (0 | 1), when used in conjunction with --cpu, +indicates whether the per-CPU MSR_IA32_HWP_REQUEST should be overruled (1) +or exempt (0) from per-Package MSR_IA32_HWP_REQUEST_PKG settings. +The default is exempt. +.PP +\fB-H, --hwp-enable\fP enable HardWare-P-state (HWP) mode. Once enabled, system RESET is required to disable HWP mode. +.PP +\fB-t, --turbo-enable\fP enable (1) or disable (0) turbo mode. +.PP +\fB-v, --version\fP print version and exit. +.PP +If no request to change policy is made, +the default behavior is to read +and display the current system state, +including the default capabilities. +.SH WARNING +.PP +This utility writes directly to Model Specific Registers. +There is no locking or coordination should this utility +be used to modify HWP limit fields at the same time that +intel_pstate's sysfs attributes access the same MSRs. +.PP +Note that --hwp-desired and --hwp-window are considered experimental. +Future versions of Linux reserve the right to access these +fields internally -- potentially conflicting with user-space access. +.SH EXAMPLE +.nf +# sudo x86_energy_perf_policy +cpu0: EPB 6 +cpu0: HWP_REQ: min 6 max 35 des 0 epp 128 window 0x0 (0*10^0us) use_pkg 0 +cpu0: HWP_CAP: low 1 eff 8 guar 27 high 35 +cpu1: EPB 6 +cpu1: HWP_REQ: min 6 max 35 des 0 epp 128 window 0x0 (0*10^0us) use_pkg 0 +cpu1: HWP_CAP: low 1 eff 8 guar 27 high 35 +cpu2: EPB 6 +cpu2: HWP_REQ: min 6 max 35 des 0 epp 128 window 0x0 (0*10^0us) use_pkg 0 +cpu2: HWP_CAP: low 1 eff 8 guar 27 high 35 +cpu3: EPB 6 +cpu3: HWP_REQ: min 6 max 35 des 0 epp 128 window 0x0 (0*10^0us) use_pkg 0 +cpu3: HWP_CAP: low 1 eff 8 guar 27 high 35 +.fi .SH NOTES -.B "x86_energy_perf_policy " +.B "x86_energy_perf_policy" runs only as root. .SH FILES .ta .nf /dev/cpu/*/msr .fi - .SH "SEE ALSO" +.nf msr(4) +Intel(R) 64 and IA-32 Architectures Software Developer's Manual +.fi .PP .SH AUTHORS .nf -Written by Len Brown <len.brown@intel.com> +Len Brown diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c index 40b3e5482f8a..65bbe627a425 100644 --- a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c +++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c @@ -3,322 +3,1424 @@ * policy preference bias on recent X86 processors. */ /* - * Copyright (c) 2010, Intel Corporation. + * Copyright (c) 2010 - 2017 Intel Corporation. * Len Brown <len.brown@intel.com> * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * This program is released under GPL v2 */ +#define _GNU_SOURCE +#include MSRHEADER #include <stdio.h> #include <unistd.h> #include <sys/types.h> +#include <sched.h> #include <sys/stat.h> #include <sys/resource.h> +#include <getopt.h> +#include <err.h> #include <fcntl.h> #include <signal.h> #include <sys/time.h> +#include <limits.h> #include <stdlib.h> #include <string.h> +#include <cpuid.h> +#include <errno.h> + +#define OPTARG_NORMAL (INT_MAX - 1) +#define OPTARG_POWER (INT_MAX - 2) +#define OPTARG_BALANCE_POWER (INT_MAX - 3) +#define OPTARG_BALANCE_PERFORMANCE (INT_MAX - 4) +#define OPTARG_PERFORMANCE (INT_MAX - 5) + +struct msr_hwp_cap { + unsigned char highest; + unsigned char guaranteed; + unsigned char efficient; + unsigned char lowest; +}; -unsigned int verbose; /* set with -v */ -unsigned int read_only; /* set with -r */ +struct msr_hwp_request { + unsigned char hwp_min; + unsigned char hwp_max; + unsigned char hwp_desired; + unsigned char hwp_epp; + unsigned int hwp_window; + unsigned char hwp_use_pkg; +} req_update; + +unsigned int debug; +unsigned int verbose; +unsigned int force; char *progname; -unsigned long long new_bias; -int cpu = -1; +int base_cpu; +unsigned char update_epb; +unsigned long long new_epb; +unsigned char turbo_is_enabled; +unsigned char update_turbo; +unsigned char turbo_update_value; +unsigned char update_hwp_epp; +unsigned char update_hwp_min; +unsigned char update_hwp_max; +unsigned char update_hwp_desired; +unsigned char update_hwp_window; +unsigned char update_hwp_use_pkg; +unsigned char update_hwp_enable; +#define hwp_update_enabled() (update_hwp_enable | update_hwp_epp | update_hwp_max | update_hwp_min | update_hwp_desired | update_hwp_window | update_hwp_use_pkg) +int max_cpu_num; +int max_pkg_num; +#define MAX_PACKAGES 64 +unsigned int first_cpu_in_pkg[MAX_PACKAGES]; +unsigned long long pkg_present_set; +unsigned long long pkg_selected_set; +cpu_set_t *cpu_present_set; +cpu_set_t *cpu_selected_set; +int genuine_intel; + +size_t cpu_setsize; + +char *proc_stat = "/proc/stat"; + +unsigned int has_epb; /* MSR_IA32_ENERGY_PERF_BIAS */ +unsigned int has_hwp; /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */ + /* IA32_HWP_REQUEST, IA32_HWP_STATUS */ +unsigned int has_hwp_notify; /* IA32_HWP_INTERRUPT */ +unsigned int has_hwp_activity_window; /* IA32_HWP_REQUEST[bits 41:32] */ +unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */ +unsigned int has_hwp_request_pkg; /* IA32_HWP_REQUEST_PKG */ + +unsigned int bdx_highest_ratio; /* - * Usage: - * - * -c cpu: limit action to a single CPU (default is all CPUs) - * -v: verbose output (can invoke more than once) - * -r: read-only, don't change any settings - * - * performance - * Performance is paramount. - * Unwilling to sacrifice any performance - * for the sake of energy saving. (hardware default) - * - * normal - * Can tolerate minor performance compromise - * for potentially significant energy savings. - * (reasonable default for most desktops and servers) - * - * powersave - * Can tolerate significant performance hit - * to maximize energy savings. - * - * n - * a numerical value to write to the underlying MSR. + * maintain compatibility with original implementation, but don't document it: */ void usage(void) { - printf("%s: [-c cpu] [-v] " - "(-r | 'performance' | 'normal' | 'powersave' | n)\n", - progname); + fprintf(stderr, "%s [options] [scope][field value]\n", progname); + fprintf(stderr, "scope: --cpu cpu-list [--hwp-use-pkg #] | --pkg pkg-list\n"); + fprintf(stderr, "field: --all | --epb | --hwp-epp | --hwp-min | --hwp-max | --hwp-desired\n"); + fprintf(stderr, "other: --hwp-enable | --turbo-enable (0 | 1) | --help | --force\n"); + fprintf(stderr, + "value: ( # | \"normal\" | \"performance\" | \"balance-performance\" | \"balance-power\"| \"power\")\n"); + fprintf(stderr, "--hwp-window usec\n"); + + fprintf(stderr, "Specify only Energy Performance BIAS (legacy usage):\n"); + fprintf(stderr, "%s: [-c cpu] [-v] (-r | policy-value )\n", progname); + exit(1); } -#define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0 +/* + * If bdx_highest_ratio is set, + * then we must translate between MSR format and simple ratio + * used on the cmdline. + */ +int ratio_2_msr_perf(int ratio) +{ + int msr_perf; + + if (!bdx_highest_ratio) + return ratio; + + msr_perf = ratio * 255 / bdx_highest_ratio; + + if (debug) + fprintf(stderr, "%d = ratio_to_msr_perf(%d)\n", msr_perf, ratio); + + return msr_perf; +} +int msr_perf_2_ratio(int msr_perf) +{ + int ratio; + double d; + + if (!bdx_highest_ratio) + return msr_perf; + + d = (double)msr_perf * (double) bdx_highest_ratio / 255.0; + d = d + 0.5; /* round */ + ratio = (int)d; + + if (debug) + fprintf(stderr, "%d = msr_perf_ratio(%d) {%f}\n", ratio, msr_perf, d); + + return ratio; +} +int parse_cmdline_epb(int i) +{ + if (!has_epb) + errx(1, "EPB not enabled on this platform"); + + update_epb = 1; + + switch (i) { + case OPTARG_POWER: + return ENERGY_PERF_BIAS_POWERSAVE; + case OPTARG_BALANCE_POWER: + return ENERGY_PERF_BIAS_BALANCE_POWERSAVE; + case OPTARG_NORMAL: + return ENERGY_PERF_BIAS_NORMAL; + case OPTARG_BALANCE_PERFORMANCE: + return ENERGY_PERF_BIAS_BALANCE_PERFORMANCE; + case OPTARG_PERFORMANCE: + return ENERGY_PERF_BIAS_PERFORMANCE; + } + if (i < 0 || i > ENERGY_PERF_BIAS_POWERSAVE) + errx(1, "--epb must be from 0 to 15"); + return i; +} + +#define HWP_CAP_LOWEST 0 +#define HWP_CAP_HIGHEST 255 + +/* + * "performance" changes hwp_min to cap.highest + * All others leave it at cap.lowest + */ +int parse_cmdline_hwp_min(int i) +{ + update_hwp_min = 1; + + switch (i) { + case OPTARG_POWER: + case OPTARG_BALANCE_POWER: + case OPTARG_NORMAL: + case OPTARG_BALANCE_PERFORMANCE: + return HWP_CAP_LOWEST; + case OPTARG_PERFORMANCE: + return HWP_CAP_HIGHEST; + } + return i; +} +/* + * "power" changes hwp_max to cap.lowest + * All others leave it at cap.highest + */ +int parse_cmdline_hwp_max(int i) +{ + update_hwp_max = 1; + + switch (i) { + case OPTARG_POWER: + return HWP_CAP_LOWEST; + case OPTARG_NORMAL: + case OPTARG_BALANCE_POWER: + case OPTARG_BALANCE_PERFORMANCE: + case OPTARG_PERFORMANCE: + return HWP_CAP_HIGHEST; + } + return i; +} +/* + * for --hwp-des, all strings leave it in autonomous mode + * If you want to change it, you need to explicitly pick a value + */ +int parse_cmdline_hwp_desired(int i) +{ + update_hwp_desired = 1; + + switch (i) { + case OPTARG_POWER: + case OPTARG_BALANCE_POWER: + case OPTARG_BALANCE_PERFORMANCE: + case OPTARG_NORMAL: + case OPTARG_PERFORMANCE: + return 0; /* autonomous */ + } + return i; +} + +int parse_cmdline_hwp_window(int i) +{ + unsigned int exponent; + + update_hwp_window = 1; + + switch (i) { + case OPTARG_POWER: + case OPTARG_BALANCE_POWER: + case OPTARG_NORMAL: + case OPTARG_BALANCE_PERFORMANCE: + case OPTARG_PERFORMANCE: + return 0; + } + if (i < 0 || i > 1270000000) { + fprintf(stderr, "--hwp-window: 0 for auto; 1 - 1270000000 usec for window duration\n"); + usage(); + } + for (exponent = 0; ; ++exponent) { + if (debug) + printf("%d 10^%d\n", i, exponent); + + if (i <= 127) + break; + + i = i / 10; + } + if (debug) + fprintf(stderr, "%d*10^%d: 0x%x\n", i, exponent, (exponent << 7) | i); + + return (exponent << 7) | i; +} +int parse_cmdline_hwp_epp(int i) +{ + update_hwp_epp = 1; + + switch (i) { + case OPTARG_POWER: + return HWP_EPP_POWERSAVE; + case OPTARG_BALANCE_POWER: + return HWP_EPP_BALANCE_POWERSAVE; + case OPTARG_NORMAL: + case OPTARG_BALANCE_PERFORMANCE: + return HWP_EPP_BALANCE_PERFORMANCE; + case OPTARG_PERFORMANCE: + return HWP_EPP_PERFORMANCE; + } + if (i < 0 || i > 0xff) { + fprintf(stderr, "--hwp-epp must be from 0 to 0xff\n"); + usage(); + } + return i; +} +int parse_cmdline_turbo(int i) +{ + update_turbo = 1; + + switch (i) { + case OPTARG_POWER: + return 0; + case OPTARG_NORMAL: + case OPTARG_BALANCE_POWER: + case OPTARG_BALANCE_PERFORMANCE: + case OPTARG_PERFORMANCE: + return 1; + } + if (i < 0 || i > 1) { + fprintf(stderr, "--turbo-enable: 1 to enable, 0 to disable\n"); + usage(); + } + return i; +} + +int parse_optarg_string(char *s) +{ + int i; + char *endptr; + + if (!strncmp(s, "default", 7)) + return OPTARG_NORMAL; + + if (!strncmp(s, "normal", 6)) + return OPTARG_NORMAL; + + if (!strncmp(s, "power", 9)) + return OPTARG_POWER; + + if (!strncmp(s, "balance-power", 17)) + return OPTARG_BALANCE_POWER; + + if (!strncmp(s, "balance-performance", 19)) + return OPTARG_BALANCE_PERFORMANCE; + + if (!strncmp(s, "performance", 11)) + return OPTARG_PERFORMANCE; + + i = strtol(s, &endptr, 0); + if (s == endptr) { + fprintf(stderr, "no digits in \"%s\"\n", s); + usage(); + } + if (i == LONG_MIN || i == LONG_MAX) + errx(-1, "%s", s); + + if (i > 0xFF) + errx(-1, "%d (0x%x) must be < 256", i, i); + + if (i < 0) + errx(-1, "%d (0x%x) must be >= 0", i, i); + return i; +} + +void parse_cmdline_all(char *s) +{ + force++; + update_hwp_enable = 1; + req_update.hwp_min = parse_cmdline_hwp_min(parse_optarg_string(s)); + req_update.hwp_max = parse_cmdline_hwp_max(parse_optarg_string(s)); + req_update.hwp_epp = parse_cmdline_hwp_epp(parse_optarg_string(s)); + if (has_epb) + new_epb = parse_cmdline_epb(parse_optarg_string(s)); + turbo_update_value = parse_cmdline_turbo(parse_optarg_string(s)); + req_update.hwp_desired = parse_cmdline_hwp_desired(parse_optarg_string(s)); + req_update.hwp_window = parse_cmdline_hwp_window(parse_optarg_string(s)); +} + +void validate_cpu_selected_set(void) +{ + int cpu; + + if (CPU_COUNT_S(cpu_setsize, cpu_selected_set) == 0) + errx(0, "no CPUs requested"); + + for (cpu = 0; cpu <= max_cpu_num; ++cpu) { + if (CPU_ISSET_S(cpu, cpu_setsize, cpu_selected_set)) + if (!CPU_ISSET_S(cpu, cpu_setsize, cpu_present_set)) + errx(1, "Requested cpu% is not present", cpu); + } +} + +void parse_cmdline_cpu(char *s) +{ + char *startp, *endp; + int cpu = 0; + + if (pkg_selected_set) { + usage(); + errx(1, "--cpu | --pkg"); + } + cpu_selected_set = CPU_ALLOC((max_cpu_num + 1)); + if (cpu_selected_set == NULL) + err(1, "cpu_selected_set"); + CPU_ZERO_S(cpu_setsize, cpu_selected_set); + + for (startp = s; startp && *startp;) { + + if (*startp == ',') { + startp++; + continue; + } + + if (*startp == '-') { + int end_cpu; -#define BIAS_PERFORMANCE 0 -#define BIAS_BALANCE 6 -#define BIAS_POWERSAVE 15 + startp++; + end_cpu = strtol(startp, &endp, 10); + if (startp == endp) + continue; + + while (cpu <= end_cpu) { + if (cpu > max_cpu_num) + errx(1, "Requested cpu%d exceeds max cpu%d", cpu, max_cpu_num); + CPU_SET_S(cpu, cpu_setsize, cpu_selected_set); + cpu++; + } + startp = endp; + continue; + } + + if (strncmp(startp, "all", 3) == 0) { + for (cpu = 0; cpu <= max_cpu_num; cpu += 1) { + if (CPU_ISSET_S(cpu, cpu_setsize, cpu_present_set)) + CPU_SET_S(cpu, cpu_setsize, cpu_selected_set); + } + startp += 3; + if (*startp == 0) + break; + } + /* "--cpu even" is not documented */ + if (strncmp(startp, "even", 4) == 0) { + for (cpu = 0; cpu <= max_cpu_num; cpu += 2) { + if (CPU_ISSET_S(cpu, cpu_setsize, cpu_present_set)) + CPU_SET_S(cpu, cpu_setsize, cpu_selected_set); + } + startp += 4; + if (*startp == 0) + break; + } + + /* "--cpu odd" is not documented */ + if (strncmp(startp, "odd", 3) == 0) { + for (cpu = 1; cpu <= max_cpu_num; cpu += 2) { + if (CPU_ISSET_S(cpu, cpu_setsize, cpu_present_set)) + CPU_SET_S(cpu, cpu_setsize, cpu_selected_set); + } + startp += 3; + if (*startp == 0) + break; + } + + cpu = strtol(startp, &endp, 10); + if (startp == endp) + errx(1, "--cpu cpu-set: confused by '%s'", startp); + if (cpu > max_cpu_num) + errx(1, "Requested cpu%d exceeds max cpu%d", cpu, max_cpu_num); + CPU_SET_S(cpu, cpu_setsize, cpu_selected_set); + startp = endp; + } + + validate_cpu_selected_set(); + +} + +void parse_cmdline_pkg(char *s) +{ + char *startp, *endp; + int pkg = 0; + + if (cpu_selected_set) { + usage(); + errx(1, "--pkg | --cpu"); + } + pkg_selected_set = 0; + + for (startp = s; startp && *startp;) { + + if (*startp == ',') { + startp++; + continue; + } + + if (*startp == '-') { + int end_pkg; + + startp++; + end_pkg = strtol(startp, &endp, 10); + if (startp == endp) + continue; + + while (pkg <= end_pkg) { + if (pkg > max_pkg_num) + errx(1, "Requested pkg%d exceeds max pkg%d", pkg, max_pkg_num); + pkg_selected_set |= 1 << pkg; + pkg++; + } + startp = endp; + continue; + } + + if (strncmp(startp, "all", 3) == 0) { + pkg_selected_set = pkg_present_set; + return; + } + + pkg = strtol(startp, &endp, 10); + if (pkg > max_pkg_num) + errx(1, "Requested pkg%d Exceeds max pkg%d", pkg, max_pkg_num); + pkg_selected_set |= 1 << pkg; + startp = endp; + } +} + +void for_packages(unsigned long long pkg_set, int (func)(int)) +{ + int pkg_num; + + for (pkg_num = 0; pkg_num <= max_pkg_num; ++pkg_num) { + if (pkg_set & (1UL << pkg_num)) + func(pkg_num); + } +} + +void print_version(void) +{ + printf("x86_energy_perf_policy 17.05.11 (C) Len Brown <len.brown@intel.com>\n"); +} void cmdline(int argc, char **argv) { int opt; + int option_index = 0; + + static struct option long_options[] = { + {"all", required_argument, 0, 'a'}, + {"cpu", required_argument, 0, 'c'}, + {"pkg", required_argument, 0, 'p'}, + {"debug", no_argument, 0, 'd'}, + {"hwp-desired", required_argument, 0, 'D'}, + {"epb", required_argument, 0, 'B'}, + {"force", no_argument, 0, 'f'}, + {"hwp-enable", no_argument, 0, 'e'}, + {"help", no_argument, 0, 'h'}, + {"hwp-epp", required_argument, 0, 'P'}, + {"hwp-min", required_argument, 0, 'm'}, + {"hwp-max", required_argument, 0, 'M'}, + {"read", no_argument, 0, 'r'}, + {"turbo-enable", required_argument, 0, 't'}, + {"hwp-use-pkg", required_argument, 0, 'u'}, + {"version", no_argument, 0, 'v'}, + {"hwp-window", required_argument, 0, 'w'}, + {0, 0, 0, 0 } + }; progname = argv[0]; - while ((opt = getopt(argc, argv, "+rvc:")) != -1) { + while ((opt = getopt_long_only(argc, argv, "+a:c:dD:E:e:f:m:M:rt:u:vw", + long_options, &option_index)) != -1) { switch (opt) { + case 'a': + parse_cmdline_all(optarg); + break; + case 'B': + new_epb = parse_cmdline_epb(parse_optarg_string(optarg)); + break; case 'c': - cpu = atoi(optarg); + parse_cmdline_cpu(optarg); + break; + case 'e': + update_hwp_enable = 1; + break; + case 'h': + usage(); + break; + case 'd': + debug++; + verbose++; + break; + case 'f': + force++; + break; + case 'D': + req_update.hwp_desired = parse_cmdline_hwp_desired(parse_optarg_string(optarg)); + break; + case 'm': + req_update.hwp_min = parse_cmdline_hwp_min(parse_optarg_string(optarg)); + break; + case 'M': + req_update.hwp_max = parse_cmdline_hwp_max(parse_optarg_string(optarg)); + break; + case 'p': + parse_cmdline_pkg(optarg); + break; + case 'P': + req_update.hwp_epp = parse_cmdline_hwp_epp(parse_optarg_string(optarg)); break; case 'r': - read_only = 1; + /* v1 used -r to specify read-only mode, now the default */ + break; + case 't': + turbo_update_value = parse_cmdline_turbo(parse_optarg_string(optarg)); + break; + case 'u': + update_hwp_use_pkg++; + if (atoi(optarg) == 0) + req_update.hwp_use_pkg = 0; + else + req_update.hwp_use_pkg = 1; break; case 'v': - verbose++; + print_version(); + exit(0); + break; + case 'w': + req_update.hwp_window = parse_cmdline_hwp_window(parse_optarg_string(optarg)); break; default: usage(); } } - /* if -r, then should be no additional optind */ - if (read_only && (argc > optind)) - usage(); - /* - * if no -r , then must be one additional optind + * v1 allowed "performance"|"normal"|"power" with no policy specifier + * to update BIAS. Continue to support that, even though no longer documented. */ - if (!read_only) { + if (argc == optind + 1) + new_epb = parse_cmdline_epb(parse_optarg_string(argv[optind])); - if (argc != optind + 1) { - printf("must supply -r or policy param\n"); - usage(); - } + if (argc > optind + 1) { + fprintf(stderr, "stray parameter '%s'\n", argv[optind + 1]); + usage(); + } +} - if (!strcmp("performance", argv[optind])) { - new_bias = BIAS_PERFORMANCE; - } else if (!strcmp("normal", argv[optind])) { - new_bias = BIAS_BALANCE; - } else if (!strcmp("powersave", argv[optind])) { - new_bias = BIAS_POWERSAVE; - } else { - char *endptr; - - new_bias = strtoull(argv[optind], &endptr, 0); - if (endptr == argv[optind] || - new_bias > BIAS_POWERSAVE) { - fprintf(stderr, "invalid value: %s\n", - argv[optind]); - usage(); - } - } + +int get_msr(int cpu, int offset, unsigned long long *msr) +{ + int retval; + char pathname[32]; + int fd; + + sprintf(pathname, "/dev/cpu/%d/msr", cpu); + fd = open(pathname, O_RDONLY); + if (fd < 0) + err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname); + + retval = pread(fd, msr, sizeof(*msr), offset); + if (retval != sizeof(*msr)) + err(-1, "%s offset 0x%llx read failed", pathname, (unsigned long long)offset); + + if (debug > 1) + fprintf(stderr, "get_msr(cpu%d, 0x%X, 0x%llX)\n", cpu, offset, *msr); + + close(fd); + return 0; +} + +int put_msr(int cpu, int offset, unsigned long long new_msr) +{ + char pathname[32]; + int retval; + int fd; + + sprintf(pathname, "/dev/cpu/%d/msr", cpu); + fd = open(pathname, O_RDWR); + if (fd < 0) + err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname); + + retval = pwrite(fd, &new_msr, sizeof(new_msr), offset); + if (retval != sizeof(new_msr)) + err(-2, "pwrite(cpu%d, offset 0x%x, 0x%llx) = %d", cpu, offset, new_msr, retval); + + close(fd); + + if (debug > 1) + fprintf(stderr, "put_msr(cpu%d, 0x%X, 0x%llX)\n", cpu, offset, new_msr); + + return 0; +} + +void print_hwp_cap(int cpu, struct msr_hwp_cap *cap, char *str) +{ + if (cpu != -1) + printf("cpu%d: ", cpu); + + printf("HWP_CAP: low %d eff %d guar %d high %d\n", + cap->lowest, cap->efficient, cap->guaranteed, cap->highest); +} +void read_hwp_cap(int cpu, struct msr_hwp_cap *cap, unsigned int msr_offset) +{ + unsigned long long msr; + + get_msr(cpu, msr_offset, &msr); + + cap->highest = msr_perf_2_ratio(HWP_HIGHEST_PERF(msr)); + cap->guaranteed = msr_perf_2_ratio(HWP_GUARANTEED_PERF(msr)); + cap->efficient = msr_perf_2_ratio(HWP_MOSTEFFICIENT_PERF(msr)); + cap->lowest = msr_perf_2_ratio(HWP_LOWEST_PERF(msr)); +} + +void print_hwp_request(int cpu, struct msr_hwp_request *h, char *str) +{ + if (cpu != -1) + printf("cpu%d: ", cpu); + + if (str) + printf("%s", str); + + printf("HWP_REQ: min %d max %d des %d epp %d window 0x%x (%d*10^%dus) use_pkg %d\n", + h->hwp_min, h->hwp_max, h->hwp_desired, h->hwp_epp, + h->hwp_window, h->hwp_window & 0x7F, (h->hwp_window >> 7) & 0x7, h->hwp_use_pkg); +} +void print_hwp_request_pkg(int pkg, struct msr_hwp_request *h, char *str) +{ + printf("pkg%d: ", pkg); + + if (str) + printf("%s", str); + + printf("HWP_REQ_PKG: min %d max %d des %d epp %d window 0x%x (%d*10^%dus)\n", + h->hwp_min, h->hwp_max, h->hwp_desired, h->hwp_epp, + h->hwp_window, h->hwp_window & 0x7F, (h->hwp_window >> 7) & 0x7); +} +void read_hwp_request(int cpu, struct msr_hwp_request *hwp_req, unsigned int msr_offset) +{ + unsigned long long msr; + + get_msr(cpu, msr_offset, &msr); + + hwp_req->hwp_min = msr_perf_2_ratio((((msr) >> 0) & 0xff)); + hwp_req->hwp_max = msr_perf_2_ratio((((msr) >> 8) & 0xff)); + hwp_req->hwp_desired = msr_perf_2_ratio((((msr) >> 16) & 0xff)); + hwp_req->hwp_epp = (((msr) >> 24) & 0xff); + hwp_req->hwp_window = (((msr) >> 32) & 0x3ff); + hwp_req->hwp_use_pkg = (((msr) >> 42) & 0x1); +} + +void write_hwp_request(int cpu, struct msr_hwp_request *hwp_req, unsigned int msr_offset) +{ + unsigned long long msr = 0; + + if (debug > 1) + printf("cpu%d: requesting min %d max %d des %d epp %d window 0x%0x use_pkg %d\n", + cpu, hwp_req->hwp_min, hwp_req->hwp_max, + hwp_req->hwp_desired, hwp_req->hwp_epp, + hwp_req->hwp_window, hwp_req->hwp_use_pkg); + + msr |= HWP_MIN_PERF(ratio_2_msr_perf(hwp_req->hwp_min)); + msr |= HWP_MAX_PERF(ratio_2_msr_perf(hwp_req->hwp_max)); + msr |= HWP_DESIRED_PERF(ratio_2_msr_perf(hwp_req->hwp_desired)); + msr |= HWP_ENERGY_PERF_PREFERENCE(hwp_req->hwp_epp); + msr |= HWP_ACTIVITY_WINDOW(hwp_req->hwp_window); + msr |= HWP_PACKAGE_CONTROL(hwp_req->hwp_use_pkg); + + put_msr(cpu, msr_offset, msr); +} + +int print_cpu_msrs(int cpu) +{ + unsigned long long msr; + struct msr_hwp_request req; + struct msr_hwp_cap cap; + + if (has_epb) { + get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr); + + printf("cpu%d: EPB %u\n", cpu, (unsigned int) msr); } + + if (!has_hwp) + return 0; + + read_hwp_request(cpu, &req, MSR_HWP_REQUEST); + print_hwp_request(cpu, &req, ""); + + read_hwp_cap(cpu, &cap, MSR_HWP_CAPABILITIES); + print_hwp_cap(cpu, &cap, ""); + + return 0; +} + +int print_pkg_msrs(int pkg) +{ + struct msr_hwp_request req; + unsigned long long msr; + + if (!has_hwp) + return 0; + + read_hwp_request(first_cpu_in_pkg[pkg], &req, MSR_HWP_REQUEST_PKG); + print_hwp_request_pkg(pkg, &req, ""); + + if (has_hwp_notify) { + get_msr(first_cpu_in_pkg[pkg], MSR_HWP_INTERRUPT, &msr); + fprintf(stderr, + "pkg%d: MSR_HWP_INTERRUPT: 0x%08llx (Excursion_Min-%sabled, Guaranteed_Perf_Change-%sabled)\n", + pkg, msr, + ((msr) & 0x2) ? "EN" : "Dis", + ((msr) & 0x1) ? "EN" : "Dis"); + } + get_msr(first_cpu_in_pkg[pkg], MSR_HWP_STATUS, &msr); + fprintf(stderr, + "pkg%d: MSR_HWP_STATUS: 0x%08llx (%sExcursion_Min, %sGuaranteed_Perf_Change)\n", + pkg, msr, + ((msr) & 0x4) ? "" : "No-", + ((msr) & 0x1) ? "" : "No-"); + + return 0; } /* - * validate_cpuid() - * returns on success, quietly exits on failure (make verbose with -v) + * Assumption: All HWP systems have 100 MHz bus clock */ -void validate_cpuid(void) +int ratio_2_sysfs_khz(int ratio) { - unsigned int eax, ebx, ecx, edx, max_level; - unsigned int fms, family, model, stepping; + int bclk_khz = 100 * 1000; /* 100,000 KHz = 100 MHz */ - eax = ebx = ecx = edx = 0; + return ratio * bclk_khz; +} +/* + * If HWP is enabled and cpufreq sysfs attribtes are present, + * then update sysfs, so that it will not become + * stale when we write to MSRs. + * (intel_pstate's max_perf_pct and min_perf_pct will follow cpufreq, + * so we don't have to touch that.) + */ +void update_cpufreq_scaling_freq(int is_max, int cpu, unsigned int ratio) +{ + char pathname[64]; + FILE *fp; + int retval; + int khz; - asm("cpuid" : "=a" (max_level), "=b" (ebx), "=c" (ecx), - "=d" (edx) : "a" (0)); + sprintf(pathname, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_%s_freq", + cpu, is_max ? "max" : "min"); - if (ebx != 0x756e6547 || edx != 0x49656e69 || ecx != 0x6c65746e) { - if (verbose) - fprintf(stderr, "%.4s%.4s%.4s != GenuineIntel", - (char *)&ebx, (char *)&edx, (char *)&ecx); - exit(1); + fp = fopen(pathname, "w"); + if (!fp) { + if (debug) + perror(pathname); + return; } - asm("cpuid" : "=a" (fms), "=c" (ecx), "=d" (edx) : "a" (1) : "ebx"); - family = (fms >> 8) & 0xf; - model = (fms >> 4) & 0xf; - stepping = fms & 0xf; - if (family == 6 || family == 0xf) - model += ((fms >> 16) & 0xf) << 4; + khz = ratio_2_sysfs_khz(ratio); + retval = fprintf(fp, "%d", khz); + if (retval < 0) + if (debug) + perror("fprintf"); + if (debug) + printf("echo %d > %s\n", khz, pathname); - if (verbose > 1) - printf("CPUID %d levels family:model:stepping " - "0x%x:%x:%x (%d:%d:%d)\n", max_level, - family, model, stepping, family, model, stepping); + fclose(fp); +} - if (!(edx & (1 << 5))) { - if (verbose) - printf("CPUID: no MSR\n"); - exit(1); +/* + * We update all sysfs before updating any MSRs because of + * bugs in cpufreq/intel_pstate where the sysfs writes + * for a CPU may change the min/max values on other CPUS. + */ + +int update_sysfs(int cpu) +{ + if (!has_hwp) + return 0; + + if (!hwp_update_enabled()) + return 0; + + if (access("/sys/devices/system/cpu/cpu0/cpufreq", F_OK)) + return 0; + + if (update_hwp_min) + update_cpufreq_scaling_freq(0, cpu, req_update.hwp_min); + + if (update_hwp_max) + update_cpufreq_scaling_freq(1, cpu, req_update.hwp_max); + + return 0; +} + +int verify_hwp_req_self_consistency(int cpu, struct msr_hwp_request *req) +{ + /* fail if min > max requested */ + if (req->hwp_min > req->hwp_max) { + errx(1, "cpu%d: requested hwp-min %d > hwp_max %d", + cpu, req->hwp_min, req->hwp_max); } - /* - * Support for MSR_IA32_ENERGY_PERF_BIAS - * is indicated by CPUID.06H.ECX.bit3 - */ - asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (6)); - if (verbose) - printf("CPUID.06H.ECX: 0x%x\n", ecx); - if (!(ecx & (1 << 3))) { - if (verbose) - printf("CPUID: No MSR_IA32_ENERGY_PERF_BIAS\n"); - exit(1); + /* fail if desired > max requestd */ + if (req->hwp_desired && (req->hwp_desired > req->hwp_max)) { + errx(1, "cpu%d: requested hwp-desired %d > hwp_max %d", + cpu, req->hwp_desired, req->hwp_max); } - return; /* success */ + /* fail if desired < min requestd */ + if (req->hwp_desired && (req->hwp_desired < req->hwp_min)) { + errx(1, "cpu%d: requested hwp-desired %d < requested hwp_min %d", + cpu, req->hwp_desired, req->hwp_min); + } + + return 0; } -unsigned long long get_msr(int cpu, int offset) +int check_hwp_request_v_hwp_capabilities(int cpu, struct msr_hwp_request *req, struct msr_hwp_cap *cap) { - unsigned long long msr; - char msr_path[32]; - int retval; - int fd; + if (update_hwp_max) { + if (req->hwp_max > cap->highest) + errx(1, "cpu%d: requested max %d > capabilities highest %d, use --force?", + cpu, req->hwp_max, cap->highest); + if (req->hwp_max < cap->lowest) + errx(1, "cpu%d: requested max %d < capabilities lowest %d, use --force?", + cpu, req->hwp_max, cap->lowest); + } - sprintf(msr_path, "/dev/cpu/%d/msr", cpu); - fd = open(msr_path, O_RDONLY); - if (fd < 0) { - printf("Try \"# modprobe msr\"\n"); - perror(msr_path); - exit(1); + if (update_hwp_min) { + if (req->hwp_min > cap->highest) + errx(1, "cpu%d: requested min %d > capabilities highest %d, use --force?", + cpu, req->hwp_min, cap->highest); + if (req->hwp_min < cap->lowest) + errx(1, "cpu%d: requested min %d < capabilities lowest %d, use --force?", + cpu, req->hwp_min, cap->lowest); } - retval = pread(fd, &msr, sizeof msr, offset); + if (update_hwp_min && update_hwp_max && (req->hwp_min > req->hwp_max)) + errx(1, "cpu%d: requested min %d > requested max %d", + cpu, req->hwp_min, req->hwp_max); - if (retval != sizeof msr) { - printf("pread cpu%d 0x%x = %d\n", cpu, offset, retval); - exit(-2); + if (update_hwp_desired && req->hwp_desired) { + if (req->hwp_desired > req->hwp_max) + errx(1, "cpu%d: requested desired %d > requested max %d, use --force?", + cpu, req->hwp_desired, req->hwp_max); + if (req->hwp_desired < req->hwp_min) + errx(1, "cpu%d: requested desired %d < requested min %d, use --force?", + cpu, req->hwp_desired, req->hwp_min); + if (req->hwp_desired < cap->lowest) + errx(1, "cpu%d: requested desired %d < capabilities lowest %d, use --force?", + cpu, req->hwp_desired, cap->lowest); + if (req->hwp_desired > cap->highest) + errx(1, "cpu%d: requested desired %d > capabilities highest %d, use --force?", + cpu, req->hwp_desired, cap->highest); } - close(fd); - return msr; + + return 0; } -unsigned long long put_msr(int cpu, unsigned long long new_msr, int offset) +int update_hwp_request(int cpu) { - unsigned long long old_msr; - char msr_path[32]; - int retval; - int fd; + struct msr_hwp_request req; + struct msr_hwp_cap cap; + + int msr_offset = MSR_HWP_REQUEST; + + read_hwp_request(cpu, &req, msr_offset); + if (debug) + print_hwp_request(cpu, &req, "old: "); + + if (update_hwp_min) + req.hwp_min = req_update.hwp_min; + + if (update_hwp_max) + req.hwp_max = req_update.hwp_max; + + if (update_hwp_desired) + req.hwp_desired = req_update.hwp_desired; + + if (update_hwp_window) + req.hwp_window = req_update.hwp_window; + + if (update_hwp_epp) + req.hwp_epp = req_update.hwp_epp; + + req.hwp_use_pkg = req_update.hwp_use_pkg; + + read_hwp_cap(cpu, &cap, MSR_HWP_CAPABILITIES); + if (debug) + print_hwp_cap(cpu, &cap, ""); + + if (!force) + check_hwp_request_v_hwp_capabilities(cpu, &req, &cap); + + verify_hwp_req_self_consistency(cpu, &req); - sprintf(msr_path, "/dev/cpu/%d/msr", cpu); - fd = open(msr_path, O_RDWR); - if (fd < 0) { - perror(msr_path); - exit(1); + write_hwp_request(cpu, &req, msr_offset); + + if (debug) { + read_hwp_request(cpu, &req, msr_offset); + print_hwp_request(cpu, &req, "new: "); } + return 0; +} +int update_hwp_request_pkg(int pkg) +{ + struct msr_hwp_request req; + struct msr_hwp_cap cap; + int cpu = first_cpu_in_pkg[pkg]; + + int msr_offset = MSR_HWP_REQUEST_PKG; + + read_hwp_request(cpu, &req, msr_offset); + if (debug) + print_hwp_request_pkg(pkg, &req, "old: "); + + if (update_hwp_min) + req.hwp_min = req_update.hwp_min; + + if (update_hwp_max) + req.hwp_max = req_update.hwp_max; + + if (update_hwp_desired) + req.hwp_desired = req_update.hwp_desired; + + if (update_hwp_window) + req.hwp_window = req_update.hwp_window; + + if (update_hwp_epp) + req.hwp_epp = req_update.hwp_epp; + + read_hwp_cap(cpu, &cap, MSR_HWP_CAPABILITIES); + if (debug) + print_hwp_cap(cpu, &cap, ""); + + if (!force) + check_hwp_request_v_hwp_capabilities(cpu, &req, &cap); + + verify_hwp_req_self_consistency(cpu, &req); + + write_hwp_request(cpu, &req, msr_offset); - retval = pread(fd, &old_msr, sizeof old_msr, offset); - if (retval != sizeof old_msr) { - perror("pwrite"); - printf("pread cpu%d 0x%x = %d\n", cpu, offset, retval); - exit(-2); + if (debug) { + read_hwp_request(cpu, &req, msr_offset); + print_hwp_request_pkg(pkg, &req, "new: "); } + return 0; +} + +int enable_hwp_on_cpu(int cpu) +{ + unsigned long long msr; + + get_msr(cpu, MSR_PM_ENABLE, &msr); + put_msr(cpu, MSR_PM_ENABLE, 1); + + if (verbose) + printf("cpu%d: MSR_PM_ENABLE old: %d new: %d\n", cpu, (unsigned int) msr, 1); + + return 0; +} + +int update_cpu_msrs(int cpu) +{ + unsigned long long msr; + - retval = pwrite(fd, &new_msr, sizeof new_msr, offset); - if (retval != sizeof new_msr) { - perror("pwrite"); - printf("pwrite cpu%d 0x%x = %d\n", cpu, offset, retval); - exit(-2); + if (update_epb) { + get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr); + put_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, new_epb); + + if (verbose) + printf("cpu%d: ENERGY_PERF_BIAS old: %d new: %d\n", + cpu, (unsigned int) msr, (unsigned int) new_epb); } - close(fd); + if (update_turbo) { + int turbo_is_present_and_disabled; + + get_msr(cpu, MSR_IA32_MISC_ENABLE, &msr); + + turbo_is_present_and_disabled = ((msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE) != 0); + + if (turbo_update_value == 1) { + if (turbo_is_present_and_disabled) { + msr &= ~MSR_IA32_MISC_ENABLE_TURBO_DISABLE; + put_msr(cpu, MSR_IA32_MISC_ENABLE, msr); + if (verbose) + printf("cpu%d: turbo ENABLE\n", cpu); + } + } else { + /* + * if "turbo_is_enabled" were known to be describe this cpu + * then we could use it here to skip redundant disable requests. + * but cpu may be in a different package, so we always write. + */ + msr |= MSR_IA32_MISC_ENABLE_TURBO_DISABLE; + put_msr(cpu, MSR_IA32_MISC_ENABLE, msr); + if (verbose) + printf("cpu%d: turbo DISABLE\n", cpu); + } + } + + if (!has_hwp) + return 0; + + if (!hwp_update_enabled()) + return 0; + + update_hwp_request(cpu); + return 0; +} + +/* + * Open a file, and exit on failure + */ +FILE *fopen_or_die(const char *path, const char *mode) +{ + FILE *filep = fopen(path, "r"); - return old_msr; + if (!filep) + err(1, "%s: open failed", path); + return filep; } -void print_msr(int cpu) +unsigned int get_pkg_num(int cpu) { - printf("cpu%d: 0x%016llx\n", - cpu, get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS)); + FILE *fp; + char pathname[128]; + unsigned int pkg; + int retval; + + sprintf(pathname, "/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu); + + fp = fopen_or_die(pathname, "r"); + retval = fscanf(fp, "%d\n", &pkg); + if (retval != 1) + errx(1, "%s: failed to parse", pathname); + return pkg; } -void update_msr(int cpu) +int set_max_cpu_pkg_num(int cpu) { - unsigned long long previous_msr; + unsigned int pkg; - previous_msr = put_msr(cpu, new_bias, MSR_IA32_ENERGY_PERF_BIAS); + if (max_cpu_num < cpu) + max_cpu_num = cpu; - if (verbose) - printf("cpu%d msr0x%x 0x%016llx -> 0x%016llx\n", - cpu, MSR_IA32_ENERGY_PERF_BIAS, previous_msr, new_bias); + pkg = get_pkg_num(cpu); + + if (pkg >= MAX_PACKAGES) + errx(1, "cpu%d: %d >= MAX_PACKAGES (%d)", cpu, pkg, MAX_PACKAGES); + + if (pkg > max_pkg_num) + max_pkg_num = pkg; - return; + if ((pkg_present_set & (1ULL << pkg)) == 0) { + pkg_present_set |= (1ULL << pkg); + first_cpu_in_pkg[pkg] = cpu; + } + + return 0; +} +int mark_cpu_present(int cpu) +{ + CPU_SET_S(cpu, cpu_setsize, cpu_present_set); + return 0; } -char *proc_stat = "/proc/stat"; /* - * run func() on every cpu in /dev/cpu + * run func(cpu) on every cpu in /proc/stat + * return max_cpu number */ -void for_every_cpu(void (func)(int)) +int for_all_proc_cpus(int (func)(int)) { FILE *fp; + int cpu_num; int retval; - fp = fopen(proc_stat, "r"); - if (fp == NULL) { - perror(proc_stat); - exit(1); - } + fp = fopen_or_die(proc_stat, "r"); retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n"); - if (retval != 0) { - perror("/proc/stat format"); - exit(1); - } + if (retval != 0) + err(1, "%s: failed to parse format", proc_stat); while (1) { - int cpu; - - retval = fscanf(fp, - "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", - &cpu); + retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num); if (retval != 1) break; - func(cpu); + retval = func(cpu_num); + if (retval) { + fclose(fp); + return retval; + } } fclose(fp); + return 0; +} + +void for_all_cpus_in_set(size_t set_size, cpu_set_t *cpu_set, int (func)(int)) +{ + int cpu_num; + + for (cpu_num = 0; cpu_num <= max_cpu_num; ++cpu_num) + if (CPU_ISSET_S(cpu_num, set_size, cpu_set)) + func(cpu_num); +} + +void init_data_structures(void) +{ + for_all_proc_cpus(set_max_cpu_pkg_num); + + cpu_setsize = CPU_ALLOC_SIZE((max_cpu_num + 1)); + + cpu_present_set = CPU_ALLOC((max_cpu_num + 1)); + if (cpu_present_set == NULL) + err(3, "CPU_ALLOC"); + CPU_ZERO_S(cpu_setsize, cpu_present_set); + for_all_proc_cpus(mark_cpu_present); +} + +/* clear has_hwp if it is not enable (or being enabled) */ + +void verify_hwp_is_enabled(void) +{ + unsigned long long msr; + + if (!has_hwp) /* set in early_cpuid() */ + return; + + /* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */ + get_msr(base_cpu, MSR_PM_ENABLE, &msr); + if ((msr & 1) == 0) { + fprintf(stderr, "HWP can be enabled using '--hwp-enable'\n"); + has_hwp = 0; + return; + } +} + +int req_update_bounds_check(void) +{ + if (!hwp_update_enabled()) + return 0; + + /* fail if min > max requested */ + if ((update_hwp_max && update_hwp_min) && + (req_update.hwp_min > req_update.hwp_max)) { + printf("hwp-min %d > hwp_max %d\n", req_update.hwp_min, req_update.hwp_max); + return -EINVAL; + } + + /* fail if desired > max requestd */ + if (req_update.hwp_desired && update_hwp_max && + (req_update.hwp_desired > req_update.hwp_max)) { + printf("hwp-desired cannot be greater than hwp_max\n"); + return -EINVAL; + } + /* fail if desired < min requestd */ + if (req_update.hwp_desired && update_hwp_min && + (req_update.hwp_desired < req_update.hwp_min)) { + printf("hwp-desired cannot be less than hwp_min\n"); + return -EINVAL; + } + + return 0; +} + +void set_base_cpu(void) +{ + base_cpu = sched_getcpu(); + if (base_cpu < 0) + err(-ENODEV, "No valid cpus found"); +} + + +void probe_dev_msr(void) +{ + struct stat sb; + char pathname[32]; + + sprintf(pathname, "/dev/cpu/%d/msr", base_cpu); + if (stat(pathname, &sb)) + if (system("/sbin/modprobe msr > /dev/null 2>&1")) + err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" "); +} +/* + * early_cpuid() + * initialize turbo_is_enabled, has_hwp, has_epb + * before cmdline is parsed + */ +void early_cpuid(void) +{ + unsigned int eax, ebx, ecx, edx, max_level; + unsigned int fms, family, model; + + __get_cpuid(0, &max_level, &ebx, &ecx, &edx); + + if (max_level < 6) + errx(1, "Processor not supported\n"); + + __get_cpuid(1, &fms, &ebx, &ecx, &edx); + family = (fms >> 8) & 0xf; + model = (fms >> 4) & 0xf; + if (family == 6 || family == 0xf) + model += ((fms >> 16) & 0xf) << 4; + + if (model == 0x4F) { + unsigned long long msr; + + get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr); + + bdx_highest_ratio = msr & 0xFF; + } + + __get_cpuid(0x6, &eax, &ebx, &ecx, &edx); + turbo_is_enabled = (eax >> 1) & 1; + has_hwp = (eax >> 7) & 1; + has_epb = (ecx >> 3) & 1; +} + +/* + * parse_cpuid() + * set + * has_hwp, has_hwp_notify, has_hwp_activity_window, has_hwp_epp, has_hwp_request_pkg, has_epb + */ +void parse_cpuid(void) +{ + unsigned int eax, ebx, ecx, edx, max_level; + unsigned int fms, family, model, stepping; + + eax = ebx = ecx = edx = 0; + + __get_cpuid(0, &max_level, &ebx, &ecx, &edx); + + if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e) + genuine_intel = 1; + + if (debug) + fprintf(stderr, "CPUID(0): %.4s%.4s%.4s ", + (char *)&ebx, (char *)&edx, (char *)&ecx); + + __get_cpuid(1, &fms, &ebx, &ecx, &edx); + family = (fms >> 8) & 0xf; + model = (fms >> 4) & 0xf; + stepping = fms & 0xf; + if (family == 6 || family == 0xf) + model += ((fms >> 16) & 0xf) << 4; + + if (debug) { + fprintf(stderr, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n", + max_level, family, model, stepping, family, model, stepping); + fprintf(stderr, "CPUID(1): %s %s %s %s %s %s %s %s\n", + ecx & (1 << 0) ? "SSE3" : "-", + ecx & (1 << 3) ? "MONITOR" : "-", + ecx & (1 << 7) ? "EIST" : "-", + ecx & (1 << 8) ? "TM2" : "-", + edx & (1 << 4) ? "TSC" : "-", + edx & (1 << 5) ? "MSR" : "-", + edx & (1 << 22) ? "ACPI-TM" : "-", + edx & (1 << 29) ? "TM" : "-"); + } + + if (!(edx & (1 << 5))) + errx(1, "CPUID: no MSR"); + + + __get_cpuid(0x6, &eax, &ebx, &ecx, &edx); + /* turbo_is_enabled already set */ + /* has_hwp already set */ + has_hwp_notify = eax & (1 << 8); + has_hwp_activity_window = eax & (1 << 9); + has_hwp_epp = eax & (1 << 10); + has_hwp_request_pkg = eax & (1 << 11); + + if (!has_hwp_request_pkg && update_hwp_use_pkg) + errx(1, "--hwp-use-pkg is not available on this hardware"); + + /* has_epb already set */ + + if (debug) + fprintf(stderr, + "CPUID(6): %sTURBO, %sHWP, %sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n", + turbo_is_enabled ? "" : "No-", + has_hwp ? "" : "No-", + has_hwp_notify ? "" : "No-", + has_hwp_activity_window ? "" : "No-", + has_hwp_epp ? "" : "No-", + has_hwp_request_pkg ? "" : "No-", + has_epb ? "" : "No-"); + + return; /* success */ } int main(int argc, char **argv) { + set_base_cpu(); + probe_dev_msr(); + init_data_structures(); + + early_cpuid(); /* initial cpuid parse before cmdline */ + cmdline(argc, argv); - if (verbose > 1) - printf("x86_energy_perf_policy Nov 24, 2010" - " - Len Brown <lenb@kernel.org>\n"); - if (verbose > 1 && !read_only) - printf("new_bias %lld\n", new_bias); - - validate_cpuid(); - - if (cpu != -1) { - if (read_only) - print_msr(cpu); - else - update_msr(cpu); - } else { - if (read_only) - for_every_cpu(print_msr); - else - for_every_cpu(update_msr); + if (debug) + print_version(); + + parse_cpuid(); + + /* If CPU-set and PKG-set are not initialized, default to all CPUs */ + if ((cpu_selected_set == 0) && (pkg_selected_set == 0)) + cpu_selected_set = cpu_present_set; + + /* + * If HWP is being enabled, do it now, so that subsequent operations + * that access HWP registers can work. + */ + if (update_hwp_enable) + for_all_cpus_in_set(cpu_setsize, cpu_selected_set, enable_hwp_on_cpu); + + /* If HWP present, but disabled, warn and ignore from here forward */ + verify_hwp_is_enabled(); + + if (req_update_bounds_check()) + return -EINVAL; + + /* display information only, no updates to settings */ + if (!update_epb && !update_turbo && !hwp_update_enabled()) { + if (cpu_selected_set) + for_all_cpus_in_set(cpu_setsize, cpu_selected_set, print_cpu_msrs); + + if (has_hwp_request_pkg) { + if (pkg_selected_set == 0) + pkg_selected_set = pkg_present_set; + + for_packages(pkg_selected_set, print_pkg_msrs); + } + + return 0; } + /* update CPU set */ + if (cpu_selected_set) { + for_all_cpus_in_set(cpu_setsize, cpu_selected_set, update_sysfs); + for_all_cpus_in_set(cpu_setsize, cpu_selected_set, update_cpu_msrs); + } else if (pkg_selected_set) + for_packages(pkg_selected_set, update_hwp_request_pkg); + return 0; } |