diff options
79 files changed, 1240 insertions, 390 deletions
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 5eea46fefcb2..4fb76c0e8d30 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -520,3 +520,21 @@ Description: Control Symetric Multi Threading (SMT) If control status is "forceoff" or "notsupported" writes are rejected. + +What: /sys/devices/system/cpu/cpu#/power/energy_perf_bias +Date: March 2019 +Contact: linux-pm@vger.kernel.org +Description: Intel Energy and Performance Bias Hint (EPB) + + EPB for the given CPU in a sliding scale 0 - 15, where a value + of 0 corresponds to a hint preference for highest performance + and a value of 15 corresponds to the maximum energy savings. + + In order to change the EPB value for the CPU, write either + a number in the 0 - 15 sliding scale above, or one of the + strings: "performance", "balance-performance", "normal", + "balance-power", "power" (that represent values reflected by + their meaning), to this attribute. + + This attribute is present for all online CPUs supporting the + Intel EPB feature. diff --git a/Documentation/admin-guide/pm/cpufreq.rst b/Documentation/admin-guide/pm/cpufreq.rst index 7eca9026a9ed..0c74a7784964 100644 --- a/Documentation/admin-guide/pm/cpufreq.rst +++ b/Documentation/admin-guide/pm/cpufreq.rst @@ -1,3 +1,6 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: <isonum.txt> + .. |struct cpufreq_policy| replace:: :c:type:`struct cpufreq_policy <cpufreq_policy>` .. |intel_pstate| replace:: :doc:`intel_pstate <intel_pstate>` @@ -5,9 +8,10 @@ CPU Performance Scaling ======================= -:: +:Copyright: |copy| 2017 Intel Corporation + +:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> - Copyright (c) 2017 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com> The Concept of CPU Performance Scaling ====================================== @@ -396,8 +400,8 @@ RT or deadline scheduling classes, the governor will increase the frequency to the allowed maximum (that is, the ``scaling_max_freq`` policy limit). In turn, if it is invoked by the CFS scheduling class, the governor will use the Per-Entity Load Tracking (PELT) metric for the root control group of the -given CPU as the CPU utilization estimate (see the `Per-entity load tracking`_ -LWN.net article for a description of the PELT mechanism). Then, the new +given CPU as the CPU utilization estimate (see the *Per-entity load tracking* +LWN.net article [1]_ for a description of the PELT mechanism). Then, the new CPU frequency to apply is computed in accordance with the formula f = 1.25 * ``f_0`` * ``util`` / ``max`` @@ -698,4 +702,8 @@ hardware feature (e.g. all Intel ones), even if the :c:macro:`CONFIG_X86_ACPI_CPUFREQ_CPB` configuration option is set. -.. _Per-entity load tracking: https://lwn.net/Articles/531853/ +References +========== + +.. [1] Jonathan Corbet, *Per-entity load tracking*, + https://lwn.net/Articles/531853/ diff --git a/Documentation/admin-guide/pm/cpuidle.rst b/Documentation/admin-guide/pm/cpuidle.rst index 9c58b35a81cb..e70b365dbc60 100644 --- a/Documentation/admin-guide/pm/cpuidle.rst +++ b/Documentation/admin-guide/pm/cpuidle.rst @@ -1,3 +1,6 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: <isonum.txt> + .. |struct cpuidle_state| replace:: :c:type:`struct cpuidle_state <cpuidle_state>` .. |cpufreq| replace:: :doc:`CPU Performance Scaling <cpufreq>` @@ -5,9 +8,10 @@ CPU Idle Time Management ======================== -:: +:Copyright: |copy| 2018 Intel Corporation + +:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> - Copyright (c) 2018 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com> Concepts ======== diff --git a/Documentation/admin-guide/pm/index.rst b/Documentation/admin-guide/pm/index.rst index 49237ac73442..39f8f9f81e7a 100644 --- a/Documentation/admin-guide/pm/index.rst +++ b/Documentation/admin-guide/pm/index.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: GPL-2.0 + ================ Power Management ================ diff --git a/Documentation/admin-guide/pm/intel_epb.rst b/Documentation/admin-guide/pm/intel_epb.rst new file mode 100644 index 000000000000..005121167af7 --- /dev/null +++ b/Documentation/admin-guide/pm/intel_epb.rst @@ -0,0 +1,41 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: <isonum.txt> + +====================================== +Intel Performance and Energy Bias Hint +====================================== + +:Copyright: |copy| 2019 Intel Corporation + +:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> + + +.. kernel-doc:: arch/x86/kernel/cpu/intel_epb.c + :doc: overview + +Intel Performance and Energy Bias Attribute in ``sysfs`` +======================================================== + +The Intel Performance and Energy Bias Hint (EPB) value for a given (logical) CPU +can be checked or updated through a ``sysfs`` attribute (file) under +:file:`/sys/devices/system/cpu/cpu<N>/power/`, where the CPU number ``<N>`` +is allocated at the system initialization time: + +``energy_perf_bias`` + Shows the current EPB value for the CPU in a sliding scale 0 - 15, where + a value of 0 corresponds to a hint preference for highest performance + and a value of 15 corresponds to the maximum energy savings. + + In order to update the EPB value for the CPU, this attribute can be + written to, either with a number in the 0 - 15 sliding scale above, or + with one of the strings: "performance", "balance-performance", "normal", + "balance-power", "power" that represent values reflected by their + meaning. + + This attribute is present for all online CPUs supporting the EPB + feature. + +Note that while the EPB interface to the processor is defined at the logical CPU +level, the physical register backing it may be shared by multiple CPUs (for +example, SMT siblings or cores in one package). For this reason, updating the +EPB value for one CPU may cause the EPB values for other CPUs to change. diff --git a/Documentation/admin-guide/pm/intel_pstate.rst b/Documentation/admin-guide/pm/intel_pstate.rst index ec0f7c111f65..67e414e34f37 100644 --- a/Documentation/admin-guide/pm/intel_pstate.rst +++ b/Documentation/admin-guide/pm/intel_pstate.rst @@ -1,10 +1,13 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: <isonum.txt> + =============================================== ``intel_pstate`` CPU Performance Scaling Driver =============================================== -:: +:Copyright: |copy| 2017 Intel Corporation - Copyright (c) 2017 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com> +:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> General Information @@ -20,11 +23,10 @@ you have not done that yet.] For the processors supported by ``intel_pstate``, the P-state concept is broader than just an operating frequency or an operating performance point (see the -`LinuxCon Europe 2015 presentation by Kristen Accardi <LCEU2015_>`_ for more +LinuxCon Europe 2015 presentation by Kristen Accardi [1]_ for more information about that). For this reason, the representation of P-states used by ``intel_pstate`` internally follows the hardware specification (for details -refer to `Intel® 64 and IA-32 Architectures Software Developer’s Manual -Volume 3: System Programming Guide <SDM_>`_). However, the ``CPUFreq`` core +refer to Intel Software Developer’s Manual [2]_). However, the ``CPUFreq`` core uses frequencies for identifying operating performance points of CPUs and frequencies are involved in the user space interface exposed by it, so ``intel_pstate`` maps its internal representation of P-states to frequencies too @@ -561,9 +563,9 @@ or to pin every task potentially sensitive to them to a specific CPU.] On the majority of systems supported by ``intel_pstate``, the ACPI tables provided by the platform firmware contain ``_PSS`` objects returning information -that can be used for CPU performance scaling (refer to the `ACPI specification`_ -for details on the ``_PSS`` objects and the format of the information returned -by them). +that can be used for CPU performance scaling (refer to the ACPI specification +[3]_ for details on the ``_PSS`` objects and the format of the information +returned by them). The information returned by the ACPI ``_PSS`` objects is used by the ``acpi-cpufreq`` scaling driver. On systems supported by ``intel_pstate`` @@ -728,6 +730,14 @@ P-state is called, the ``ftrace`` filter can be set to to <idle>-0 [000] ..s. 2537.654843: intel_pstate_set_pstate <-intel_pstate_timer_func -.. _LCEU2015: http://events.linuxfoundation.org/sites/events/files/slides/LinuxConEurope_2015.pdf -.. _SDM: http://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-software-developer-system-programming-manual-325384.html -.. _ACPI specification: http://www.uefi.org/sites/default/files/resources/ACPI_6_1.pdf +References +========== + +.. [1] Kristen Accardi, *Balancing Power and Performance in the Linux Kernel*, + http://events.linuxfoundation.org/sites/events/files/slides/LinuxConEurope_2015.pdf + +.. [2] *Intel® 64 and IA-32 Architectures Software Developer’s Manual Volume 3: System Programming Guide*, + http://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-software-developer-system-programming-manual-325384.html + +.. [3] *Advanced Configuration and Power Interface Specification*, + https://uefi.org/sites/default/files/resources/ACPI_6_3_final_Jan30.pdf diff --git a/Documentation/admin-guide/pm/sleep-states.rst b/Documentation/admin-guide/pm/sleep-states.rst index dbf5acd49f35..cd3a28cb81f4 100644 --- a/Documentation/admin-guide/pm/sleep-states.rst +++ b/Documentation/admin-guide/pm/sleep-states.rst @@ -1,10 +1,14 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: <isonum.txt> + =================== System Sleep States =================== -:: +:Copyright: |copy| 2017 Intel Corporation + +:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> - Copyright (c) 2017 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com> Sleep states are global low-power states of the entire system in which user space code cannot be executed and the overall system activity is significantly diff --git a/Documentation/admin-guide/pm/strategies.rst b/Documentation/admin-guide/pm/strategies.rst index afe4d3f831fe..dd0362e32fa5 100644 --- a/Documentation/admin-guide/pm/strategies.rst +++ b/Documentation/admin-guide/pm/strategies.rst @@ -1,10 +1,14 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: <isonum.txt> + =========================== Power Management Strategies =========================== -:: +:Copyright: |copy| 2017 Intel Corporation + +:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> - Copyright (c) 2017 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com> The Linux kernel supports two major high-level power management strategies. diff --git a/Documentation/admin-guide/pm/system-wide.rst b/Documentation/admin-guide/pm/system-wide.rst index 0c81e4c5de39..2b1f987b34f0 100644 --- a/Documentation/admin-guide/pm/system-wide.rst +++ b/Documentation/admin-guide/pm/system-wide.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: GPL-2.0 + ============================ System-Wide Power Management ============================ diff --git a/Documentation/admin-guide/pm/working-state.rst b/Documentation/admin-guide/pm/working-state.rst index b6cef9b5e961..fc298eb1234b 100644 --- a/Documentation/admin-guide/pm/working-state.rst +++ b/Documentation/admin-guide/pm/working-state.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: GPL-2.0 + ============================== Working-State Power Management ============================== @@ -8,3 +10,4 @@ Working-State Power Management cpuidle cpufreq intel_pstate + intel_epb diff --git a/Documentation/driver-api/pm/cpuidle.rst b/Documentation/driver-api/pm/cpuidle.rst index 5842ab621a58..006cf6db40c6 100644 --- a/Documentation/driver-api/pm/cpuidle.rst +++ b/Documentation/driver-api/pm/cpuidle.rst @@ -1,3 +1,6 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: <isonum.txt> + .. |struct cpuidle_governor| replace:: :c:type:`struct cpuidle_governor <cpuidle_governor>` .. |struct cpuidle_device| replace:: :c:type:`struct cpuidle_device <cpuidle_device>` .. |struct cpuidle_driver| replace:: :c:type:`struct cpuidle_driver <cpuidle_driver>` @@ -7,9 +10,9 @@ CPU Idle Time Management ======================== -:: +:Copyright: |copy| 2019 Intel Corporation - Copyright (c) 2019 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com> +:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> CPU Idle Time Management Subsystem diff --git a/Documentation/driver-api/pm/devices.rst b/Documentation/driver-api/pm/devices.rst index 090c151aa86b..30835683616a 100644 --- a/Documentation/driver-api/pm/devices.rst +++ b/Documentation/driver-api/pm/devices.rst @@ -1,3 +1,6 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: <isonum.txt> + .. |struct dev_pm_ops| replace:: :c:type:`struct dev_pm_ops <dev_pm_ops>` .. |struct dev_pm_domain| replace:: :c:type:`struct dev_pm_domain <dev_pm_domain>` .. |struct bus_type| replace:: :c:type:`struct bus_type <bus_type>` @@ -12,11 +15,12 @@ Device Power Management Basics ============================== -:: +:Copyright: |copy| 2010-2011 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc. +:Copyright: |copy| 2010 Alan Stern <stern@rowland.harvard.edu> +:Copyright: |copy| 2016 Intel Corporation + +:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> - Copyright (c) 2010-2011 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc. - Copyright (c) 2010 Alan Stern <stern@rowland.harvard.edu> - Copyright (c) 2016 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com> Most of the code in Linux is device drivers, so most of the Linux power management (PM) code is also driver-specific. Most drivers will do very diff --git a/Documentation/driver-api/pm/index.rst b/Documentation/driver-api/pm/index.rst index 56975c6bc789..c2a9ef8d115c 100644 --- a/Documentation/driver-api/pm/index.rst +++ b/Documentation/driver-api/pm/index.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: GPL-2.0 + =============================== CPU and Device Power Management =============================== diff --git a/Documentation/driver-api/pm/notifiers.rst b/Documentation/driver-api/pm/notifiers.rst index 62f860026992..186435c43b77 100644 --- a/Documentation/driver-api/pm/notifiers.rst +++ b/Documentation/driver-api/pm/notifiers.rst @@ -1,10 +1,14 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: <isonum.txt> + ============================= Suspend/Hibernation Notifiers ============================= -:: +:Copyright: |copy| 2016 Intel Corporation + +:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> - Copyright (c) 2016 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com> There are some operations that subsystems or drivers may want to carry out before hibernation/suspend or after restore/resume, but they require the system diff --git a/Documentation/driver-api/pm/types.rst b/Documentation/driver-api/pm/types.rst index 3ebdecc54104..73a231caf764 100644 --- a/Documentation/driver-api/pm/types.rst +++ b/Documentation/driver-api/pm/types.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: GPL-2.0 + ================================== Device Power Management Data Types ================================== diff --git a/MAINTAINERS b/MAINTAINERS index d846ccd81235..bd40a852207b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4553,6 +4553,7 @@ S: Maintained F: drivers/devfreq/ F: include/linux/devfreq.h F: Documentation/devicetree/bindings/devfreq/ +F: include/trace/events/devfreq.h DEVICE FREQUENCY EVENT (DEVFREQ-EVENT) M: Chanwoo Choi <cw00.choi@samsung.com> @@ -12416,7 +12417,7 @@ M: Mark Rutland <mark.rutland@arm.com> M: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> L: linux-arm-kernel@lists.infradead.org S: Maintained -F: drivers/firmware/psci*.c +F: drivers/firmware/psci/ F: include/linux/psci.h F: include/uapi/linux/psci.h diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index cfd24f9f7614..1796d2bdcaaa 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -28,7 +28,7 @@ obj-y += cpuid-deps.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o -obj-$(CONFIG_CPU_SUP_INTEL) += intel.o intel_pconfig.o +obj-$(CONFIG_CPU_SUP_INTEL) += intel.o intel_pconfig.o intel_epb.o obj-$(CONFIG_CPU_SUP_AMD) += amd.o obj-$(CONFIG_CPU_SUP_HYGON) += hygon.o obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 37f7d438a6ef..37640544e12f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1824,23 +1824,6 @@ void cpu_init(void) } #endif -static void bsp_resume(void) -{ - if (this_cpu->c_bsp_resume) - this_cpu->c_bsp_resume(&boot_cpu_data); -} - -static struct syscore_ops cpu_syscore_ops = { - .resume = bsp_resume, -}; - -static int __init init_cpu_syscore(void) -{ - register_syscore_ops(&cpu_syscore_ops); - return 0; -} -core_initcall(init_cpu_syscore); - /* * The microcode loader calls this upon late microcode load to recheck features, * only when microcode has been updated. Caller holds microcode_mutex and CPU diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 5eb946b9a9f3..c0e2407abdd6 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -14,7 +14,6 @@ struct cpu_dev { void (*c_init)(struct cpuinfo_x86 *); void (*c_identify)(struct cpuinfo_x86 *); void (*c_detect_tlb)(struct cpuinfo_x86 *); - void (*c_bsp_resume)(struct cpuinfo_x86 *); int c_x86_vendor; #ifdef CONFIG_X86_32 /* Optional vendor specific routine to obtain the cache size. */ diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 3142fd7a9b32..f17c1a714779 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -596,36 +596,6 @@ detect_keyid_bits: c->x86_phys_bits -= keyid_bits; } -static void init_intel_energy_perf(struct cpuinfo_x86 *c) -{ - u64 epb; - - /* - * Initialize MSR_IA32_ENERGY_PERF_BIAS if not already initialized. - * (x86_energy_perf_policy(8) is available to change it at run-time.) - */ - if (!cpu_has(c, X86_FEATURE_EPB)) - return; - - rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb); - if ((epb & 0xF) != ENERGY_PERF_BIAS_PERFORMANCE) - return; - - pr_info_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n"); - pr_info_once("ENERGY_PERF_BIAS: View and update with x86_energy_perf_policy(8)\n"); - epb = (epb & ~0xF) | ENERGY_PERF_BIAS_NORMAL; - wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb); -} - -static void intel_bsp_resume(struct cpuinfo_x86 *c) -{ - /* - * MSR_IA32_ENERGY_PERF_BIAS is lost across suspend/resume, - * so reinitialize it properly like during bootup: - */ - init_intel_energy_perf(c); -} - static void init_cpuid_fault(struct cpuinfo_x86 *c) { u64 msr; @@ -763,8 +733,6 @@ static void init_intel(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_TME)) detect_tme(c); - init_intel_energy_perf(c); - init_intel_misc_features(c); } @@ -1023,9 +991,7 @@ static const struct cpu_dev intel_cpu_dev = { .c_detect_tlb = intel_detect_tlb, .c_early_init = early_init_intel, .c_init = init_intel, - .c_bsp_resume = intel_bsp_resume, .c_x86_vendor = X86_VENDOR_INTEL, }; cpu_dev_register(intel_cpu_dev); - diff --git a/arch/x86/kernel/cpu/intel_epb.c b/arch/x86/kernel/cpu/intel_epb.c new file mode 100644 index 000000000000..f4dd73396f28 --- /dev/null +++ b/arch/x86/kernel/cpu/intel_epb.c @@ -0,0 +1,216 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Intel Performance and Energy Bias Hint support. + * + * Copyright (C) 2019 Intel Corporation + * + * Author: + * Rafael J. Wysocki <rafael.j.wysocki@intel.com> + */ + +#include <linux/cpuhotplug.h> +#include <linux/cpu.h> +#include <linux/device.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/syscore_ops.h> +#include <linux/pm.h> + +#include <asm/cpufeature.h> +#include <asm/msr.h> + +/** + * DOC: overview + * + * The Performance and Energy Bias Hint (EPB) allows software to specify its + * preference with respect to the power-performance tradeoffs present in the + * processor. Generally, the EPB is expected to be set by user space (directly + * via sysfs or with the help of the x86_energy_perf_policy tool), but there are + * two reasons for the kernel to update it. + * + * First, there are systems where the platform firmware resets the EPB during + * system-wide transitions from sleep states back into the working state + * effectively causing the previous EPB updates by user space to be lost. + * Thus the kernel needs to save the current EPB values for all CPUs during + * system-wide transitions to sleep states and restore them on the way back to + * the working state. That can be achieved by saving EPB for secondary CPUs + * when they are taken offline during transitions into system sleep states and + * for the boot CPU in a syscore suspend operation, so that it can be restored + * for the boot CPU in a syscore resume operation and for the other CPUs when + * they are brought back online. However, CPUs that are already offline when + * a system-wide PM transition is started are not taken offline again, but their + * EPB values may still be reset by the platform firmware during the transition, + * so in fact it is necessary to save the EPB of any CPU taken offline and to + * restore it when the given CPU goes back online at all times. + * + * Second, on many systems the initial EPB value coming from the platform + * firmware is 0 ('performance') and at least on some of them that is because + * the platform firmware does not initialize EPB at all with the assumption that + * the OS will do that anyway. That sometimes is problematic, as it may cause + * the system battery to drain too fast, for example, so it is better to adjust + * it on CPU bring-up and if the initial EPB value for a given CPU is 0, the + * kernel changes it to 6 ('normal'). + */ + +static DEFINE_PER_CPU(u8, saved_epb); + +#define EPB_MASK 0x0fULL +#define EPB_SAVED 0x10ULL +#define MAX_EPB EPB_MASK + +static int intel_epb_save(void) +{ + u64 epb; + + rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb); + /* + * Ensure that saved_epb will always be nonzero after this write even if + * the EPB value read from the MSR is 0. + */ + this_cpu_write(saved_epb, (epb & EPB_MASK) | EPB_SAVED); + + return 0; +} + +static void intel_epb_restore(void) +{ + u64 val = this_cpu_read(saved_epb); + u64 epb; + + rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb); + if (val) { + val &= EPB_MASK; + } else { + /* + * Because intel_epb_save() has not run for the current CPU yet, + * it is going online for the first time, so if its EPB value is + * 0 ('performance') at this point, assume that it has not been + * initialized by the platform firmware and set it to 6 + * ('normal'). + */ + val = epb & EPB_MASK; + if (val == ENERGY_PERF_BIAS_PERFORMANCE) { + val = ENERGY_PERF_BIAS_NORMAL; + pr_warn_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n"); + } + } + wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, (epb & ~EPB_MASK) | val); +} + +static struct syscore_ops intel_epb_syscore_ops = { + .suspend = intel_epb_save, + .resume = intel_epb_restore, +}; + +static const char * const energy_perf_strings[] = { + "performance", + "balance-performance", + "normal", + "balance-power", + "power" +}; +static const u8 energ_perf_values[] = { + ENERGY_PERF_BIAS_PERFORMANCE, + ENERGY_PERF_BIAS_BALANCE_PERFORMANCE, + ENERGY_PERF_BIAS_NORMAL, + ENERGY_PERF_BIAS_BALANCE_POWERSAVE, + ENERGY_PERF_BIAS_POWERSAVE +}; + +static ssize_t energy_perf_bias_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + unsigned int cpu = dev->id; + u64 epb; + int ret; + + ret = rdmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb); + if (ret < 0) + return ret; + + return sprintf(buf, "%llu\n", epb); +} + +static ssize_t energy_perf_bias_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + unsigned int cpu = dev->id; + u64 epb, val; + int ret; + + ret = __sysfs_match_string(energy_perf_strings, + ARRAY_SIZE(energy_perf_strings), buf); + if (ret >= 0) + val = energ_perf_values[ret]; + else if (kstrtou64(buf, 0, &val) || val > MAX_EPB) + return -EINVAL; + + ret = rdmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb); + if (ret < 0) + return ret; + + ret = wrmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, + (epb & ~EPB_MASK) | val); + if (ret < 0) + return ret; + + return count; +} + +static DEVICE_ATTR_RW(energy_perf_bias); + +static struct attribute *intel_epb_attrs[] = { + &dev_attr_energy_perf_bias.attr, + NULL +}; + +static const struct attribute_group intel_epb_attr_group = { + .name = power_group_name, + .attrs = intel_epb_attrs +}; + +static int intel_epb_online(unsigned int cpu) +{ + struct device *cpu_dev = get_cpu_device(cpu); + + intel_epb_restore(); + if (!cpuhp_tasks_frozen) + sysfs_merge_group(&cpu_dev->kobj, &intel_epb_attr_group); + + return 0; +} + +static int intel_epb_offline(unsigned int cpu) +{ + struct device *cpu_dev = get_cpu_device(cpu); + + if (!cpuhp_tasks_frozen) + sysfs_unmerge_group(&cpu_dev->kobj, &intel_epb_attr_group); + + intel_epb_save(); + return 0; +} + +static __init int intel_epb_init(void) +{ + int ret; + + if (!boot_cpu_has(X86_FEATURE_EPB)) + return -ENODEV; + + ret = cpuhp_setup_state(CPUHP_AP_X86_INTEL_EPB_ONLINE, + "x86/intel/epb:online", intel_epb_online, + intel_epb_offline); + if (ret < 0) + goto err_out_online; + + register_syscore_ops(&intel_epb_syscore_ops); + return 0; + +err_out_online: + cpuhp_remove_state(CPUHP_AP_X86_INTEL_EPB_ONLINE); + return ret; +} +subsys_initcall(intel_epb_init); diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index aab0c82e0a0d..15b5e98a86f9 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -185,8 +185,7 @@ static void __init cyc2ns_init_boot_cpu(void) /* * Secondary CPUs do not run through tsc_init(), so set up * all the scale factors for all CPUs, assuming the same - * speed as the bootup CPU. (cpufreq notifiers will fix this - * up if their speed diverges) + * speed as the bootup CPU. */ static void __init cyc2ns_init_secondary_cpus(void) { @@ -940,12 +939,12 @@ void tsc_restore_sched_clock_state(void) } #ifdef CONFIG_CPU_FREQ -/* Frequency scaling support. Adjust the TSC based timer when the cpu frequency +/* + * Frequency scaling support. Adjust the TSC based timer when the CPU frequency * changes. * - * RED-PEN: On SMP we assume all CPUs run with the same frequency. It's - * not that important because current Opteron setups do not support - * scaling on SMP anyroads. + * NOTE: On SMP the situation is not fixable in general, so simply mark the TSC + * as unstable and give up in those cases. * * Should fix up last_tsc too. Currently gettimeofday in the * first tick after the change will be slightly wrong. @@ -959,22 +958,22 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) { struct cpufreq_freqs *freq = data; - unsigned long *lpj; - lpj = &boot_cpu_data.loops_per_jiffy; -#ifdef CONFIG_SMP - if (!(freq->flags & CPUFREQ_CONST_LOOPS)) - lpj = &cpu_data(freq->cpu).loops_per_jiffy; -#endif + if (num_online_cpus() > 1) { + mark_tsc_unstable("cpufreq changes on SMP"); + return 0; + } if (!ref_freq) { ref_freq = freq->old; - loops_per_jiffy_ref = *lpj; + loops_per_jiffy_ref = boot_cpu_data.loops_per_jiffy; tsc_khz_ref = tsc_khz; } + if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || - (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) { - *lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); + (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) { + boot_cpu_data.loops_per_jiffy = + cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); if (!(freq->flags & CPUFREQ_CONST_LOOPS)) diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c index a303fd0e108c..c73d3a62799a 100644 --- a/drivers/acpi/processor_perflib.c +++ b/drivers/acpi/processor_perflib.c @@ -181,7 +181,7 @@ void acpi_processor_ppc_has_changed(struct acpi_processor *pr, int event_flag) acpi_processor_ppc_ost(pr->handle, 0); } if (ret >= 0) - cpufreq_update_policy(pr->id); + cpufreq_update_limits(pr->id); } int acpi_processor_get_bios_limit(int cpu, unsigned int *limit) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 96a6dc9d305c..3d899e8abd58 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -22,6 +22,7 @@ #include <linux/sched.h> #include <linux/suspend.h> #include <linux/export.h> +#include <linux/cpu.h> #include "power.h" @@ -128,6 +129,7 @@ static const struct genpd_lock_ops genpd_spin_ops = { #define genpd_is_irq_safe(genpd) (genpd->flags & GENPD_FLAG_IRQ_SAFE) #define genpd_is_always_on(genpd) (genpd->flags & GENPD_FLAG_ALWAYS_ON) #define genpd_is_active_wakeup(genpd) (genpd->flags & GENPD_FLAG_ACTIVE_WAKEUP) +#define genpd_is_cpu_domain(genpd) (genpd->flags & GENPD_FLAG_CPU_DOMAIN) static inline bool irq_safe_dev_in_no_sleep_domain(struct device *dev, const struct generic_pm_domain *genpd) @@ -391,11 +393,9 @@ int dev_pm_genpd_set_performance_state(struct device *dev, unsigned int state) if (unlikely(!genpd->set_performance_state)) return -EINVAL; - if (unlikely(!dev->power.subsys_data || - !dev->power.subsys_data->domain_data)) { - WARN_ON(1); + if (WARN_ON(!dev->power.subsys_data || + !dev->power.subsys_data->domain_data)) return -EINVAL; - } genpd_lock(genpd); @@ -1396,8 +1396,7 @@ EXPORT_SYMBOL_GPL(pm_genpd_syscore_poweron); #endif /* CONFIG_PM_SLEEP */ -static struct generic_pm_domain_data *genpd_alloc_dev_data(struct device *dev, - struct gpd_timing_data *td) +static struct generic_pm_domain_data *genpd_alloc_dev_data(struct device *dev) { struct generic_pm_domain_data *gpd_data; int ret; @@ -1412,9 +1411,6 @@ static struct generic_pm_domain_data *genpd_alloc_dev_data(struct device *dev, goto err_put; } - if (td) - gpd_data->td = *td; - gpd_data->base.dev = dev; gpd_data->td.constraint_changed = true; gpd_data->td.effective_constraint_ns = PM_QOS_RESUME_LATENCY_NO_CONSTRAINT_NS; @@ -1454,8 +1450,57 @@ static void genpd_free_dev_data(struct device *dev, dev_pm_put_subsys_data(dev); } +static void genpd_update_cpumask(struct generic_pm_domain *genpd, + int cpu, bool set, unsigned int depth) +{ + struct gpd_link *link; + + if (!genpd_is_cpu_domain(genpd)) + return; + + list_for_each_entry(link, &genpd->slave_links, slave_node) { + struct generic_pm_domain *master = link->master; + + genpd_lock_nested(master, depth + 1); + genpd_update_cpumask(master, cpu, set, depth + 1); + genpd_unlock(master); + } + + if (set) + cpumask_set_cpu(cpu, genpd->cpus); + else + cpumask_clear_cpu(cpu, genpd->cpus); +} + +static void genpd_set_cpumask(struct generic_pm_domain *genpd, int cpu) +{ + if (cpu >= 0) + genpd_update_cpumask(genpd, cpu, true, 0); +} + +static void genpd_clear_cpumask(struct generic_pm_domain *genpd, int cpu) +{ + if (cpu >= 0) + genpd_update_cpumask(genpd, cpu, false, 0); +} + +static int genpd_get_cpu(struct generic_pm_domain *genpd, struct device *dev) +{ + int cpu; + + if (!genpd_is_cpu_domain(genpd)) + return -1; + + for_each_possible_cpu(cpu) { + if (get_cpu_device(cpu) == dev) + return cpu; + } + + return -1; +} + static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, - struct gpd_timing_data *td) + struct device *base_dev) { struct generic_pm_domain_data *gpd_data; int ret; @@ -1465,16 +1510,19 @@ static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(dev)) return -EINVAL; - gpd_data = genpd_alloc_dev_data(dev, td); + gpd_data = genpd_alloc_dev_data(dev); if (IS_ERR(gpd_data)) return PTR_ERR(gpd_data); + gpd_data->cpu = genpd_get_cpu(genpd, base_dev); + ret = genpd->attach_dev ? genpd->attach_dev(genpd, dev) : 0; if (ret) goto out; genpd_lock(genpd); + genpd_set_cpumask(genpd, gpd_data->cpu); dev_pm_domain_set(dev, &genpd->domain); genpd->device_count++; @@ -1502,7 +1550,7 @@ int pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev) int ret; mutex_lock(&gpd_list_lock); - ret = genpd_add_device(genpd, dev, NULL); + ret = genpd_add_device(genpd, dev, dev); mutex_unlock(&gpd_list_lock); return ret; @@ -1532,6 +1580,7 @@ static int genpd_remove_device(struct generic_pm_domain *genpd, genpd->device_count--; genpd->max_off_time_changed = true; + genpd_clear_cpumask(genpd, gpd_data->cpu); dev_pm_domain_set(dev, NULL); list_del_init(&pdd->list_node); @@ -1686,6 +1735,12 @@ out: } EXPORT_SYMBOL_GPL(pm_genpd_remove_subdomain); +static void genpd_free_default_power_state(struct genpd_power_state *states, + unsigned int state_count) +{ + kfree(states); +} + static int genpd_set_default_power_state(struct generic_pm_domain *genpd) { struct genpd_power_state *state; @@ -1696,7 +1751,7 @@ static int genpd_set_default_power_state(struct generic_pm_domain *genpd) genpd->states = state; genpd->state_count = 1; - genpd->free = state; + genpd->free_states = genpd_free_default_power_state; return 0; } @@ -1762,11 +1817,18 @@ int pm_genpd_init(struct generic_pm_domain *genpd, if (genpd_is_always_on(genpd) && !genpd_status_on(genpd)) return -EINVAL; + if (genpd_is_cpu_domain(genpd) && + !zalloc_cpumask_var(&genpd->cpus, GFP_KERNEL)) + return -ENOMEM; + /* Use only one "off" state if there were no states declared */ if (genpd->state_count == 0) { ret = genpd_set_default_power_state(genpd); - if (ret) + if (ret) { + if (genpd_is_cpu_domain(genpd)) + free_cpumask_var(genpd->cpus); return ret; + } } else if (!gov && genpd->state_count > 1) { pr_warn("%s: no governor for states\n", genpd->name); } @@ -1812,7 +1874,11 @@ static int genpd_remove(struct generic_pm_domain *genpd) list_del(&genpd->gpd_list_node); genpd_unlock(genpd); cancel_work_sync(&genpd->power_off_work); - kfree(genpd->free); + if (genpd_is_cpu_domain(genpd)) + free_cpumask_var(genpd->cpus); + if (genpd->free_states) + genpd->free_states(genpd->states, genpd->state_count); + pr_debug("%s: removed %s\n", __func__, genpd->name); return 0; @@ -2190,7 +2256,7 @@ int of_genpd_add_device(struct of_phandle_args *genpdspec, struct device *dev) goto out; } - ret = genpd_add_device(genpd, dev, NULL); + ret = genpd_add_device(genpd, dev, dev); out: mutex_unlock(&gpd_list_lock); @@ -2274,6 +2340,7 @@ EXPORT_SYMBOL_GPL(of_genpd_remove_last); static void genpd_release_dev(struct device *dev) { + of_node_put(dev->of_node); kfree(dev); } @@ -2335,14 +2402,14 @@ static void genpd_dev_pm_sync(struct device *dev) genpd_queue_power_off_work(pd); } -static int __genpd_dev_pm_attach(struct device *dev, struct device_node *np, +static int __genpd_dev_pm_attach(struct device *dev, struct device *base_dev, unsigned int index, bool power_on) { struct of_phandle_args pd_args; struct generic_pm_domain *pd; int ret; - ret = of_parse_phandle_with_args(np, "power-domains", + ret = of_parse_phandle_with_args(dev->of_node, "power-domains", "#power-domain-cells", index, &pd_args); if (ret < 0) return ret; @@ -2354,12 +2421,12 @@ static int __genpd_dev_pm_attach(struct device *dev, struct device_node *np, mutex_unlock(&gpd_list_lock); dev_dbg(dev, "%s() failed to find PM domain: %ld\n", __func__, PTR_ERR(pd)); - return driver_deferred_probe_check_state(dev); + return driver_deferred_probe_check_state(base_dev); } dev_dbg(dev, "adding to PM domain %s\n", pd->name); - ret = genpd_add_device(pd, dev, NULL); + ret = genpd_add_device(pd, dev, base_dev); mutex_unlock(&gpd_list_lock); if (ret < 0) { @@ -2410,7 +2477,7 @@ int genpd_dev_pm_attach(struct device *dev) "#power-domain-cells") != 1) return 0; - return __genpd_dev_pm_attach(dev, dev->of_node, 0, true); + return __genpd_dev_pm_attach(dev, dev, 0, true); } EXPORT_SYMBOL_GPL(genpd_dev_pm_attach); @@ -2440,10 +2507,10 @@ struct device *genpd_dev_pm_attach_by_id(struct device *dev, if (!dev->of_node) return NULL; - /* Deal only with devices using multiple PM domains. */ + /* Verify that the index is within a valid range. */ num_domains = of_count_phandle_with_args(dev->of_node, "power-domains", "#power-domain-cells"); - if (num_domains < 2 || index >= num_domains) + if (index >= num_domains) return NULL; /* Allocate and register device on the genpd bus. */ @@ -2454,15 +2521,16 @@ struct device *genpd_dev_pm_attach_by_id(struct device *dev, dev_set_name(virt_dev, "genpd:%u:%s", index, dev_name(dev)); virt_dev->bus = &genpd_bus_type; virt_dev->release = genpd_release_dev; + virt_dev->of_node = of_node_get(dev->of_node); ret = device_register(virt_dev); if (ret) { - kfree(virt_dev); + put_device(virt_dev); return ERR_PTR(ret); } /* Try to attach the device to the PM domain at the specified index. */ - ret = __genpd_dev_pm_attach(virt_dev, dev->of_node, index, false); + ret = __genpd_dev_pm_attach(virt_dev, dev, index, false); if (ret < 1) { device_unregister(virt_dev); return ret ? ERR_PTR(ret) : NULL; diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c index 4d07e38a8247..7912bc957244 100644 --- a/drivers/base/power/domain_governor.c +++ b/drivers/base/power/domain_governor.c @@ -10,6 +10,9 @@ #include <linux/pm_domain.h> #include <linux/pm_qos.h> #include <linux/hrtimer.h> +#include <linux/cpuidle.h> +#include <linux/cpumask.h> +#include <linux/ktime.h> static int dev_update_qos_constraint(struct device *dev, void *data) { @@ -210,8 +213,10 @@ static bool default_power_down_ok(struct dev_pm_domain *pd) struct generic_pm_domain *genpd = pd_to_genpd(pd); struct gpd_link *link; - if (!genpd->max_off_time_changed) + if (!genpd->max_off_time_changed) { + genpd->state_idx = genpd->cached_power_down_state_idx; return genpd->cached_power_down_ok; + } /* * We have to invalidate the cached results for the masters, so @@ -236,6 +241,7 @@ static bool default_power_down_ok(struct dev_pm_domain *pd) genpd->state_idx--; } + genpd->cached_power_down_state_idx = genpd->state_idx; return genpd->cached_power_down_ok; } @@ -244,6 +250,65 @@ static bool always_on_power_down_ok(struct dev_pm_domain *domain) return false; } +#ifdef CONFIG_CPU_IDLE +static bool cpu_power_down_ok(struct dev_pm_domain *pd) +{ + struct generic_pm_domain *genpd = pd_to_genpd(pd); + struct cpuidle_device *dev; + ktime_t domain_wakeup, next_hrtimer; + s64 idle_duration_ns; + int cpu, i; + + /* Validate dev PM QoS constraints. */ + if (!default_power_down_ok(pd)) + return false; + + if (!(genpd->flags & GENPD_FLAG_CPU_DOMAIN)) + return true; + + /* + * Find the next wakeup for any of the online CPUs within the PM domain + * and its subdomains. Note, we only need the genpd->cpus, as it already + * contains a mask of all CPUs from subdomains. + */ + domain_wakeup = ktime_set(KTIME_SEC_MAX, 0); + for_each_cpu_and(cpu, genpd->cpus, cpu_online_mask) { + dev = per_cpu(cpuidle_devices, cpu); + if (dev) { + next_hrtimer = READ_ONCE(dev->next_hrtimer); + if (ktime_before(next_hrtimer, domain_wakeup)) + domain_wakeup = next_hrtimer; + } + } + + /* The minimum idle duration is from now - until the next wakeup. */ + idle_duration_ns = ktime_to_ns(ktime_sub(domain_wakeup, ktime_get())); + if (idle_duration_ns <= 0) + return false; + + /* + * Find the deepest idle state that has its residency value satisfied + * and by also taking into account the power off latency for the state. + * Start at the state picked by the dev PM QoS constraint validation. + */ + i = genpd->state_idx; + do { + if (idle_duration_ns >= (genpd->states[i].residency_ns + + genpd->states[i].power_off_latency_ns)) { + genpd->state_idx = i; + return true; + } + } while (--i >= 0); + + return false; +} + +struct dev_power_governor pm_domain_cpu_gov = { + .suspend_ok = default_suspend_ok, + .power_down_ok = cpu_power_down_ok, +}; +#endif + struct dev_power_governor simple_qos_governor = { .suspend_ok = default_suspend_ok, .power_down_ok = default_power_down_ok, diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index f80d298de3fa..43e863cc0c1b 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -478,7 +478,7 @@ struct dpm_watchdog { /** * dpm_watchdog_handler - Driver suspend / resume watchdog handler. - * @data: Watchdog object address. + * @t: The timer that PM watchdog depends on. * * Called when a driver has timed out suspending or resuming. * There's not much we can do here to recover so panic() to @@ -706,6 +706,19 @@ static bool is_async(struct device *dev) && !pm_trace_is_enabled(); } +static bool dpm_async_fn(struct device *dev, async_func_t func) +{ + reinit_completion(&dev->power.completion); + + if (is_async(dev)) { + get_device(dev); + async_schedule(func, dev); + return true; + } + + return false; +} + static void async_resume_noirq(void *data, async_cookie_t cookie) { struct device *dev = (struct device *)data; @@ -732,13 +745,8 @@ void dpm_noirq_resume_devices(pm_message_t state) * in case the starting of async threads is * delayed by non-async resuming devices. */ - list_for_each_entry(dev, &dpm_noirq_list, power.entry) { - reinit_completion(&dev->power.completion); - if (is_async(dev)) { - get_device(dev); - async_schedule_dev(async_resume_noirq, dev); - } - } + list_for_each_entry(dev, &dpm_noirq_list, power.entry) + dpm_async_fn(dev, async_resume_noirq); while (!list_empty(&dpm_noirq_list)) { dev = to_device(dpm_noirq_list.next); @@ -889,13 +897,8 @@ void dpm_resume_early(pm_message_t state) * in case the starting of async threads is * delayed by non-async resuming devices. */ - list_for_each_entry(dev, &dpm_late_early_list, power.entry) { - reinit_completion(&dev->power.completion); - if (is_async(dev)) { - get_device(dev); - async_schedule_dev(async_resume_early, dev); - } - } + list_for_each_entry(dev, &dpm_late_early_list, power.entry) + dpm_async_fn(dev, async_resume_early); while (!list_empty(&dpm_late_early_list)) { dev = to_device(dpm_late_early_list.next); @@ -1053,13 +1056,8 @@ void dpm_resume(pm_message_t state) pm_transition = state; async_error = 0; - list_for_each_entry(dev, &dpm_suspended_list, power.entry) { - reinit_completion(&dev->power.completion); - if (is_async(dev)) { - get_device(dev); - async_schedule_dev(async_resume, dev); - } - } + list_for_each_entry(dev, &dpm_suspended_list, power.entry) + dpm_async_fn(dev, async_resume); while (!list_empty(&dpm_suspended_list)) { dev = to_device(dpm_suspended_list.next); @@ -1373,13 +1371,9 @@ static void async_suspend_noirq(void *data, async_cookie_t cookie) static int device_suspend_noirq(struct device *dev) { - reinit_completion(&dev->power.completion); - - if (is_async(dev)) { - get_device(dev); - async_schedule_dev(async_suspend_noirq, dev); + if (dpm_async_fn(dev, async_suspend_noirq)) return 0; - } + return __device_suspend_noirq(dev, pm_transition, false); } @@ -1576,13 +1570,8 @@ static void async_suspend_late(void *data, async_cookie_t cookie) static int device_suspend_late(struct device *dev) { - reinit_completion(&dev->power.completion); - - if (is_async(dev)) { - get_device(dev); - async_schedule_dev(async_suspend_late, dev); + if (dpm_async_fn(dev, async_suspend_late)) return 0; - } return __device_suspend_late(dev, pm_transition, false); } @@ -1747,6 +1736,10 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) if (dev->power.syscore) goto Complete; + /* Avoid direct_complete to let wakeup_path propagate. */ + if (device_may_wakeup(dev) || dev->power.wakeup_path) + dev->power.direct_complete = false; + if (dev->power.direct_complete) { if (pm_runtime_status_suspended(dev)) { pm_runtime_disable(dev); @@ -1842,13 +1835,8 @@ static void async_suspend(void *data, async_cookie_t cookie) static int device_suspend(struct device *dev) { - reinit_completion(&dev->power.completion); - - if (is_async(dev)) { - get_device(dev); - async_schedule_dev(async_suspend, dev); + if (dpm_async_fn(dev, async_suspend)) return 0; - } return __device_suspend(dev, pm_transition, false); } @@ -2069,8 +2057,8 @@ EXPORT_SYMBOL_GPL(__suspend_report_result); /** * device_pm_wait_for_dev - Wait for suspend/resume of a device to complete. - * @dev: Device to wait for. * @subordinate: Device that needs to wait for @dev. + * @dev: Device to wait for. */ int device_pm_wait_for_dev(struct device *subordinate, struct device *dev) { diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index bb1ae175fae1..23c243a4c675 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -804,7 +804,7 @@ void pm_print_active_wakeup_sources(void) srcuidx = srcu_read_lock(&wakeup_srcu); list_for_each_entry_rcu(ws, &wakeup_sources, entry) { if (ws->active) { - pr_debug("active wakeup source: %s\n", ws->name); + pm_pr_dbg("active wakeup source: %s\n", ws->name); active = 1; } else if (!active && (!last_activity_ws || @@ -815,7 +815,7 @@ void pm_print_active_wakeup_sources(void) } if (!active && last_activity_ws) - pr_debug("last active wakeup source: %s\n", + pm_pr_dbg("last active wakeup source: %s\n", last_activity_ws->name); srcu_read_unlock(&wakeup_srcu, srcuidx); } @@ -845,7 +845,7 @@ bool pm_wakeup_pending(void) raw_spin_unlock_irqrestore(&events_lock, flags); if (ret) { - pr_debug("Wakeup pending, aborting suspend\n"); + pm_pr_dbg("Wakeup pending, aborting suspend\n"); pm_print_active_wakeup_sources(); } diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index b22e6bba71f1..4d2b33a30292 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -26,10 +26,6 @@ config CPU_FREQ_GOV_COMMON select IRQ_WORK bool -config CPU_FREQ_BOOST_SW - bool - depends on THERMAL - config CPU_FREQ_STAT bool "CPU frequency transition statistics" help diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index c72258a44ba4..73bb2aafb1a8 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -366,7 +366,7 @@ static u32 get_cur_val(const struct cpumask *mask, struct acpi_cpufreq_data *dat val = drv_read(data, mask); - pr_debug("get_cur_val = %u\n", val); + pr_debug("%s = %u\n", __func__, val); return val; } @@ -378,7 +378,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu) unsigned int freq; unsigned int cached_freq; - pr_debug("get_cur_freq_on_cpu (%d)\n", cpu); + pr_debug("%s (%d)\n", __func__, cpu); policy = cpufreq_cpu_get_raw(cpu); if (unlikely(!policy)) @@ -458,8 +458,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, if (acpi_pstate_strict) { if (!check_freqs(policy, mask, policy->freq_table[index].frequency)) { - pr_debug("acpi_cpufreq_target failed (%d)\n", - policy->cpu); + pr_debug("%s (%d)\n", __func__, policy->cpu); result = -EAGAIN; } } @@ -573,7 +572,7 @@ static int cpufreq_boost_down_prep(unsigned int cpu) static int __init acpi_cpufreq_early_init(void) { unsigned int i; - pr_debug("acpi_cpufreq_early_init\n"); + pr_debug("%s\n", __func__); acpi_perf_data = alloc_percpu(struct acpi_processor_performance); if (!acpi_perf_data) { @@ -657,7 +656,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) static int blacklisted; #endif - pr_debug("acpi_cpufreq_cpu_init\n"); + pr_debug("%s\n", __func__); #ifdef CONFIG_SMP if (blacklisted) @@ -856,7 +855,7 @@ static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy) { struct acpi_cpufreq_data *data = policy->driver_data; - pr_debug("acpi_cpufreq_cpu_exit\n"); + pr_debug("%s\n", __func__); policy->fast_switch_possible = false; policy->driver_data = NULL; @@ -881,7 +880,7 @@ static int acpi_cpufreq_resume(struct cpufreq_policy *policy) { struct acpi_cpufreq_data *data = policy->driver_data; - pr_debug("acpi_cpufreq_resume\n"); + pr_debug("%s\n", __func__); data->resume = 1; @@ -954,7 +953,7 @@ static int __init acpi_cpufreq_init(void) if (cpufreq_get_current_driver()) return -EEXIST; - pr_debug("acpi_cpufreq_init\n"); + pr_debug("%s\n", __func__); ret = acpi_cpufreq_early_init(); if (ret) @@ -991,7 +990,7 @@ static int __init acpi_cpufreq_init(void) static void __exit acpi_cpufreq_exit(void) { - pr_debug("acpi_cpufreq_exit\n"); + pr_debug("%s\n", __func__); acpi_cpufreq_boost_exit(); diff --git a/drivers/cpufreq/amd_freq_sensitivity.c b/drivers/cpufreq/amd_freq_sensitivity.c index 4ac7c3cf34be..6927a8c0e748 100644 --- a/drivers/cpufreq/amd_freq_sensitivity.c +++ b/drivers/cpufreq/amd_freq_sensitivity.c @@ -124,7 +124,7 @@ static int __init amd_freq_sensitivity_init(void) PCI_DEVICE_ID_AMD_KERNCZ_SMBUS, NULL); if (!pcidev) { - if (!static_cpu_has(X86_FEATURE_PROC_FEEDBACK)) + if (!boot_cpu_has(X86_FEATURE_PROC_FEEDBACK)) return -ENODEV; } diff --git a/drivers/cpufreq/armada-37xx-cpufreq.c b/drivers/cpufreq/armada-37xx-cpufreq.c index 75491fc841a6..0df16eb1eb3c 100644 --- a/drivers/cpufreq/armada-37xx-cpufreq.c +++ b/drivers/cpufreq/armada-37xx-cpufreq.c @@ -359,11 +359,11 @@ static int __init armada37xx_cpufreq_driver_init(void) struct armada_37xx_dvfs *dvfs; struct platform_device *pdev; unsigned long freq; - unsigned int cur_frequency; + unsigned int cur_frequency, base_frequency; struct regmap *nb_pm_base, *avs_base; struct device *cpu_dev; int load_lvl, ret; - struct clk *clk; + struct clk *clk, *parent; nb_pm_base = syscon_regmap_lookup_by_compatible("marvell,armada-3700-nb-pm"); @@ -399,6 +399,22 @@ static int __init armada37xx_cpufreq_driver_init(void) return PTR_ERR(clk); } + parent = clk_get_parent(clk); + if (IS_ERR(parent)) { + dev_err(cpu_dev, "Cannot get parent clock for CPU0\n"); + clk_put(clk); + return PTR_ERR(parent); + } + + /* Get parent CPU frequency */ + base_frequency = clk_get_rate(parent); + + if (!base_frequency) { + dev_err(cpu_dev, "Failed to get parent clock rate for CPU\n"); + clk_put(clk); + return -EINVAL; + } + /* Get nominal (current) CPU frequency */ cur_frequency = clk_get_rate(clk); if (!cur_frequency) { @@ -431,7 +447,7 @@ static int __init armada37xx_cpufreq_driver_init(void) for (load_lvl = ARMADA_37XX_DVFS_LOAD_0; load_lvl < LOAD_LEVEL_NR; load_lvl++) { unsigned long u_volt = avs_map[dvfs->avs[load_lvl]] * 1000; - freq = cur_frequency / dvfs->divider[load_lvl]; + freq = base_frequency / dvfs->divider[load_lvl]; ret = dev_pm_opp_add(cpu_dev, freq, u_volt); if (ret) goto remove_opp; diff --git a/drivers/cpufreq/armada-8k-cpufreq.c b/drivers/cpufreq/armada-8k-cpufreq.c index b3f4bd647e9b..988ebc326bdb 100644 --- a/drivers/cpufreq/armada-8k-cpufreq.c +++ b/drivers/cpufreq/armada-8k-cpufreq.c @@ -132,6 +132,7 @@ static int __init armada_8k_cpufreq_init(void) of_node_put(node); return -ENODEV; } + of_node_put(node); nb_cpus = num_possible_cpus(); freq_tables = kcalloc(nb_cpus, sizeof(*freq_tables), GFP_KERNEL); diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index e10922709d13..7ea217c88c2e 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -34,11 +34,6 @@ static LIST_HEAD(cpufreq_policy_list); -static inline bool policy_is_inactive(struct cpufreq_policy *policy) -{ - return cpumask_empty(policy->cpus); -} - /* Macros to iterate over CPU policies */ #define for_each_suitable_policy(__policy, __active) \ list_for_each_entry(__policy, &cpufreq_policy_list, policy_list) \ @@ -250,6 +245,51 @@ void cpufreq_cpu_put(struct cpufreq_policy *policy) } EXPORT_SYMBOL_GPL(cpufreq_cpu_put); +/** + * cpufreq_cpu_release - Unlock a policy and decrement its usage counter. + * @policy: cpufreq policy returned by cpufreq_cpu_acquire(). + */ +void cpufreq_cpu_release(struct cpufreq_policy *policy) +{ + if (WARN_ON(!policy)) + return; + + lockdep_assert_held(&policy->rwsem); + + up_write(&policy->rwsem); + + cpufreq_cpu_put(policy); +} + +/** + * cpufreq_cpu_acquire - Find policy for a CPU, mark it as busy and lock it. + * @cpu: CPU to find the policy for. + * + * Call cpufreq_cpu_get() to get a reference on the cpufreq policy for @cpu and + * if the policy returned by it is not NULL, acquire its rwsem for writing. + * Return the policy if it is active or release it and return NULL otherwise. + * + * The policy returned by this function has to be released with the help of + * cpufreq_cpu_release() in order to release its rwsem and balance its usage + * counter properly. + */ +struct cpufreq_policy *cpufreq_cpu_acquire(unsigned int cpu) +{ + struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); + + if (!policy) + return NULL; + + down_write(&policy->rwsem); + + if (policy_is_inactive(policy)) { + cpufreq_cpu_release(policy); + return NULL; + } + + return policy; +} + /********************************************************************* * EXTERNALLY AFFECTING FREQUENCY CHANGES * *********************************************************************/ @@ -669,9 +709,6 @@ static ssize_t show_scaling_cur_freq(struct cpufreq_policy *policy, char *buf) return ret; } -static int cpufreq_set_policy(struct cpufreq_policy *policy, - struct cpufreq_policy *new_policy); - /** * cpufreq_per_cpu_attr_write() / store_##file_name() - sysfs write access */ @@ -857,11 +894,9 @@ static ssize_t show_bios_limit(struct cpufreq_policy *policy, char *buf) { unsigned int limit; int ret; - if (cpufreq_driver->bios_limit) { - ret = cpufreq_driver->bios_limit(policy->cpu, &limit); - if (!ret) - return sprintf(buf, "%u\n", limit); - } + ret = cpufreq_driver->bios_limit(policy->cpu, &limit); + if (!ret) + return sprintf(buf, "%u\n", limit); return sprintf(buf, "%u\n", policy->cpuinfo.max_freq); } @@ -1098,6 +1133,7 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) cpufreq_global_kobject, "policy%u", cpu); if (ret) { pr_err("%s: failed to init policy->kobj: %d\n", __func__, ret); + kobject_put(&policy->kobj); goto err_free_real_cpus; } @@ -1550,7 +1586,7 @@ static unsigned int __cpufreq_get(struct cpufreq_policy *policy) { unsigned int ret_freq = 0; - if (unlikely(policy_is_inactive(policy)) || !cpufreq_driver->get) + if (unlikely(policy_is_inactive(policy))) return ret_freq; ret_freq = cpufreq_driver->get(policy->cpu); @@ -1588,7 +1624,8 @@ unsigned int cpufreq_get(unsigned int cpu) if (policy) { down_read(&policy->rwsem); - ret_freq = __cpufreq_get(policy); + if (cpufreq_driver->get) + ret_freq = __cpufreq_get(policy); up_read(&policy->rwsem); cpufreq_cpu_put(policy); @@ -2229,8 +2266,8 @@ EXPORT_SYMBOL(cpufreq_get_policy); * * The cpuinfo part of @policy is not updated by this function. */ -static int cpufreq_set_policy(struct cpufreq_policy *policy, - struct cpufreq_policy *new_policy) +int cpufreq_set_policy(struct cpufreq_policy *policy, + struct cpufreq_policy *new_policy) { struct cpufreq_governor *old_gov; int ret; @@ -2337,17 +2374,12 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, */ void cpufreq_update_policy(unsigned int cpu) { - struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); + struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu); struct cpufreq_policy new_policy; if (!policy) return; - down_write(&policy->rwsem); - - if (policy_is_inactive(policy)) - goto unlock; - /* * BIOS might change freq behind our back * -> ask driver for current freq and notify governors about a change @@ -2364,12 +2396,26 @@ void cpufreq_update_policy(unsigned int cpu) cpufreq_set_policy(policy, &new_policy); unlock: - up_write(&policy->rwsem); - - cpufreq_cpu_put(policy); + cpufreq_cpu_release(policy); } EXPORT_SYMBOL(cpufreq_update_policy); +/** + * cpufreq_update_limits - Update policy limits for a given CPU. + * @cpu: CPU to update the policy limits for. + * + * Invoke the driver's ->update_limits callback if present or call + * cpufreq_update_policy() for @cpu. + */ +void cpufreq_update_limits(unsigned int cpu) +{ + if (cpufreq_driver->update_limits) + cpufreq_driver->update_limits(cpu); + else + cpufreq_update_policy(cpu); +} +EXPORT_SYMBOL_GPL(cpufreq_update_limits); + /********************************************************************* * BOOST * *********************************************************************/ @@ -2426,7 +2472,7 @@ int cpufreq_boost_trigger_state(int state) static bool cpufreq_boost_supported(void) { - return likely(cpufreq_driver) && cpufreq_driver->set_boost; + return cpufreq_driver->set_boost; } static int create_boost_sysfs_file(void) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index ffa9adeaba31..9d1d9bf02710 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -459,6 +459,8 @@ int cpufreq_dbs_governor_init(struct cpufreq_policy *policy) /* Failure, so roll back. */ pr_err("initialization failed (dbs_data kobject init error %d)\n", ret); + kobject_put(&dbs_data->attr_set.kobj); + policy->governor_data = NULL; if (!have_governor_per_policy()) diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index e2db5581489a..08b192eb22c6 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -14,7 +14,6 @@ #include <linux/module.h> #include <linux/slab.h> -static DEFINE_SPINLOCK(cpufreq_stats_lock); struct cpufreq_stats { unsigned int total_trans; @@ -23,6 +22,7 @@ struct cpufreq_stats { unsigned int state_num; unsigned int last_index; u64 *time_in_state; + spinlock_t lock; unsigned int *freq_table; unsigned int *trans_table; }; @@ -39,12 +39,12 @@ static void cpufreq_stats_clear_table(struct cpufreq_stats *stats) { unsigned int count = stats->max_state; - spin_lock(&cpufreq_stats_lock); + spin_lock(&stats->lock); memset(stats->time_in_state, 0, count * sizeof(u64)); memset(stats->trans_table, 0, count * count * sizeof(int)); stats->last_time = get_jiffies_64(); stats->total_trans = 0; - spin_unlock(&cpufreq_stats_lock); + spin_unlock(&stats->lock); } static ssize_t show_total_trans(struct cpufreq_policy *policy, char *buf) @@ -62,9 +62,9 @@ static ssize_t show_time_in_state(struct cpufreq_policy *policy, char *buf) if (policy->fast_switch_enabled) return 0; - spin_lock(&cpufreq_stats_lock); + spin_lock(&stats->lock); cpufreq_stats_update(stats); - spin_unlock(&cpufreq_stats_lock); + spin_unlock(&stats->lock); for (i = 0; i < stats->state_num; i++) { len += sprintf(buf + len, "%u %llu\n", stats->freq_table[i], @@ -211,6 +211,7 @@ void cpufreq_stats_create_table(struct cpufreq_policy *policy) stats->state_num = i; stats->last_time = get_jiffies_64(); stats->last_index = freq_table_get_index(stats, policy->cur); + spin_lock_init(&stats->lock); policy->stats = stats; ret = sysfs_create_group(&policy->kobj, &stats_attr_group); @@ -242,11 +243,11 @@ void cpufreq_stats_record_transition(struct cpufreq_policy *policy, if (old_index == -1 || new_index == -1 || old_index == new_index) return; - spin_lock(&cpufreq_stats_lock); + spin_lock(&stats->lock); cpufreq_stats_update(stats); stats->last_index = new_index; stats->trans_table[old_index * stats->max_state + new_index]++; stats->total_trans++; - spin_unlock(&cpufreq_stats_lock); + spin_unlock(&stats->lock); } diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c index 3a8cc99e6815..e7be0af3199f 100644 --- a/drivers/cpufreq/freq_table.c +++ b/drivers/cpufreq/freq_table.c @@ -290,9 +290,6 @@ EXPORT_SYMBOL_GPL(cpufreq_freq_attr_scaling_boost_freqs); struct freq_attr *cpufreq_generic_attr[] = { &cpufreq_freq_attr_scaling_available_freqs, -#ifdef CONFIG_CPU_FREQ_BOOST_SW - &cpufreq_freq_attr_scaling_boost_freqs, -#endif NULL, }; EXPORT_SYMBOL_GPL(cpufreq_generic_attr); diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c index a4ff09f91c8f..3e17560b1efe 100644 --- a/drivers/cpufreq/imx6q-cpufreq.c +++ b/drivers/cpufreq/imx6q-cpufreq.c @@ -388,11 +388,11 @@ static int imx6q_cpufreq_probe(struct platform_device *pdev) ret = imx6ul_opp_check_speed_grading(cpu_dev); if (ret) { if (ret == -EPROBE_DEFER) - return ret; + goto put_node; dev_err(cpu_dev, "failed to read ocotp: %d\n", ret); - return ret; + goto put_node; } } else { imx6q_opp_check_speed_grading(cpu_dev); diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 2986119dd31f..34b54df41aaa 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -179,6 +179,7 @@ struct vid_data { * based on the MSR_IA32_MISC_ENABLE value and whether or * not the maximum reported turbo P-state is different from * the maximum reported non-turbo one. + * @turbo_disabled_mf: The @turbo_disabled value reflected by cpuinfo.max_freq. * @min_perf_pct: Minimum capacity limit in percent of the maximum turbo * P-state capacity. * @max_perf_pct: Maximum capacity limit in percent of the maximum turbo @@ -187,6 +188,7 @@ struct vid_data { struct global_params { bool no_turbo; bool turbo_disabled; + bool turbo_disabled_mf; int max_perf_pct; int min_perf_pct; }; @@ -525,7 +527,7 @@ static s16 intel_pstate_get_epb(struct cpudata *cpu_data) u64 epb; int ret; - if (!static_cpu_has(X86_FEATURE_EPB)) + if (!boot_cpu_has(X86_FEATURE_EPB)) return -ENXIO; ret = rdmsrl_on_cpu(cpu_data->cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb); @@ -539,7 +541,7 @@ static s16 intel_pstate_get_epp(struct cpudata *cpu_data, u64 hwp_req_data) { s16 epp; - if (static_cpu_has(X86_FEATURE_HWP_EPP)) { + if (boot_cpu_has(X86_FEATURE_HWP_EPP)) { /* * When hwp_req_data is 0, means that caller didn't read * MSR_HWP_REQUEST, so need to read and get EPP. @@ -564,7 +566,7 @@ static int intel_pstate_set_epb(int cpu, s16 pref) u64 epb; int ret; - if (!static_cpu_has(X86_FEATURE_EPB)) + if (!boot_cpu_has(X86_FEATURE_EPB)) return -ENXIO; ret = rdmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb); @@ -612,7 +614,7 @@ static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data) if (epp < 0) return epp; - if (static_cpu_has(X86_FEATURE_HWP_EPP)) { + if (boot_cpu_has(X86_FEATURE_HWP_EPP)) { if (epp == HWP_EPP_PERFORMANCE) return 1; if (epp <= HWP_EPP_BALANCE_PERFORMANCE) @@ -621,7 +623,7 @@ static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data) return 3; else return 4; - } else if (static_cpu_has(X86_FEATURE_EPB)) { + } else if (boot_cpu_has(X86_FEATURE_EPB)) { /* * Range: * 0x00-0x03 : Performance @@ -649,7 +651,7 @@ static int intel_pstate_set_energy_pref_index(struct cpudata *cpu_data, mutex_lock(&intel_pstate_limits_lock); - if (static_cpu_has(X86_FEATURE_HWP_EPP)) { + if (boot_cpu_has(X86_FEATURE_HWP_EPP)) { u64 value; ret = rdmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST, &value); @@ -824,7 +826,7 @@ static void intel_pstate_hwp_set(unsigned int cpu) epp = cpu_data->epp_powersave; } update_epp: - if (static_cpu_has(X86_FEATURE_HWP_EPP)) { + if (boot_cpu_has(X86_FEATURE_HWP_EPP)) { value &= ~GENMASK_ULL(31, 24); value |= (u64)epp << 24; } else { @@ -849,7 +851,7 @@ static void intel_pstate_hwp_force_min_perf(int cpu) value |= HWP_MIN_PERF(min_perf); /* Set EPP/EPB to min */ - if (static_cpu_has(X86_FEATURE_HWP_EPP)) + if (boot_cpu_has(X86_FEATURE_HWP_EPP)) value |= HWP_ENERGY_PERF_PREFERENCE(HWP_EPP_POWERSAVE); else intel_pstate_set_epb(cpu, HWP_EPP_BALANCE_POWERSAVE); @@ -897,6 +899,48 @@ static void intel_pstate_update_policies(void) cpufreq_update_policy(cpu); } +static void intel_pstate_update_max_freq(unsigned int cpu) +{ + struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu); + struct cpufreq_policy new_policy; + struct cpudata *cpudata; + + if (!policy) + return; + + cpudata = all_cpu_data[cpu]; + policy->cpuinfo.max_freq = global.turbo_disabled_mf ? + cpudata->pstate.max_freq : cpudata->pstate.turbo_freq; + + memcpy(&new_policy, policy, sizeof(*policy)); + new_policy.max = min(policy->user_policy.max, policy->cpuinfo.max_freq); + new_policy.min = min(policy->user_policy.min, new_policy.max); + + cpufreq_set_policy(policy, &new_policy); + + cpufreq_cpu_release(policy); +} + +static void intel_pstate_update_limits(unsigned int cpu) +{ + mutex_lock(&intel_pstate_driver_lock); + + update_turbo_state(); + /* + * If turbo has been turned on or off globally, policy limits for + * all CPUs need to be updated to reflect that. + */ + if (global.turbo_disabled_mf != global.turbo_disabled) { + global.turbo_disabled_mf = global.turbo_disabled; + for_each_possible_cpu(cpu) + intel_pstate_update_max_freq(cpu); + } else { + cpufreq_update_policy(cpu); + } + + mutex_unlock(&intel_pstate_driver_lock); +} + /************************** sysfs begin ************************/ #define show_one(file_name, object) \ static ssize_t show_##file_name \ @@ -1197,7 +1241,7 @@ static void __init intel_pstate_sysfs_expose_params(void) static void intel_pstate_hwp_enable(struct cpudata *cpudata) { /* First disable HWP notification interrupt as we don't process them */ - if (static_cpu_has(X86_FEATURE_HWP_NOTIFY)) + if (boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00); wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1); @@ -2138,6 +2182,7 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy) /* cpuinfo and default policy values */ policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling; update_turbo_state(); + global.turbo_disabled_mf = global.turbo_disabled; policy->cpuinfo.max_freq = global.turbo_disabled ? cpu->pstate.max_pstate : cpu->pstate.turbo_pstate; policy->cpuinfo.max_freq *= cpu->pstate.scaling; @@ -2182,6 +2227,7 @@ static struct cpufreq_driver intel_pstate = { .init = intel_pstate_cpu_init, .exit = intel_pstate_cpu_exit, .stop_cpu = intel_pstate_stop_cpu, + .update_limits = intel_pstate_update_limits, .name = "intel_pstate", }; @@ -2316,6 +2362,7 @@ static struct cpufreq_driver intel_cpufreq = { .init = intel_cpufreq_cpu_init, .exit = intel_pstate_cpu_exit, .stop_cpu = intel_cpufreq_stop_cpu, + .update_limits = intel_pstate_update_limits, .name = "intel_cpufreq", }; diff --git a/drivers/cpufreq/kirkwood-cpufreq.c b/drivers/cpufreq/kirkwood-cpufreq.c index c2dd43f3f5d8..8d63a6dc8383 100644 --- a/drivers/cpufreq/kirkwood-cpufreq.c +++ b/drivers/cpufreq/kirkwood-cpufreq.c @@ -124,13 +124,14 @@ static int kirkwood_cpufreq_probe(struct platform_device *pdev) priv.cpu_clk = of_clk_get_by_name(np, "cpu_clk"); if (IS_ERR(priv.cpu_clk)) { dev_err(priv.dev, "Unable to get cpuclk\n"); - return PTR_ERR(priv.cpu_clk); + err = PTR_ERR(priv.cpu_clk); + goto out_node; } err = clk_prepare_enable(priv.cpu_clk); if (err) { dev_err(priv.dev, "Unable to prepare cpuclk\n"); - return err; + goto out_node; } kirkwood_freq_table[0].frequency = clk_get_rate(priv.cpu_clk) / 1000; @@ -161,20 +162,22 @@ static int kirkwood_cpufreq_probe(struct platform_device *pdev) goto out_ddr; } - of_node_put(np); - np = NULL; - err = cpufreq_register_driver(&kirkwood_cpufreq_driver); - if (!err) - return 0; + if (err) { + dev_err(priv.dev, "Failed to register cpufreq driver\n"); + goto out_powersave; + } - dev_err(priv.dev, "Failed to register cpufreq driver\n"); + of_node_put(np); + return 0; +out_powersave: clk_disable_unprepare(priv.powersave_clk); out_ddr: clk_disable_unprepare(priv.ddr_clk); out_cpu: clk_disable_unprepare(priv.cpu_clk); +out_node: of_node_put(np); return err; diff --git a/drivers/cpufreq/maple-cpufreq.c b/drivers/cpufreq/maple-cpufreq.c index d9df89392b84..a94355723ef8 100644 --- a/drivers/cpufreq/maple-cpufreq.c +++ b/drivers/cpufreq/maple-cpufreq.c @@ -210,7 +210,7 @@ static int __init maple_cpufreq_init(void) */ valp = of_get_property(cpunode, "clock-frequency", NULL); if (!valp) - return -ENODEV; + goto bail_noprops; max_freq = (*valp)/1000; maple_cpu_freqs[0].frequency = max_freq; maple_cpu_freqs[1].frequency = max_freq/2; @@ -231,10 +231,6 @@ static int __init maple_cpufreq_init(void) rc = cpufreq_register_driver(&maple_cpufreq_driver); - of_node_put(cpunode); - - return rc; - bail_noprops: of_node_put(cpunode); diff --git a/drivers/cpufreq/pasemi-cpufreq.c b/drivers/cpufreq/pasemi-cpufreq.c index 75dfbd2a58ea..c7710c149de8 100644 --- a/drivers/cpufreq/pasemi-cpufreq.c +++ b/drivers/cpufreq/pasemi-cpufreq.c @@ -146,6 +146,7 @@ static int pas_cpufreq_cpu_init(struct cpufreq_policy *policy) cpu = of_get_cpu_node(policy->cpu, NULL); + of_node_put(cpu); if (!cpu) goto out; diff --git a/drivers/cpufreq/pmac32-cpufreq.c b/drivers/cpufreq/pmac32-cpufreq.c index 52f0d91d30c1..9b4ce2eb8222 100644 --- a/drivers/cpufreq/pmac32-cpufreq.c +++ b/drivers/cpufreq/pmac32-cpufreq.c @@ -552,6 +552,7 @@ static int pmac_cpufreq_init_7447A(struct device_node *cpunode) volt_gpio_np = of_find_node_by_name(NULL, "cpu-vcore-select"); if (volt_gpio_np) voltage_gpio = read_gpio(volt_gpio_np); + of_node_put(volt_gpio_np); if (!voltage_gpio){ pr_err("missing cpu-vcore-select gpio\n"); return 1; @@ -588,6 +589,7 @@ static int pmac_cpufreq_init_750FX(struct device_node *cpunode) if (volt_gpio_np) voltage_gpio = read_gpio(volt_gpio_np); + of_node_put(volt_gpio_np); pvr = mfspr(SPRN_PVR); has_cpu_l2lve = !((pvr & 0xf00) == 0x100); diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c index fb77b39a4ce3..3c12e03fa343 100644 --- a/drivers/cpufreq/powernow-k8.c +++ b/drivers/cpufreq/powernow-k8.c @@ -1178,7 +1178,7 @@ static int powernowk8_init(void) unsigned int i, supported_cpus = 0; int ret; - if (static_cpu_has(X86_FEATURE_HW_PSTATE)) { + if (boot_cpu_has(X86_FEATURE_HW_PSTATE)) { __request_acpi_cpufreq(); return -ENODEV; } diff --git a/drivers/cpufreq/ppc_cbe_cpufreq.c b/drivers/cpufreq/ppc_cbe_cpufreq.c index 41a0f0be3f9f..8414c3a4ea08 100644 --- a/drivers/cpufreq/ppc_cbe_cpufreq.c +++ b/drivers/cpufreq/ppc_cbe_cpufreq.c @@ -86,6 +86,7 @@ static int cbe_cpufreq_cpu_init(struct cpufreq_policy *policy) if (!cbe_get_cpu_pmd_regs(policy->cpu) || !cbe_get_cpu_mic_tm_regs(policy->cpu)) { pr_info("invalid CBE regs pointers for cpufreq\n"); + of_node_put(cpu); return -EINVAL; } diff --git a/drivers/cpufreq/qoriq-cpufreq.c b/drivers/cpufreq/qoriq-cpufreq.c index 4295e5476264..71b640c8c1a5 100644 --- a/drivers/cpufreq/qoriq-cpufreq.c +++ b/drivers/cpufreq/qoriq-cpufreq.c @@ -280,10 +280,12 @@ static const struct of_device_id node_matches[] __initconst = { { .compatible = "fsl,ls1012a-clockgen", }, { .compatible = "fsl,ls1021a-clockgen", }, + { .compatible = "fsl,ls1028a-clockgen", }, { .compatible = "fsl,ls1043a-clockgen", }, { .compatible = "fsl,ls1046a-clockgen", }, { .compatible = "fsl,ls1088a-clockgen", }, { .compatible = "fsl,ls2080a-clockgen", }, + { .compatible = "fsl,lx2160a-clockgen", }, { .compatible = "fsl,p4080-clockgen", }, { .compatible = "fsl,qoriq-clockgen-1.0", }, { .compatible = "fsl,qoriq-clockgen-2.0", }, diff --git a/drivers/cpufreq/speedstep-centrino.c b/drivers/cpufreq/speedstep-centrino.c index a1fb735685db..e086b2dd4072 100644 --- a/drivers/cpufreq/speedstep-centrino.c +++ b/drivers/cpufreq/speedstep-centrino.c @@ -412,7 +412,7 @@ static int centrino_cpu_exit(struct cpufreq_policy *policy) } /** - * centrino_setpolicy - set a new CPUFreq policy + * centrino_target - set a new CPUFreq policy * @policy: new policy * @index: index of target frequency * diff --git a/drivers/cpuidle/cpuidle-exynos.c b/drivers/cpuidle/cpuidle-exynos.c index 0171a6e190d7..f7199a35cbb6 100644 --- a/drivers/cpuidle/cpuidle-exynos.c +++ b/drivers/cpuidle/cpuidle-exynos.c @@ -84,7 +84,7 @@ static struct cpuidle_driver exynos_idle_driver = { [1] = { .enter = exynos_enter_lowpower, .exit_latency = 300, - .target_residency = 100000, + .target_residency = 10000, .name = "C1", .desc = "ARM power down", }, diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 7f108309e871..0f4b7c45df3e 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -328,9 +328,23 @@ int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, int cpuidle_enter(struct cpuidle_driver *drv, struct cpuidle_device *dev, int index) { + int ret = 0; + + /* + * Store the next hrtimer, which becomes either next tick or the next + * timer event, whatever expires first. Additionally, to make this data + * useful for consumers outside cpuidle, we rely on that the governor's + * ->select() callback have decided, whether to stop the tick or not. + */ + WRITE_ONCE(dev->next_hrtimer, tick_nohz_get_next_hrtimer()); + if (cpuidle_state_is_coupled(drv, index)) - return cpuidle_enter_state_coupled(dev, drv, index); - return cpuidle_enter_state(dev, drv, index); + ret = cpuidle_enter_state_coupled(dev, drv, index); + else + ret = cpuidle_enter_state(dev, drv, index); + + WRITE_ONCE(dev->next_hrtimer, 0); + return ret; } /** @@ -511,6 +525,7 @@ static void __cpuidle_device_init(struct cpuidle_device *dev) { memset(dev->states_usage, 0, sizeof(dev->states_usage)); dev->last_residency = 0; + dev->next_hrtimer = 0; } /** diff --git a/drivers/devfreq/devfreq-event.c b/drivers/devfreq/devfreq-event.c index d67242d87744..87e93406d7cd 100644 --- a/drivers/devfreq/devfreq-event.c +++ b/drivers/devfreq/devfreq-event.c @@ -240,7 +240,7 @@ struct devfreq_event_dev *devfreq_event_get_edev_by_phandle(struct device *dev, } list_for_each_entry(edev, &devfreq_event_list, node) { - if (!strcmp(edev->desc->name, node->name)) + if (of_node_name_eq(node, edev->desc->name)) goto out; } edev = NULL; diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 0ae3de76833b..6b6991f0e873 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -29,6 +29,9 @@ #include <linux/of.h> #include "governor.h" +#define CREATE_TRACE_POINTS +#include <trace/events/devfreq.h> + static struct class *devfreq_class; /* @@ -228,7 +231,7 @@ static struct devfreq_governor *find_devfreq_governor(const char *name) * if is not found. This can happen when both drivers (the governor driver * and the driver that call devfreq_add_device) are built as modules. * devfreq_list_lock should be held by the caller. Returns the matched - * governor's pointer. + * governor's pointer or an error pointer. */ static struct devfreq_governor *try_then_request_governor(const char *name) { @@ -254,7 +257,7 @@ static struct devfreq_governor *try_then_request_governor(const char *name) /* Restore previous state before return */ mutex_lock(&devfreq_list_lock); if (err) - return NULL; + return ERR_PTR(err); governor = find_devfreq_governor(name); } @@ -394,6 +397,8 @@ static void devfreq_monitor(struct work_struct *work) queue_delayed_work(devfreq_wq, &devfreq->work, msecs_to_jiffies(devfreq->profile->polling_ms)); mutex_unlock(&devfreq->lock); + + trace_devfreq_monitor(devfreq); } /** @@ -528,7 +533,7 @@ void devfreq_interval_update(struct devfreq *devfreq, unsigned int *delay) mutex_lock(&devfreq->lock); if (!devfreq->stop_polling) queue_delayed_work(devfreq_wq, &devfreq->work, - msecs_to_jiffies(devfreq->profile->polling_ms)); + msecs_to_jiffies(devfreq->profile->polling_ms)); } out: mutex_unlock(&devfreq->lock); @@ -537,7 +542,7 @@ EXPORT_SYMBOL(devfreq_interval_update); /** * devfreq_notifier_call() - Notify that the device frequency requirements - * has been changed out of devfreq framework. + * has been changed out of devfreq framework. * @nb: the notifier_block (supposed to be devfreq->nb) * @type: not used * @devp: not used @@ -651,7 +656,7 @@ struct devfreq *devfreq_add_device(struct device *dev, mutex_unlock(&devfreq->lock); err = set_freq_table(devfreq); if (err < 0) - goto err_out; + goto err_dev; mutex_lock(&devfreq->lock); } @@ -683,16 +688,27 @@ struct devfreq *devfreq_add_device(struct device *dev, goto err_out; } - devfreq->trans_table = - devm_kzalloc(&devfreq->dev, - array3_size(sizeof(unsigned int), - devfreq->profile->max_state, - devfreq->profile->max_state), - GFP_KERNEL); + devfreq->trans_table = devm_kzalloc(&devfreq->dev, + array3_size(sizeof(unsigned int), + devfreq->profile->max_state, + devfreq->profile->max_state), + GFP_KERNEL); + if (!devfreq->trans_table) { + mutex_unlock(&devfreq->lock); + err = -ENOMEM; + goto err_devfreq; + } + devfreq->time_in_state = devm_kcalloc(&devfreq->dev, - devfreq->profile->max_state, - sizeof(unsigned long), - GFP_KERNEL); + devfreq->profile->max_state, + sizeof(unsigned long), + GFP_KERNEL); + if (!devfreq->time_in_state) { + mutex_unlock(&devfreq->lock); + err = -ENOMEM; + goto err_devfreq; + } + devfreq->last_stat_updated = jiffies; srcu_init_notifier_head(&devfreq->transition_notifier_list); @@ -726,7 +742,7 @@ struct devfreq *devfreq_add_device(struct device *dev, err_init: mutex_unlock(&devfreq_list_lock); - +err_devfreq: devfreq_remove_device(devfreq); devfreq = NULL; err_dev: @@ -1113,7 +1129,7 @@ static ssize_t governor_store(struct device *dev, struct device_attribute *attr, struct devfreq *df = to_devfreq(dev); int ret; char str_governor[DEVFREQ_NAME_LEN + 1]; - struct devfreq_governor *governor; + const struct devfreq_governor *governor, *prev_governor; ret = sscanf(buf, "%" __stringify(DEVFREQ_NAME_LEN) "s", str_governor); if (ret != 1) @@ -1142,12 +1158,24 @@ static ssize_t governor_store(struct device *dev, struct device_attribute *attr, goto out; } } + prev_governor = df->governor; df->governor = governor; strncpy(df->governor_name, governor->name, DEVFREQ_NAME_LEN); ret = df->governor->event_handler(df, DEVFREQ_GOV_START, NULL); - if (ret) + if (ret) { dev_warn(dev, "%s: Governor %s not started(%d)\n", __func__, df->governor->name, ret); + df->governor = prev_governor; + strncpy(df->governor_name, prev_governor->name, + DEVFREQ_NAME_LEN); + ret = df->governor->event_handler(df, DEVFREQ_GOV_START, NULL); + if (ret) { + dev_err(dev, + "%s: reverting to Governor %s failed (%d)\n", + __func__, df->governor_name, ret); + df->governor = NULL; + } + } out: mutex_unlock(&devfreq_list_lock); @@ -1172,7 +1200,7 @@ static ssize_t available_governors_show(struct device *d, */ if (df->governor->immutable) { count = scnprintf(&buf[count], DEVFREQ_NAME_LEN, - "%s ", df->governor_name); + "%s ", df->governor_name); /* * The devfreq device shows the registered governor except for * immutable governors such as passive governor . @@ -1485,8 +1513,8 @@ EXPORT_SYMBOL(devfreq_recommended_opp); /** * devfreq_register_opp_notifier() - Helper function to get devfreq notified - * for any changes in the OPP availability - * changes + * for any changes in the OPP availability + * changes * @dev: The devfreq user device. (parent of devfreq) * @devfreq: The devfreq object. */ @@ -1498,8 +1526,8 @@ EXPORT_SYMBOL(devfreq_register_opp_notifier); /** * devfreq_unregister_opp_notifier() - Helper function to stop getting devfreq - * notified for any changes in the OPP - * availability changes anymore. + * notified for any changes in the OPP + * availability changes anymore. * @dev: The devfreq user device. (parent of devfreq) * @devfreq: The devfreq object. * @@ -1518,8 +1546,8 @@ static void devm_devfreq_opp_release(struct device *dev, void *res) } /** - * devm_ devfreq_register_opp_notifier() - * - Resource-managed devfreq_register_opp_notifier() + * devm_devfreq_register_opp_notifier() - Resource-managed + * devfreq_register_opp_notifier() * @dev: The devfreq user device. (parent of devfreq) * @devfreq: The devfreq object. */ @@ -1547,8 +1575,8 @@ int devm_devfreq_register_opp_notifier(struct device *dev, EXPORT_SYMBOL(devm_devfreq_register_opp_notifier); /** - * devm_devfreq_unregister_opp_notifier() - * - Resource-managed devfreq_unregister_opp_notifier() + * devm_devfreq_unregister_opp_notifier() - Resource-managed + * devfreq_unregister_opp_notifier() * @dev: The devfreq user device. (parent of devfreq) * @devfreq: The devfreq object. */ @@ -1567,8 +1595,8 @@ EXPORT_SYMBOL(devm_devfreq_unregister_opp_notifier); * @list: DEVFREQ_TRANSITION_NOTIFIER. */ int devfreq_register_notifier(struct devfreq *devfreq, - struct notifier_block *nb, - unsigned int list) + struct notifier_block *nb, + unsigned int list) { int ret = 0; @@ -1674,9 +1702,9 @@ EXPORT_SYMBOL(devm_devfreq_register_notifier); * @list: DEVFREQ_TRANSITION_NOTIFIER. */ void devm_devfreq_unregister_notifier(struct device *dev, - struct devfreq *devfreq, - struct notifier_block *nb, - unsigned int list) + struct devfreq *devfreq, + struct notifier_block *nb, + unsigned int list) { WARN_ON(devres_release(dev, devm_devfreq_notifier_release, devm_devfreq_dev_match, devfreq)); diff --git a/drivers/devfreq/event/exynos-ppmu.c b/drivers/devfreq/event/exynos-ppmu.c index c61de0bdf053..c2ea94957501 100644 --- a/drivers/devfreq/event/exynos-ppmu.c +++ b/drivers/devfreq/event/exynos-ppmu.c @@ -529,7 +529,7 @@ static int of_get_devfreq_events(struct device_node *np, if (!ppmu_events[i].name) continue; - if (!of_node_cmp(node->name, ppmu_events[i].name)) + if (of_node_name_eq(node, ppmu_events[i].name)) break; } diff --git a/drivers/devfreq/event/rockchip-dfi.c b/drivers/devfreq/event/rockchip-dfi.c index 22b113363ffc..a436ec4901bb 100644 --- a/drivers/devfreq/event/rockchip-dfi.c +++ b/drivers/devfreq/event/rockchip-dfi.c @@ -26,6 +26,8 @@ #include <linux/list.h> #include <linux/of.h> +#include <soc/rockchip/rk3399_grf.h> + #define RK3399_DMC_NUM_CH 2 /* DDRMON_CTRL */ @@ -43,18 +45,6 @@ #define DDRMON_CH1_COUNT_NUM 0x3c #define DDRMON_CH1_DFI_ACCESS_NUM 0x40 -/* pmu grf */ -#define PMUGRF_OS_REG2 0x308 -#define DDRTYPE_SHIFT 13 -#define DDRTYPE_MASK 7 - -enum { - DDR3 = 3, - LPDDR3 = 6, - LPDDR4 = 7, - UNUSED = 0xFF -}; - struct dmc_usage { u32 access; u32 total; @@ -83,16 +73,17 @@ static void rockchip_dfi_start_hardware_counter(struct devfreq_event_dev *edev) u32 ddr_type; /* get ddr type */ - regmap_read(info->regmap_pmu, PMUGRF_OS_REG2, &val); - ddr_type = (val >> DDRTYPE_SHIFT) & DDRTYPE_MASK; + regmap_read(info->regmap_pmu, RK3399_PMUGRF_OS_REG2, &val); + ddr_type = (val >> RK3399_PMUGRF_DDRTYPE_SHIFT) & + RK3399_PMUGRF_DDRTYPE_MASK; /* clear DDRMON_CTRL setting */ writel_relaxed(CLR_DDRMON_CTRL, dfi_regs + DDRMON_CTRL); /* set ddr type to dfi */ - if (ddr_type == LPDDR3) + if (ddr_type == RK3399_PMUGRF_DDRTYPE_LPDDR3) writel_relaxed(LPDDR3_EN, dfi_regs + DDRMON_CTRL); - else if (ddr_type == LPDDR4) + else if (ddr_type == RK3399_PMUGRF_DDRTYPE_LPDDR4) writel_relaxed(LPDDR4_EN, dfi_regs + DDRMON_CTRL); /* enable count, use software mode */ @@ -211,7 +202,7 @@ static int rockchip_dfi_probe(struct platform_device *pdev) if (IS_ERR(data->clk)) { dev_err(dev, "Cannot get the clk dmc_clk\n"); return PTR_ERR(data->clk); - }; + } /* try to find the optional reference to the pmu syscon */ node = of_parse_phandle(np, "rockchip,pmu", 0); diff --git a/drivers/devfreq/exynos-bus.c b/drivers/devfreq/exynos-bus.c index c25658b26598..486cc5b422f1 100644 --- a/drivers/devfreq/exynos-bus.c +++ b/drivers/devfreq/exynos-bus.c @@ -514,6 +514,13 @@ err: return ret; } +static void exynos_bus_shutdown(struct platform_device *pdev) +{ + struct exynos_bus *bus = dev_get_drvdata(&pdev->dev); + + devfreq_suspend_device(bus->devfreq); +} + #ifdef CONFIG_PM_SLEEP static int exynos_bus_resume(struct device *dev) { @@ -556,6 +563,7 @@ MODULE_DEVICE_TABLE(of, exynos_bus_of_match); static struct platform_driver exynos_bus_platdrv = { .probe = exynos_bus_probe, + .shutdown = exynos_bus_shutdown, .driver = { .name = "exynos-bus", .pm = &exynos_bus_pm, diff --git a/drivers/devfreq/rk3399_dmc.c b/drivers/devfreq/rk3399_dmc.c index e795ad2b3f6b..567c034d0301 100644 --- a/drivers/devfreq/rk3399_dmc.c +++ b/drivers/devfreq/rk3399_dmc.c @@ -18,14 +18,17 @@ #include <linux/devfreq.h> #include <linux/devfreq-event.h> #include <linux/interrupt.h> +#include <linux/mfd/syscon.h> #include <linux/module.h> #include <linux/of.h> #include <linux/platform_device.h> #include <linux/pm_opp.h> +#include <linux/regmap.h> #include <linux/regulator/consumer.h> #include <linux/rwsem.h> #include <linux/suspend.h> +#include <soc/rockchip/rk3399_grf.h> #include <soc/rockchip/rockchip_sip.h> struct dram_timing { @@ -69,8 +72,11 @@ struct rk3399_dmcfreq { struct mutex lock; struct dram_timing timing; struct regulator *vdd_center; + struct regmap *regmap_pmu; unsigned long rate, target_rate; unsigned long volt, target_volt; + unsigned int odt_dis_freq; + int odt_pd_arg0, odt_pd_arg1; }; static int rk3399_dmcfreq_target(struct device *dev, unsigned long *freq, @@ -80,6 +86,8 @@ static int rk3399_dmcfreq_target(struct device *dev, unsigned long *freq, struct dev_pm_opp *opp; unsigned long old_clk_rate = dmcfreq->rate; unsigned long target_volt, target_rate; + struct arm_smccc_res res; + bool odt_enable = false; int err; opp = devfreq_recommended_opp(dev, freq, flags); @@ -95,6 +103,19 @@ static int rk3399_dmcfreq_target(struct device *dev, unsigned long *freq, mutex_lock(&dmcfreq->lock); + if (target_rate >= dmcfreq->odt_dis_freq) + odt_enable = true; + + /* + * This makes a SMC call to the TF-A to set the DDR PD (power-down) + * timings and to enable or disable the ODT (on-die termination) + * resistors. + */ + arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, dmcfreq->odt_pd_arg0, + dmcfreq->odt_pd_arg1, + ROCKCHIP_SIP_CONFIG_DRAM_SET_ODT_PD, + odt_enable, 0, 0, 0, &res); + /* * If frequency scaling from low to high, adjust voltage first. * If frequency scaling from high to low, adjust frequency first. @@ -294,11 +315,13 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev) { struct arm_smccc_res res; struct device *dev = &pdev->dev; - struct device_node *np = pdev->dev.of_node; + struct device_node *np = pdev->dev.of_node, *node; struct rk3399_dmcfreq *data; int ret, index, size; uint32_t *timing; struct dev_pm_opp *opp; + u32 ddr_type; + u32 val; data = devm_kzalloc(dev, sizeof(struct rk3399_dmcfreq), GFP_KERNEL); if (!data) @@ -322,7 +345,7 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev) dev_err(dev, "Cannot get the clk dmc_clk\n"); return PTR_ERR(data->dmc_clk); - }; + } data->edev = devfreq_event_get_edev_by_phandle(dev, 0); if (IS_ERR(data->edev)) @@ -354,11 +377,57 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev) } } + node = of_parse_phandle(np, "rockchip,pmu", 0); + if (node) { + data->regmap_pmu = syscon_node_to_regmap(node); + if (IS_ERR(data->regmap_pmu)) + return PTR_ERR(data->regmap_pmu); + } + + regmap_read(data->regmap_pmu, RK3399_PMUGRF_OS_REG2, &val); + ddr_type = (val >> RK3399_PMUGRF_DDRTYPE_SHIFT) & + RK3399_PMUGRF_DDRTYPE_MASK; + + switch (ddr_type) { + case RK3399_PMUGRF_DDRTYPE_DDR3: + data->odt_dis_freq = data->timing.ddr3_odt_dis_freq; + break; + case RK3399_PMUGRF_DDRTYPE_LPDDR3: + data->odt_dis_freq = data->timing.lpddr3_odt_dis_freq; + break; + case RK3399_PMUGRF_DDRTYPE_LPDDR4: + data->odt_dis_freq = data->timing.lpddr4_odt_dis_freq; + break; + default: + return -EINVAL; + }; + arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, 0, 0, ROCKCHIP_SIP_CONFIG_DRAM_INIT, 0, 0, 0, 0, &res); /* + * In TF-A there is a platform SIP call to set the PD (power-down) + * timings and to enable or disable the ODT (on-die termination). + * This call needs three arguments as follows: + * + * arg0: + * bit[0-7] : sr_idle + * bit[8-15] : sr_mc_gate_idle + * bit[16-31] : standby idle + * arg1: + * bit[0-11] : pd_idle + * bit[16-27] : srpd_lite_idle + * arg2: + * bit[0] : odt enable + */ + data->odt_pd_arg0 = (data->timing.sr_idle & 0xff) | + ((data->timing.sr_mc_gate_idle & 0xff) << 8) | + ((data->timing.standby_idle & 0xffff) << 16); + data->odt_pd_arg1 = (data->timing.pd_idle & 0xfff) | + ((data->timing.srpd_lite_idle & 0xfff) << 16); + + /* * We add a devfreq driver to our parent since it has a device tree node * with operating points. */ diff --git a/drivers/devfreq/tegra-devfreq.c b/drivers/devfreq/tegra-devfreq.c index c59d2eee5d30..c89ba7b834ff 100644 --- a/drivers/devfreq/tegra-devfreq.c +++ b/drivers/devfreq/tegra-devfreq.c @@ -573,10 +573,7 @@ static int tegra_governor_get_target(struct devfreq *devfreq, static int tegra_governor_event_handler(struct devfreq *devfreq, unsigned int event, void *data) { - struct tegra_devfreq *tegra; - int ret = 0; - - tegra = dev_get_drvdata(devfreq->dev.parent); + struct tegra_devfreq *tegra = dev_get_drvdata(devfreq->dev.parent); switch (event) { case DEVFREQ_GOV_START: @@ -600,7 +597,7 @@ static int tegra_governor_event_handler(struct devfreq *devfreq, break; } - return ret; + return 0; } static struct devfreq_governor tegra_devfreq_governor = { diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig index cac16c4b0df3..7b655f6156fb 100644 --- a/drivers/firmware/Kconfig +++ b/drivers/firmware/Kconfig @@ -5,20 +5,6 @@ menu "Firmware Drivers" -config ARM_PSCI_FW - bool - -config ARM_PSCI_CHECKER - bool "ARM PSCI checker" - depends on ARM_PSCI_FW && HOTPLUG_CPU && CPU_IDLE && !TORTURE_TEST - help - Run the PSCI checker during startup. This checks that hotplug and - suspend operations work correctly when using PSCI. - - The torture tests may interfere with the PSCI checker by turning CPUs - on and off through hotplug, so for now torture tests and PSCI checker - are mutually exclusive. - config ARM_SCMI_PROTOCOL bool "ARM System Control and Management Interface (SCMI) Message Protocol" depends on ARM || ARM64 || COMPILE_TEST @@ -270,6 +256,7 @@ config TI_SCI_PROTOCOL config HAVE_ARM_SMCCC bool +source "drivers/firmware/psci/Kconfig" source "drivers/firmware/broadcom/Kconfig" source "drivers/firmware/google/Kconfig" source "drivers/firmware/efi/Kconfig" diff --git a/drivers/firmware/Makefile b/drivers/firmware/Makefile index 80feb635120f..9a3909a22682 100644 --- a/drivers/firmware/Makefile +++ b/drivers/firmware/Makefile @@ -2,8 +2,6 @@ # # Makefile for the linux kernel. # -obj-$(CONFIG_ARM_PSCI_FW) += psci.o -obj-$(CONFIG_ARM_PSCI_CHECKER) += psci_checker.o obj-$(CONFIG_ARM_SCPI_PROTOCOL) += arm_scpi.o obj-$(CONFIG_ARM_SCPI_POWER_DOMAIN) += scpi_pm_domain.o obj-$(CONFIG_ARM_SDE_INTERFACE) += arm_sdei.o @@ -25,6 +23,7 @@ CFLAGS_qcom_scm-32.o :=$(call as-instr,.arch armv7-a\n.arch_extension sec,-DREQU obj-$(CONFIG_TI_SCI_PROTOCOL) += ti_sci.o obj-$(CONFIG_ARM_SCMI_PROTOCOL) += arm_scmi/ +obj-y += psci/ obj-y += broadcom/ obj-y += meson/ obj-$(CONFIG_GOOGLE_FIRMWARE) += google/ diff --git a/drivers/firmware/psci/Kconfig b/drivers/firmware/psci/Kconfig new file mode 100644 index 000000000000..26a3b32bf7ab --- /dev/null +++ b/drivers/firmware/psci/Kconfig @@ -0,0 +1,13 @@ +config ARM_PSCI_FW + bool + +config ARM_PSCI_CHECKER + bool "ARM PSCI checker" + depends on ARM_PSCI_FW && HOTPLUG_CPU && CPU_IDLE && !TORTURE_TEST + help + Run the PSCI checker during startup. This checks that hotplug and + suspend operations work correctly when using PSCI. + + The torture tests may interfere with the PSCI checker by turning CPUs + on and off through hotplug, so for now torture tests and PSCI checker + are mutually exclusive. diff --git a/drivers/firmware/psci/Makefile b/drivers/firmware/psci/Makefile new file mode 100644 index 000000000000..1956b882470f --- /dev/null +++ b/drivers/firmware/psci/Makefile @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 +# +obj-$(CONFIG_ARM_PSCI_FW) += psci.o +obj-$(CONFIG_ARM_PSCI_CHECKER) += psci_checker.o diff --git a/drivers/firmware/psci.c b/drivers/firmware/psci/psci.c index c80ec1d03274..fe090ef43d28 100644 --- a/drivers/firmware/psci.c +++ b/drivers/firmware/psci/psci.c @@ -88,6 +88,7 @@ static u32 psci_function_id[PSCI_FN_MAX]; PSCI_1_0_EXT_POWER_STATE_TYPE_MASK) static u32 psci_cpu_suspend_feature; +static bool psci_system_reset2_supported; static inline bool psci_has_ext_power_state(void) { @@ -95,6 +96,11 @@ static inline bool psci_has_ext_power_state(void) PSCI_1_0_FEATURES_CPU_SUSPEND_PF_MASK; } +static inline bool psci_has_osi_support(void) +{ + return psci_cpu_suspend_feature & PSCI_1_0_OS_INITIATED; +} + static inline bool psci_power_state_loses_context(u32 state) { const u32 mask = psci_has_ext_power_state() ? @@ -253,7 +259,17 @@ static int get_set_conduit_method(struct device_node *np) static void psci_sys_reset(enum reboot_mode reboot_mode, const char *cmd) { - invoke_psci_fn(PSCI_0_2_FN_SYSTEM_RESET, 0, 0, 0); + if ((reboot_mode == REBOOT_WARM || reboot_mode == REBOOT_SOFT) && + psci_system_reset2_supported) { + /* + * reset_type[31] = 0 (architectural) + * reset_type[30:0] = 0 (SYSTEM_WARM_RESET) + * cookie = 0 (ignored by the implementation) + */ + invoke_psci_fn(PSCI_FN_NATIVE(1_1, SYSTEM_RESET2), 0, 0, 0); + } else { + invoke_psci_fn(PSCI_0_2_FN_SYSTEM_RESET, 0, 0, 0); + } } static void psci_sys_poweroff(void) @@ -270,9 +286,26 @@ static int __init psci_features(u32 psci_func_id) #ifdef CONFIG_CPU_IDLE static DEFINE_PER_CPU_READ_MOSTLY(u32 *, psci_power_state); +static int psci_dt_parse_state_node(struct device_node *np, u32 *state) +{ + int err = of_property_read_u32(np, "arm,psci-suspend-param", state); + + if (err) { + pr_warn("%pOF missing arm,psci-suspend-param property\n", np); + return err; + } + + if (!psci_power_state_is_valid(*state)) { + pr_warn("Invalid PSCI power state %#x\n", *state); + return -EINVAL; + } + + return 0; +} + static int psci_dt_cpu_init_idle(struct device_node *cpu_node, int cpu) { - int i, ret, count = 0; + int i, ret = 0, count = 0; u32 *psci_states; struct device_node *state_node; @@ -291,29 +324,16 @@ static int psci_dt_cpu_init_idle(struct device_node *cpu_node, int cpu) return -ENOMEM; for (i = 0; i < count; i++) { - u32 state; - state_node = of_parse_phandle(cpu_node, "cpu-idle-states", i); + ret = psci_dt_parse_state_node(state_node, &psci_states[i]); + of_node_put(state_node); - ret = of_property_read_u32(state_node, - "arm,psci-suspend-param", - &state); - if (ret) { - pr_warn(" * %pOF missing arm,psci-suspend-param property\n", - state_node); - of_node_put(state_node); + if (ret) goto free_mem; - } - of_node_put(state_node); - pr_debug("psci-power-state %#x index %d\n", state, i); - if (!psci_power_state_is_valid(state)) { - pr_warn("Invalid PSCI power state %#x\n", state); - ret = -EINVAL; - goto free_mem; - } - psci_states[i] = state; + pr_debug("psci-power-state %#x index %d\n", psci_states[i], i); } + /* Idle states parsed correctly, initialize per-cpu pointer */ per_cpu(psci_power_state, cpu) = psci_states; return 0; @@ -451,6 +471,16 @@ static const struct platform_suspend_ops psci_suspend_ops = { .enter = psci_system_suspend_enter, }; +static void __init psci_init_system_reset2(void) +{ + int ret; + + ret = psci_features(PSCI_FN_NATIVE(1_1, SYSTEM_RESET2)); + + if (ret != PSCI_RET_NOT_SUPPORTED) + psci_system_reset2_supported = true; +} + static void __init psci_init_system_suspend(void) { int ret; @@ -588,6 +618,7 @@ static int __init psci_probe(void) psci_init_smccc(); psci_init_cpu_suspend(); psci_init_system_suspend(); + psci_init_system_reset2(); } return 0; @@ -605,9 +636,9 @@ static int __init psci_0_2_init(struct device_node *np) int err; err = get_set_conduit_method(np); - if (err) - goto out_put_node; + return err; + /* * Starting with v0.2, the PSCI specification introduced a call * (PSCI_VERSION) that allows probing the firmware version, so @@ -615,11 +646,7 @@ static int __init psci_0_2_init(struct device_node *np) * can be carried out according to the specific version reported * by firmware */ - err = psci_probe(); - -out_put_node: - of_node_put(np); - return err; + return psci_probe(); } /* @@ -631,9 +658,8 @@ static int __init psci_0_1_init(struct device_node *np) int err; err = get_set_conduit_method(np); - if (err) - goto out_put_node; + return err; pr_info("Using PSCI v0.1 Function IDs from DT\n"); @@ -657,15 +683,27 @@ static int __init psci_0_1_init(struct device_node *np) psci_ops.migrate = psci_migrate; } -out_put_node: - of_node_put(np); - return err; + return 0; +} + +static int __init psci_1_0_init(struct device_node *np) +{ + int err; + + err = psci_0_2_init(np); + if (err) + return err; + + if (psci_has_osi_support()) + pr_info("OSI mode supported.\n"); + + return 0; } static const struct of_device_id psci_of_match[] __initconst = { { .compatible = "arm,psci", .data = psci_0_1_init}, { .compatible = "arm,psci-0.2", .data = psci_0_2_init}, - { .compatible = "arm,psci-1.0", .data = psci_0_2_init}, + { .compatible = "arm,psci-1.0", .data = psci_1_0_init}, {}, }; @@ -674,6 +712,7 @@ int __init psci_dt_init(void) struct device_node *np; const struct of_device_id *matched_np; psci_initcall_t init_fn; + int ret; np = of_find_matching_node_and_match(NULL, psci_of_match, &matched_np); @@ -681,7 +720,10 @@ int __init psci_dt_init(void) return -ENODEV; init_fn = (psci_initcall_t)matched_np->data; - return init_fn(np); + ret = init_fn(np); + + of_node_put(np); + return ret; } #ifdef CONFIG_ACPI diff --git a/drivers/firmware/psci_checker.c b/drivers/firmware/psci/psci_checker.c index 346943657962..346943657962 100644 --- a/drivers/firmware/psci_checker.c +++ b/drivers/firmware/psci/psci_checker.c diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 0420f7e8ad5b..0e7703fe733f 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -526,6 +526,60 @@ struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev, } EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_floor); +/** + * dev_pm_opp_find_freq_ceil_by_volt() - Find OPP with highest frequency for + * target voltage. + * @dev: Device for which we do this operation. + * @u_volt: Target voltage. + * + * Search for OPP with highest (ceil) frequency and has voltage <= u_volt. + * + * Return: matching *opp, else returns ERR_PTR in case of error which should be + * handled using IS_ERR. + * + * Error return values can be: + * EINVAL: bad parameters + * + * The callers are required to call dev_pm_opp_put() for the returned OPP after + * use. + */ +struct dev_pm_opp *dev_pm_opp_find_freq_ceil_by_volt(struct device *dev, + unsigned long u_volt) +{ + struct opp_table *opp_table; + struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE); + + if (!dev || !u_volt) { + dev_err(dev, "%s: Invalid argument volt=%lu\n", __func__, + u_volt); + return ERR_PTR(-EINVAL); + } + + opp_table = _find_opp_table(dev); + if (IS_ERR(opp_table)) + return ERR_CAST(opp_table); + + mutex_lock(&opp_table->lock); + + list_for_each_entry(temp_opp, &opp_table->opp_list, node) { + if (temp_opp->available) { + if (temp_opp->supplies[0].u_volt > u_volt) + break; + opp = temp_opp; + } + } + + /* Increment the reference count of OPP */ + if (!IS_ERR(opp)) + dev_pm_opp_get(opp); + + mutex_unlock(&opp_table->lock); + dev_pm_opp_put_opp_table(opp_table); + + return opp; +} +EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_ceil_by_volt); + static int _set_opp_voltage(struct device *dev, struct regulator *reg, struct dev_pm_opp_supply *supply) { diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index b160e98076e3..684caf067003 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -178,6 +178,11 @@ static inline struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu) static inline void cpufreq_cpu_put(struct cpufreq_policy *policy) { } #endif +static inline bool policy_is_inactive(struct cpufreq_policy *policy) +{ + return cpumask_empty(policy->cpus); +} + static inline bool policy_is_shared(struct cpufreq_policy *policy) { return cpumask_weight(policy->cpus) > 1; @@ -193,8 +198,14 @@ unsigned int cpufreq_quick_get_max(unsigned int cpu); void disable_cpufreq(void); u64 get_cpu_idle_time(unsigned int cpu, u64 *wall, int io_busy); + +struct cpufreq_policy *cpufreq_cpu_acquire(unsigned int cpu); +void cpufreq_cpu_release(struct cpufreq_policy *policy); int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu); +int cpufreq_set_policy(struct cpufreq_policy *policy, + struct cpufreq_policy *new_policy); void cpufreq_update_policy(unsigned int cpu); +void cpufreq_update_limits(unsigned int cpu); bool have_governor_per_policy(void); struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy); void cpufreq_enable_fast_switch(struct cpufreq_policy *policy); @@ -322,6 +333,9 @@ struct cpufreq_driver { /* should be defined, if possible */ unsigned int (*get)(unsigned int cpu); + /* Called to update policy limits on firmware notifications. */ + void (*update_limits)(unsigned int cpu); + /* optional */ int (*bios_limit)(int cpu, unsigned int *limit); diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index e78281d07b70..dbfdd0fadbef 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -147,6 +147,7 @@ enum cpuhp_state { CPUHP_AP_X86_VDSO_VMA_ONLINE, CPUHP_AP_IRQ_AFFINITY_ONLINE, CPUHP_AP_ARM_MVEBU_SYNC_CLOCKS, + CPUHP_AP_X86_INTEL_EPB_ONLINE, CPUHP_AP_PERF_ONLINE, CPUHP_AP_PERF_X86_ONLINE, CPUHP_AP_PERF_X86_UNCORE_ONLINE, diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 3b39472324a3..bb9a0db89f1a 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -83,6 +83,7 @@ struct cpuidle_device { unsigned int use_deepest_state:1; unsigned int poll_time_limit:1; unsigned int cpu; + ktime_t next_hrtimer; int last_residency; struct cpuidle_state_usage states_usage[CPUIDLE_STATE_MAX]; diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 1ed5874bcee0..0e8e356bed6a 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -16,6 +16,7 @@ #include <linux/of.h> #include <linux/notifier.h> #include <linux/spinlock.h> +#include <linux/cpumask.h> /* * Flags to control the behaviour of a genpd. @@ -42,11 +43,22 @@ * GENPD_FLAG_ACTIVE_WAKEUP: Instructs genpd to keep the PM domain powered * on, in case any of its attached devices is used * in the wakeup path to serve system wakeups. + * + * GENPD_FLAG_CPU_DOMAIN: Instructs genpd that it should expect to get + * devices attached, which may belong to CPUs or + * possibly have subdomains with CPUs attached. + * This flag enables the genpd backend driver to + * deploy idle power management support for CPUs + * and groups of CPUs. Note that, the backend + * driver must then comply with the so called, + * last-man-standing algorithm, for the CPUs in the + * PM domain. */ #define GENPD_FLAG_PM_CLK (1U << 0) #define GENPD_FLAG_IRQ_SAFE (1U << 1) #define GENPD_FLAG_ALWAYS_ON (1U << 2) #define GENPD_FLAG_ACTIVE_WAKEUP (1U << 3) +#define GENPD_FLAG_CPU_DOMAIN (1U << 4) enum gpd_status { GPD_STATE_ACTIVE = 0, /* PM domain is active */ @@ -69,6 +81,7 @@ struct genpd_power_state { s64 residency_ns; struct fwnode_handle *fwnode; ktime_t idle_time; + void *data; }; struct genpd_lock_ops; @@ -93,6 +106,7 @@ struct generic_pm_domain { unsigned int suspended_count; /* System suspend device counter */ unsigned int prepared_count; /* Suspend counter of prepared devices */ unsigned int performance_state; /* Aggregated max performance state */ + cpumask_var_t cpus; /* A cpumask of the attached CPUs */ int (*power_off)(struct generic_pm_domain *domain); int (*power_on)(struct generic_pm_domain *domain); struct opp_table *opp_table; /* OPP table of the genpd */ @@ -104,15 +118,17 @@ struct generic_pm_domain { s64 max_off_time_ns; /* Maximum allowed "suspended" time. */ bool max_off_time_changed; bool cached_power_down_ok; + bool cached_power_down_state_idx; int (*attach_dev)(struct generic_pm_domain *domain, struct device *dev); void (*detach_dev)(struct generic_pm_domain *domain, struct device *dev); unsigned int flags; /* Bit field of configs for genpd */ struct genpd_power_state *states; + void (*free_states)(struct genpd_power_state *states, + unsigned int state_count); unsigned int state_count; /* number of states */ unsigned int state_idx; /* state that genpd will go to when off */ - void *free; /* Free the state that was allocated for default */ ktime_t on_time; ktime_t accounting_time; const struct genpd_lock_ops *lock_ops; @@ -159,6 +175,7 @@ struct generic_pm_domain_data { struct pm_domain_data base; struct gpd_timing_data td; struct notifier_block nb; + int cpu; unsigned int performance_state; void *data; }; @@ -187,6 +204,9 @@ int dev_pm_genpd_set_performance_state(struct device *dev, unsigned int state); extern struct dev_power_governor simple_qos_governor; extern struct dev_power_governor pm_domain_always_on_gov; +#ifdef CONFIG_CPU_IDLE +extern struct dev_power_governor pm_domain_cpu_gov; +#endif #else static inline struct generic_pm_domain_data *dev_gpd_data(struct device *dev) diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 24c757a32a7b..b150fe97ce5a 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -102,6 +102,8 @@ struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev, unsigned long *freq); +struct dev_pm_opp *dev_pm_opp_find_freq_ceil_by_volt(struct device *dev, + unsigned long u_volt); struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev, unsigned long *freq); @@ -207,6 +209,12 @@ static inline struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev, return ERR_PTR(-ENOTSUPP); } +static inline struct dev_pm_opp *dev_pm_opp_find_freq_ceil_by_volt(struct device *dev, + unsigned long u_volt) +{ + return ERR_PTR(-ENOTSUPP); +} + static inline struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev, unsigned long *freq) { diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 3f529ad9a9d2..6b3ea9ea6a9e 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -425,6 +425,7 @@ void restore_processor_state(void); /* kernel/power/main.c */ extern int register_pm_notifier(struct notifier_block *nb); extern int unregister_pm_notifier(struct notifier_block *nb); +extern void ksys_sync_helper(void); #define pm_notifier(fn, pri) { \ static struct notifier_block fn##_nb = \ @@ -462,6 +463,8 @@ static inline int unregister_pm_notifier(struct notifier_block *nb) return 0; } +static inline void ksys_sync_helper(void) {} + #define pm_notifier(fn, pri) do { (void)(fn); } while (0) static inline bool pm_wakeup_pending(void) { return false; } diff --git a/include/linux/tick.h b/include/linux/tick.h index 76acb48acdb7..f92a10b5e112 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -128,6 +128,7 @@ extern void tick_nohz_idle_enter(void); extern void tick_nohz_idle_exit(void); extern void tick_nohz_irq_exit(void); extern bool tick_nohz_idle_got_tick(void); +extern ktime_t tick_nohz_get_next_hrtimer(void); extern ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next); extern unsigned long tick_nohz_get_idle_calls(void); extern unsigned long tick_nohz_get_idle_calls_cpu(int cpu); @@ -151,7 +152,11 @@ static inline void tick_nohz_idle_restart_tick(void) { } static inline void tick_nohz_idle_enter(void) { } static inline void tick_nohz_idle_exit(void) { } static inline bool tick_nohz_idle_got_tick(void) { return false; } - +static inline ktime_t tick_nohz_get_next_hrtimer(void) +{ + /* Next wake up is the tick period, assume it starts now */ + return ktime_add(ktime_get(), TICK_NSEC); +} static inline ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next) { *delta_next = TICK_NSEC; diff --git a/include/soc/rockchip/rk3399_grf.h b/include/soc/rockchip/rk3399_grf.h new file mode 100644 index 000000000000..3eebabcb2812 --- /dev/null +++ b/include/soc/rockchip/rk3399_grf.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Rockchip General Register Files definitions + * + * Copyright (c) 2018, Collabora Ltd. + * Author: Enric Balletbo i Serra <enric.balletbo@collabora.com> + */ + +#ifndef __SOC_RK3399_GRF_H +#define __SOC_RK3399_GRF_H + +/* PMU GRF Registers */ +#define RK3399_PMUGRF_OS_REG2 0x308 +#define RK3399_PMUGRF_DDRTYPE_SHIFT 13 +#define RK3399_PMUGRF_DDRTYPE_MASK 7 +#define RK3399_PMUGRF_DDRTYPE_DDR3 3 +#define RK3399_PMUGRF_DDRTYPE_LPDDR2 5 +#define RK3399_PMUGRF_DDRTYPE_LPDDR3 6 +#define RK3399_PMUGRF_DDRTYPE_LPDDR4 7 + +#endif diff --git a/include/soc/rockchip/rockchip_sip.h b/include/soc/rockchip/rockchip_sip.h index 7e28092c4d3d..ad9482c56797 100644 --- a/include/soc/rockchip/rockchip_sip.h +++ b/include/soc/rockchip/rockchip_sip.h @@ -23,5 +23,6 @@ #define ROCKCHIP_SIP_CONFIG_DRAM_GET_RATE 0x05 #define ROCKCHIP_SIP_CONFIG_DRAM_CLR_IRQ 0x06 #define ROCKCHIP_SIP_CONFIG_DRAM_SET_PARAM 0x07 +#define ROCKCHIP_SIP_CONFIG_DRAM_SET_ODT_PD 0x08 #endif diff --git a/include/trace/events/devfreq.h b/include/trace/events/devfreq.h new file mode 100644 index 000000000000..cf5b8772175d --- /dev/null +++ b/include/trace/events/devfreq.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM devfreq + +#if !defined(_TRACE_DEVFREQ_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_DEVFREQ_H + +#include <linux/devfreq.h> +#include <linux/tracepoint.h> + +TRACE_EVENT(devfreq_monitor, + TP_PROTO(struct devfreq *devfreq), + + TP_ARGS(devfreq), + + TP_STRUCT__entry( + __field(unsigned long, freq) + __field(unsigned long, busy_time) + __field(unsigned long, total_time) + __field(unsigned int, polling_ms) + __string(dev_name, dev_name(&devfreq->dev)) + ), + + TP_fast_assign( + __entry->freq = devfreq->previous_freq; + __entry->busy_time = devfreq->last_status.busy_time; + __entry->total_time = devfreq->last_status.total_time; + __entry->polling_ms = devfreq->profile->polling_ms; + __assign_str(dev_name, dev_name(&devfreq->dev)); + ), + + TP_printk("dev_name=%s freq=%lu polling_ms=%u load=%lu", + __get_str(dev_name), __entry->freq, __entry->polling_ms, + __entry->total_time == 0 ? 0 : + (100 * __entry->busy_time) / __entry->total_time) +); +#endif /* _TRACE_DEVFREQ_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/include/uapi/linux/psci.h b/include/uapi/linux/psci.h index b3bcabe380da..2fcad1dd0b0e 100644 --- a/include/uapi/linux/psci.h +++ b/include/uapi/linux/psci.h @@ -49,8 +49,11 @@ #define PSCI_1_0_FN_PSCI_FEATURES PSCI_0_2_FN(10) #define PSCI_1_0_FN_SYSTEM_SUSPEND PSCI_0_2_FN(14) +#define PSCI_1_0_FN_SET_SUSPEND_MODE PSCI_0_2_FN(15) +#define PSCI_1_1_FN_SYSTEM_RESET2 PSCI_0_2_FN(18) #define PSCI_1_0_FN64_SYSTEM_SUSPEND PSCI_0_2_FN64(14) +#define PSCI_1_1_FN64_SYSTEM_RESET2 PSCI_0_2_FN64(18) /* PSCI v0.2 power state encoding for CPU_SUSPEND function */ #define PSCI_0_2_POWER_STATE_ID_MASK 0xffff @@ -97,6 +100,10 @@ #define PSCI_1_0_FEATURES_CPU_SUSPEND_PF_MASK \ (0x1 << PSCI_1_0_FEATURES_CPU_SUSPEND_PF_SHIFT) +#define PSCI_1_0_OS_INITIATED BIT(0) +#define PSCI_1_0_SUSPEND_MODE_PC 0 +#define PSCI_1_0_SUSPEND_MODE_OSI 1 + /* PSCI return values (inclusive of all PSCI versions) */ #define PSCI_RET_SUCCESS 0 #define PSCI_RET_NOT_SUPPORTED -1 diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index cfc7a57049e4..c8c272df7154 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -14,7 +14,6 @@ #include <linux/export.h> #include <linux/suspend.h> -#include <linux/syscalls.h> #include <linux/reboot.h> #include <linux/string.h> #include <linux/device.h> @@ -709,9 +708,7 @@ int hibernate(void) goto Exit; } - pr_info("Syncing filesystems ... \n"); - ksys_sync(); - pr_info("done.\n"); + ksys_sync_helper(); error = freeze_processes(); if (error) diff --git a/kernel/power/main.c b/kernel/power/main.c index 98e76cad128b..4f43e724f6eb 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -16,6 +16,7 @@ #include <linux/debugfs.h> #include <linux/seq_file.h> #include <linux/suspend.h> +#include <linux/syscalls.h> #include "power.h" @@ -51,6 +52,19 @@ void unlock_system_sleep(void) } EXPORT_SYMBOL_GPL(unlock_system_sleep); +void ksys_sync_helper(void) +{ + ktime_t start; + long elapsed_msecs; + + start = ktime_get(); + ksys_sync(); + elapsed_msecs = ktime_to_ms(ktime_sub(ktime_get(), start)); + pr_info("Filesystems sync: %ld.%03ld seconds\n", + elapsed_msecs / MSEC_PER_SEC, elapsed_msecs % MSEC_PER_SEC); +} +EXPORT_SYMBOL_GPL(ksys_sync_helper); + /* Routines for PM-transition notifications */ static BLOCKING_NOTIFIER_HEAD(pm_chain_head); diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 59b6def23046..ef908c134b34 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -17,7 +17,6 @@ #include <linux/console.h> #include <linux/cpu.h> #include <linux/cpuidle.h> -#include <linux/syscalls.h> #include <linux/gfp.h> #include <linux/io.h> #include <linux/kernel.h> @@ -568,13 +567,11 @@ static int enter_state(suspend_state_t state) if (state == PM_SUSPEND_TO_IDLE) s2idle_begin(); -#ifndef CONFIG_SUSPEND_SKIP_SYNC - trace_suspend_resume(TPS("sync_filesystems"), 0, true); - pr_info("Syncing filesystems ... "); - ksys_sync(); - pr_cont("done.\n"); - trace_suspend_resume(TPS("sync_filesystems"), 0, false); -#endif + if (!IS_ENABLED(CONFIG_SUSPEND_SKIP_SYNC)) { + trace_suspend_resume(TPS("sync_filesystems"), 0, true); + ksys_sync_helper(); + trace_suspend_resume(TPS("sync_filesystems"), 0, false); + } pm_pr_dbg("Preparing system for sleep (%s)\n", mem_sleep_labels[state]); pm_suspend_clear_flags(); diff --git a/kernel/power/user.c b/kernel/power/user.c index 2d8b60a3c86b..cb24e840a3e6 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -10,7 +10,6 @@ */ #include <linux/suspend.h> -#include <linux/syscalls.h> #include <linux/reboot.h> #include <linux/string.h> #include <linux/device.h> @@ -228,9 +227,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, if (data->frozen) break; - printk("Syncing filesystems ... "); - ksys_sync(); - printk("done.\n"); + ksys_sync_helper(); error = freeze_processes(); if (error) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 3638d2377e3c..5403479073b0 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -13,6 +13,8 @@ #include <linux/sched/cpufreq.h> #include <trace/events/power.h> +#define IOWAIT_BOOST_MIN (SCHED_CAPACITY_SCALE / 8) + struct sugov_tunables { struct gov_attr_set attr_set; unsigned int rate_limit_us; @@ -51,7 +53,6 @@ struct sugov_cpu { u64 last_update; unsigned long bw_dl; - unsigned long min; unsigned long max; /* The field below is for single-CPU policies only: */ @@ -291,8 +292,8 @@ static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu) * * The IO wait boost of a task is disabled after a tick since the last update * of a CPU. If a new IO wait boost is requested after more then a tick, then - * we enable the boost starting from the minimum frequency, which improves - * energy efficiency by ignoring sporadic wakeups from IO. + * we enable the boost starting from IOWAIT_BOOST_MIN, which improves energy + * efficiency by ignoring sporadic wakeups from IO. */ static bool sugov_iowait_reset(struct sugov_cpu *sg_cpu, u64 time, bool set_iowait_boost) @@ -303,7 +304,7 @@ static bool sugov_iowait_reset(struct sugov_cpu *sg_cpu, u64 time, if (delta_ns <= TICK_NSEC) return false; - sg_cpu->iowait_boost = set_iowait_boost ? sg_cpu->min : 0; + sg_cpu->iowait_boost = set_iowait_boost ? IOWAIT_BOOST_MIN : 0; sg_cpu->iowait_boost_pending = set_iowait_boost; return true; @@ -317,8 +318,9 @@ static bool sugov_iowait_reset(struct sugov_cpu *sg_cpu, u64 time, * * Each time a task wakes up after an IO operation, the CPU utilization can be * boosted to a certain utilization which doubles at each "frequent and - * successive" wakeup from IO, ranging from the utilization of the minimum - * OPP to the utilization of the maximum OPP. + * successive" wakeup from IO, ranging from IOWAIT_BOOST_MIN to the utilization + * of the maximum OPP. + * * To keep doubling, an IO boost has to be requested at least once per tick, * otherwise we restart from the utilization of the minimum OPP. */ @@ -349,7 +351,7 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, } /* First wakeup after IO: start with minimum boost */ - sg_cpu->iowait_boost = sg_cpu->min; + sg_cpu->iowait_boost = IOWAIT_BOOST_MIN; } /** @@ -389,7 +391,7 @@ static unsigned long sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time, * No boost pending; reduce the boost value. */ sg_cpu->iowait_boost >>= 1; - if (sg_cpu->iowait_boost < sg_cpu->min) { + if (sg_cpu->iowait_boost < IOWAIT_BOOST_MIN) { sg_cpu->iowait_boost = 0; return util; } @@ -827,9 +829,6 @@ static int sugov_start(struct cpufreq_policy *policy) memset(sg_cpu, 0, sizeof(*sg_cpu)); sg_cpu->cpu = cpu; sg_cpu->sg_policy = sg_policy; - sg_cpu->min = - (SCHED_CAPACITY_SCALE * policy->cpuinfo.min_freq) / - policy->cpuinfo.max_freq; } for_each_cpu(cpu, policy->cpus) { diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index bdf00c763ee3..f4ee1a3428ae 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -1038,6 +1038,18 @@ bool tick_nohz_idle_got_tick(void) } /** + * tick_nohz_get_next_hrtimer - return the next expiration time for the hrtimer + * or the tick, whatever that expires first. Note that, if the tick has been + * stopped, it returns the next hrtimer. + * + * Called from power state control code with interrupts disabled + */ +ktime_t tick_nohz_get_next_hrtimer(void) +{ + return __this_cpu_read(tick_cpu_device.evtdev)->next_event; +} + +/** * tick_nohz_get_sleep_length - return the expected length of the current sleep * @delta_next: duration until the next event if the tick cannot be stopped * |