diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-06-13 07:24:18 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-06-13 07:24:18 -0700 |
commit | d09fcecb0c797b884ce65daa37c121a2786bb17b (patch) | |
tree | e9f055a54ebc2e3ab536872e55cbc4badc4f68a3 /drivers | |
parent | f5b7769eb0400ec5217a47e41148a9f816ca1f9f (diff) | |
parent | 6a900f884e3e864d13501e63357990bc472f940c (diff) | |
download | linux-d09fcecb0c797b884ce65daa37c121a2786bb17b.tar.bz2 |
Merge tag 'pm-4.18-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
Pull more power management updates from Rafael Wysocki:
"These revert a recent PM core change that introduced a regression, fix
the build when the recently added Kryo cpufreq driver is selected, add
support for devices attached to multiple power domains to the generic
power domains (genpd) framework, add support for iowait boosting on
systens with hardware-managed P-states (HWP) enabled to the
intel_pstate driver, modify the behavior of the wakeup_count device
attribute in sysfs, fix a few issues and clean up some ugliness,
mostly in cpufreq (core and drivers) and in the cpupower utility.
Specifics:
- Revert a recent PM core change that attempted to fix an issue
related to device links, but introduced a regression (Rafael
Wysocki)
- Fix build when the recently added cpufreq driver for Kryo
processors is selected by making it possible to build that driver
as a module (Arnd Bergmann)
- Fix the long idle detection mechanism in the out-of-band (ondemand
and conservative) cpufreq governors (Chen Yu)
- Add support for devices in multiple power domains to the generic
power domains (genpd) framework (Ulf Hansson)
- Add support for iowait boosting on systems with hardware-managed
P-states (HWP) enabled to the intel_pstate driver and make it use
that feature on systems with Skylake Xeon processors as it is
reported to improve performance significantly on those systems
(Srinivas Pandruvada)
- Fix and update the acpi_cpufreq, ti-cpufreq and imx6q cpufreq
drivers (Colin Ian King, Suman Anna, Sébastien Szymanski)
- Change the behavior of the wakeup_count device attribute in sysfs
to expose the number of events when the device might have aborted
system suspend in progress (Ravi Chandra Sadineni)
- Fix two minor issues in the cpupower utility (Abhishek Goel, Colin
Ian King)"
* tag 'pm-4.18-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm:
Revert "PM / runtime: Fixup reference counting of device link suppliers at probe"
cpufreq: imx6q: check speed grades for i.MX6ULL
cpufreq: governors: Fix long idle detection logic in load calculation
cpufreq: intel_pstate: enable boost for Skylake Xeon
PM / wakeup: Export wakeup_count instead of event_count via sysfs
PM / Domains: Add dev_pm_domain_attach_by_id() to manage multi PM domains
PM / Domains: Add support for multi PM domains per device to genpd
PM / Domains: Split genpd_dev_pm_attach()
PM / Domains: Don't attach devices in genpd with multi PM domains
PM / Domains: dt: Allow power-domain property to be a list of specifiers
cpufreq: intel_pstate: New sysfs entry to control HWP boost
cpufreq: intel_pstate: HWP boost performance on IO wakeup
cpufreq: intel_pstate: Add HWP boost utility and sched util hooks
cpufreq: ti-cpufreq: Use devres managed API in probe()
cpufreq: ti-cpufreq: Fix an incorrect error return value
cpufreq: ACPI: make function acpi_cpufreq_fast_switch() static
cpufreq: kryo: allow building as a loadable module
cpupower : Fix header name to read idle state name
cpupower: fix spelling mistake: "logilename" -> "logfilename"
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/base/dd.c | 3 | ||||
-rw-r--r-- | drivers/base/power/common.c | 43 | ||||
-rw-r--r-- | drivers/base/power/domain.c | 134 | ||||
-rw-r--r-- | drivers/base/power/runtime.c | 27 | ||||
-rw-r--r-- | drivers/base/power/sysfs.c | 2 | ||||
-rw-r--r-- | drivers/cpufreq/Kconfig.arm | 2 | ||||
-rw-r--r-- | drivers/cpufreq/acpi-cpufreq.c | 4 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_governor.c | 12 | ||||
-rw-r--r-- | drivers/cpufreq/imx6q-cpufreq.c | 29 | ||||
-rw-r--r-- | drivers/cpufreq/intel_pstate.c | 179 | ||||
-rw-r--r-- | drivers/cpufreq/ti-cpufreq.c | 7 |
11 files changed, 390 insertions, 52 deletions
diff --git a/drivers/base/dd.c b/drivers/base/dd.c index fb4e2df68d95..1435d7281c66 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -580,7 +580,7 @@ int driver_probe_device(struct device_driver *drv, struct device *dev) pr_debug("bus: '%s': %s: matched device %s with driver %s\n", drv->bus->name, __func__, dev_name(dev), drv->name); - pm_runtime_resume_suppliers(dev); + pm_runtime_get_suppliers(dev); if (dev->parent) pm_runtime_get_sync(dev->parent); @@ -591,6 +591,7 @@ int driver_probe_device(struct device_driver *drv, struct device *dev) if (dev->parent) pm_runtime_put(dev->parent); + pm_runtime_put_suppliers(dev); return ret; } diff --git a/drivers/base/power/common.c b/drivers/base/power/common.c index 7ae62b6355b8..df41b4780b3b 100644 --- a/drivers/base/power/common.c +++ b/drivers/base/power/common.c @@ -117,13 +117,50 @@ int dev_pm_domain_attach(struct device *dev, bool power_on) EXPORT_SYMBOL_GPL(dev_pm_domain_attach); /** + * dev_pm_domain_attach_by_id - Associate a device with one of its PM domains. + * @dev: The device used to lookup the PM domain. + * @index: The index of the PM domain. + * + * As @dev may only be attached to a single PM domain, the backend PM domain + * provider creates a virtual device to attach instead. If attachment succeeds, + * the ->detach() callback in the struct dev_pm_domain are assigned by the + * corresponding backend attach function, as to deal with detaching of the + * created virtual device. + * + * This function should typically be invoked by a driver during the probe phase, + * in case its device requires power management through multiple PM domains. The + * driver may benefit from using the received device, to configure device-links + * towards its original device. Depending on the use-case and if needed, the + * links may be dynamically changed by the driver, which allows it to control + * the power to the PM domains independently from each other. + * + * Callers must ensure proper synchronization of this function with power + * management callbacks. + * + * Returns the virtual created device when successfully attached to its PM + * domain, NULL in case @dev don't need a PM domain, else an ERR_PTR(). + * Note that, to detach the returned virtual device, the driver shall call + * dev_pm_domain_detach() on it, typically during the remove phase. + */ +struct device *dev_pm_domain_attach_by_id(struct device *dev, + unsigned int index) +{ + if (dev->pm_domain) + return ERR_PTR(-EEXIST); + + return genpd_dev_pm_attach_by_id(dev, index); +} +EXPORT_SYMBOL_GPL(dev_pm_domain_attach_by_id); + +/** * dev_pm_domain_detach - Detach a device from its PM domain. * @dev: Device to detach. * @power_off: Used to indicate whether we should power off the device. * - * This functions will reverse the actions from dev_pm_domain_attach() and thus - * try to detach the @dev from its PM domain. Typically it should be invoked - * from subsystem level code during the remove phase. + * This functions will reverse the actions from dev_pm_domain_attach() and + * dev_pm_domain_attach_by_id(), thus it detaches @dev from its PM domain. + * Typically it should be invoked during the remove phase, either from + * subsystem level code or from drivers. * * Callers must ensure proper synchronization of this function with power * management callbacks. diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 6f403d6fccb2..4925af5c4cf0 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -2171,6 +2171,15 @@ struct generic_pm_domain *of_genpd_remove_last(struct device_node *np) } EXPORT_SYMBOL_GPL(of_genpd_remove_last); +static void genpd_release_dev(struct device *dev) +{ + kfree(dev); +} + +static struct bus_type genpd_bus_type = { + .name = "genpd", +}; + /** * genpd_dev_pm_detach - Detach a device from its PM domain. * @dev: Device to detach. @@ -2208,6 +2217,10 @@ static void genpd_dev_pm_detach(struct device *dev, bool power_off) /* Check if PM domain can be powered off after removing this device. */ genpd_queue_power_off_work(pd); + + /* Unregister the device if it was created by genpd. */ + if (dev->bus == &genpd_bus_type) + device_unregister(dev); } static void genpd_dev_pm_sync(struct device *dev) @@ -2221,32 +2234,17 @@ static void genpd_dev_pm_sync(struct device *dev) genpd_queue_power_off_work(pd); } -/** - * genpd_dev_pm_attach - Attach a device to its PM domain using DT. - * @dev: Device to attach. - * - * Parse device's OF node to find a PM domain specifier. If such is found, - * attaches the device to retrieved pm_domain ops. - * - * Returns 1 on successfully attached PM domain, 0 when the device don't need a - * PM domain or a negative error code in case of failures. Note that if a - * power-domain exists for the device, but it cannot be found or turned on, - * then return -EPROBE_DEFER to ensure that the device is not probed and to - * re-try again later. - */ -int genpd_dev_pm_attach(struct device *dev) +static int __genpd_dev_pm_attach(struct device *dev, struct device_node *np, + unsigned int index) { struct of_phandle_args pd_args; struct generic_pm_domain *pd; int ret; - if (!dev->of_node) - return 0; - - ret = of_parse_phandle_with_args(dev->of_node, "power-domains", - "#power-domain-cells", 0, &pd_args); + ret = of_parse_phandle_with_args(np, "power-domains", + "#power-domain-cells", index, &pd_args); if (ret < 0) - return 0; + return ret; mutex_lock(&gpd_list_lock); pd = genpd_get_from_provider(&pd_args); @@ -2282,8 +2280,98 @@ int genpd_dev_pm_attach(struct device *dev) return ret ? -EPROBE_DEFER : 1; } + +/** + * genpd_dev_pm_attach - Attach a device to its PM domain using DT. + * @dev: Device to attach. + * + * Parse device's OF node to find a PM domain specifier. If such is found, + * attaches the device to retrieved pm_domain ops. + * + * Returns 1 on successfully attached PM domain, 0 when the device don't need a + * PM domain or when multiple power-domains exists for it, else a negative error + * code. Note that if a power-domain exists for the device, but it cannot be + * found or turned on, then return -EPROBE_DEFER to ensure that the device is + * not probed and to re-try again later. + */ +int genpd_dev_pm_attach(struct device *dev) +{ + if (!dev->of_node) + return 0; + + /* + * Devices with multiple PM domains must be attached separately, as we + * can only attach one PM domain per device. + */ + if (of_count_phandle_with_args(dev->of_node, "power-domains", + "#power-domain-cells") != 1) + return 0; + + return __genpd_dev_pm_attach(dev, dev->of_node, 0); +} EXPORT_SYMBOL_GPL(genpd_dev_pm_attach); +/** + * genpd_dev_pm_attach_by_id - Associate a device with one of its PM domains. + * @dev: The device used to lookup the PM domain. + * @index: The index of the PM domain. + * + * Parse device's OF node to find a PM domain specifier at the provided @index. + * If such is found, creates a virtual device and attaches it to the retrieved + * pm_domain ops. To deal with detaching of the virtual device, the ->detach() + * callback in the struct dev_pm_domain are assigned to genpd_dev_pm_detach(). + * + * Returns the created virtual device if successfully attached PM domain, NULL + * when the device don't need a PM domain, else an ERR_PTR() in case of + * failures. If a power-domain exists for the device, but cannot be found or + * turned on, then ERR_PTR(-EPROBE_DEFER) is returned to ensure that the device + * is not probed and to re-try again later. + */ +struct device *genpd_dev_pm_attach_by_id(struct device *dev, + unsigned int index) +{ + struct device *genpd_dev; + int num_domains; + int ret; + + if (!dev->of_node) + return NULL; + + /* Deal only with devices using multiple PM domains. */ + num_domains = of_count_phandle_with_args(dev->of_node, "power-domains", + "#power-domain-cells"); + if (num_domains < 2 || index >= num_domains) + return NULL; + + /* Allocate and register device on the genpd bus. */ + genpd_dev = kzalloc(sizeof(*genpd_dev), GFP_KERNEL); + if (!genpd_dev) + return ERR_PTR(-ENOMEM); + + dev_set_name(genpd_dev, "genpd:%u:%s", index, dev_name(dev)); + genpd_dev->bus = &genpd_bus_type; + genpd_dev->release = genpd_release_dev; + + ret = device_register(genpd_dev); + if (ret) { + kfree(genpd_dev); + return ERR_PTR(ret); + } + + /* Try to attach the device to the PM domain at the specified index. */ + ret = __genpd_dev_pm_attach(genpd_dev, dev->of_node, index); + if (ret < 1) { + device_unregister(genpd_dev); + return ret ? ERR_PTR(ret) : NULL; + } + + pm_runtime_set_active(genpd_dev); + pm_runtime_enable(genpd_dev); + + return genpd_dev; +} +EXPORT_SYMBOL_GPL(genpd_dev_pm_attach_by_id); + static const struct of_device_id idle_state_match[] = { { .compatible = "domain-idle-state", }, { } @@ -2443,6 +2531,12 @@ unlock: } EXPORT_SYMBOL_GPL(of_genpd_opp_to_performance_state); +static int __init genpd_bus_init(void) +{ + return bus_register(&genpd_bus_type); +} +core_initcall(genpd_bus_init); + #endif /* CONFIG_PM_GENERIC_DOMAINS_OF */ diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index c6030f100c08..beb85c31f3fa 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -1563,16 +1563,37 @@ void pm_runtime_clean_up_links(struct device *dev) } /** - * pm_runtime_resume_suppliers - Resume supplier devices. + * pm_runtime_get_suppliers - Resume and reference-count supplier devices. * @dev: Consumer device. */ -void pm_runtime_resume_suppliers(struct device *dev) +void pm_runtime_get_suppliers(struct device *dev) { + struct device_link *link; int idx; idx = device_links_read_lock(); - rpm_get_suppliers(dev); + list_for_each_entry_rcu(link, &dev->links.suppliers, c_node) + if (link->flags & DL_FLAG_PM_RUNTIME) + pm_runtime_get_sync(link->supplier); + + device_links_read_unlock(idx); +} + +/** + * pm_runtime_put_suppliers - Drop references to supplier devices. + * @dev: Consumer device. + */ +void pm_runtime_put_suppliers(struct device *dev) +{ + struct device_link *link; + int idx; + + idx = device_links_read_lock(); + + list_for_each_entry_rcu(link, &dev->links.suppliers, c_node) + if (link->flags & DL_FLAG_PM_RUNTIME) + pm_runtime_put(link->supplier); device_links_read_unlock(idx); } diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c index 0f651efc58a1..d713738ce796 100644 --- a/drivers/base/power/sysfs.c +++ b/drivers/base/power/sysfs.c @@ -353,7 +353,7 @@ static ssize_t wakeup_count_show(struct device *dev, spin_lock_irq(&dev->power.lock); if (dev->power.wakeup) { - count = dev->power.wakeup->event_count; + count = dev->power.wakeup->wakeup_count; enabled = true; } spin_unlock_irq(&dev->power.lock); diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index c7ce928fbf1f..52f5f1a2040c 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -125,7 +125,7 @@ config ARM_OMAP2PLUS_CPUFREQ default ARCH_OMAP2PLUS config ARM_QCOM_CPUFREQ_KRYO - bool "Qualcomm Kryo based CPUFreq" + tristate "Qualcomm Kryo based CPUFreq" depends on ARM64 depends on QCOM_QFPROM depends on QCOM_SMEM diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 8ff1c9123834..b61f4ec43e06 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -465,8 +465,8 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, return result; } -unsigned int acpi_cpufreq_fast_switch(struct cpufreq_policy *policy, - unsigned int target_freq) +static unsigned int acpi_cpufreq_fast_switch(struct cpufreq_policy *policy, + unsigned int target_freq) { struct acpi_cpufreq_data *data = policy->driver_data; struct acpi_processor_performance *perf; diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 871bf9cf55cf..1d50e97d49f1 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -165,7 +165,7 @@ unsigned int dbs_update(struct cpufreq_policy *policy) * calls, so the previous load value can be used then. */ load = j_cdbs->prev_load; - } else if (unlikely(time_elapsed > 2 * sampling_rate && + } else if (unlikely((int)idle_time > 2 * sampling_rate && j_cdbs->prev_load)) { /* * If the CPU had gone completely idle and a task has @@ -185,10 +185,8 @@ unsigned int dbs_update(struct cpufreq_policy *policy) * clear prev_load to guarantee that the load will be * computed again next time. * - * Detecting this situation is easy: the governor's - * utilization update handler would not have run during - * CPU-idle periods. Hence, an unusually large - * 'time_elapsed' (as compared to the sampling rate) + * Detecting this situation is easy: an unusually large + * 'idle_time' (as compared to the sampling rate) * indicates this scenario. */ load = j_cdbs->prev_load; @@ -217,8 +215,8 @@ unsigned int dbs_update(struct cpufreq_policy *policy) j_cdbs->prev_load = load; } - if (time_elapsed > 2 * sampling_rate) { - unsigned int periods = time_elapsed / sampling_rate; + if (unlikely((int)idle_time > 2 * sampling_rate)) { + unsigned int periods = idle_time / sampling_rate; if (periods < idle_periods) idle_periods = periods; diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c index 70912104a199..8b3c2a79ad6c 100644 --- a/drivers/cpufreq/imx6q-cpufreq.c +++ b/drivers/cpufreq/imx6q-cpufreq.c @@ -266,6 +266,8 @@ put_node: } #define OCOTP_CFG3_6UL_SPEED_696MHZ 0x2 +#define OCOTP_CFG3_6ULL_SPEED_792MHZ 0x2 +#define OCOTP_CFG3_6ULL_SPEED_900MHZ 0x3 static void imx6ul_opp_check_speed_grading(struct device *dev) { @@ -287,16 +289,30 @@ static void imx6ul_opp_check_speed_grading(struct device *dev) * Speed GRADING[1:0] defines the max speed of ARM: * 2b'00: Reserved; * 2b'01: 528000000Hz; - * 2b'10: 696000000Hz; - * 2b'11: Reserved; + * 2b'10: 696000000Hz on i.MX6UL, 792000000Hz on i.MX6ULL; + * 2b'11: 900000000Hz on i.MX6ULL only; * We need to set the max speed of ARM according to fuse map. */ val = readl_relaxed(base + OCOTP_CFG3); val >>= OCOTP_CFG3_SPEED_SHIFT; val &= 0x3; - if (val != OCOTP_CFG3_6UL_SPEED_696MHZ) - if (dev_pm_opp_disable(dev, 696000000)) - dev_warn(dev, "failed to disable 696MHz OPP\n"); + + if (of_machine_is_compatible("fsl,imx6ul")) { + if (val != OCOTP_CFG3_6UL_SPEED_696MHZ) + if (dev_pm_opp_disable(dev, 696000000)) + dev_warn(dev, "failed to disable 696MHz OPP\n"); + } + + if (of_machine_is_compatible("fsl,imx6ull")) { + if (val != OCOTP_CFG3_6ULL_SPEED_792MHZ) + if (dev_pm_opp_disable(dev, 792000000)) + dev_warn(dev, "failed to disable 792MHz OPP\n"); + + if (val != OCOTP_CFG3_6ULL_SPEED_900MHZ) + if (dev_pm_opp_disable(dev, 900000000)) + dev_warn(dev, "failed to disable 900MHz OPP\n"); + } + iounmap(base); put_node: of_node_put(np); @@ -356,7 +372,8 @@ static int imx6q_cpufreq_probe(struct platform_device *pdev) goto put_reg; } - if (of_machine_is_compatible("fsl,imx6ul")) + if (of_machine_is_compatible("fsl,imx6ul") || + of_machine_is_compatible("fsl,imx6ull")) imx6ul_opp_check_speed_grading(cpu_dev); else imx6q_opp_check_speed_grading(cpu_dev); diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index b6575408f279..1de5ec8d5ea3 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -221,6 +221,11 @@ struct global_params { * preference/bias * @epp_saved: Saved EPP/EPB during system suspend or CPU offline * operation + * @hwp_req_cached: Cached value of the last HWP Request MSR + * @hwp_cap_cached: Cached value of the last HWP Capabilities MSR + * @last_io_update: Last time when IO wake flag was set + * @sched_flags: Store scheduler flags for possible cross CPU update + * @hwp_boost_min: Last HWP boosted min performance * * This structure stores per CPU instance data for all CPUs. */ @@ -253,6 +258,11 @@ struct cpudata { s16 epp_policy; s16 epp_default; s16 epp_saved; + u64 hwp_req_cached; + u64 hwp_cap_cached; + u64 last_io_update; + unsigned int sched_flags; + u32 hwp_boost_min; }; static struct cpudata **all_cpu_data; @@ -285,6 +295,7 @@ static struct pstate_funcs pstate_funcs __read_mostly; static int hwp_active __read_mostly; static bool per_cpu_limits __read_mostly; +static bool hwp_boost __read_mostly; static struct cpufreq_driver *intel_pstate_driver __read_mostly; @@ -689,6 +700,7 @@ static void intel_pstate_get_hwp_max(unsigned int cpu, int *phy_max, u64 cap; rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap); + WRITE_ONCE(all_cpu_data[cpu]->hwp_cap_cached, cap); if (global.no_turbo) *current_max = HWP_GUARANTEED_PERF(cap); else @@ -763,6 +775,7 @@ update_epp: intel_pstate_set_epb(cpu, epp); } skip_epp: + WRITE_ONCE(cpu_data->hwp_req_cached, value); wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value); } @@ -1020,6 +1033,30 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b, return count; } +static ssize_t show_hwp_dynamic_boost(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", hwp_boost); +} + +static ssize_t store_hwp_dynamic_boost(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + + ret = kstrtouint(buf, 10, &input); + if (ret) + return ret; + + mutex_lock(&intel_pstate_driver_lock); + hwp_boost = !!input; + intel_pstate_update_policies(); + mutex_unlock(&intel_pstate_driver_lock); + + return count; +} + show_one(max_perf_pct, max_perf_pct); show_one(min_perf_pct, min_perf_pct); @@ -1029,6 +1066,7 @@ define_one_global_rw(max_perf_pct); define_one_global_rw(min_perf_pct); define_one_global_ro(turbo_pct); define_one_global_ro(num_pstates); +define_one_global_rw(hwp_dynamic_boost); static struct attribute *intel_pstate_attributes[] = { &status.attr, @@ -1069,6 +1107,11 @@ static void __init intel_pstate_sysfs_expose_params(void) rc = sysfs_create_file(intel_pstate_kobject, &min_perf_pct.attr); WARN_ON(rc); + if (hwp_active) { + rc = sysfs_create_file(intel_pstate_kobject, + &hwp_dynamic_boost.attr); + WARN_ON(rc); + } } /************************** sysfs end ************************/ @@ -1381,6 +1424,116 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) intel_pstate_set_min_pstate(cpu); } +/* + * Long hold time will keep high perf limits for long time, + * which negatively impacts perf/watt for some workloads, + * like specpower. 3ms is based on experiements on some + * workoads. + */ +static int hwp_boost_hold_time_ns = 3 * NSEC_PER_MSEC; + +static inline void intel_pstate_hwp_boost_up(struct cpudata *cpu) +{ + u64 hwp_req = READ_ONCE(cpu->hwp_req_cached); + u32 max_limit = (hwp_req & 0xff00) >> 8; + u32 min_limit = (hwp_req & 0xff); + u32 boost_level1; + + /* + * Cases to consider (User changes via sysfs or boot time): + * If, P0 (Turbo max) = P1 (Guaranteed max) = min: + * No boost, return. + * If, P0 (Turbo max) > P1 (Guaranteed max) = min: + * Should result in one level boost only for P0. + * If, P0 (Turbo max) = P1 (Guaranteed max) > min: + * Should result in two level boost: + * (min + p1)/2 and P1. + * If, P0 (Turbo max) > P1 (Guaranteed max) > min: + * Should result in three level boost: + * (min + p1)/2, P1 and P0. + */ + + /* If max and min are equal or already at max, nothing to boost */ + if (max_limit == min_limit || cpu->hwp_boost_min >= max_limit) + return; + + if (!cpu->hwp_boost_min) + cpu->hwp_boost_min = min_limit; + + /* level at half way mark between min and guranteed */ + boost_level1 = (HWP_GUARANTEED_PERF(cpu->hwp_cap_cached) + min_limit) >> 1; + + if (cpu->hwp_boost_min < boost_level1) + cpu->hwp_boost_min = boost_level1; + else if (cpu->hwp_boost_min < HWP_GUARANTEED_PERF(cpu->hwp_cap_cached)) + cpu->hwp_boost_min = HWP_GUARANTEED_PERF(cpu->hwp_cap_cached); + else if (cpu->hwp_boost_min == HWP_GUARANTEED_PERF(cpu->hwp_cap_cached) && + max_limit != HWP_GUARANTEED_PERF(cpu->hwp_cap_cached)) + cpu->hwp_boost_min = max_limit; + else + return; + + hwp_req = (hwp_req & ~GENMASK_ULL(7, 0)) | cpu->hwp_boost_min; + wrmsrl(MSR_HWP_REQUEST, hwp_req); + cpu->last_update = cpu->sample.time; +} + +static inline void intel_pstate_hwp_boost_down(struct cpudata *cpu) +{ + if (cpu->hwp_boost_min) { + bool expired; + + /* Check if we are idle for hold time to boost down */ + expired = time_after64(cpu->sample.time, cpu->last_update + + hwp_boost_hold_time_ns); + if (expired) { + wrmsrl(MSR_HWP_REQUEST, cpu->hwp_req_cached); + cpu->hwp_boost_min = 0; + } + } + cpu->last_update = cpu->sample.time; +} + +static inline void intel_pstate_update_util_hwp_local(struct cpudata *cpu, + u64 time) +{ + cpu->sample.time = time; + + if (cpu->sched_flags & SCHED_CPUFREQ_IOWAIT) { + bool do_io = false; + + cpu->sched_flags = 0; + /* + * Set iowait_boost flag and update time. Since IO WAIT flag + * is set all the time, we can't just conclude that there is + * some IO bound activity is scheduled on this CPU with just + * one occurrence. If we receive at least two in two + * consecutive ticks, then we treat as boost candidate. + */ + if (time_before64(time, cpu->last_io_update + 2 * TICK_NSEC)) + do_io = true; + + cpu->last_io_update = time; + + if (do_io) + intel_pstate_hwp_boost_up(cpu); + + } else { + intel_pstate_hwp_boost_down(cpu); + } +} + +static inline void intel_pstate_update_util_hwp(struct update_util_data *data, + u64 time, unsigned int flags) +{ + struct cpudata *cpu = container_of(data, struct cpudata, update_util); + + cpu->sched_flags |= flags; + + if (smp_processor_id() == cpu->cpu) + intel_pstate_update_util_hwp_local(cpu, time); +} + static inline void intel_pstate_calc_avg_perf(struct cpudata *cpu) { struct sample *sample = &cpu->sample; @@ -1641,6 +1794,12 @@ static const struct x86_cpu_id intel_pstate_cpu_ee_disable_ids[] = { {} }; +static const struct x86_cpu_id intel_pstate_hwp_boost_ids[] = { + ICPU(INTEL_FAM6_SKYLAKE_X, core_funcs), + ICPU(INTEL_FAM6_SKYLAKE_DESKTOP, core_funcs), + {} +}; + static int intel_pstate_init_cpu(unsigned int cpunum) { struct cpudata *cpu; @@ -1671,6 +1830,10 @@ static int intel_pstate_init_cpu(unsigned int cpunum) intel_pstate_disable_ee(cpunum); intel_pstate_hwp_enable(cpu); + + id = x86_match_cpu(intel_pstate_hwp_boost_ids); + if (id) + hwp_boost = true; } intel_pstate_get_cpu_pstates(cpu); @@ -1684,7 +1847,7 @@ static void intel_pstate_set_update_util_hook(unsigned int cpu_num) { struct cpudata *cpu = all_cpu_data[cpu_num]; - if (hwp_active) + if (hwp_active && !hwp_boost) return; if (cpu->update_util_set) @@ -1693,7 +1856,9 @@ static void intel_pstate_set_update_util_hook(unsigned int cpu_num) /* Prevent intel_pstate_update_util() from using stale data. */ cpu->sample.time = 0; cpufreq_add_update_util_hook(cpu_num, &cpu->update_util, - intel_pstate_update_util); + (hwp_active ? + intel_pstate_update_util_hwp : + intel_pstate_update_util)); cpu->update_util_set = true; } @@ -1805,8 +1970,16 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) intel_pstate_set_update_util_hook(policy->cpu); } - if (hwp_active) + if (hwp_active) { + /* + * When hwp_boost was active before and dynamically it + * was turned off, in that case we need to clear the + * update util hook. + */ + if (!hwp_boost) + intel_pstate_clear_update_util_hook(policy->cpu); intel_pstate_hwp_set(policy->cpu); + } mutex_unlock(&intel_pstate_limits_lock); diff --git a/drivers/cpufreq/ti-cpufreq.c b/drivers/cpufreq/ti-cpufreq.c index 6ba709b6f095..3f0e2a14895a 100644 --- a/drivers/cpufreq/ti-cpufreq.c +++ b/drivers/cpufreq/ti-cpufreq.c @@ -217,7 +217,7 @@ static int ti_cpufreq_probe(struct platform_device *pdev) if (!match) return -ENODEV; - opp_data = kzalloc(sizeof(*opp_data), GFP_KERNEL); + opp_data = devm_kzalloc(&pdev->dev, sizeof(*opp_data), GFP_KERNEL); if (!opp_data) return -ENOMEM; @@ -226,8 +226,7 @@ static int ti_cpufreq_probe(struct platform_device *pdev) opp_data->cpu_dev = get_cpu_device(0); if (!opp_data->cpu_dev) { pr_err("%s: Failed to get device for CPU0\n", __func__); - ret = ENODEV; - goto free_opp_data; + return -ENODEV; } opp_data->opp_node = dev_pm_opp_of_get_opp_desc_node(opp_data->cpu_dev); @@ -285,8 +284,6 @@ register_cpufreq_dt: fail_put_node: of_node_put(opp_data->opp_node); -free_opp_data: - kfree(opp_data); return ret; } |