diff options
Diffstat (limited to 'drivers')
47 files changed, 914 insertions, 554 deletions
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 75534c5b5433..0c65a09aec1b 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -655,8 +655,8 @@ static int acpi_idle_enter(struct cpuidle_device *dev, return index; } -static void acpi_idle_enter_s2idle(struct cpuidle_device *dev, - struct cpuidle_driver *drv, int index) +static int acpi_idle_enter_s2idle(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) { struct acpi_processor_cx *cx = per_cpu(acpi_cstate[index], dev->cpu); @@ -664,16 +664,18 @@ static void acpi_idle_enter_s2idle(struct cpuidle_device *dev, struct acpi_processor *pr = __this_cpu_read(processors); if (unlikely(!pr)) - return; + return 0; if (pr->flags.bm_check) { acpi_idle_enter_bm(pr, cx, false); - return; + return 0; } else { ACPI_FLUSH_CPU_CACHE(); } } acpi_idle_do_entry(cx); + + return 0; } static int acpi_processor_setup_cpuidle_cx(struct acpi_processor *pr, diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 0a01df608849..2cb5e04cf86c 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -263,18 +263,18 @@ static int _genpd_reeval_performance_state(struct generic_pm_domain *genpd, /* * Traverse all sub-domains within the domain. This can be * done without any additional locking as the link->performance_state - * field is protected by the master genpd->lock, which is already taken. + * field is protected by the parent genpd->lock, which is already taken. * * Also note that link->performance_state (subdomain's performance state - * requirement to master domain) is different from - * link->slave->performance_state (current performance state requirement + * requirement to parent domain) is different from + * link->child->performance_state (current performance state requirement * of the devices/sub-domains of the subdomain) and so can have a * different value. * * Note that we also take vote from powered-off sub-domains into account * as the same is done for devices right now. */ - list_for_each_entry(link, &genpd->master_links, master_node) { + list_for_each_entry(link, &genpd->parent_links, parent_node) { if (link->performance_state > state) state = link->performance_state; } @@ -285,40 +285,40 @@ static int _genpd_reeval_performance_state(struct generic_pm_domain *genpd, static int _genpd_set_performance_state(struct generic_pm_domain *genpd, unsigned int state, int depth) { - struct generic_pm_domain *master; + struct generic_pm_domain *parent; struct gpd_link *link; - int master_state, ret; + int parent_state, ret; if (state == genpd->performance_state) return 0; - /* Propagate to masters of genpd */ - list_for_each_entry(link, &genpd->slave_links, slave_node) { - master = link->master; + /* Propagate to parents of genpd */ + list_for_each_entry(link, &genpd->child_links, child_node) { + parent = link->parent; - if (!master->set_performance_state) + if (!parent->set_performance_state) continue; - /* Find master's performance state */ + /* Find parent's performance state */ ret = dev_pm_opp_xlate_performance_state(genpd->opp_table, - master->opp_table, + parent->opp_table, state); if (unlikely(ret < 0)) goto err; - master_state = ret; + parent_state = ret; - genpd_lock_nested(master, depth + 1); + genpd_lock_nested(parent, depth + 1); link->prev_performance_state = link->performance_state; - link->performance_state = master_state; - master_state = _genpd_reeval_performance_state(master, - master_state); - ret = _genpd_set_performance_state(master, master_state, depth + 1); + link->performance_state = parent_state; + parent_state = _genpd_reeval_performance_state(parent, + parent_state); + ret = _genpd_set_performance_state(parent, parent_state, depth + 1); if (ret) link->performance_state = link->prev_performance_state; - genpd_unlock(master); + genpd_unlock(parent); if (ret) goto err; @@ -333,26 +333,26 @@ static int _genpd_set_performance_state(struct generic_pm_domain *genpd, err: /* Encountered an error, lets rollback */ - list_for_each_entry_continue_reverse(link, &genpd->slave_links, - slave_node) { - master = link->master; + list_for_each_entry_continue_reverse(link, &genpd->child_links, + child_node) { + parent = link->parent; - if (!master->set_performance_state) + if (!parent->set_performance_state) continue; - genpd_lock_nested(master, depth + 1); + genpd_lock_nested(parent, depth + 1); - master_state = link->prev_performance_state; - link->performance_state = master_state; + parent_state = link->prev_performance_state; + link->performance_state = parent_state; - master_state = _genpd_reeval_performance_state(master, - master_state); - if (_genpd_set_performance_state(master, master_state, depth + 1)) { + parent_state = _genpd_reeval_performance_state(parent, + parent_state); + if (_genpd_set_performance_state(parent, parent_state, depth + 1)) { pr_err("%s: Failed to roll back to %d performance state\n", - master->name, master_state); + parent->name, parent_state); } - genpd_unlock(master); + genpd_unlock(parent); } return ret; @@ -552,7 +552,7 @@ static int genpd_power_off(struct generic_pm_domain *genpd, bool one_dev_on, /* * If sd_count > 0 at this point, one of the subdomains hasn't - * managed to call genpd_power_on() for the master yet after + * managed to call genpd_power_on() for the parent yet after * incrementing it. In that case genpd_power_on() will wait * for us to drop the lock, so we can call .power_off() and let * the genpd_power_on() restore power for us (this shouldn't @@ -566,22 +566,22 @@ static int genpd_power_off(struct generic_pm_domain *genpd, bool one_dev_on, genpd->status = GPD_STATE_POWER_OFF; genpd_update_accounting(genpd); - list_for_each_entry(link, &genpd->slave_links, slave_node) { - genpd_sd_counter_dec(link->master); - genpd_lock_nested(link->master, depth + 1); - genpd_power_off(link->master, false, depth + 1); - genpd_unlock(link->master); + list_for_each_entry(link, &genpd->child_links, child_node) { + genpd_sd_counter_dec(link->parent); + genpd_lock_nested(link->parent, depth + 1); + genpd_power_off(link->parent, false, depth + 1); + genpd_unlock(link->parent); } return 0; } /** - * genpd_power_on - Restore power to a given PM domain and its masters. + * genpd_power_on - Restore power to a given PM domain and its parents. * @genpd: PM domain to power up. * @depth: nesting count for lockdep. * - * Restore power to @genpd and all of its masters so that it is possible to + * Restore power to @genpd and all of its parents so that it is possible to * resume a device belonging to it. */ static int genpd_power_on(struct generic_pm_domain *genpd, unsigned int depth) @@ -594,20 +594,20 @@ static int genpd_power_on(struct generic_pm_domain *genpd, unsigned int depth) /* * The list is guaranteed not to change while the loop below is being - * executed, unless one of the masters' .power_on() callbacks fiddles + * executed, unless one of the parents' .power_on() callbacks fiddles * with it. */ - list_for_each_entry(link, &genpd->slave_links, slave_node) { - struct generic_pm_domain *master = link->master; + list_for_each_entry(link, &genpd->child_links, child_node) { + struct generic_pm_domain *parent = link->parent; - genpd_sd_counter_inc(master); + genpd_sd_counter_inc(parent); - genpd_lock_nested(master, depth + 1); - ret = genpd_power_on(master, depth + 1); - genpd_unlock(master); + genpd_lock_nested(parent, depth + 1); + ret = genpd_power_on(parent, depth + 1); + genpd_unlock(parent); if (ret) { - genpd_sd_counter_dec(master); + genpd_sd_counter_dec(parent); goto err; } } @@ -623,12 +623,12 @@ static int genpd_power_on(struct generic_pm_domain *genpd, unsigned int depth) err: list_for_each_entry_continue_reverse(link, - &genpd->slave_links, - slave_node) { - genpd_sd_counter_dec(link->master); - genpd_lock_nested(link->master, depth + 1); - genpd_power_off(link->master, false, depth + 1); - genpd_unlock(link->master); + &genpd->child_links, + child_node) { + genpd_sd_counter_dec(link->parent); + genpd_lock_nested(link->parent, depth + 1); + genpd_power_off(link->parent, false, depth + 1); + genpd_unlock(link->parent); } return ret; @@ -932,13 +932,13 @@ late_initcall(genpd_power_off_unused); #ifdef CONFIG_PM_SLEEP /** - * genpd_sync_power_off - Synchronously power off a PM domain and its masters. + * genpd_sync_power_off - Synchronously power off a PM domain and its parents. * @genpd: PM domain to power off, if possible. * @use_lock: use the lock. * @depth: nesting count for lockdep. * * Check if the given PM domain can be powered off (during system suspend or - * hibernation) and do that if so. Also, in that case propagate to its masters. + * hibernation) and do that if so. Also, in that case propagate to its parents. * * This function is only called in "noirq" and "syscore" stages of system power * transitions. The "noirq" callbacks may be executed asynchronously, thus in @@ -963,21 +963,21 @@ static void genpd_sync_power_off(struct generic_pm_domain *genpd, bool use_lock, genpd->status = GPD_STATE_POWER_OFF; - list_for_each_entry(link, &genpd->slave_links, slave_node) { - genpd_sd_counter_dec(link->master); + list_for_each_entry(link, &genpd->child_links, child_node) { + genpd_sd_counter_dec(link->parent); if (use_lock) - genpd_lock_nested(link->master, depth + 1); + genpd_lock_nested(link->parent, depth + 1); - genpd_sync_power_off(link->master, use_lock, depth + 1); + genpd_sync_power_off(link->parent, use_lock, depth + 1); if (use_lock) - genpd_unlock(link->master); + genpd_unlock(link->parent); } } /** - * genpd_sync_power_on - Synchronously power on a PM domain and its masters. + * genpd_sync_power_on - Synchronously power on a PM domain and its parents. * @genpd: PM domain to power on. * @use_lock: use the lock. * @depth: nesting count for lockdep. @@ -994,16 +994,16 @@ static void genpd_sync_power_on(struct generic_pm_domain *genpd, bool use_lock, if (genpd_status_on(genpd)) return; - list_for_each_entry(link, &genpd->slave_links, slave_node) { - genpd_sd_counter_inc(link->master); + list_for_each_entry(link, &genpd->child_links, child_node) { + genpd_sd_counter_inc(link->parent); if (use_lock) - genpd_lock_nested(link->master, depth + 1); + genpd_lock_nested(link->parent, depth + 1); - genpd_sync_power_on(link->master, use_lock, depth + 1); + genpd_sync_power_on(link->parent, use_lock, depth + 1); if (use_lock) - genpd_unlock(link->master); + genpd_unlock(link->parent); } _genpd_power_on(genpd, false); @@ -1443,12 +1443,12 @@ static void genpd_update_cpumask(struct generic_pm_domain *genpd, if (!genpd_is_cpu_domain(genpd)) return; - list_for_each_entry(link, &genpd->slave_links, slave_node) { - struct generic_pm_domain *master = link->master; + list_for_each_entry(link, &genpd->child_links, child_node) { + struct generic_pm_domain *parent = link->parent; - genpd_lock_nested(master, depth + 1); - genpd_update_cpumask(master, cpu, set, depth + 1); - genpd_unlock(master); + genpd_lock_nested(parent, depth + 1); + genpd_update_cpumask(parent, cpu, set, depth + 1); + genpd_unlock(parent); } if (set) @@ -1636,17 +1636,17 @@ static int genpd_add_subdomain(struct generic_pm_domain *genpd, goto out; } - list_for_each_entry(itr, &genpd->master_links, master_node) { - if (itr->slave == subdomain && itr->master == genpd) { + list_for_each_entry(itr, &genpd->parent_links, parent_node) { + if (itr->child == subdomain && itr->parent == genpd) { ret = -EINVAL; goto out; } } - link->master = genpd; - list_add_tail(&link->master_node, &genpd->master_links); - link->slave = subdomain; - list_add_tail(&link->slave_node, &subdomain->slave_links); + link->parent = genpd; + list_add_tail(&link->parent_node, &genpd->parent_links); + link->child = subdomain; + list_add_tail(&link->child_node, &subdomain->child_links); if (genpd_status_on(subdomain)) genpd_sd_counter_inc(genpd); @@ -1660,7 +1660,7 @@ static int genpd_add_subdomain(struct generic_pm_domain *genpd, /** * pm_genpd_add_subdomain - Add a subdomain to an I/O PM domain. - * @genpd: Master PM domain to add the subdomain to. + * @genpd: Leader PM domain to add the subdomain to. * @subdomain: Subdomain to be added. */ int pm_genpd_add_subdomain(struct generic_pm_domain *genpd, @@ -1678,7 +1678,7 @@ EXPORT_SYMBOL_GPL(pm_genpd_add_subdomain); /** * pm_genpd_remove_subdomain - Remove a subdomain from an I/O PM domain. - * @genpd: Master PM domain to remove the subdomain from. + * @genpd: Leader PM domain to remove the subdomain from. * @subdomain: Subdomain to be removed. */ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, @@ -1693,19 +1693,19 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, genpd_lock(subdomain); genpd_lock_nested(genpd, SINGLE_DEPTH_NESTING); - if (!list_empty(&subdomain->master_links) || subdomain->device_count) { + if (!list_empty(&subdomain->parent_links) || subdomain->device_count) { pr_warn("%s: unable to remove subdomain %s\n", genpd->name, subdomain->name); ret = -EBUSY; goto out; } - list_for_each_entry_safe(link, l, &genpd->master_links, master_node) { - if (link->slave != subdomain) + list_for_each_entry_safe(link, l, &genpd->parent_links, parent_node) { + if (link->child != subdomain) continue; - list_del(&link->master_node); - list_del(&link->slave_node); + list_del(&link->parent_node); + list_del(&link->child_node); kfree(link); if (genpd_status_on(subdomain)) genpd_sd_counter_dec(genpd); @@ -1770,8 +1770,8 @@ int pm_genpd_init(struct generic_pm_domain *genpd, if (IS_ERR_OR_NULL(genpd)) return -EINVAL; - INIT_LIST_HEAD(&genpd->master_links); - INIT_LIST_HEAD(&genpd->slave_links); + INIT_LIST_HEAD(&genpd->parent_links); + INIT_LIST_HEAD(&genpd->child_links); INIT_LIST_HEAD(&genpd->dev_list); genpd_lock_init(genpd); genpd->gov = gov; @@ -1848,15 +1848,15 @@ static int genpd_remove(struct generic_pm_domain *genpd) return -EBUSY; } - if (!list_empty(&genpd->master_links) || genpd->device_count) { + if (!list_empty(&genpd->parent_links) || genpd->device_count) { genpd_unlock(genpd); pr_err("%s: unable to remove %s\n", __func__, genpd->name); return -EBUSY; } - list_for_each_entry_safe(link, l, &genpd->slave_links, slave_node) { - list_del(&link->master_node); - list_del(&link->slave_node); + list_for_each_entry_safe(link, l, &genpd->child_links, child_node) { + list_del(&link->parent_node); + list_del(&link->child_node); kfree(link); } @@ -2827,12 +2827,12 @@ static int genpd_summary_one(struct seq_file *s, /* * Modifications on the list require holding locks on both - * master and slave, so we are safe. + * parent and child, so we are safe. * Also genpd->name is immutable. */ - list_for_each_entry(link, &genpd->master_links, master_node) { - seq_printf(s, "%s", link->slave->name); - if (!list_is_last(&link->master_node, &genpd->master_links)) + list_for_each_entry(link, &genpd->parent_links, parent_node) { + seq_printf(s, "%s", link->child->name); + if (!list_is_last(&link->parent_node, &genpd->parent_links)) seq_puts(s, ", "); } @@ -2860,7 +2860,7 @@ static int summary_show(struct seq_file *s, void *data) struct generic_pm_domain *genpd; int ret = 0; - seq_puts(s, "domain status slaves\n"); + seq_puts(s, "domain status children\n"); seq_puts(s, " /device runtime status\n"); seq_puts(s, "----------------------------------------------------------------------\n"); @@ -2915,8 +2915,8 @@ static int sub_domains_show(struct seq_file *s, void *data) if (ret) return -ERESTARTSYS; - list_for_each_entry(link, &genpd->master_links, master_node) - seq_printf(s, "%s\n", link->slave->name); + list_for_each_entry(link, &genpd->parent_links, parent_node) + seq_printf(s, "%s\n", link->child->name); genpd_unlock(genpd); return ret; diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c index daa8c7689f7e..490ed7deb99a 100644 --- a/drivers/base/power/domain_governor.c +++ b/drivers/base/power/domain_governor.c @@ -135,8 +135,8 @@ static bool __default_power_down_ok(struct dev_pm_domain *pd, * * All subdomains have been powered off already at this point. */ - list_for_each_entry(link, &genpd->master_links, master_node) { - struct generic_pm_domain *sd = link->slave; + list_for_each_entry(link, &genpd->parent_links, parent_node) { + struct generic_pm_domain *sd = link->child; s64 sd_max_off_ns = sd->max_off_time_ns; if (sd_max_off_ns < 0) @@ -217,13 +217,13 @@ static bool default_power_down_ok(struct dev_pm_domain *pd) } /* - * We have to invalidate the cached results for the masters, so + * We have to invalidate the cached results for the parents, so * use the observation that default_power_down_ok() is not - * going to be called for any master until this instance + * going to be called for any parent until this instance * returns. */ - list_for_each_entry(link, &genpd->slave_links, slave_node) - link->master->max_off_time_changed = true; + list_for_each_entry(link, &genpd->child_links, child_node) + link->parent->max_off_time_changed = true; genpd->max_off_time_ns = -1; genpd->max_off_time_changed = false; diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c index 24d25cf8ab14..c7b24812523c 100644 --- a/drivers/base/power/sysfs.c +++ b/drivers/base/power/sysfs.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* sysfs entries for device PM */ #include <linux/device.h> +#include <linux/kobject.h> #include <linux/string.h> #include <linux/export.h> #include <linux/pm_qos.h> @@ -739,12 +740,18 @@ int dpm_sysfs_change_owner(struct device *dev, kuid_t kuid, kgid_t kgid) int wakeup_sysfs_add(struct device *dev) { - return sysfs_merge_group(&dev->kobj, &pm_wakeup_attr_group); + int ret = sysfs_merge_group(&dev->kobj, &pm_wakeup_attr_group); + + if (!ret) + kobject_uevent(&dev->kobj, KOBJ_CHANGE); + + return ret; } void wakeup_sysfs_remove(struct device *dev) { sysfs_unmerge_group(&dev->kobj, &pm_wakeup_attr_group); + kobject_uevent(&dev->kobj, KOBJ_CHANGE); } int pm_qos_sysfs_add_resume_latency(struct device *dev) diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 429e5a36c08a..e4ff681faaaa 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -244,7 +244,7 @@ static unsigned extract_freq(struct cpufreq_policy *policy, u32 val) static u32 cpu_freq_read_intel(struct acpi_pct_register *not_used) { - u32 val, dummy; + u32 val, dummy __always_unused; rdmsr(MSR_IA32_PERF_CTL, val, dummy); return val; @@ -261,7 +261,7 @@ static void cpu_freq_write_intel(struct acpi_pct_register *not_used, u32 val) static u32 cpu_freq_read_amd(struct acpi_pct_register *not_used) { - u32 val, dummy; + u32 val, dummy __always_unused; rdmsr(MSR_AMD_PERF_CTL, val, dummy); return val; @@ -612,7 +612,7 @@ static const struct dmi_system_id sw_any_bug_dmi_table[] = { static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c) { /* Intel Xeon Processor 7100 Series Specification Update - * http://www.intel.com/Assets/PDF/specupdate/314554.pdf + * https://www.intel.com/Assets/PDF/specupdate/314554.pdf * AL30: A Machine Check Exception (MCE) Occurring during an * Enhanced Intel SpeedStep Technology Ratio Change May Cause * Both Processor Cores to Lock Up. */ @@ -993,14 +993,14 @@ MODULE_PARM_DESC(acpi_pstate_strict, late_initcall(acpi_cpufreq_init); module_exit(acpi_cpufreq_exit); -static const struct x86_cpu_id acpi_cpufreq_ids[] = { +static const struct x86_cpu_id __maybe_unused acpi_cpufreq_ids[] = { X86_MATCH_FEATURE(X86_FEATURE_ACPI, NULL), X86_MATCH_FEATURE(X86_FEATURE_HW_PSTATE, NULL), {} }; MODULE_DEVICE_TABLE(x86cpu, acpi_cpufreq_ids); -static const struct acpi_device_id processor_device_ids[] = { +static const struct acpi_device_id __maybe_unused processor_device_ids[] = { {ACPI_PROCESSOR_OBJECT_HID, }, {ACPI_PROCESSOR_DEVICE_HID, }, {}, diff --git a/drivers/cpufreq/amd_freq_sensitivity.c b/drivers/cpufreq/amd_freq_sensitivity.c index f7c4206d4c90..d0b10baf039a 100644 --- a/drivers/cpufreq/amd_freq_sensitivity.c +++ b/drivers/cpufreq/amd_freq_sensitivity.c @@ -144,7 +144,7 @@ static void __exit amd_freq_sensitivity_exit(void) } module_exit(amd_freq_sensitivity_exit); -static const struct x86_cpu_id amd_freq_sensitivity_ids[] = { +static const struct x86_cpu_id __maybe_unused amd_freq_sensitivity_ids[] = { X86_MATCH_FEATURE(X86_FEATURE_PROC_FEEDBACK, NULL), {} }; diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 79742bbd221f..944d7b45afe9 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -279,7 +279,7 @@ static int cpufreq_init(struct cpufreq_policy *policy) policy->cpuinfo.transition_latency = transition_latency; policy->dvfs_possible_from_any_cpu = true; - dev_pm_opp_of_register_em(policy->cpus); + dev_pm_opp_of_register_em(cpu_dev, policy->cpus); return 0; diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 0128de3603df..17c1c3becd92 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -50,7 +50,9 @@ static LIST_HEAD(cpufreq_governor_list); #define for_each_governor(__governor) \ list_for_each_entry(__governor, &cpufreq_governor_list, governor_list) -/** +static char default_governor[CPUFREQ_NAME_LEN]; + +/* * The "cpufreq driver" - the arch- or hardware-dependent low * level driver of CPUFreq support, and its spinlock. This lock * also protects the cpufreq_cpu_data array. @@ -78,7 +80,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, struct cpufreq_governor *new_gov, unsigned int new_pol); -/** +/* * Two notifier lists: the "policy" list is involved in the * validation process for a new CPU frequency policy; the * "transition" list for kernel code that needs to handle @@ -298,7 +300,7 @@ struct cpufreq_policy *cpufreq_cpu_acquire(unsigned int cpu) * EXTERNALLY AFFECTING FREQUENCY CHANGES * *********************************************************************/ -/** +/* * adjust_jiffies - adjust the system "loops_per_jiffy" * * This function alters the system "loops_per_jiffy" for the clock @@ -524,6 +526,7 @@ EXPORT_SYMBOL_GPL(cpufreq_disable_fast_switch); /** * cpufreq_driver_resolve_freq - Map a target frequency to a driver-supported * one. + * @policy: associated policy to interrogate * @target_freq: target frequency to resolve. * * The target to driver frequency mapping is cached in the policy. @@ -621,6 +624,24 @@ static struct cpufreq_governor *find_governor(const char *str_governor) return NULL; } +static struct cpufreq_governor *get_governor(const char *str_governor) +{ + struct cpufreq_governor *t; + + mutex_lock(&cpufreq_governor_mutex); + t = find_governor(str_governor); + if (!t) + goto unlock; + + if (!try_module_get(t->owner)) + t = NULL; + +unlock: + mutex_unlock(&cpufreq_governor_mutex); + + return t; +} + static unsigned int cpufreq_parse_policy(char *str_governor) { if (!strncasecmp(str_governor, "performance", CPUFREQ_NAME_LEN)) @@ -640,31 +661,17 @@ static struct cpufreq_governor *cpufreq_parse_governor(char *str_governor) { struct cpufreq_governor *t; - mutex_lock(&cpufreq_governor_mutex); - - t = find_governor(str_governor); - if (!t) { - int ret; - - mutex_unlock(&cpufreq_governor_mutex); - - ret = request_module("cpufreq_%s", str_governor); - if (ret) - return NULL; + t = get_governor(str_governor); + if (t) + return t; - mutex_lock(&cpufreq_governor_mutex); - - t = find_governor(str_governor); - } - if (t && !try_module_get(t->owner)) - t = NULL; - - mutex_unlock(&cpufreq_governor_mutex); + if (request_module("cpufreq_%s", str_governor)) + return NULL; - return t; + return get_governor(str_governor); } -/** +/* * cpufreq_per_cpu_attr_read() / show_##file_name() - * print out cpufreq information * @@ -706,7 +713,7 @@ static ssize_t show_scaling_cur_freq(struct cpufreq_policy *policy, char *buf) return ret; } -/** +/* * cpufreq_per_cpu_attr_write() / store_##file_name() - sysfs write access */ #define store_one(file_name, object) \ @@ -727,7 +734,7 @@ static ssize_t store_##file_name \ store_one(scaling_min_freq, min); store_one(scaling_max_freq, max); -/** +/* * show_cpuinfo_cur_freq - current CPU frequency as detected by hardware */ static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy, @@ -741,7 +748,7 @@ static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy, return sprintf(buf, "<unknown>\n"); } -/** +/* * show_scaling_governor - show the current policy for the specified CPU */ static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf) @@ -756,7 +763,7 @@ static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf) return -EINVAL; } -/** +/* * store_scaling_governor - store policy for the specified CPU */ static ssize_t store_scaling_governor(struct cpufreq_policy *policy, @@ -793,7 +800,7 @@ static ssize_t store_scaling_governor(struct cpufreq_policy *policy, return ret ? ret : count; } -/** +/* * show_scaling_driver - show the cpufreq driver currently loaded */ static ssize_t show_scaling_driver(struct cpufreq_policy *policy, char *buf) @@ -801,7 +808,7 @@ static ssize_t show_scaling_driver(struct cpufreq_policy *policy, char *buf) return scnprintf(buf, CPUFREQ_NAME_PLEN, "%s\n", cpufreq_driver->name); } -/** +/* * show_scaling_available_governors - show the available CPUfreq governors */ static ssize_t show_scaling_available_governors(struct cpufreq_policy *policy, @@ -815,12 +822,14 @@ static ssize_t show_scaling_available_governors(struct cpufreq_policy *policy, goto out; } + mutex_lock(&cpufreq_governor_mutex); for_each_governor(t) { if (i >= (ssize_t) ((PAGE_SIZE / sizeof(char)) - (CPUFREQ_NAME_LEN + 2))) - goto out; + break; i += scnprintf(&buf[i], CPUFREQ_NAME_PLEN, "%s ", t->name); } + mutex_unlock(&cpufreq_governor_mutex); out: i += sprintf(&buf[i], "\n"); return i; @@ -843,7 +852,7 @@ ssize_t cpufreq_show_cpus(const struct cpumask *mask, char *buf) } EXPORT_SYMBOL_GPL(cpufreq_show_cpus); -/** +/* * show_related_cpus - show the CPUs affected by each transition even if * hw coordination is in use */ @@ -852,7 +861,7 @@ static ssize_t show_related_cpus(struct cpufreq_policy *policy, char *buf) return cpufreq_show_cpus(policy->related_cpus, buf); } -/** +/* * show_affected_cpus - show the CPUs affected by each transition */ static ssize_t show_affected_cpus(struct cpufreq_policy *policy, char *buf) @@ -886,7 +895,7 @@ static ssize_t show_scaling_setspeed(struct cpufreq_policy *policy, char *buf) return policy->governor->show_setspeed(policy, buf); } -/** +/* * show_bios_limit - show the current cpufreq HW/BIOS limitation */ static ssize_t show_bios_limit(struct cpufreq_policy *policy, char *buf) @@ -1048,36 +1057,36 @@ static int cpufreq_add_dev_interface(struct cpufreq_policy *policy) return 0; } -__weak struct cpufreq_governor *cpufreq_default_governor(void) -{ - return NULL; -} - static int cpufreq_init_policy(struct cpufreq_policy *policy) { - struct cpufreq_governor *def_gov = cpufreq_default_governor(); struct cpufreq_governor *gov = NULL; unsigned int pol = CPUFREQ_POLICY_UNKNOWN; + int ret; if (has_target()) { /* Update policy governor to the one used before hotplug. */ - gov = find_governor(policy->last_governor); + gov = get_governor(policy->last_governor); if (gov) { pr_debug("Restoring governor %s for cpu %d\n", - policy->governor->name, policy->cpu); - } else if (def_gov) { - gov = def_gov; + gov->name, policy->cpu); } else { - return -ENODATA; + gov = get_governor(default_governor); + } + + if (!gov) { + gov = cpufreq_default_governor(); + __module_get(gov->owner); } + } else { + /* Use the default policy if there is no last_policy. */ if (policy->last_policy) { pol = policy->last_policy; - } else if (def_gov) { - pol = cpufreq_parse_policy(def_gov->name); + } else { + pol = cpufreq_parse_policy(default_governor); /* - * In case the default governor is neiter "performance" + * In case the default governor is neither "performance" * nor "powersave", fall back to the initial policy * value set by the driver. */ @@ -1089,7 +1098,11 @@ static int cpufreq_init_policy(struct cpufreq_policy *policy) return -ENODATA; } - return cpufreq_set_policy(policy, gov, pol); + ret = cpufreq_set_policy(policy, gov, pol); + if (gov) + module_put(gov->owner); + + return ret; } static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu) @@ -1604,7 +1617,7 @@ unlock: return 0; } -/** +/* * cpufreq_remove_dev - remove a CPU device * * Removes the cpufreq interface for a CPU device. @@ -2361,6 +2374,7 @@ EXPORT_SYMBOL_GPL(cpufreq_unregister_governor); * cpufreq_get_policy - get the current cpufreq_policy * @policy: struct cpufreq_policy into which the current cpufreq_policy * is written + * @cpu: CPU to find the policy for * * Reads the current cpufreq policy. */ @@ -2747,7 +2761,7 @@ out: } EXPORT_SYMBOL_GPL(cpufreq_register_driver); -/** +/* * cpufreq_unregister_driver - unregister the current CPUFreq driver * * Unregister the current CPUFreq driver. Only call this if you have @@ -2783,13 +2797,19 @@ EXPORT_SYMBOL_GPL(cpufreq_unregister_driver); static int __init cpufreq_core_init(void) { + struct cpufreq_governor *gov = cpufreq_default_governor(); + if (cpufreq_disabled()) return -ENODEV; cpufreq_global_kobject = kobject_create_and_add("cpufreq", &cpu_subsys.dev_root->kobj); BUG_ON(!cpufreq_global_kobject); + if (!strlen(default_governor)) + strncpy(default_governor, gov->name, CPUFREQ_NAME_LEN); + return 0; } module_param(off, int, 0444); +module_param_string(default_governor, default_governor, CPUFREQ_NAME_LEN, 0444); core_initcall(cpufreq_core_init); diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 737ff3b9c2c0..aa39ff31ec9f 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -322,17 +322,7 @@ static struct dbs_governor cs_governor = { .start = cs_start, }; -#define CPU_FREQ_GOV_CONSERVATIVE (&cs_governor.gov) - -static int __init cpufreq_gov_dbs_init(void) -{ - return cpufreq_register_governor(CPU_FREQ_GOV_CONSERVATIVE); -} - -static void __exit cpufreq_gov_dbs_exit(void) -{ - cpufreq_unregister_governor(CPU_FREQ_GOV_CONSERVATIVE); -} +#define CPU_FREQ_GOV_CONSERVATIVE (cs_governor.gov) MODULE_AUTHOR("Alexander Clouter <alex@digriz.org.uk>"); MODULE_DESCRIPTION("'cpufreq_conservative' - A dynamic cpufreq governor for " @@ -343,11 +333,9 @@ MODULE_LICENSE("GPL"); #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE struct cpufreq_governor *cpufreq_default_governor(void) { - return CPU_FREQ_GOV_CONSERVATIVE; + return &CPU_FREQ_GOV_CONSERVATIVE; } - -core_initcall(cpufreq_gov_dbs_init); -#else -module_init(cpufreq_gov_dbs_init); #endif -module_exit(cpufreq_gov_dbs_exit); + +cpufreq_governor_init(CPU_FREQ_GOV_CONSERVATIVE); +cpufreq_governor_exit(CPU_FREQ_GOV_CONSERVATIVE); diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index f99ae45efaea..63f7c219062b 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -26,7 +26,7 @@ static DEFINE_PER_CPU(struct cpu_dbs_info, cpu_dbs); static DEFINE_MUTEX(gov_dbs_data_mutex); /* Common sysfs tunables */ -/** +/* * store_sampling_rate - update sampling rate effective immediately if needed. * * If new rate is smaller than the old, simply updating diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 82a4d37ddecb..ac361a8b1d3b 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -408,7 +408,7 @@ static struct dbs_governor od_dbs_gov = { .start = od_start, }; -#define CPU_FREQ_GOV_ONDEMAND (&od_dbs_gov.gov) +#define CPU_FREQ_GOV_ONDEMAND (od_dbs_gov.gov) static void od_set_powersave_bias(unsigned int powersave_bias) { @@ -429,7 +429,7 @@ static void od_set_powersave_bias(unsigned int powersave_bias) continue; policy = cpufreq_cpu_get_raw(cpu); - if (!policy || policy->governor != CPU_FREQ_GOV_ONDEMAND) + if (!policy || policy->governor != &CPU_FREQ_GOV_ONDEMAND) continue; policy_dbs = policy->governor_data; @@ -461,16 +461,6 @@ void od_unregister_powersave_bias_handler(void) } EXPORT_SYMBOL_GPL(od_unregister_powersave_bias_handler); -static int __init cpufreq_gov_dbs_init(void) -{ - return cpufreq_register_governor(CPU_FREQ_GOV_ONDEMAND); -} - -static void __exit cpufreq_gov_dbs_exit(void) -{ - cpufreq_unregister_governor(CPU_FREQ_GOV_ONDEMAND); -} - MODULE_AUTHOR("Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>"); MODULE_AUTHOR("Alexey Starikovskiy <alexey.y.starikovskiy@intel.com>"); MODULE_DESCRIPTION("'cpufreq_ondemand' - A dynamic cpufreq governor for " @@ -480,11 +470,9 @@ MODULE_LICENSE("GPL"); #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND struct cpufreq_governor *cpufreq_default_governor(void) { - return CPU_FREQ_GOV_ONDEMAND; + return &CPU_FREQ_GOV_ONDEMAND; } - -core_initcall(cpufreq_gov_dbs_init); -#else -module_init(cpufreq_gov_dbs_init); #endif -module_exit(cpufreq_gov_dbs_exit); + +cpufreq_governor_init(CPU_FREQ_GOV_ONDEMAND); +cpufreq_governor_exit(CPU_FREQ_GOV_ONDEMAND); diff --git a/drivers/cpufreq/cpufreq_performance.c b/drivers/cpufreq/cpufreq_performance.c index def9afe0f5b8..71c1d9aba772 100644 --- a/drivers/cpufreq/cpufreq_performance.c +++ b/drivers/cpufreq/cpufreq_performance.c @@ -23,16 +23,6 @@ static struct cpufreq_governor cpufreq_gov_performance = { .limits = cpufreq_gov_performance_limits, }; -static int __init cpufreq_gov_performance_init(void) -{ - return cpufreq_register_governor(&cpufreq_gov_performance); -} - -static void __exit cpufreq_gov_performance_exit(void) -{ - cpufreq_unregister_governor(&cpufreq_gov_performance); -} - #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE struct cpufreq_governor *cpufreq_default_governor(void) { @@ -50,5 +40,5 @@ MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>"); MODULE_DESCRIPTION("CPUfreq policy governor 'performance'"); MODULE_LICENSE("GPL"); -core_initcall(cpufreq_gov_performance_init); -module_exit(cpufreq_gov_performance_exit); +cpufreq_governor_init(cpufreq_gov_performance); +cpufreq_governor_exit(cpufreq_gov_performance); diff --git a/drivers/cpufreq/cpufreq_powersave.c b/drivers/cpufreq/cpufreq_powersave.c index 1ae66019eb83..7749522355b5 100644 --- a/drivers/cpufreq/cpufreq_powersave.c +++ b/drivers/cpufreq/cpufreq_powersave.c @@ -23,16 +23,6 @@ static struct cpufreq_governor cpufreq_gov_powersave = { .owner = THIS_MODULE, }; -static int __init cpufreq_gov_powersave_init(void) -{ - return cpufreq_register_governor(&cpufreq_gov_powersave); -} - -static void __exit cpufreq_gov_powersave_exit(void) -{ - cpufreq_unregister_governor(&cpufreq_gov_powersave); -} - MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>"); MODULE_DESCRIPTION("CPUfreq policy governor 'powersave'"); MODULE_LICENSE("GPL"); @@ -42,9 +32,7 @@ struct cpufreq_governor *cpufreq_default_governor(void) { return &cpufreq_gov_powersave; } - -core_initcall(cpufreq_gov_powersave_init); -#else -module_init(cpufreq_gov_powersave_init); #endif -module_exit(cpufreq_gov_powersave_exit); + +cpufreq_governor_init(cpufreq_gov_powersave); +cpufreq_governor_exit(cpufreq_gov_powersave); diff --git a/drivers/cpufreq/cpufreq_userspace.c b/drivers/cpufreq/cpufreq_userspace.c index b43e7cd502c5..50a4d7846580 100644 --- a/drivers/cpufreq/cpufreq_userspace.c +++ b/drivers/cpufreq/cpufreq_userspace.c @@ -126,16 +126,6 @@ static struct cpufreq_governor cpufreq_gov_userspace = { .owner = THIS_MODULE, }; -static int __init cpufreq_gov_userspace_init(void) -{ - return cpufreq_register_governor(&cpufreq_gov_userspace); -} - -static void __exit cpufreq_gov_userspace_exit(void) -{ - cpufreq_unregister_governor(&cpufreq_gov_userspace); -} - MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>, " "Russell King <rmk@arm.linux.org.uk>"); MODULE_DESCRIPTION("CPUfreq policy governor 'userspace'"); @@ -146,9 +136,7 @@ struct cpufreq_governor *cpufreq_default_governor(void) { return &cpufreq_gov_userspace; } - -core_initcall(cpufreq_gov_userspace_init); -#else -module_init(cpufreq_gov_userspace_init); #endif -module_exit(cpufreq_gov_userspace_exit); + +cpufreq_governor_init(cpufreq_gov_userspace); +cpufreq_governor_exit(cpufreq_gov_userspace); diff --git a/drivers/cpufreq/davinci-cpufreq.c b/drivers/cpufreq/davinci-cpufreq.c index 297d23cad8b5..91f477a6cbc4 100644 --- a/drivers/cpufreq/davinci-cpufreq.c +++ b/drivers/cpufreq/davinci-cpufreq.c @@ -2,7 +2,7 @@ /* * CPU frequency scaling for DaVinci * - * Copyright (C) 2009 Texas Instruments Incorporated - http://www.ti.com/ + * Copyright (C) 2009 Texas Instruments Incorporated - https://www.ti.com/ * * Based on linux/arch/arm/plat-omap/cpu-omap.c. Original Copyright follows: * diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c index e117b0059123..f839dc9852c0 100644 --- a/drivers/cpufreq/freq_table.c +++ b/drivers/cpufreq/freq_table.c @@ -221,7 +221,7 @@ int cpufreq_frequency_table_get_index(struct cpufreq_policy *policy, } EXPORT_SYMBOL_GPL(cpufreq_frequency_table_get_index); -/** +/* * show_available_freqs - show available frequencies for the specified CPU */ static ssize_t show_available_freqs(struct cpufreq_policy *policy, char *buf, @@ -260,7 +260,7 @@ static ssize_t show_available_freqs(struct cpufreq_policy *policy, char *buf, struct freq_attr cpufreq_freq_attr_##_name##_freqs = \ __ATTR_RO(_name##_frequencies) -/** +/* * show_scaling_available_frequencies - show available normal frequencies for * the specified CPU */ @@ -272,7 +272,7 @@ static ssize_t scaling_available_frequencies_show(struct cpufreq_policy *policy, cpufreq_attr_available_freq(scaling_available); EXPORT_SYMBOL_GPL(cpufreq_freq_attr_scaling_available_freqs); -/** +/* * show_available_boost_freqs - show available boost frequencies for * the specified CPU */ diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c index fdb2ffffbd15..ef7b34c1fd2b 100644 --- a/drivers/cpufreq/imx6q-cpufreq.c +++ b/drivers/cpufreq/imx6q-cpufreq.c @@ -193,7 +193,7 @@ static int imx6q_cpufreq_init(struct cpufreq_policy *policy) policy->clk = clks[ARM].clk; cpufreq_generic_init(policy, freq_table, transition_latency); policy->suspend_freq = max_freq; - dev_pm_opp_of_register_em(policy->cpus); + dev_pm_opp_of_register_em(cpu_dev, policy->cpus); return 0; } diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 7e0f7880b21a..7f5d81931483 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -201,9 +201,7 @@ struct global_params { * @pstate: Stores P state limits for this CPU * @vid: Stores VID limits for this CPU * @last_sample_time: Last Sample time - * @aperf_mperf_shift: Number of clock cycles after aperf, merf is incremented - * This shift is a multiplier to mperf delta to - * calculate CPU busy. + * @aperf_mperf_shift: APERF vs MPERF counting frequency difference * @prev_aperf: Last APERF value read from APERF MSR * @prev_mperf: Last MPERF value read from MPERF MSR * @prev_tsc: Last timestamp counter (TSC) value @@ -275,6 +273,7 @@ static struct cpudata **all_cpu_data; * @get_min: Callback to get minimum P state * @get_turbo: Callback to get turbo P state * @get_scaling: Callback to get frequency scaling factor + * @get_aperf_mperf_shift: Callback to get the APERF vs MPERF frequency difference * @get_val: Callback to convert P state to actual MSR write value * @get_vid: Callback to get VID data for Atom platforms * @@ -602,11 +601,12 @@ static const unsigned int epp_values[] = { HWP_EPP_POWERSAVE }; -static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data) +static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data, int *raw_epp) { s16 epp; int index = -EINVAL; + *raw_epp = 0; epp = intel_pstate_get_epp(cpu_data, 0); if (epp < 0) return epp; @@ -614,12 +614,14 @@ static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data) if (boot_cpu_has(X86_FEATURE_HWP_EPP)) { if (epp == HWP_EPP_PERFORMANCE) return 1; - if (epp <= HWP_EPP_BALANCE_PERFORMANCE) + if (epp == HWP_EPP_BALANCE_PERFORMANCE) return 2; - if (epp <= HWP_EPP_BALANCE_POWERSAVE) + if (epp == HWP_EPP_BALANCE_POWERSAVE) return 3; - else + if (epp == HWP_EPP_POWERSAVE) return 4; + *raw_epp = epp; + return 0; } else if (boot_cpu_has(X86_FEATURE_EPB)) { /* * Range: @@ -638,7 +640,8 @@ static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data) } static int intel_pstate_set_energy_pref_index(struct cpudata *cpu_data, - int pref_index) + int pref_index, bool use_raw, + u32 raw_epp) { int epp = -EINVAL; int ret; @@ -646,29 +649,34 @@ static int intel_pstate_set_energy_pref_index(struct cpudata *cpu_data, if (!pref_index) epp = cpu_data->epp_default; - mutex_lock(&intel_pstate_limits_lock); - if (boot_cpu_has(X86_FEATURE_HWP_EPP)) { - u64 value; - - ret = rdmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST, &value); - if (ret) - goto return_pref; + /* + * Use the cached HWP Request MSR value, because the register + * itself may be updated by intel_pstate_hwp_boost_up() or + * intel_pstate_hwp_boost_down() at any time. + */ + u64 value = READ_ONCE(cpu_data->hwp_req_cached); value &= ~GENMASK_ULL(31, 24); - if (epp == -EINVAL) + if (use_raw) + epp = raw_epp; + else if (epp == -EINVAL) epp = epp_values[pref_index - 1]; value |= (u64)epp << 24; + /* + * The only other updater of hwp_req_cached in the active mode, + * intel_pstate_hwp_set(), is called under the same lock as this + * function, so it cannot run in parallel with the update below. + */ + WRITE_ONCE(cpu_data->hwp_req_cached, value); ret = wrmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST, value); } else { if (epp == -EINVAL) epp = (pref_index - 1) << 2; ret = intel_pstate_set_epb(cpu_data->cpu, epp); } -return_pref: - mutex_unlock(&intel_pstate_limits_lock); return ret; } @@ -694,31 +702,54 @@ static ssize_t store_energy_performance_preference( { struct cpudata *cpu_data = all_cpu_data[policy->cpu]; char str_preference[21]; - int ret; + bool raw = false; + ssize_t ret; + u32 epp = 0; ret = sscanf(buf, "%20s", str_preference); if (ret != 1) return -EINVAL; ret = match_string(energy_perf_strings, -1, str_preference); - if (ret < 0) - return ret; + if (ret < 0) { + if (!boot_cpu_has(X86_FEATURE_HWP_EPP)) + return ret; - intel_pstate_set_energy_pref_index(cpu_data, ret); - return count; + ret = kstrtouint(buf, 10, &epp); + if (ret) + return ret; + + if (epp > 255) + return -EINVAL; + + raw = true; + } + + mutex_lock(&intel_pstate_limits_lock); + + ret = intel_pstate_set_energy_pref_index(cpu_data, ret, raw, epp); + if (!ret) + ret = count; + + mutex_unlock(&intel_pstate_limits_lock); + + return ret; } static ssize_t show_energy_performance_preference( struct cpufreq_policy *policy, char *buf) { struct cpudata *cpu_data = all_cpu_data[policy->cpu]; - int preference; + int preference, raw_epp; - preference = intel_pstate_get_energy_pref_index(cpu_data); + preference = intel_pstate_get_energy_pref_index(cpu_data, &raw_epp); if (preference < 0) return preference; - return sprintf(buf, "%s\n", energy_perf_strings[preference]); + if (raw_epp) + return sprintf(buf, "%d\n", raw_epp); + else + return sprintf(buf, "%s\n", energy_perf_strings[preference]); } cpufreq_freq_attr_rw(energy_performance_preference); @@ -866,10 +897,39 @@ static int intel_pstate_hwp_save_state(struct cpufreq_policy *policy) return 0; } +#define POWER_CTL_EE_ENABLE 1 +#define POWER_CTL_EE_DISABLE 2 + +static int power_ctl_ee_state; + +static void set_power_ctl_ee_state(bool input) +{ + u64 power_ctl; + + mutex_lock(&intel_pstate_driver_lock); + rdmsrl(MSR_IA32_POWER_CTL, power_ctl); + if (input) { + power_ctl &= ~BIT(MSR_IA32_POWER_CTL_BIT_EE); + power_ctl_ee_state = POWER_CTL_EE_ENABLE; + } else { + power_ctl |= BIT(MSR_IA32_POWER_CTL_BIT_EE); + power_ctl_ee_state = POWER_CTL_EE_DISABLE; + } + wrmsrl(MSR_IA32_POWER_CTL, power_ctl); + mutex_unlock(&intel_pstate_driver_lock); +} + static void intel_pstate_hwp_enable(struct cpudata *cpudata); static int intel_pstate_resume(struct cpufreq_policy *policy) { + + /* Only restore if the system default is changed */ + if (power_ctl_ee_state == POWER_CTL_EE_ENABLE) + set_power_ctl_ee_state(true); + else if (power_ctl_ee_state == POWER_CTL_EE_DISABLE) + set_power_ctl_ee_state(false); + if (!hwp_active) return 0; @@ -1218,6 +1278,32 @@ static ssize_t store_hwp_dynamic_boost(struct kobject *a, return count; } +static ssize_t show_energy_efficiency(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + u64 power_ctl; + int enable; + + rdmsrl(MSR_IA32_POWER_CTL, power_ctl); + enable = !!(power_ctl & BIT(MSR_IA32_POWER_CTL_BIT_EE)); + return sprintf(buf, "%d\n", !enable); +} + +static ssize_t store_energy_efficiency(struct kobject *a, struct kobj_attribute *b, + const char *buf, size_t count) +{ + bool input; + int ret; + + ret = kstrtobool(buf, &input); + if (ret) + return ret; + + set_power_ctl_ee_state(input); + + return count; +} + show_one(max_perf_pct, max_perf_pct); show_one(min_perf_pct, min_perf_pct); @@ -1228,6 +1314,7 @@ define_one_global_rw(min_perf_pct); define_one_global_ro(turbo_pct); define_one_global_ro(num_pstates); define_one_global_rw(hwp_dynamic_boost); +define_one_global_rw(energy_efficiency); static struct attribute *intel_pstate_attributes[] = { &status.attr, @@ -1241,6 +1328,8 @@ static const struct attribute_group intel_pstate_attr_group = { .attrs = intel_pstate_attributes, }; +static const struct x86_cpu_id intel_pstate_cpu_ee_disable_ids[]; + static void __init intel_pstate_sysfs_expose_params(void) { struct kobject *intel_pstate_kobject; @@ -1273,6 +1362,11 @@ static void __init intel_pstate_sysfs_expose_params(void) &hwp_dynamic_boost.attr); WARN_ON(rc); } + + if (x86_match_cpu(intel_pstate_cpu_ee_disable_ids)) { + rc = sysfs_create_file(intel_pstate_kobject, &energy_efficiency.attr); + WARN_ON(rc); + } } /************************** sysfs end ************************/ @@ -1288,25 +1382,6 @@ static void intel_pstate_hwp_enable(struct cpudata *cpudata) cpudata->epp_default = intel_pstate_get_epp(cpudata, 0); } -#define MSR_IA32_POWER_CTL_BIT_EE 19 - -/* Disable energy efficiency optimization */ -static void intel_pstate_disable_ee(int cpu) -{ - u64 power_ctl; - int ret; - - ret = rdmsrl_on_cpu(cpu, MSR_IA32_POWER_CTL, &power_ctl); - if (ret) - return; - - if (!(power_ctl & BIT(MSR_IA32_POWER_CTL_BIT_EE))) { - pr_info("Disabling energy efficiency optimization\n"); - power_ctl |= BIT(MSR_IA32_POWER_CTL_BIT_EE); - wrmsrl_on_cpu(cpu, MSR_IA32_POWER_CTL, power_ctl); - } -} - static int atom_get_min_pstate(void) { u64 value; @@ -1982,10 +2057,6 @@ static int intel_pstate_init_cpu(unsigned int cpunum) if (hwp_active) { const struct x86_cpu_id *id; - id = x86_match_cpu(intel_pstate_cpu_ee_disable_ids); - if (id) - intel_pstate_disable_ee(cpunum); - intel_pstate_hwp_enable(cpu); id = x86_match_cpu(intel_pstate_hwp_boost_ids); @@ -2754,7 +2825,12 @@ static int __init intel_pstate_init(void) id = x86_match_cpu(hwp_support_ids); if (id) { copy_cpu_funcs(&core_funcs); - if (!no_hwp) { + /* + * Avoid enabling HWP for processors without EPP support, + * because that means incomplete HWP implementation which is a + * corner case and supporting it is generally problematic. + */ + if (!no_hwp && boot_cpu_has(X86_FEATURE_HWP_EPP)) { hwp_active++; hwp_mode_bdw = id->driver_data; intel_pstate.attr = hwp_cpufreq_attrs; @@ -2808,8 +2884,17 @@ hwp_cpu_matched: if (rc) return rc; - if (hwp_active) + if (hwp_active) { + const struct x86_cpu_id *id; + + id = x86_match_cpu(intel_pstate_cpu_ee_disable_ids); + if (id) { + set_power_ctl_ee_state(false); + pr_info("Disabling energy efficiency optimization\n"); + } + pr_info("HWP enabled\n"); + } return 0; } diff --git a/drivers/cpufreq/mediatek-cpufreq.c b/drivers/cpufreq/mediatek-cpufreq.c index 0c98dd08273d..7d1212c9b7c8 100644 --- a/drivers/cpufreq/mediatek-cpufreq.c +++ b/drivers/cpufreq/mediatek-cpufreq.c @@ -448,7 +448,7 @@ static int mtk_cpufreq_init(struct cpufreq_policy *policy) policy->driver_data = info; policy->clk = info->cpu_clk; - dev_pm_opp_of_register_em(policy->cpus); + dev_pm_opp_of_register_em(info->cpu_dev, policy->cpus); return 0; } diff --git a/drivers/cpufreq/omap-cpufreq.c b/drivers/cpufreq/omap-cpufreq.c index 8d14b42a8c6f..3694bb030df3 100644 --- a/drivers/cpufreq/omap-cpufreq.c +++ b/drivers/cpufreq/omap-cpufreq.c @@ -131,7 +131,7 @@ static int omap_cpu_init(struct cpufreq_policy *policy) /* FIXME: what's the actual transition time? */ cpufreq_generic_init(policy, freq_table, 300 * 1000); - dev_pm_opp_of_register_em(policy->cpus); + dev_pm_opp_of_register_em(mpu_dev, policy->cpus); return 0; } diff --git a/drivers/cpufreq/pasemi-cpufreq.c b/drivers/cpufreq/pasemi-cpufreq.c index c66f566a854c..815645170c4d 100644 --- a/drivers/cpufreq/pasemi-cpufreq.c +++ b/drivers/cpufreq/pasemi-cpufreq.c @@ -22,6 +22,8 @@ #include <asm/time.h> #include <asm/smp.h> +#include <platforms/pasemi/pasemi.h> + #define SDCASR_REG 0x0100 #define SDCASR_REG_STRIDE 0x1000 #define SDCPWR_CFGA0_REG 0x0100 diff --git a/drivers/cpufreq/pcc-cpufreq.c b/drivers/cpufreq/pcc-cpufreq.c index 5789fe7a94bd..9f3fc7a073d0 100644 --- a/drivers/cpufreq/pcc-cpufreq.c +++ b/drivers/cpufreq/pcc-cpufreq.c @@ -616,7 +616,7 @@ static void __exit pcc_cpufreq_exit(void) free_percpu(pcc_cpu_info); } -static const struct acpi_device_id processor_device_ids[] = { +static const struct acpi_device_id __maybe_unused processor_device_ids[] = { {ACPI_PROCESSOR_OBJECT_HID, }, {ACPI_PROCESSOR_DEVICE_HID, }, {}, diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c index 3984959eed1d..0acc9e241cd7 100644 --- a/drivers/cpufreq/powernow-k8.c +++ b/drivers/cpufreq/powernow-k8.c @@ -86,7 +86,7 @@ static u32 convert_fid_to_vco_fid(u32 fid) */ static int pending_bit_stuck(void) { - u32 lo, hi; + u32 lo, hi __always_unused; rdmsr(MSR_FIDVID_STATUS, lo, hi); return lo & MSR_S_LO_CHANGE_PENDING ? 1 : 0; @@ -282,7 +282,7 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data, { u32 rvosteps = data->rvo; u32 savefid = data->currfid; - u32 maxvid, lo, rvomult = 1; + u32 maxvid, lo __always_unused, rvomult = 1; pr_debug("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, reqvid 0x%x, rvo 0x%x\n", smp_processor_id(), diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index 8646eb197cd9..a9af15e994cc 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -64,13 +64,14 @@ * highest_lpstate_idx * @last_sampled_time: Time from boot in ms when global pstates were * last set - * @last_lpstate_idx, Last set value of local pstate and global - * last_gpstate_idx pstate in terms of cpufreq table index + * @last_lpstate_idx: Last set value of local pstate and global + * @last_gpstate_idx: pstate in terms of cpufreq table index * @timer: Is used for ramping down if cpu goes idle for * a long time with global pstate held high * @gpstate_lock: A spinlock to maintain synchronization between * routines called by the timer handler and * governer's target_index calls + * @policy: Associated CPUFreq policy */ struct global_pstate_info { int highest_lpstate_idx; @@ -85,7 +86,7 @@ struct global_pstate_info { static struct cpufreq_frequency_table powernv_freqs[POWERNV_MAX_PSTATES+1]; -DEFINE_HASHTABLE(pstate_revmap, POWERNV_MAX_PSTATES_ORDER); +static DEFINE_HASHTABLE(pstate_revmap, POWERNV_MAX_PSTATES_ORDER); /** * struct pstate_idx_revmap_data: Entry in the hashmap pstate_revmap * indexed by a function of pstate id. @@ -170,7 +171,7 @@ static inline u8 extract_pstate(u64 pmsr_val, unsigned int shift) /* Use following functions for conversions between pstate_id and index */ -/** +/* * idx_to_pstate : Returns the pstate id corresponding to the * frequency in the cpufreq frequency table * powernv_freqs indexed by @i. @@ -188,7 +189,7 @@ static inline u8 idx_to_pstate(unsigned int i) return powernv_freqs[i].driver_data; } -/** +/* * pstate_to_idx : Returns the index in the cpufreq frequencytable * powernv_freqs for the frequency whose corresponding * pstate id is @pstate. @@ -380,7 +381,7 @@ static ssize_t cpuinfo_nominal_freq_show(struct cpufreq_policy *policy, powernv_freqs[powernv_pstate_info.nominal].frequency); } -struct freq_attr cpufreq_freq_attr_cpuinfo_nominal_freq = +static struct freq_attr cpufreq_freq_attr_cpuinfo_nominal_freq = __ATTR_RO(cpuinfo_nominal_freq); #define SCALING_BOOST_FREQS_ATTR_INDEX 2 @@ -660,13 +661,13 @@ static inline void queue_gpstate_timer(struct global_pstate_info *gpstates) /** * gpstate_timer_handler * - * @data: pointer to cpufreq_policy on which timer was queued + * @t: Timer context used to fetch global pstate info struct * * This handler brings down the global pstate closer to the local pstate * according quadratic equation. Queues a new timer if it is still not equal * to local pstate */ -void gpstate_timer_handler(struct timer_list *t) +static void gpstate_timer_handler(struct timer_list *t) { struct global_pstate_info *gpstates = from_timer(gpstates, t, timer); struct cpufreq_policy *policy = gpstates->policy; @@ -899,7 +900,7 @@ static struct notifier_block powernv_cpufreq_reboot_nb = { .notifier_call = powernv_cpufreq_reboot_notifier, }; -void powernv_cpufreq_work_fn(struct work_struct *work) +static void powernv_cpufreq_work_fn(struct work_struct *work) { struct chip *chip = container_of(work, struct chip, throttle); struct cpufreq_policy *policy; diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index fc92a8842e25..0a04b6f03b9a 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -238,7 +238,7 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy) goto error; } - dev_pm_opp_of_register_em(policy->cpus); + dev_pm_opp_of_register_em(cpu_dev, policy->cpus); policy->fast_switch_possible = true; diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c index 1cf688fcb56b..fb42e3390377 100644 --- a/drivers/cpufreq/scmi-cpufreq.c +++ b/drivers/cpufreq/scmi-cpufreq.c @@ -103,17 +103,12 @@ scmi_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask) } static int __maybe_unused -scmi_get_cpu_power(unsigned long *power, unsigned long *KHz, int cpu) +scmi_get_cpu_power(unsigned long *power, unsigned long *KHz, + struct device *cpu_dev) { - struct device *cpu_dev = get_cpu_device(cpu); unsigned long Hz; int ret, domain; - if (!cpu_dev) { - pr_err("failed to get cpu%d device\n", cpu); - return -ENODEV; - } - domain = handle->perf_ops->device_domain_id(cpu_dev); if (domain < 0) return domain; @@ -201,7 +196,7 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy) policy->fast_switch_possible = handle->perf_ops->fast_switch_possible(handle, cpu_dev); - em_register_perf_domain(policy->cpus, nr_opp, &em_cb); + em_dev_register_perf_domain(cpu_dev, nr_opp, &em_cb, policy->cpus); return 0; diff --git a/drivers/cpufreq/scpi-cpufreq.c b/drivers/cpufreq/scpi-cpufreq.c index 20d1f85d5f5a..b0f5388b8854 100644 --- a/drivers/cpufreq/scpi-cpufreq.c +++ b/drivers/cpufreq/scpi-cpufreq.c @@ -167,7 +167,7 @@ static int scpi_cpufreq_init(struct cpufreq_policy *policy) policy->fast_switch_possible = false; - dev_pm_opp_of_register_em(policy->cpus); + dev_pm_opp_of_register_em(cpu_dev, policy->cpus); return 0; diff --git a/drivers/cpufreq/vexpress-spc-cpufreq.c b/drivers/cpufreq/vexpress-spc-cpufreq.c index 83c85d3d67e3..4e8b1dee7c9a 100644 --- a/drivers/cpufreq/vexpress-spc-cpufreq.c +++ b/drivers/cpufreq/vexpress-spc-cpufreq.c @@ -450,7 +450,7 @@ static int ve_spc_cpufreq_init(struct cpufreq_policy *policy) policy->freq_table = freq_table[cur_cluster]; policy->cpuinfo.transition_latency = 1000000; /* 1 ms */ - dev_pm_opp_of_register_em(policy->cpus); + dev_pm_opp_of_register_em(cpu_dev, policy->cpus); if (is_bL_switching_enabled()) per_cpu(cpu_last_req_freq, policy->cpu) = diff --git a/drivers/cpuidle/Kconfig.arm b/drivers/cpuidle/Kconfig.arm index 51a7e89085c0..0844fadc4be8 100644 --- a/drivers/cpuidle/Kconfig.arm +++ b/drivers/cpuidle/Kconfig.arm @@ -23,6 +23,16 @@ config ARM_PSCI_CPUIDLE It provides an idle driver that is capable of detecting and managing idle states through the PSCI firmware interface. +config ARM_PSCI_CPUIDLE_DOMAIN + bool "PSCI CPU idle Domain" + depends on ARM_PSCI_CPUIDLE + depends on PM_GENERIC_DOMAINS_OF + default y + help + Select this to enable the PSCI based CPUidle driver to use PM domains, + which is needed to support the hierarchical DT based layout of the + idle states. + config ARM_BIG_LITTLE_CPUIDLE bool "Support for ARM big.LITTLE processors" depends on ARCH_VEXPRESS_TC2_PM || ARCH_EXYNOS || COMPILE_TEST diff --git a/drivers/cpuidle/Makefile b/drivers/cpuidle/Makefile index f07800cbb43f..26bbc5e74123 100644 --- a/drivers/cpuidle/Makefile +++ b/drivers/cpuidle/Makefile @@ -21,9 +21,8 @@ obj-$(CONFIG_ARM_U8500_CPUIDLE) += cpuidle-ux500.o obj-$(CONFIG_ARM_AT91_CPUIDLE) += cpuidle-at91.o obj-$(CONFIG_ARM_EXYNOS_CPUIDLE) += cpuidle-exynos.o obj-$(CONFIG_ARM_CPUIDLE) += cpuidle-arm.o -obj-$(CONFIG_ARM_PSCI_CPUIDLE) += cpuidle_psci.o -cpuidle_psci-y := cpuidle-psci.o -cpuidle_psci-$(CONFIG_PM_GENERIC_DOMAINS_OF) += cpuidle-psci-domain.o +obj-$(CONFIG_ARM_PSCI_CPUIDLE) += cpuidle-psci.o +obj-$(CONFIG_ARM_PSCI_CPUIDLE_DOMAIN) += cpuidle-psci-domain.o obj-$(CONFIG_ARM_TEGRA_CPUIDLE) += cpuidle-tegra.o obj-$(CONFIG_ARM_QCOM_SPM_CPUIDLE) += cpuidle-qcom-spm.o diff --git a/drivers/cpuidle/cpuidle-psci-domain.c b/drivers/cpuidle/cpuidle-psci-domain.c index 423f03bbeb74..b6e9649ab0da 100644 --- a/drivers/cpuidle/cpuidle-psci-domain.c +++ b/drivers/cpuidle/cpuidle-psci-domain.c @@ -12,6 +12,7 @@ #include <linux/cpu.h> #include <linux/device.h> #include <linux/kernel.h> +#include <linux/platform_device.h> #include <linux/pm_domain.h> #include <linux/pm_runtime.h> #include <linux/psci.h> @@ -26,7 +27,7 @@ struct psci_pd_provider { }; static LIST_HEAD(psci_pd_providers); -static bool osi_mode_enabled __initdata; +static bool psci_pd_allow_domain_state; static int psci_pd_power_off(struct generic_pm_domain *pd) { @@ -36,6 +37,9 @@ static int psci_pd_power_off(struct generic_pm_domain *pd) if (!state->data) return 0; + if (!psci_pd_allow_domain_state) + return -EBUSY; + /* OSI mode is enabled, set the corresponding domain state. */ pd_state = state->data; psci_set_domain_state(*pd_state); @@ -43,8 +47,8 @@ static int psci_pd_power_off(struct generic_pm_domain *pd) return 0; } -static int __init psci_pd_parse_state_nodes(struct genpd_power_state *states, - int state_count) +static int psci_pd_parse_state_nodes(struct genpd_power_state *states, + int state_count) { int i, ret; u32 psci_state, *psci_state_buf; @@ -73,7 +77,7 @@ free_state: return ret; } -static int __init psci_pd_parse_states(struct device_node *np, +static int psci_pd_parse_states(struct device_node *np, struct genpd_power_state **states, int *state_count) { int ret; @@ -101,7 +105,7 @@ static void psci_pd_free_states(struct genpd_power_state *states, kfree(states); } -static int __init psci_pd_init(struct device_node *np) +static int psci_pd_init(struct device_node *np) { struct generic_pm_domain *pd; struct psci_pd_provider *pd_provider; @@ -168,7 +172,7 @@ out: return ret; } -static void __init psci_pd_remove(void) +static void psci_pd_remove(void) { struct psci_pd_provider *pd_provider, *it; struct generic_pm_domain *genpd; @@ -186,7 +190,7 @@ static void __init psci_pd_remove(void) } } -static int __init psci_pd_init_topology(struct device_node *np, bool add) +static int psci_pd_init_topology(struct device_node *np, bool add) { struct device_node *node; struct of_phandle_args child, parent; @@ -212,24 +216,33 @@ static int __init psci_pd_init_topology(struct device_node *np, bool add) return 0; } -static int __init psci_pd_add_topology(struct device_node *np) +static int psci_pd_add_topology(struct device_node *np) { return psci_pd_init_topology(np, true); } -static void __init psci_pd_remove_topology(struct device_node *np) +static void psci_pd_remove_topology(struct device_node *np) { psci_pd_init_topology(np, false); } -static const struct of_device_id psci_of_match[] __initconst = { +static void psci_cpuidle_domain_sync_state(struct device *dev) +{ + /* + * All devices have now been attached/probed to the PM domain topology, + * hence it's fine to allow domain states to be picked. + */ + psci_pd_allow_domain_state = true; +} + +static const struct of_device_id psci_of_match[] = { { .compatible = "arm,psci-1.0" }, {} }; -static int __init psci_idle_init_domains(void) +static int psci_cpuidle_domain_probe(struct platform_device *pdev) { - struct device_node *np = of_find_matching_node(NULL, psci_of_match); + struct device_node *np = pdev->dev.of_node; struct device_node *node; int ret = 0, pd_count = 0; @@ -238,7 +251,7 @@ static int __init psci_idle_init_domains(void) /* Currently limit the hierarchical topology to be used in OSI mode. */ if (!psci_has_osi_support()) - goto out; + return 0; /* * Parse child nodes for the "#power-domain-cells" property and @@ -257,7 +270,7 @@ static int __init psci_idle_init_domains(void) /* Bail out if not using the hierarchical CPU topology. */ if (!pd_count) - goto out; + return 0; /* Link genpd masters/subdomains to model the CPU topology. */ ret = psci_pd_add_topology(np); @@ -272,10 +285,8 @@ static int __init psci_idle_init_domains(void) goto remove_pd; } - osi_mode_enabled = true; - of_node_put(np); pr_info("Initialized CPU PM domain topology\n"); - return pd_count; + return 0; put_node: of_node_put(node); @@ -283,19 +294,28 @@ remove_pd: if (pd_count) psci_pd_remove(); pr_err("failed to create CPU PM domains ret=%d\n", ret); -out: - of_node_put(np); return ret; } + +static struct platform_driver psci_cpuidle_domain_driver = { + .probe = psci_cpuidle_domain_probe, + .driver = { + .name = "psci-cpuidle-domain", + .of_match_table = psci_of_match, + .sync_state = psci_cpuidle_domain_sync_state, + }, +}; + +static int __init psci_idle_init_domains(void) +{ + return platform_driver_register(&psci_cpuidle_domain_driver); +} subsys_initcall(psci_idle_init_domains); -struct device __init *psci_dt_attach_cpu(int cpu) +struct device *psci_dt_attach_cpu(int cpu) { struct device *dev; - if (!osi_mode_enabled) - return NULL; - dev = dev_pm_domain_attach_by_name(get_cpu_device(cpu), "psci"); if (IS_ERR_OR_NULL(dev)) return dev; @@ -306,3 +326,11 @@ struct device __init *psci_dt_attach_cpu(int cpu) return dev; } + +void psci_dt_detach_cpu(struct device *dev) +{ + if (IS_ERR_OR_NULL(dev)) + return; + + dev_pm_domain_detach(dev, false); +} diff --git a/drivers/cpuidle/cpuidle-psci.c b/drivers/cpuidle/cpuidle-psci.c index 3806f911b61c..74463841805f 100644 --- a/drivers/cpuidle/cpuidle-psci.c +++ b/drivers/cpuidle/cpuidle-psci.c @@ -17,9 +17,11 @@ #include <linux/module.h> #include <linux/of.h> #include <linux/of_device.h> +#include <linux/platform_device.h> #include <linux/psci.h> #include <linux/pm_runtime.h> #include <linux/slab.h> +#include <linux/string.h> #include <asm/cpuidle.h> @@ -33,7 +35,7 @@ struct psci_cpuidle_data { static DEFINE_PER_CPU_READ_MOSTLY(struct psci_cpuidle_data, psci_cpuidle_data); static DEFINE_PER_CPU(u32, domain_state); -static bool psci_cpuidle_use_cpuhp __initdata; +static bool psci_cpuidle_use_cpuhp; void psci_set_domain_state(u32 state) { @@ -104,7 +106,7 @@ static int psci_idle_cpuhp_down(unsigned int cpu) return 0; } -static void __init psci_idle_init_cpuhp(void) +static void psci_idle_init_cpuhp(void) { int err; @@ -127,30 +129,13 @@ static int psci_enter_idle_state(struct cpuidle_device *dev, return psci_enter_state(idx, state[idx]); } -static struct cpuidle_driver psci_idle_driver __initdata = { - .name = "psci_idle", - .owner = THIS_MODULE, - /* - * PSCI idle states relies on architectural WFI to - * be represented as state index 0. - */ - .states[0] = { - .enter = psci_enter_idle_state, - .exit_latency = 1, - .target_residency = 1, - .power_usage = UINT_MAX, - .name = "WFI", - .desc = "ARM WFI", - } -}; - -static const struct of_device_id psci_idle_state_match[] __initconst = { +static const struct of_device_id psci_idle_state_match[] = { { .compatible = "arm,idle-state", .data = psci_enter_idle_state }, { }, }; -int __init psci_dt_parse_state_node(struct device_node *np, u32 *state) +int psci_dt_parse_state_node(struct device_node *np, u32 *state) { int err = of_property_read_u32(np, "arm,psci-suspend-param", state); @@ -167,9 +152,9 @@ int __init psci_dt_parse_state_node(struct device_node *np, u32 *state) return 0; } -static int __init psci_dt_cpu_init_topology(struct cpuidle_driver *drv, - struct psci_cpuidle_data *data, - unsigned int state_count, int cpu) +static int psci_dt_cpu_init_topology(struct cpuidle_driver *drv, + struct psci_cpuidle_data *data, + unsigned int state_count, int cpu) { /* Currently limit the hierarchical topology to be used in OSI mode. */ if (!psci_has_osi_support()) @@ -190,9 +175,9 @@ static int __init psci_dt_cpu_init_topology(struct cpuidle_driver *drv, return 0; } -static int __init psci_dt_cpu_init_idle(struct cpuidle_driver *drv, - struct device_node *cpu_node, - unsigned int state_count, int cpu) +static int psci_dt_cpu_init_idle(struct device *dev, struct cpuidle_driver *drv, + struct device_node *cpu_node, + unsigned int state_count, int cpu) { int i, ret = 0; u32 *psci_states; @@ -200,7 +185,8 @@ static int __init psci_dt_cpu_init_idle(struct cpuidle_driver *drv, struct psci_cpuidle_data *data = per_cpu_ptr(&psci_cpuidle_data, cpu); state_count++; /* Add WFI state too */ - psci_states = kcalloc(state_count, sizeof(*psci_states), GFP_KERNEL); + psci_states = devm_kcalloc(dev, state_count, sizeof(*psci_states), + GFP_KERNEL); if (!psci_states) return -ENOMEM; @@ -213,32 +199,26 @@ static int __init psci_dt_cpu_init_idle(struct cpuidle_driver *drv, of_node_put(state_node); if (ret) - goto free_mem; + return ret; pr_debug("psci-power-state %#x index %d\n", psci_states[i], i); } - if (i != state_count) { - ret = -ENODEV; - goto free_mem; - } + if (i != state_count) + return -ENODEV; /* Initialize optional data, used for the hierarchical topology. */ ret = psci_dt_cpu_init_topology(drv, data, state_count, cpu); if (ret < 0) - goto free_mem; + return ret; /* Idle states parsed correctly, store them in the per-cpu struct. */ data->psci_states = psci_states; return 0; - -free_mem: - kfree(psci_states); - return ret; } -static __init int psci_cpu_init_idle(struct cpuidle_driver *drv, - unsigned int cpu, unsigned int state_count) +static int psci_cpu_init_idle(struct device *dev, struct cpuidle_driver *drv, + unsigned int cpu, unsigned int state_count) { struct device_node *cpu_node; int ret; @@ -254,14 +234,22 @@ static __init int psci_cpu_init_idle(struct cpuidle_driver *drv, if (!cpu_node) return -ENODEV; - ret = psci_dt_cpu_init_idle(drv, cpu_node, state_count, cpu); + ret = psci_dt_cpu_init_idle(dev, drv, cpu_node, state_count, cpu); of_node_put(cpu_node); return ret; } -static int __init psci_idle_init_cpu(int cpu) +static void psci_cpu_deinit_idle(int cpu) +{ + struct psci_cpuidle_data *data = per_cpu_ptr(&psci_cpuidle_data, cpu); + + psci_dt_detach_cpu(data->dev); + psci_cpuidle_use_cpuhp = false; +} + +static int psci_idle_init_cpu(struct device *dev, int cpu) { struct cpuidle_driver *drv; struct device_node *cpu_node; @@ -284,17 +272,26 @@ static int __init psci_idle_init_cpu(int cpu) if (ret) return ret; - drv = kmemdup(&psci_idle_driver, sizeof(*drv), GFP_KERNEL); + drv = devm_kzalloc(dev, sizeof(*drv), GFP_KERNEL); if (!drv) return -ENOMEM; + drv->name = "psci_idle"; + drv->owner = THIS_MODULE; drv->cpumask = (struct cpumask *)cpumask_of(cpu); /* - * Initialize idle states data, starting at index 1, since - * by default idle state 0 is the quiescent state reached - * by the cpu by executing the wfi instruction. - * + * PSCI idle states relies on architectural WFI to be represented as + * state index 0. + */ + drv->states[0].enter = psci_enter_idle_state; + drv->states[0].exit_latency = 1; + drv->states[0].target_residency = 1; + drv->states[0].power_usage = UINT_MAX; + strcpy(drv->states[0].name, "WFI"); + strcpy(drv->states[0].desc, "ARM WFI"); + + /* * If no DT idle states are detected (ret == 0) let the driver * initialization fail accordingly since there is no reason to * initialize the idle driver if only wfi is supported, the @@ -302,48 +299,45 @@ static int __init psci_idle_init_cpu(int cpu) * on idle entry. */ ret = dt_init_idle_driver(drv, psci_idle_state_match, 1); - if (ret <= 0) { - ret = ret ? : -ENODEV; - goto out_kfree_drv; - } + if (ret <= 0) + return ret ? : -ENODEV; /* * Initialize PSCI idle states. */ - ret = psci_cpu_init_idle(drv, cpu, ret); + ret = psci_cpu_init_idle(dev, drv, cpu, ret); if (ret) { pr_err("CPU %d failed to PSCI idle\n", cpu); - goto out_kfree_drv; + return ret; } ret = cpuidle_register(drv, NULL); if (ret) - goto out_kfree_drv; + goto deinit; cpuidle_cooling_register(drv); return 0; - -out_kfree_drv: - kfree(drv); +deinit: + psci_cpu_deinit_idle(cpu); return ret; } /* - * psci_idle_init - Initializes PSCI cpuidle driver + * psci_idle_probe - Initializes PSCI cpuidle driver * * Initializes PSCI cpuidle driver for all CPUs, if any CPU fails * to register cpuidle driver then rollback to cancel all CPUs * registration. */ -static int __init psci_idle_init(void) +static int psci_cpuidle_probe(struct platform_device *pdev) { int cpu, ret; struct cpuidle_driver *drv; struct cpuidle_device *dev; for_each_possible_cpu(cpu) { - ret = psci_idle_init_cpu(cpu); + ret = psci_idle_init_cpu(&pdev->dev, cpu); if (ret) goto out_fail; } @@ -356,9 +350,34 @@ out_fail: dev = per_cpu(cpuidle_devices, cpu); drv = cpuidle_get_cpu_driver(dev); cpuidle_unregister(drv); - kfree(drv); + psci_cpu_deinit_idle(cpu); } return ret; } + +static struct platform_driver psci_cpuidle_driver = { + .probe = psci_cpuidle_probe, + .driver = { + .name = "psci-cpuidle", + }, +}; + +static int __init psci_idle_init(void) +{ + struct platform_device *pdev; + int ret; + + ret = platform_driver_register(&psci_cpuidle_driver); + if (ret) + return ret; + + pdev = platform_device_register_simple("psci-cpuidle", -1, NULL, 0); + if (IS_ERR(pdev)) { + platform_driver_unregister(&psci_cpuidle_driver); + return PTR_ERR(pdev); + } + + return 0; +} device_initcall(psci_idle_init); diff --git a/drivers/cpuidle/cpuidle-psci.h b/drivers/cpuidle/cpuidle-psci.h index 7299a04dd467..d8e925e84c27 100644 --- a/drivers/cpuidle/cpuidle-psci.h +++ b/drivers/cpuidle/cpuidle-psci.h @@ -3,15 +3,18 @@ #ifndef __CPUIDLE_PSCI_H #define __CPUIDLE_PSCI_H +struct device; struct device_node; void psci_set_domain_state(u32 state); -int __init psci_dt_parse_state_node(struct device_node *np, u32 *state); +int psci_dt_parse_state_node(struct device_node *np, u32 *state); -#ifdef CONFIG_PM_GENERIC_DOMAINS_OF -struct device __init *psci_dt_attach_cpu(int cpu); +#ifdef CONFIG_ARM_PSCI_CPUIDLE_DOMAIN +struct device *psci_dt_attach_cpu(int cpu); +void psci_dt_detach_cpu(struct device *dev); #else -static inline struct device __init *psci_dt_attach_cpu(int cpu) { return NULL; } +static inline struct device *psci_dt_attach_cpu(int cpu) { return NULL; } +static inline void psci_dt_detach_cpu(struct device *dev) { } #endif #endif /* __CPUIDLE_PSCI_H */ diff --git a/drivers/cpuidle/cpuidle-tegra.c b/drivers/cpuidle/cpuidle-tegra.c index 150045849d78..a12fb141875a 100644 --- a/drivers/cpuidle/cpuidle-tegra.c +++ b/drivers/cpuidle/cpuidle-tegra.c @@ -253,11 +253,13 @@ static int tegra_cpuidle_enter(struct cpuidle_device *dev, return err ? -1 : index; } -static void tegra114_enter_s2idle(struct cpuidle_device *dev, - struct cpuidle_driver *drv, - int index) +static int tegra114_enter_s2idle(struct cpuidle_device *dev, + struct cpuidle_driver *drv, + int index) { tegra_cpuidle_enter(dev, drv, index); + + return 0; } /* diff --git a/drivers/devfreq/devfreq-event.c b/drivers/devfreq/devfreq-event.c index 8c31b0f2e28f..56efbeb7851e 100644 --- a/drivers/devfreq/devfreq-event.c +++ b/drivers/devfreq/devfreq-event.c @@ -293,7 +293,7 @@ static void devfreq_event_release_edev(struct device *dev) /** * devfreq_event_add_edev() - Add new devfreq-event device. * @dev : the device owning the devfreq-event device being created - * @desc : the devfreq-event device's decriptor which include essential + * @desc : the devfreq-event device's descriptor which include essential * data for devfreq-event device. * * Note that this function add new devfreq-event device to devfreq-event class @@ -385,7 +385,7 @@ static void devm_devfreq_event_release(struct device *dev, void *res) /** * devm_devfreq_event_add_edev() - Resource-managed devfreq_event_add_edev() * @dev : the device owning the devfreq-event device being created - * @desc : the devfreq-event device's decriptor which include essential + * @desc : the devfreq-event device's descriptor which include essential * data for devfreq-event device. * * Note that this function manages automatically the memory of devfreq-event diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 52b9c3e141f3..561d91b2d3bf 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -49,6 +49,11 @@ static LIST_HEAD(devfreq_governor_list); static LIST_HEAD(devfreq_list); static DEFINE_MUTEX(devfreq_list_lock); +static const char timer_name[][DEVFREQ_NAME_LEN] = { + [DEVFREQ_TIMER_DEFERRABLE] = { "deferrable" }, + [DEVFREQ_TIMER_DELAYED] = { "delayed" }, +}; + /** * find_device_devfreq() - find devfreq struct using device pointer * @dev: device pointer used to lookup device devfreq. @@ -454,7 +459,17 @@ void devfreq_monitor_start(struct devfreq *devfreq) if (devfreq->governor->interrupt_driven) return; - INIT_DEFERRABLE_WORK(&devfreq->work, devfreq_monitor); + switch (devfreq->profile->timer) { + case DEVFREQ_TIMER_DEFERRABLE: + INIT_DEFERRABLE_WORK(&devfreq->work, devfreq_monitor); + break; + case DEVFREQ_TIMER_DELAYED: + INIT_DELAYED_WORK(&devfreq->work, devfreq_monitor); + break; + default: + return; + } + if (devfreq->profile->polling_ms) queue_delayed_work(devfreq_wq, &devfreq->work, msecs_to_jiffies(devfreq->profile->polling_ms)); @@ -771,6 +786,11 @@ struct devfreq *devfreq_add_device(struct device *dev, devfreq->data = data; devfreq->nb.notifier_call = devfreq_notifier_call; + if (devfreq->profile->timer < 0 + || devfreq->profile->timer >= DEVFREQ_TIMER_NUM) { + goto err_out; + } + if (!devfreq->profile->max_state && !devfreq->profile->freq_table) { mutex_unlock(&devfreq->lock); err = set_freq_table(devfreq); @@ -1260,18 +1280,20 @@ EXPORT_SYMBOL(devfreq_remove_governor); static ssize_t name_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct devfreq *devfreq = to_devfreq(dev); - return sprintf(buf, "%s\n", dev_name(devfreq->dev.parent)); + struct devfreq *df = to_devfreq(dev); + return sprintf(buf, "%s\n", dev_name(df->dev.parent)); } static DEVICE_ATTR_RO(name); static ssize_t governor_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!to_devfreq(dev)->governor) + struct devfreq *df = to_devfreq(dev); + + if (!df->governor) return -EINVAL; - return sprintf(buf, "%s\n", to_devfreq(dev)->governor->name); + return sprintf(buf, "%s\n", df->governor->name); } static ssize_t governor_store(struct device *dev, struct device_attribute *attr, @@ -1282,6 +1304,9 @@ static ssize_t governor_store(struct device *dev, struct device_attribute *attr, char str_governor[DEVFREQ_NAME_LEN + 1]; const struct devfreq_governor *governor, *prev_governor; + if (!df->governor) + return -EINVAL; + ret = sscanf(buf, "%" __stringify(DEVFREQ_NAME_LEN) "s", str_governor); if (ret != 1) return -EINVAL; @@ -1295,20 +1320,18 @@ static ssize_t governor_store(struct device *dev, struct device_attribute *attr, if (df->governor == governor) { ret = 0; goto out; - } else if ((df->governor && df->governor->immutable) || - governor->immutable) { + } else if (df->governor->immutable || governor->immutable) { ret = -EINVAL; goto out; } - if (df->governor) { - ret = df->governor->event_handler(df, DEVFREQ_GOV_STOP, NULL); - if (ret) { - dev_warn(dev, "%s: Governor %s not stopped(%d)\n", - __func__, df->governor->name, ret); - goto out; - } + ret = df->governor->event_handler(df, DEVFREQ_GOV_STOP, NULL); + if (ret) { + dev_warn(dev, "%s: Governor %s not stopped(%d)\n", + __func__, df->governor->name, ret); + goto out; } + prev_governor = df->governor; df->governor = governor; strncpy(df->governor_name, governor->name, DEVFREQ_NAME_LEN); @@ -1343,13 +1366,16 @@ static ssize_t available_governors_show(struct device *d, struct devfreq *df = to_devfreq(d); ssize_t count = 0; + if (!df->governor) + return -EINVAL; + mutex_lock(&devfreq_list_lock); /* * The devfreq with immutable governor (e.g., passive) shows * only own governor. */ - if (df->governor && df->governor->immutable) { + if (df->governor->immutable) { count = scnprintf(&buf[count], DEVFREQ_NAME_LEN, "%s ", df->governor_name); /* @@ -1383,27 +1409,37 @@ static ssize_t cur_freq_show(struct device *dev, struct device_attribute *attr, char *buf) { unsigned long freq; - struct devfreq *devfreq = to_devfreq(dev); + struct devfreq *df = to_devfreq(dev); - if (devfreq->profile->get_cur_freq && - !devfreq->profile->get_cur_freq(devfreq->dev.parent, &freq)) + if (!df->profile) + return -EINVAL; + + if (df->profile->get_cur_freq && + !df->profile->get_cur_freq(df->dev.parent, &freq)) return sprintf(buf, "%lu\n", freq); - return sprintf(buf, "%lu\n", devfreq->previous_freq); + return sprintf(buf, "%lu\n", df->previous_freq); } static DEVICE_ATTR_RO(cur_freq); static ssize_t target_freq_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%lu\n", to_devfreq(dev)->previous_freq); + struct devfreq *df = to_devfreq(dev); + + return sprintf(buf, "%lu\n", df->previous_freq); } static DEVICE_ATTR_RO(target_freq); static ssize_t polling_interval_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", to_devfreq(dev)->profile->polling_ms); + struct devfreq *df = to_devfreq(dev); + + if (!df->profile) + return -EINVAL; + + return sprintf(buf, "%d\n", df->profile->polling_ms); } static ssize_t polling_interval_store(struct device *dev, @@ -1531,6 +1567,9 @@ static ssize_t available_frequencies_show(struct device *d, ssize_t count = 0; int i; + if (!df->profile) + return -EINVAL; + mutex_lock(&df->lock); for (i = 0; i < df->profile->max_state; i++) @@ -1551,49 +1590,53 @@ static DEVICE_ATTR_RO(available_frequencies); static ssize_t trans_stat_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct devfreq *devfreq = to_devfreq(dev); + struct devfreq *df = to_devfreq(dev); ssize_t len; int i, j; - unsigned int max_state = devfreq->profile->max_state; + unsigned int max_state; + + if (!df->profile) + return -EINVAL; + max_state = df->profile->max_state; if (max_state == 0) return sprintf(buf, "Not Supported.\n"); - mutex_lock(&devfreq->lock); - if (!devfreq->stop_polling && - devfreq_update_status(devfreq, devfreq->previous_freq)) { - mutex_unlock(&devfreq->lock); + mutex_lock(&df->lock); + if (!df->stop_polling && + devfreq_update_status(df, df->previous_freq)) { + mutex_unlock(&df->lock); return 0; } - mutex_unlock(&devfreq->lock); + mutex_unlock(&df->lock); len = sprintf(buf, " From : To\n"); len += sprintf(buf + len, " :"); for (i = 0; i < max_state; i++) len += sprintf(buf + len, "%10lu", - devfreq->profile->freq_table[i]); + df->profile->freq_table[i]); len += sprintf(buf + len, " time(ms)\n"); for (i = 0; i < max_state; i++) { - if (devfreq->profile->freq_table[i] - == devfreq->previous_freq) { + if (df->profile->freq_table[i] + == df->previous_freq) { len += sprintf(buf + len, "*"); } else { len += sprintf(buf + len, " "); } len += sprintf(buf + len, "%10lu:", - devfreq->profile->freq_table[i]); + df->profile->freq_table[i]); for (j = 0; j < max_state; j++) len += sprintf(buf + len, "%10u", - devfreq->stats.trans_table[(i * max_state) + j]); + df->stats.trans_table[(i * max_state) + j]); len += sprintf(buf + len, "%10llu\n", (u64) - jiffies64_to_msecs(devfreq->stats.time_in_state[i])); + jiffies64_to_msecs(df->stats.time_in_state[i])); } len += sprintf(buf + len, "Total transition : %u\n", - devfreq->stats.total_trans); + df->stats.total_trans); return len; } @@ -1604,6 +1647,9 @@ static ssize_t trans_stat_store(struct device *dev, struct devfreq *df = to_devfreq(dev); int err, value; + if (!df->profile) + return -EINVAL; + if (df->profile->max_state == 0) return count; @@ -1625,6 +1671,69 @@ static ssize_t trans_stat_store(struct device *dev, } static DEVICE_ATTR_RW(trans_stat); +static ssize_t timer_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct devfreq *df = to_devfreq(dev); + + if (!df->profile) + return -EINVAL; + + return sprintf(buf, "%s\n", timer_name[df->profile->timer]); +} + +static ssize_t timer_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct devfreq *df = to_devfreq(dev); + char str_timer[DEVFREQ_NAME_LEN + 1]; + int timer = -1; + int ret = 0, i; + + if (!df->governor || !df->profile) + return -EINVAL; + + ret = sscanf(buf, "%16s", str_timer); + if (ret != 1) + return -EINVAL; + + for (i = 0; i < DEVFREQ_TIMER_NUM; i++) { + if (!strncmp(timer_name[i], str_timer, DEVFREQ_NAME_LEN)) { + timer = i; + break; + } + } + + if (timer < 0) { + ret = -EINVAL; + goto out; + } + + if (df->profile->timer == timer) { + ret = 0; + goto out; + } + + mutex_lock(&df->lock); + df->profile->timer = timer; + mutex_unlock(&df->lock); + + ret = df->governor->event_handler(df, DEVFREQ_GOV_STOP, NULL); + if (ret) { + dev_warn(dev, "%s: Governor %s not stopped(%d)\n", + __func__, df->governor->name, ret); + goto out; + } + + ret = df->governor->event_handler(df, DEVFREQ_GOV_START, NULL); + if (ret) + dev_warn(dev, "%s: Governor %s not started(%d)\n", + __func__, df->governor->name, ret); +out: + return ret ? ret : count; +} +static DEVICE_ATTR_RW(timer); + static struct attribute *devfreq_attrs[] = { &dev_attr_name.attr, &dev_attr_governor.attr, @@ -1636,6 +1745,7 @@ static struct attribute *devfreq_attrs[] = { &dev_attr_min_freq.attr, &dev_attr_max_freq.attr, &dev_attr_trans_stat.attr, + &dev_attr_timer.attr, NULL, }; ATTRIBUTE_GROUPS(devfreq); @@ -1657,8 +1767,7 @@ static int devfreq_summary_show(struct seq_file *s, void *data) unsigned long cur_freq, min_freq, max_freq; unsigned int polling_ms; - seq_printf(s, "%-30s %-10s %-10s %-15s %10s %12s %12s %12s\n", - "dev_name", + seq_printf(s, "%-30s %-30s %-15s %10s %12s %12s %12s\n", "dev", "parent_dev", "governor", @@ -1666,10 +1775,9 @@ static int devfreq_summary_show(struct seq_file *s, void *data) "cur_freq_Hz", "min_freq_Hz", "max_freq_Hz"); - seq_printf(s, "%30s %10s %10s %15s %10s %12s %12s %12s\n", + seq_printf(s, "%30s %30s %15s %10s %12s %12s %12s\n", + "------------------------------", "------------------------------", - "----------", - "----------", "---------------", "----------", "------------", @@ -1692,14 +1800,13 @@ static int devfreq_summary_show(struct seq_file *s, void *data) #endif mutex_lock(&devfreq->lock); - cur_freq = devfreq->previous_freq, + cur_freq = devfreq->previous_freq; get_freq_range(devfreq, &min_freq, &max_freq); - polling_ms = devfreq->profile->polling_ms, + polling_ms = devfreq->profile->polling_ms; mutex_unlock(&devfreq->lock); seq_printf(s, - "%-30s %-10s %-10s %-15s %10d %12ld %12ld %12ld\n", - dev_name(devfreq->dev.parent), + "%-30s %-30s %-15s %10d %12ld %12ld %12ld\n", dev_name(&devfreq->dev), p_devfreq ? dev_name(&p_devfreq->dev) : "null", devfreq->governor_name, diff --git a/drivers/devfreq/rk3399_dmc.c b/drivers/devfreq/rk3399_dmc.c index 24f04f78285b..027769e39f9b 100644 --- a/drivers/devfreq/rk3399_dmc.c +++ b/drivers/devfreq/rk3399_dmc.c @@ -95,18 +95,20 @@ static int rk3399_dmcfreq_target(struct device *dev, unsigned long *freq, mutex_lock(&dmcfreq->lock); - if (target_rate >= dmcfreq->odt_dis_freq) - odt_enable = true; - - /* - * This makes a SMC call to the TF-A to set the DDR PD (power-down) - * timings and to enable or disable the ODT (on-die termination) - * resistors. - */ - arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, dmcfreq->odt_pd_arg0, - dmcfreq->odt_pd_arg1, - ROCKCHIP_SIP_CONFIG_DRAM_SET_ODT_PD, - odt_enable, 0, 0, 0, &res); + if (dmcfreq->regmap_pmu) { + if (target_rate >= dmcfreq->odt_dis_freq) + odt_enable = true; + + /* + * This makes a SMC call to the TF-A to set the DDR PD + * (power-down) timings and to enable or disable the + * ODT (on-die termination) resistors. + */ + arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, dmcfreq->odt_pd_arg0, + dmcfreq->odt_pd_arg1, + ROCKCHIP_SIP_CONFIG_DRAM_SET_ODT_PD, + odt_enable, 0, 0, 0, &res); + } /* * If frequency scaling from low to high, adjust voltage first. @@ -371,13 +373,14 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev) } node = of_parse_phandle(np, "rockchip,pmu", 0); - if (node) { - data->regmap_pmu = syscon_node_to_regmap(node); - of_node_put(node); - if (IS_ERR(data->regmap_pmu)) { - ret = PTR_ERR(data->regmap_pmu); - goto err_edev; - } + if (!node) + goto no_pmu; + + data->regmap_pmu = syscon_node_to_regmap(node); + of_node_put(node); + if (IS_ERR(data->regmap_pmu)) { + ret = PTR_ERR(data->regmap_pmu); + goto err_edev; } regmap_read(data->regmap_pmu, RK3399_PMUGRF_OS_REG2, &val); @@ -399,6 +402,7 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev) goto err_edev; }; +no_pmu: arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, 0, 0, ROCKCHIP_SIP_CONFIG_DRAM_INIT, 0, 0, 0, 0, &res); diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index f4495841bf68..fd0fa9e7900b 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -66,8 +66,6 @@ static struct cpuidle_device __percpu *intel_idle_cpuidle_devices; static unsigned long auto_demotion_disable_flags; static bool disable_promotion_to_c1e; -static bool lapic_timer_always_reliable; - struct idle_cpu { struct cpuidle_state *state_table; @@ -142,7 +140,7 @@ static __cpuidle int intel_idle(struct cpuidle_device *dev, if (state->flags & CPUIDLE_FLAG_TLB_FLUSHED) leave_mm(cpu); - if (!static_cpu_has(X86_FEATURE_ARAT) && !lapic_timer_always_reliable) { + if (!static_cpu_has(X86_FEATURE_ARAT)) { /* * Switch over to one-shot tick broadcast if the target C-state * is deeper than C1. @@ -175,13 +173,15 @@ static __cpuidle int intel_idle(struct cpuidle_device *dev, * Invoked as a suspend-to-idle callback routine with frozen user space, frozen * scheduler tick and suspended scheduler clock on the target CPU. */ -static __cpuidle void intel_idle_s2idle(struct cpuidle_device *dev, - struct cpuidle_driver *drv, int index) +static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) { unsigned long eax = flg2MWAIT(drv->states[index].flags); unsigned long ecx = 1; /* break on interrupt flag */ mwait_idle_with_hints(eax, ecx); + + return 0; } /* @@ -752,6 +752,35 @@ static struct cpuidle_state skx_cstates[] __initdata = { .enter = NULL } }; +static struct cpuidle_state icx_cstates[] __initdata = { + { + .name = "C1", + .desc = "MWAIT 0x00", + .flags = MWAIT2flg(0x00), + .exit_latency = 1, + .target_residency = 1, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C1E", + .desc = "MWAIT 0x01", + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, + .exit_latency = 4, + .target_residency = 4, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C6", + .desc = "MWAIT 0x20", + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 128, + .target_residency = 384, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .enter = NULL } +}; + static struct cpuidle_state atom_cstates[] __initdata = { { .name = "C1E", @@ -1056,6 +1085,12 @@ static const struct idle_cpu idle_cpu_skx __initconst = { .use_acpi = true, }; +static const struct idle_cpu idle_cpu_icx __initconst = { + .state_table = icx_cstates, + .disable_promotion_to_c1e = true, + .use_acpi = true, +}; + static const struct idle_cpu idle_cpu_avn __initconst = { .state_table = avn_cstates, .disable_promotion_to_c1e = true, @@ -1110,6 +1145,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &idle_cpu_skl), X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &idle_cpu_skl), X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &idle_cpu_skx), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &idle_cpu_icx), X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &idle_cpu_knl), X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &idle_cpu_knl), X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &idle_cpu_bxt), @@ -1562,7 +1598,7 @@ static int intel_idle_cpu_online(unsigned int cpu) { struct cpuidle_device *dev; - if (!lapic_timer_always_reliable) + if (!boot_cpu_has(X86_FEATURE_ARAT)) tick_broadcast_enable(); /* @@ -1655,16 +1691,13 @@ static int __init intel_idle_init(void) goto init_driver_fail; } - if (boot_cpu_has(X86_FEATURE_ARAT)) /* Always Reliable APIC Timer */ - lapic_timer_always_reliable = true; - retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online", intel_idle_cpu_online, NULL); if (retval < 0) goto hp_setup_fail; pr_debug("Local APIC timer is reliable in %s\n", - lapic_timer_always_reliable ? "all C-states" : "C1"); + boot_cpu_has(X86_FEATURE_ARAT) ? "all C-states" : "C1"); return 0; diff --git a/drivers/memory/samsung/exynos5422-dmc.c b/drivers/memory/samsung/exynos5422-dmc.c index 4312233e8e0a..b9c7956e5031 100644 --- a/drivers/memory/samsung/exynos5422-dmc.c +++ b/drivers/memory/samsung/exynos5422-dmc.c @@ -12,6 +12,7 @@ #include <linux/io.h> #include <linux/mfd/syscon.h> #include <linux/module.h> +#include <linux/moduleparam.h> #include <linux/of_device.h> #include <linux/pm_opp.h> #include <linux/platform_device.h> @@ -21,6 +22,10 @@ #include "../jedec_ddr.h" #include "../of_memory.h" +static int irqmode; +module_param(irqmode, int, 0644); +MODULE_PARM_DESC(irqmode, "Enable IRQ mode (0=off [default], 1=on)"); + #define EXYNOS5_DREXI_TIMINGAREF (0x0030) #define EXYNOS5_DREXI_TIMINGROW0 (0x0034) #define EXYNOS5_DREXI_TIMINGDATA0 (0x0038) @@ -951,6 +956,7 @@ static int exynos5_dmc_get_cur_freq(struct device *dev, unsigned long *freq) * It provides to the devfreq framework needed functions and polling period. */ static struct devfreq_dev_profile exynos5_dmc_df_profile = { + .timer = DEVFREQ_TIMER_DELAYED, .target = exynos5_dmc_target, .get_dev_status = exynos5_dmc_get_status, .get_cur_freq = exynos5_dmc_get_cur_freq, @@ -1433,7 +1439,7 @@ static int exynos5_dmc_probe(struct platform_device *pdev) /* There is two modes in which the driver works: polling or IRQ */ irq[0] = platform_get_irq_byname(pdev, "drex_0"); irq[1] = platform_get_irq_byname(pdev, "drex_1"); - if (irq[0] > 0 && irq[1] > 0) { + if (irq[0] > 0 && irq[1] > 0 && irqmode) { ret = devm_request_threaded_irq(dev, irq[0], NULL, dmc_irq_thread, IRQF_ONESHOT, dev_name(dev), dmc); @@ -1471,10 +1477,10 @@ static int exynos5_dmc_probe(struct platform_device *pdev) * Setup default thresholds for the devfreq governor. * The values are chosen based on experiments. */ - dmc->gov_data.upthreshold = 30; + dmc->gov_data.upthreshold = 10; dmc->gov_data.downdifferential = 5; - exynos5_dmc_df_profile.polling_ms = 500; + exynos5_dmc_df_profile.polling_ms = 100; } dmc->df = devm_devfreq_add_device(dev, &exynos5_dmc_df_profile, @@ -1489,7 +1495,7 @@ static int exynos5_dmc_probe(struct platform_device *pdev) if (dmc->in_irq_mode) exynos5_dmc_start_perf_events(dmc, PERF_COUNTER_START_VALUE); - dev_info(dev, "DMC initialized\n"); + dev_info(dev, "DMC initialized, in irq mode: %d\n", dmc->in_irq_mode); return 0; diff --git a/drivers/mmc/host/jz4740_mmc.c b/drivers/mmc/host/jz4740_mmc.c index cba7a6fcd178..447552ac25c4 100644 --- a/drivers/mmc/host/jz4740_mmc.c +++ b/drivers/mmc/host/jz4740_mmc.c @@ -1108,24 +1108,18 @@ static int jz4740_mmc_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM_SLEEP - -static int jz4740_mmc_suspend(struct device *dev) +static int __maybe_unused jz4740_mmc_suspend(struct device *dev) { return pinctrl_pm_select_sleep_state(dev); } -static int jz4740_mmc_resume(struct device *dev) +static int __maybe_unused jz4740_mmc_resume(struct device *dev) { return pinctrl_select_default_state(dev); } static SIMPLE_DEV_PM_OPS(jz4740_mmc_pm_ops, jz4740_mmc_suspend, jz4740_mmc_resume); -#define JZ4740_MMC_PM_OPS (&jz4740_mmc_pm_ops) -#else -#define JZ4740_MMC_PM_OPS NULL -#endif static struct platform_driver jz4740_mmc_driver = { .probe = jz4740_mmc_probe, @@ -1133,7 +1127,7 @@ static struct platform_driver jz4740_mmc_driver = { .driver = { .name = "jz4740-mmc", .of_match_table = of_match_ptr(jz4740_mmc_of_match), - .pm = JZ4740_MMC_PM_OPS, + .pm = pm_ptr(&jz4740_mmc_pm_ops), }, }; diff --git a/drivers/opp/core.c b/drivers/opp/core.c index dfbd3d10410c..0c8c74a3c868 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -118,7 +118,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_voltage); */ unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp) { - if (IS_ERR_OR_NULL(opp) || !opp->available) { + if (IS_ERR_OR_NULL(opp)) { pr_err("%s: Invalid parameters\n", __func__); return 0; } @@ -2271,6 +2271,7 @@ adjust_put_table: dev_pm_opp_put_opp_table(opp_table); return r; } +EXPORT_SYMBOL_GPL(dev_pm_opp_adjust_voltage); /** * dev_pm_opp_enable() - Enable a specific OPP diff --git a/drivers/opp/of.c b/drivers/opp/of.c index 314f306140a1..0430290670ab 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -1209,20 +1209,19 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_of_node); /* * Callback function provided to the Energy Model framework upon registration. - * This computes the power estimated by @CPU at @kHz if it is the frequency + * This computes the power estimated by @dev at @kHz if it is the frequency * of an existing OPP, or at the frequency of the first OPP above @kHz otherwise * (see dev_pm_opp_find_freq_ceil()). This function updates @kHz to the ceiled * frequency and @mW to the associated power. The power is estimated as - * P = C * V^2 * f with C being the CPU's capacitance and V and f respectively - * the voltage and frequency of the OPP. + * P = C * V^2 * f with C being the device's capacitance and V and f + * respectively the voltage and frequency of the OPP. * - * Returns -ENODEV if the CPU device cannot be found, -EINVAL if the power - * calculation failed because of missing parameters, 0 otherwise. + * Returns -EINVAL if the power calculation failed because of missing + * parameters, 0 otherwise. */ -static int __maybe_unused _get_cpu_power(unsigned long *mW, unsigned long *kHz, - int cpu) +static int __maybe_unused _get_power(unsigned long *mW, unsigned long *kHz, + struct device *dev) { - struct device *cpu_dev; struct dev_pm_opp *opp; struct device_node *np; unsigned long mV, Hz; @@ -1230,11 +1229,7 @@ static int __maybe_unused _get_cpu_power(unsigned long *mW, unsigned long *kHz, u64 tmp; int ret; - cpu_dev = get_cpu_device(cpu); - if (!cpu_dev) - return -ENODEV; - - np = of_node_get(cpu_dev->of_node); + np = of_node_get(dev->of_node); if (!np) return -EINVAL; @@ -1244,7 +1239,7 @@ static int __maybe_unused _get_cpu_power(unsigned long *mW, unsigned long *kHz, return -EINVAL; Hz = *kHz * 1000; - opp = dev_pm_opp_find_freq_ceil(cpu_dev, &Hz); + opp = dev_pm_opp_find_freq_ceil(dev, &Hz); if (IS_ERR(opp)) return -EINVAL; @@ -1264,30 +1259,38 @@ static int __maybe_unused _get_cpu_power(unsigned long *mW, unsigned long *kHz, /** * dev_pm_opp_of_register_em() - Attempt to register an Energy Model - * @cpus : CPUs for which an Energy Model has to be registered + * @dev : Device for which an Energy Model has to be registered + * @cpus : CPUs for which an Energy Model has to be registered. For + * other type of devices it should be set to NULL. * * This checks whether the "dynamic-power-coefficient" devicetree property has * been specified, and tries to register an Energy Model with it if it has. + * Having this property means the voltages are known for OPPs and the EM + * might be calculated. */ -void dev_pm_opp_of_register_em(struct cpumask *cpus) +int dev_pm_opp_of_register_em(struct device *dev, struct cpumask *cpus) { - struct em_data_callback em_cb = EM_DATA_CB(_get_cpu_power); - int ret, nr_opp, cpu = cpumask_first(cpus); - struct device *cpu_dev; + struct em_data_callback em_cb = EM_DATA_CB(_get_power); struct device_node *np; + int ret, nr_opp; u32 cap; - cpu_dev = get_cpu_device(cpu); - if (!cpu_dev) - return; + if (IS_ERR_OR_NULL(dev)) { + ret = -EINVAL; + goto failed; + } - nr_opp = dev_pm_opp_get_opp_count(cpu_dev); - if (nr_opp <= 0) - return; + nr_opp = dev_pm_opp_get_opp_count(dev); + if (nr_opp <= 0) { + ret = -EINVAL; + goto failed; + } - np = of_node_get(cpu_dev->of_node); - if (!np) - return; + np = of_node_get(dev->of_node); + if (!np) { + ret = -EINVAL; + goto failed; + } /* * Register an EM only if the 'dynamic-power-coefficient' property is @@ -1298,9 +1301,20 @@ void dev_pm_opp_of_register_em(struct cpumask *cpus) */ ret = of_property_read_u32(np, "dynamic-power-coefficient", &cap); of_node_put(np); - if (ret || !cap) - return; + if (ret || !cap) { + dev_dbg(dev, "Couldn't find proper 'dynamic-power-coefficient' in DT\n"); + ret = -EINVAL; + goto failed; + } + + ret = em_dev_register_perf_domain(dev, nr_opp, &em_cb, cpus); + if (ret) + goto failed; - em_register_perf_domain(cpus, nr_opp, &em_cb); + return 0; + +failed: + dev_dbg(dev, "Couldn't register Energy Model %d\n", ret); + return ret; } EXPORT_SYMBOL_GPL(dev_pm_opp_of_register_em); diff --git a/drivers/opp/ti-opp-supply.c b/drivers/opp/ti-opp-supply.c index e3357e91decb..bd4771f388ab 100644 --- a/drivers/opp/ti-opp-supply.c +++ b/drivers/opp/ti-opp-supply.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (C) 2016-2017 Texas Instruments Incorporated - http://www.ti.com/ + * Copyright (C) 2016-2017 Texas Instruments Incorporated - https://www.ti.com/ * Nishanth Menon <nm@ti.com> * Dave Gerlach <d-gerlach@ti.com> * diff --git a/drivers/powercap/idle_inject.c b/drivers/powercap/idle_inject.c index c90f0990968b..597733ed86e9 100644 --- a/drivers/powercap/idle_inject.c +++ b/drivers/powercap/idle_inject.c @@ -19,8 +19,8 @@ * The idle + run duration is specified via separate helpers and that allows * idle injection to be started. * - * The idle injection kthreads will call play_idle() with the idle duration - * specified as per the above. + * The idle injection kthreads will call play_idle_precise() with the idle + * duration and max allowed latency specified as per the above. * * After all of them have been woken up, a timer is set to start the next idle * injection cycle. @@ -100,7 +100,7 @@ static void idle_inject_wakeup(struct idle_inject_device *ii_dev) * * This function is called when the idle injection timer expires. It wakes up * idle injection tasks associated with the timer and they, in turn, invoke - * play_idle() to inject a specified amount of CPU idle time. + * play_idle_precise() to inject a specified amount of CPU idle time. * * Return: HRTIMER_RESTART. */ @@ -124,8 +124,8 @@ static enum hrtimer_restart idle_inject_timer_fn(struct hrtimer *timer) * idle_inject_fn - idle injection work function * @cpu: the CPU owning the task * - * This function calls play_idle() to inject a specified amount of CPU idle - * time. + * This function calls play_idle_precise() to inject a specified amount of CPU + * idle time. */ static void idle_inject_fn(unsigned int cpu) { diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index 61a63a16b5e7..6f55aaef8afc 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -39,6 +39,8 @@ #define POWER_HIGH_LOCK BIT_ULL(63) #define POWER_LOW_LOCK BIT(31) +#define POWER_LIMIT4_MASK 0x1FFF + #define TIME_WINDOW1_MASK (0x7FULL<<17) #define TIME_WINDOW2_MASK (0x7FULL<<49) @@ -82,6 +84,7 @@ enum unit_type { static const char pl1_name[] = "long_term"; static const char pl2_name[] = "short_term"; +static const char pl4_name[] = "peak_power"; #define power_zone_to_rapl_domain(_zone) \ container_of(_zone, struct rapl_domain, power_zone) @@ -93,6 +96,7 @@ struct rapl_defaults { u64 (*compute_time_window)(struct rapl_package *rp, u64 val, bool to_raw); unsigned int dram_domain_energy_unit; + unsigned int psys_domain_energy_unit; }; static struct rapl_defaults *rapl_defaults; @@ -337,6 +341,9 @@ static int set_power_limit(struct powercap_zone *power_zone, int cid, case PL2_ENABLE: rapl_write_data_raw(rd, POWER_LIMIT2, power_limit); break; + case PL4_ENABLE: + rapl_write_data_raw(rd, POWER_LIMIT4, power_limit); + break; default: ret = -EINVAL; } @@ -371,6 +378,9 @@ static int get_current_power_limit(struct powercap_zone *power_zone, int cid, case PL2_ENABLE: prim = POWER_LIMIT2; break; + case PL4_ENABLE: + prim = POWER_LIMIT4; + break; default: put_online_cpus(); return -EINVAL; @@ -440,6 +450,13 @@ static int get_time_window(struct powercap_zone *power_zone, int cid, case PL2_ENABLE: ret = rapl_read_data_raw(rd, TIME_WINDOW2, true, &val); break; + case PL4_ENABLE: + /* + * Time window parameter is not applicable for PL4 entry + * so assigining '0' as default value. + */ + val = 0; + break; default: put_online_cpus(); return -EINVAL; @@ -483,6 +500,9 @@ static int get_max_power(struct powercap_zone *power_zone, int id, u64 *data) case PL2_ENABLE: prim = MAX_POWER; break; + case PL4_ENABLE: + prim = MAX_POWER; + break; default: put_online_cpus(); return -EINVAL; @@ -492,6 +512,10 @@ static int get_max_power(struct powercap_zone *power_zone, int id, u64 *data) else *data = val; + /* As a generalization rule, PL4 would be around two times PL2. */ + if (rd->rpl[id].prim_id == PL4_ENABLE) + *data = *data * 2; + put_online_cpus(); return ret; @@ -524,21 +548,42 @@ static void rapl_init_domains(struct rapl_package *rp) rd->id = i; rd->rpl[0].prim_id = PL1_ENABLE; rd->rpl[0].name = pl1_name; - /* some domain may support two power limits */ - if (rp->priv->limits[i] == 2) { + + /* + * The PL2 power domain is applicable for limits two + * and limits three + */ + if (rp->priv->limits[i] >= 2) { rd->rpl[1].prim_id = PL2_ENABLE; rd->rpl[1].name = pl2_name; } + /* Enable PL4 domain if the total power limits are three */ + if (rp->priv->limits[i] == 3) { + rd->rpl[2].prim_id = PL4_ENABLE; + rd->rpl[2].name = pl4_name; + } + for (j = 0; j < RAPL_DOMAIN_REG_MAX; j++) rd->regs[j] = rp->priv->regs[i][j]; - if (i == RAPL_DOMAIN_DRAM) { + switch (i) { + case RAPL_DOMAIN_DRAM: rd->domain_energy_unit = rapl_defaults->dram_domain_energy_unit; if (rd->domain_energy_unit) pr_info("DRAM domain energy unit %dpj\n", rd->domain_energy_unit); + break; + case RAPL_DOMAIN_PLATFORM: + rd->domain_energy_unit = + rapl_defaults->psys_domain_energy_unit; + if (rd->domain_energy_unit) + pr_info("Platform domain energy unit %dpj\n", + rd->domain_energy_unit); + break; + default: + break; } rd++; } @@ -587,6 +632,8 @@ static struct rapl_primitive_info rpi[] = { RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0), PRIMITIVE_INFO_INIT(POWER_LIMIT2, POWER_LIMIT2_MASK, 32, RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0), + PRIMITIVE_INFO_INIT(POWER_LIMIT4, POWER_LIMIT4_MASK, 0, + RAPL_DOMAIN_REG_PL4, POWER_UNIT, 0), PRIMITIVE_INFO_INIT(FW_LOCK, POWER_LOW_LOCK, 31, RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), PRIMITIVE_INFO_INIT(PL1_ENABLE, POWER_LIMIT1_ENABLE, 15, @@ -597,6 +644,8 @@ static struct rapl_primitive_info rpi[] = { RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), PRIMITIVE_INFO_INIT(PL2_CLAMP, POWER_LIMIT2_CLAMP, 48, RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), + PRIMITIVE_INFO_INIT(PL4_ENABLE, POWER_LIMIT4_MASK, 0, + RAPL_DOMAIN_REG_PL4, ARBITRARY_UNIT, 0), PRIMITIVE_INFO_INIT(TIME_WINDOW1, TIME_WINDOW1_MASK, 17, RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0), PRIMITIVE_INFO_INIT(TIME_WINDOW2, TIME_WINDOW2_MASK, 49, @@ -919,6 +968,14 @@ static const struct rapl_defaults rapl_defaults_hsw_server = { .dram_domain_energy_unit = 15300, }; +static const struct rapl_defaults rapl_defaults_spr_server = { + .check_unit = rapl_check_unit_core, + .set_floor_freq = set_floor_freq_default, + .compute_time_window = rapl_compute_time_window_core, + .dram_domain_energy_unit = 15300, + .psys_domain_energy_unit = 1000000000, +}; + static const struct rapl_defaults rapl_defaults_byt = { .floor_freq_reg_addr = IOSF_CPU_POWER_BUDGET_CTL_BYT, .check_unit = rapl_check_unit_atom, @@ -978,6 +1035,7 @@ static const struct x86_cpu_id rapl_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &rapl_defaults_core), X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &rapl_defaults_core), X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &rapl_defaults_core), + X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &rapl_defaults_spr_server), X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &rapl_defaults_byt), X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, &rapl_defaults_cht), @@ -1252,6 +1310,7 @@ void rapl_remove_package(struct rapl_package *rp) if (find_nr_power_limit(rd) > 1) { rapl_write_data_raw(rd, PL2_ENABLE, 0); rapl_write_data_raw(rd, PL2_CLAMP, 0); + rapl_write_data_raw(rd, PL4_ENABLE, 0); } if (rd->id == RAPL_DOMAIN_PACKAGE) { rd_package = rd; @@ -1360,6 +1419,13 @@ static void power_limit_state_save(void) if (ret) rd->rpl[i].last_power_limit = 0; break; + case PL4_ENABLE: + ret = rapl_read_data_raw(rd, + POWER_LIMIT4, true, + &rd->rpl[i].last_power_limit); + if (ret) + rd->rpl[i].last_power_limit = 0; + break; } } } @@ -1390,6 +1456,11 @@ static void power_limit_state_restore(void) rapl_write_data_raw(rd, POWER_LIMIT2, rd->rpl[i].last_power_limit); break; + case PL4_ENABLE: + if (rd->rpl[i].last_power_limit) + rapl_write_data_raw(rd, POWER_LIMIT4, + rd->rpl[i].last_power_limit); + break; } } } diff --git a/drivers/powercap/intel_rapl_msr.c b/drivers/powercap/intel_rapl_msr.c index d5487965bdfe..d2a2627507a9 100644 --- a/drivers/powercap/intel_rapl_msr.c +++ b/drivers/powercap/intel_rapl_msr.c @@ -28,6 +28,7 @@ /* Local defines */ #define MSR_PLATFORM_POWER_LIMIT 0x0000065C +#define MSR_VR_CURRENT_CONFIG 0x00000601 /* private data for RAPL MSR Interface */ static struct rapl_if_priv rapl_msr_priv = { @@ -123,13 +124,27 @@ static int rapl_msr_write_raw(int cpu, struct reg_action *ra) return ra->err; } +/* List of verified CPUs. */ +static const struct x86_cpu_id pl4_support_ids[] = { + { X86_VENDOR_INTEL, 6, INTEL_FAM6_TIGERLAKE_L, X86_FEATURE_ANY }, + {} +}; + static int rapl_msr_probe(struct platform_device *pdev) { + const struct x86_cpu_id *id = x86_match_cpu(pl4_support_ids); int ret; rapl_msr_priv.read_raw = rapl_msr_read_raw; rapl_msr_priv.write_raw = rapl_msr_write_raw; + if (id) { + rapl_msr_priv.limits[RAPL_DOMAIN_PACKAGE] = 3; + rapl_msr_priv.regs[RAPL_DOMAIN_PACKAGE][RAPL_DOMAIN_REG_PL4] = + MSR_VR_CURRENT_CONFIG; + pr_info("PL4 support detected.\n"); + } + rapl_msr_priv.control_type = powercap_register_control_type(NULL, "intel-rapl", NULL); if (IS_ERR(rapl_msr_priv.control_type)) { pr_debug("failed to register powercap control_type.\n"); diff --git a/drivers/thermal/cpufreq_cooling.c b/drivers/thermal/cpufreq_cooling.c index 6c0e1b053126..6cf23a54e853 100644 --- a/drivers/thermal/cpufreq_cooling.c +++ b/drivers/thermal/cpufreq_cooling.c @@ -333,18 +333,18 @@ static inline bool em_is_sane(struct cpufreq_cooling_device *cpufreq_cdev, return false; policy = cpufreq_cdev->policy; - if (!cpumask_equal(policy->related_cpus, to_cpumask(em->cpus))) { + if (!cpumask_equal(policy->related_cpus, em_span_cpus(em))) { pr_err("The span of pd %*pbl is misaligned with cpufreq policy %*pbl\n", - cpumask_pr_args(to_cpumask(em->cpus)), + cpumask_pr_args(em_span_cpus(em)), cpumask_pr_args(policy->related_cpus)); return false; } nr_levels = cpufreq_cdev->max_level + 1; - if (em->nr_cap_states != nr_levels) { - pr_err("The number of cap states in pd %*pbl (%u) doesn't match the number of cooling levels (%u)\n", - cpumask_pr_args(to_cpumask(em->cpus)), - em->nr_cap_states, nr_levels); + if (em_pd_nr_perf_states(em) != nr_levels) { + pr_err("The number of performance states in pd %*pbl (%u) doesn't match the number of cooling levels (%u)\n", + cpumask_pr_args(em_span_cpus(em)), + em_pd_nr_perf_states(em), nr_levels); return false; } |