From b5dee3130bb4014511f5d0dd46855ed843e3fdc8 Mon Sep 17 00:00:00 2001 From: Harry Pan Date: Mon, 25 Feb 2019 20:36:41 +0800 Subject: PM / sleep: Refactor filesystems sync to reduce duplication Create a common helper to sync filesystems for system suspend and hibernation. Signed-off-by: Harry Pan Acked-by: Pavel Machek [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki --- include/linux/suspend.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 3f529ad9a9d2..6b3ea9ea6a9e 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -425,6 +425,7 @@ void restore_processor_state(void); /* kernel/power/main.c */ extern int register_pm_notifier(struct notifier_block *nb); extern int unregister_pm_notifier(struct notifier_block *nb); +extern void ksys_sync_helper(void); #define pm_notifier(fn, pri) { \ static struct notifier_block fn##_nb = \ @@ -462,6 +463,8 @@ static inline int unregister_pm_notifier(struct notifier_block *nb) return 0; } +static inline void ksys_sync_helper(void) {} + #define pm_notifier(fn, pri) do { (void)(fn); } while (0) static inline bool pm_wakeup_pending(void) { return false; } -- cgit v1.2.3 From 49a27e279052cf71ba931a26b00194ff46510480 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 27 Mar 2019 15:35:45 +0100 Subject: PM / Domains: Add generic data pointer to struct genpd_power_state Add a data pointer to the genpd_power_state struct, to allow a genpd backend driver to store per-state specific data. To introduce the pointer, change the way genpd deals with freeing of the corresponding allocated data. More precisely, clarify the responsibility of whom that shall free the data, by adding a ->free_states() callback to the generic_pm_domain structure. The one allocating the data will be expected to set the callback, to allow genpd to invoke it from genpd_remove(). Co-developed-by: Lina Iyer Acked-by: Daniel Lezcano Signed-off-by: Ulf Hansson [ rjw: Subject & changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 12 ++++++++++-- include/linux/pm_domain.h | 4 +++- 2 files changed, 13 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 96a6dc9d305c..ff6f992f7a1d 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -1686,6 +1686,12 @@ out: } EXPORT_SYMBOL_GPL(pm_genpd_remove_subdomain); +static void genpd_free_default_power_state(struct genpd_power_state *states, + unsigned int state_count) +{ + kfree(states); +} + static int genpd_set_default_power_state(struct generic_pm_domain *genpd) { struct genpd_power_state *state; @@ -1696,7 +1702,7 @@ static int genpd_set_default_power_state(struct generic_pm_domain *genpd) genpd->states = state; genpd->state_count = 1; - genpd->free = state; + genpd->free_states = genpd_free_default_power_state; return 0; } @@ -1812,7 +1818,9 @@ static int genpd_remove(struct generic_pm_domain *genpd) list_del(&genpd->gpd_list_node); genpd_unlock(genpd); cancel_work_sync(&genpd->power_off_work); - kfree(genpd->free); + if (genpd->free_states) + genpd->free_states(genpd->states, genpd->state_count); + pr_debug("%s: removed %s\n", __func__, genpd->name); return 0; diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 1ed5874bcee0..8e1399231753 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -69,6 +69,7 @@ struct genpd_power_state { s64 residency_ns; struct fwnode_handle *fwnode; ktime_t idle_time; + void *data; }; struct genpd_lock_ops; @@ -110,9 +111,10 @@ struct generic_pm_domain { struct device *dev); unsigned int flags; /* Bit field of configs for genpd */ struct genpd_power_state *states; + void (*free_states)(struct genpd_power_state *states, + unsigned int state_count); unsigned int state_count; /* number of states */ unsigned int state_idx; /* state that genpd will go to when off */ - void *free; /* Free the state that was allocated for default */ ktime_t on_time; ktime_t accounting_time; const struct genpd_lock_ops *lock_ops; -- cgit v1.2.3 From eb594b7325f61835555140922a4cb715264a325c Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 27 Mar 2019 15:35:46 +0100 Subject: PM / Domains: Add support for CPU devices to genpd To enable a CPU device to be attached to a PM domain managed by genpd, make a few changes to it for convenience. To be able to quickly find out what CPUs are attached to a genpd, which typically becomes useful from a genpd governor as subsequent changes are about to show, add a cpumask to struct generic_pm_domain to be updated when a CPU device gets attached to the genpd containing that cpumask. Also, propagate the cpumask changes upwards in the domain hierarchy to the master PM domains. This way, the cpumask for a genpd hierarchically reflects all CPUs attached to the topology below it. Finally, make this an opt-in feature, to avoid having to manage CPUs and the cpumask for a genpd that don't need it. To that end, add a new genpd configuration bit, GENPD_FLAG_CPU_DOMAIN. Co-developed-by: Lina Iyer Acked-by: Daniel Lezcano Signed-off-by: Ulf Hansson [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 65 ++++++++++++++++++++++++++++++++++++++++++++- include/linux/pm_domain.h | 13 +++++++++ 2 files changed, 77 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index ff6f992f7a1d..ecac03dcc9b2 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "power.h" @@ -128,6 +129,7 @@ static const struct genpd_lock_ops genpd_spin_ops = { #define genpd_is_irq_safe(genpd) (genpd->flags & GENPD_FLAG_IRQ_SAFE) #define genpd_is_always_on(genpd) (genpd->flags & GENPD_FLAG_ALWAYS_ON) #define genpd_is_active_wakeup(genpd) (genpd->flags & GENPD_FLAG_ACTIVE_WAKEUP) +#define genpd_is_cpu_domain(genpd) (genpd->flags & GENPD_FLAG_CPU_DOMAIN) static inline bool irq_safe_dev_in_no_sleep_domain(struct device *dev, const struct generic_pm_domain *genpd) @@ -1454,6 +1456,56 @@ static void genpd_free_dev_data(struct device *dev, dev_pm_put_subsys_data(dev); } +static void __genpd_update_cpumask(struct generic_pm_domain *genpd, + int cpu, bool set, unsigned int depth) +{ + struct gpd_link *link; + + if (!genpd_is_cpu_domain(genpd)) + return; + + list_for_each_entry(link, &genpd->slave_links, slave_node) { + struct generic_pm_domain *master = link->master; + + genpd_lock_nested(master, depth + 1); + __genpd_update_cpumask(master, cpu, set, depth + 1); + genpd_unlock(master); + } + + if (set) + cpumask_set_cpu(cpu, genpd->cpus); + else + cpumask_clear_cpu(cpu, genpd->cpus); +} + +static void genpd_update_cpumask(struct generic_pm_domain *genpd, + struct device *dev, bool set) +{ + int cpu; + + if (!genpd_is_cpu_domain(genpd)) + return; + + for_each_possible_cpu(cpu) { + if (get_cpu_device(cpu) == dev) { + __genpd_update_cpumask(genpd, cpu, set, 0); + return; + } + } +} + +static void genpd_set_cpumask(struct generic_pm_domain *genpd, + struct device *dev) +{ + genpd_update_cpumask(genpd, dev, true); +} + +static void genpd_clear_cpumask(struct generic_pm_domain *genpd, + struct device *dev) +{ + genpd_update_cpumask(genpd, dev, false); +} + static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, struct gpd_timing_data *td) { @@ -1475,6 +1527,7 @@ static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, genpd_lock(genpd); + genpd_set_cpumask(genpd, dev); dev_pm_domain_set(dev, &genpd->domain); genpd->device_count++; @@ -1532,6 +1585,7 @@ static int genpd_remove_device(struct generic_pm_domain *genpd, genpd->device_count--; genpd->max_off_time_changed = true; + genpd_clear_cpumask(genpd, dev); dev_pm_domain_set(dev, NULL); list_del_init(&pdd->list_node); @@ -1768,11 +1822,18 @@ int pm_genpd_init(struct generic_pm_domain *genpd, if (genpd_is_always_on(genpd) && !genpd_status_on(genpd)) return -EINVAL; + if (genpd_is_cpu_domain(genpd) && + !zalloc_cpumask_var(&genpd->cpus, GFP_KERNEL)) + return -ENOMEM; + /* Use only one "off" state if there were no states declared */ if (genpd->state_count == 0) { ret = genpd_set_default_power_state(genpd); - if (ret) + if (ret) { + if (genpd_is_cpu_domain(genpd)) + free_cpumask_var(genpd->cpus); return ret; + } } else if (!gov && genpd->state_count > 1) { pr_warn("%s: no governor for states\n", genpd->name); } @@ -1818,6 +1879,8 @@ static int genpd_remove(struct generic_pm_domain *genpd) list_del(&genpd->gpd_list_node); genpd_unlock(genpd); cancel_work_sync(&genpd->power_off_work); + if (genpd_is_cpu_domain(genpd)) + free_cpumask_var(genpd->cpus); if (genpd->free_states) genpd->free_states(genpd->states, genpd->state_count); diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 8e1399231753..a6e251fe9deb 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -16,6 +16,7 @@ #include #include #include +#include /* * Flags to control the behaviour of a genpd. @@ -42,11 +43,22 @@ * GENPD_FLAG_ACTIVE_WAKEUP: Instructs genpd to keep the PM domain powered * on, in case any of its attached devices is used * in the wakeup path to serve system wakeups. + * + * GENPD_FLAG_CPU_DOMAIN: Instructs genpd that it should expect to get + * devices attached, which may belong to CPUs or + * possibly have subdomains with CPUs attached. + * This flag enables the genpd backend driver to + * deploy idle power management support for CPUs + * and groups of CPUs. Note that, the backend + * driver must then comply with the so called, + * last-man-standing algorithm, for the CPUs in the + * PM domain. */ #define GENPD_FLAG_PM_CLK (1U << 0) #define GENPD_FLAG_IRQ_SAFE (1U << 1) #define GENPD_FLAG_ALWAYS_ON (1U << 2) #define GENPD_FLAG_ACTIVE_WAKEUP (1U << 3) +#define GENPD_FLAG_CPU_DOMAIN (1U << 4) enum gpd_status { GPD_STATE_ACTIVE = 0, /* PM domain is active */ @@ -94,6 +106,7 @@ struct generic_pm_domain { unsigned int suspended_count; /* System suspend device counter */ unsigned int prepared_count; /* Suspend counter of prepared devices */ unsigned int performance_state; /* Aggregated max performance state */ + cpumask_var_t cpus; /* A cpumask of the attached CPUs */ int (*power_off)(struct generic_pm_domain *domain); int (*power_on)(struct generic_pm_domain *domain); struct opp_table *opp_table; /* OPP table of the genpd */ -- cgit v1.2.3 From 6f9b83ac877fb5558d76b9f78590f3afd1bdf421 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 27 Mar 2019 15:35:47 +0100 Subject: cpuidle: Export the next timer expiration for CPUs To be able to predict the sleep duration for a CPU entering idle, it is essential to know the expiration time of the next timer. Both the teo and the menu cpuidle governors already use this information for CPU idle state selection. Moving forward, a similar prediction needs to be made for a group of idle CPUs rather than for a single one and the following changes implement a new genpd governor for that purpose. In order to support that feature, add a new function called tick_nohz_get_next_hrtimer() that will return the next hrtimer expiration time of a given CPU to be invoked after deciding whether or not to stop the scheduler tick on that CPU. Make the cpuidle core call tick_nohz_get_next_hrtimer() right before invoking the ->enter() callback provided by the cpuidle driver for the given state and store its return value in the per-CPU struct cpuidle_device, so as to make it available to code outside of cpuidle. Note that at the point when cpuidle calls tick_nohz_get_next_hrtimer(), the governor's ->select() callback has already returned and indicated whether or not the tick should be stopped, so in fact the value returned by tick_nohz_get_next_hrtimer() always is the next hrtimer expiration time for the given CPU, possibly including the tick (if it hasn't been stopped). Co-developed-by: Lina Iyer Co-developed-by: Daniel Lezcano Acked-by: Daniel Lezcano Signed-off-by: Ulf Hansson [ rjw: Subject & changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/cpuidle.c | 19 +++++++++++++++++-- include/linux/cpuidle.h | 1 + include/linux/tick.h | 7 ++++++- kernel/time/tick-sched.c | 12 ++++++++++++ 4 files changed, 36 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 7f108309e871..0f4b7c45df3e 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -328,9 +328,23 @@ int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, int cpuidle_enter(struct cpuidle_driver *drv, struct cpuidle_device *dev, int index) { + int ret = 0; + + /* + * Store the next hrtimer, which becomes either next tick or the next + * timer event, whatever expires first. Additionally, to make this data + * useful for consumers outside cpuidle, we rely on that the governor's + * ->select() callback have decided, whether to stop the tick or not. + */ + WRITE_ONCE(dev->next_hrtimer, tick_nohz_get_next_hrtimer()); + if (cpuidle_state_is_coupled(drv, index)) - return cpuidle_enter_state_coupled(dev, drv, index); - return cpuidle_enter_state(dev, drv, index); + ret = cpuidle_enter_state_coupled(dev, drv, index); + else + ret = cpuidle_enter_state(dev, drv, index); + + WRITE_ONCE(dev->next_hrtimer, 0); + return ret; } /** @@ -511,6 +525,7 @@ static void __cpuidle_device_init(struct cpuidle_device *dev) { memset(dev->states_usage, 0, sizeof(dev->states_usage)); dev->last_residency = 0; + dev->next_hrtimer = 0; } /** diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 3b39472324a3..bb9a0db89f1a 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -83,6 +83,7 @@ struct cpuidle_device { unsigned int use_deepest_state:1; unsigned int poll_time_limit:1; unsigned int cpu; + ktime_t next_hrtimer; int last_residency; struct cpuidle_state_usage states_usage[CPUIDLE_STATE_MAX]; diff --git a/include/linux/tick.h b/include/linux/tick.h index 55388ab45fd4..8891b5ac3e40 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -122,6 +122,7 @@ extern void tick_nohz_idle_enter(void); extern void tick_nohz_idle_exit(void); extern void tick_nohz_irq_exit(void); extern bool tick_nohz_idle_got_tick(void); +extern ktime_t tick_nohz_get_next_hrtimer(void); extern ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next); extern unsigned long tick_nohz_get_idle_calls(void); extern unsigned long tick_nohz_get_idle_calls_cpu(int cpu); @@ -145,7 +146,11 @@ static inline void tick_nohz_idle_restart_tick(void) { } static inline void tick_nohz_idle_enter(void) { } static inline void tick_nohz_idle_exit(void) { } static inline bool tick_nohz_idle_got_tick(void) { return false; } - +static inline ktime_t tick_nohz_get_next_hrtimer(void) +{ + /* Next wake up is the tick period, assume it starts now */ + return ktime_add(ktime_get(), TICK_NSEC); +} static inline ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next) { *delta_next = TICK_NSEC; diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 6fa52cd6df0b..8d18e03124ff 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -1022,6 +1022,18 @@ bool tick_nohz_idle_got_tick(void) return false; } +/** + * tick_nohz_get_next_hrtimer - return the next expiration time for the hrtimer + * or the tick, whatever that expires first. Note that, if the tick has been + * stopped, it returns the next hrtimer. + * + * Called from power state control code with interrupts disabled + */ +ktime_t tick_nohz_get_next_hrtimer(void) +{ + return __this_cpu_read(tick_cpu_device.evtdev)->next_event; +} + /** * tick_nohz_get_sleep_length - return the expected length of the current sleep * @delta_next: duration until the next event if the tick cannot be stopped -- cgit v1.2.3 From e94999688e3aa3c0a8ad5a60352cdc3ca3030434 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Thu, 11 Apr 2019 20:17:33 +0200 Subject: PM / Domains: Add genpd governor for CPUs After some preceding changes, PM domains managed by genpd may contain CPU devices, so idle state residency values should be taken into account during the state selection process. [The residency value is the minimum amount of time to be spent by a CPU (or a group of CPUs) in an idle state in order to save more energy than could be saved by picking up a shallower idle state.] For this purpose, add a new genpd governor, pm_domain_cpu_gov, to be used for selecting idle states of PM domains with CPU devices attached either directly or through subdomains. The new governor computes the minimum expected idle duration for all online CPUs attached to a PM domain and its subdomains. Next, it finds the deepest idle state whose target residency is within the expected idle duration and selects it as the target idle state of the domain. It should be noted that the minimum expected idle duration computation is based on the closest timer event information stored in the per-CPU variables cpuidle_devices for all of the CPUs in the domain. That needs to be revisited in future, as obviously there are other reasons why a CPU may be woken up from idle. Co-developed-by: Lina Iyer Acked-by: Daniel Lezcano Signed-off-by: Ulf Hansson [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain_governor.c | 67 +++++++++++++++++++++++++++++++++++- include/linux/pm_domain.h | 4 +++ 2 files changed, 70 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c index 4d07e38a8247..7912bc957244 100644 --- a/drivers/base/power/domain_governor.c +++ b/drivers/base/power/domain_governor.c @@ -10,6 +10,9 @@ #include #include #include +#include +#include +#include static int dev_update_qos_constraint(struct device *dev, void *data) { @@ -210,8 +213,10 @@ static bool default_power_down_ok(struct dev_pm_domain *pd) struct generic_pm_domain *genpd = pd_to_genpd(pd); struct gpd_link *link; - if (!genpd->max_off_time_changed) + if (!genpd->max_off_time_changed) { + genpd->state_idx = genpd->cached_power_down_state_idx; return genpd->cached_power_down_ok; + } /* * We have to invalidate the cached results for the masters, so @@ -236,6 +241,7 @@ static bool default_power_down_ok(struct dev_pm_domain *pd) genpd->state_idx--; } + genpd->cached_power_down_state_idx = genpd->state_idx; return genpd->cached_power_down_ok; } @@ -244,6 +250,65 @@ static bool always_on_power_down_ok(struct dev_pm_domain *domain) return false; } +#ifdef CONFIG_CPU_IDLE +static bool cpu_power_down_ok(struct dev_pm_domain *pd) +{ + struct generic_pm_domain *genpd = pd_to_genpd(pd); + struct cpuidle_device *dev; + ktime_t domain_wakeup, next_hrtimer; + s64 idle_duration_ns; + int cpu, i; + + /* Validate dev PM QoS constraints. */ + if (!default_power_down_ok(pd)) + return false; + + if (!(genpd->flags & GENPD_FLAG_CPU_DOMAIN)) + return true; + + /* + * Find the next wakeup for any of the online CPUs within the PM domain + * and its subdomains. Note, we only need the genpd->cpus, as it already + * contains a mask of all CPUs from subdomains. + */ + domain_wakeup = ktime_set(KTIME_SEC_MAX, 0); + for_each_cpu_and(cpu, genpd->cpus, cpu_online_mask) { + dev = per_cpu(cpuidle_devices, cpu); + if (dev) { + next_hrtimer = READ_ONCE(dev->next_hrtimer); + if (ktime_before(next_hrtimer, domain_wakeup)) + domain_wakeup = next_hrtimer; + } + } + + /* The minimum idle duration is from now - until the next wakeup. */ + idle_duration_ns = ktime_to_ns(ktime_sub(domain_wakeup, ktime_get())); + if (idle_duration_ns <= 0) + return false; + + /* + * Find the deepest idle state that has its residency value satisfied + * and by also taking into account the power off latency for the state. + * Start at the state picked by the dev PM QoS constraint validation. + */ + i = genpd->state_idx; + do { + if (idle_duration_ns >= (genpd->states[i].residency_ns + + genpd->states[i].power_off_latency_ns)) { + genpd->state_idx = i; + return true; + } + } while (--i >= 0); + + return false; +} + +struct dev_power_governor pm_domain_cpu_gov = { + .suspend_ok = default_suspend_ok, + .power_down_ok = cpu_power_down_ok, +}; +#endif + struct dev_power_governor simple_qos_governor = { .suspend_ok = default_suspend_ok, .power_down_ok = default_power_down_ok, diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index a6e251fe9deb..bc82e74560ee 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -118,6 +118,7 @@ struct generic_pm_domain { s64 max_off_time_ns; /* Maximum allowed "suspended" time. */ bool max_off_time_changed; bool cached_power_down_ok; + bool cached_power_down_state_idx; int (*attach_dev)(struct generic_pm_domain *domain, struct device *dev); void (*detach_dev)(struct generic_pm_domain *domain, @@ -202,6 +203,9 @@ int dev_pm_genpd_set_performance_state(struct device *dev, unsigned int state); extern struct dev_power_governor simple_qos_governor; extern struct dev_power_governor pm_domain_always_on_gov; +#ifdef CONFIG_CPU_IDLE +extern struct dev_power_governor pm_domain_cpu_gov; +#endif #else static inline struct generic_pm_domain_data *dev_gpd_data(struct device *dev) -- cgit v1.2.3