From 737eb0301f296d55c22350c6968ff1ef51bacb5f Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 20 Feb 2015 14:53:46 +0000 Subject: genirq / PM: better describe IRQF_NO_SUSPEND semantics The IRQF_NO_SUSPEND flag is intended to be used for interrupts required to be enabled during the suspend-resume cycle. This mostly consists of IPIs and timer interrupts, potentially including chained irqchip interrupts if these are necessary to handle timers or IPIs. If an interrupt does not fall into one of the aforementioned categories, requesting it with IRQF_NO_SUSPEND is likely incorrect. Using IRQF_NO_SUSPEND does not guarantee that the interrupt can wake the system from a suspended state. For an interrupt to be able to trigger a wakeup, it may be necessary to program various components of the system. In these cases it is necessary to use {enable,disabled}_irq_wake. Unfortunately, several drivers assume that IRQF_NO_SUSPEND ensures that an IRQ can wake up the system, and the documentation can be read ambiguously w.r.t. this property. This patch updates the documentation regarding IRQF_NO_SUSPEND to make this caveat explicit, hopefully making future misuse rarer. Cleanup of existing misuse will occur as part of later patch series. Signed-off-by: Mark Rutland Acked-by: Peter Zijlstra (Intel) Signed-off-by: Rafael J. Wysocki --- include/linux/interrupt.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index d9b05b5bf8c7..606771c7cac2 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -52,7 +52,9 @@ * IRQF_ONESHOT - Interrupt is not reenabled after the hardirq handler finished. * Used by threaded interrupts which need to keep the * irq line disabled until the threaded handler has been run. - * IRQF_NO_SUSPEND - Do not disable this IRQ during suspend + * IRQF_NO_SUSPEND - Do not disable this IRQ during suspend. Does not guarantee + * that this interrupt will wake the system from a suspended + * state. See Documentation/power/suspend-and-interrupts.txt * IRQF_FORCE_RESUME - Force enable it on resume even if IRQF_NO_SUSPEND is set * IRQF_NO_THREAD - Interrupt cannot be threaded * IRQF_EARLY_RESUME - Resume IRQ early during syscore instead of at device -- cgit v1.2.3 From 17f480342026e54000731acaa69bf32787ce46cb Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 27 Feb 2015 00:07:55 +0100 Subject: genirq / PM: Add flag for shared NO_SUSPEND interrupt lines It currently is required that all users of NO_SUSPEND interrupt lines pass the IRQF_NO_SUSPEND flag when requesting the IRQ or the WARN_ON_ONCE() in irq_pm_install_action() will trigger. That is done to warn about situations in which unprepared interrupt handlers may be run unnecessarily for suspended devices and may attempt to access those devices by mistake. However, it may cause drivers that have no technical reasons for using IRQF_NO_SUSPEND to set that flag just because they happen to share the interrupt line with something like a timer. Moreover, the generic handling of wakeup interrupts introduced by commit 9ce7a25849e8 (genirq: Simplify wakeup mechanism) only works for IRQs without any NO_SUSPEND users, so the drivers of wakeup devices needing to use shared NO_SUSPEND interrupt lines for signaling system wakeup generally have to detect wakeup in their interrupt handlers. Thus if they happen to share an interrupt line with a NO_SUSPEND user, they also need to request that their interrupt handlers be run after suspend_device_irqs(). In both cases the reason for using IRQF_NO_SUSPEND is not because the driver in question has a genuine need to run its interrupt handler after suspend_device_irqs(), but because it happens to share the line with some other NO_SUSPEND user. Otherwise, the driver would do without IRQF_NO_SUSPEND just fine. To make it possible to specify that condition explicitly, introduce a new IRQ action handler flag for shared IRQs, IRQF_COND_SUSPEND, that, when set, will indicate to the IRQ core that the interrupt user is generally fine with suspending the IRQ, but it also can tolerate handler invocations after suspend_device_irqs() and, in particular, it is capable of detecting system wakeup and triggering it as appropriate from its interrupt handler. That will allow us to work around a problem with a shared timer interrupt line on at91 platforms. Link: http://marc.info/?l=linux-kernel&m=142252777602084&w=2 Link: http://marc.info/?t=142252775300011&r=1&w=2 Link: https://lkml.org/lkml/2014/12/15/552 Reported-by: Boris Brezillon Signed-off-by: Rafael J. Wysocki Acked-by: Peter Zijlstra (Intel) Acked-by: Mark Rutland --- include/linux/interrupt.h | 5 +++++ include/linux/irqdesc.h | 1 + kernel/irq/manage.c | 7 ++++++- kernel/irq/pm.c | 7 ++++++- 4 files changed, 18 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 606771c7cac2..2e88580194f0 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -59,6 +59,10 @@ * IRQF_NO_THREAD - Interrupt cannot be threaded * IRQF_EARLY_RESUME - Resume IRQ early during syscore instead of at device * resume time. + * IRQF_COND_SUSPEND - If the IRQ is shared with a NO_SUSPEND user, execute this + * interrupt handler after suspending interrupts. For system + * wakeup devices users need to implement wakeup detection in + * their interrupt handlers. */ #define IRQF_DISABLED 0x00000020 #define IRQF_SHARED 0x00000080 @@ -72,6 +76,7 @@ #define IRQF_FORCE_RESUME 0x00008000 #define IRQF_NO_THREAD 0x00010000 #define IRQF_EARLY_RESUME 0x00020000 +#define IRQF_COND_SUSPEND 0x00040000 #define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD) diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index faf433af425e..dd1109fb241e 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -78,6 +78,7 @@ struct irq_desc { #ifdef CONFIG_PM_SLEEP unsigned int nr_actions; unsigned int no_suspend_depth; + unsigned int cond_suspend_depth; unsigned int force_resume_depth; #endif #ifdef CONFIG_PROC_FS diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 196a06fbc122..886d09e691d5 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1474,8 +1474,13 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler, * otherwise we'll have trouble later trying to figure out * which interrupt is which (messes up the interrupt freeing * logic etc). + * + * Also IRQF_COND_SUSPEND only makes sense for shared interrupts and + * it cannot be set along with IRQF_NO_SUSPEND. */ - if ((irqflags & IRQF_SHARED) && !dev_id) + if (((irqflags & IRQF_SHARED) && !dev_id) || + (!(irqflags & IRQF_SHARED) && (irqflags & IRQF_COND_SUSPEND)) || + ((irqflags & IRQF_NO_SUSPEND) && (irqflags & IRQF_COND_SUSPEND))) return -EINVAL; desc = irq_to_desc(irq); diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c index 3ca532592704..5204a6d1b985 100644 --- a/kernel/irq/pm.c +++ b/kernel/irq/pm.c @@ -43,9 +43,12 @@ void irq_pm_install_action(struct irq_desc *desc, struct irqaction *action) if (action->flags & IRQF_NO_SUSPEND) desc->no_suspend_depth++; + else if (action->flags & IRQF_COND_SUSPEND) + desc->cond_suspend_depth++; WARN_ON_ONCE(desc->no_suspend_depth && - desc->no_suspend_depth != desc->nr_actions); + (desc->no_suspend_depth + + desc->cond_suspend_depth) != desc->nr_actions); } /* @@ -61,6 +64,8 @@ void irq_pm_remove_action(struct irq_desc *desc, struct irqaction *action) if (action->flags & IRQF_NO_SUSPEND) desc->no_suspend_depth--; + else if (action->flags & IRQF_COND_SUSPEND) + desc->cond_suspend_depth--; } static bool suspend_device_irq(struct irq_desc *desc, int irq) -- cgit v1.2.3 From ef2b22ac540c018bd574d1846ab95b9bfcf38702 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 2 Mar 2015 22:26:55 +0100 Subject: cpuidle / sleep: Use broadcast timer for states that stop local timer Commit 381063133246 (PM / sleep: Re-implement suspend-to-idle handling) overlooked the fact that entering some sufficiently deep idle states by CPUs may cause their local timers to stop and in those cases it is necessary to switch over to a broadcast timer prior to entering the idle state. If the cpuidle driver in use does not provide the new ->enter_freeze callback for any of the idle states, that problem affects suspend-to-idle too, but it is not taken into account after the changes made by commit 381063133246. Fix that by changing the definition of cpuidle_enter_freeze() and re-arranging of the code in cpuidle_idle_call(), so the former does not call cpuidle_enter() any more and the fallback case is handled by cpuidle_idle_call() directly. Fixes: 381063133246 (PM / sleep: Re-implement suspend-to-idle handling) Reported-and-tested-by: Lorenzo Pieralisi Signed-off-by: Rafael J. Wysocki Acked-by: Peter Zijlstra (Intel) --- drivers/cpuidle/cpuidle.c | 62 +++++++++++++++++------------------------------ include/linux/cpuidle.h | 17 +++++++++++-- kernel/sched/idle.c | 30 ++++++++++++++++------- 3 files changed, 58 insertions(+), 51 deletions(-) (limited to 'include') diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 8b3e132b6a01..080bd2dbde4b 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -44,8 +44,8 @@ void disable_cpuidle(void) off = 1; } -static bool cpuidle_not_available(struct cpuidle_driver *drv, - struct cpuidle_device *dev) +bool cpuidle_not_available(struct cpuidle_driver *drv, + struct cpuidle_device *dev) { return off || !initialized || !drv || !dev || !dev->enabled; } @@ -72,14 +72,8 @@ int cpuidle_play_dead(void) return -ENODEV; } -/** - * cpuidle_find_deepest_state - Find deepest state meeting specific conditions. - * @drv: cpuidle driver for the given CPU. - * @dev: cpuidle device for the given CPU. - * @freeze: Whether or not the state should be suitable for suspend-to-idle. - */ -static int cpuidle_find_deepest_state(struct cpuidle_driver *drv, - struct cpuidle_device *dev, bool freeze) +static int find_deepest_state(struct cpuidle_driver *drv, + struct cpuidle_device *dev, bool freeze) { unsigned int latency_req = 0; int i, ret = freeze ? -1 : CPUIDLE_DRIVER_STATE_START - 1; @@ -98,6 +92,17 @@ static int cpuidle_find_deepest_state(struct cpuidle_driver *drv, return ret; } +/** + * cpuidle_find_deepest_state - Find the deepest available idle state. + * @drv: cpuidle driver for the given CPU. + * @dev: cpuidle device for the given CPU. + */ +int cpuidle_find_deepest_state(struct cpuidle_driver *drv, + struct cpuidle_device *dev) +{ + return find_deepest_state(drv, dev, false); +} + static void enter_freeze_proper(struct cpuidle_driver *drv, struct cpuidle_device *dev, int index) { @@ -119,46 +124,26 @@ static void enter_freeze_proper(struct cpuidle_driver *drv, /** * cpuidle_enter_freeze - Enter an idle state suitable for suspend-to-idle. + * @drv: cpuidle driver for the given CPU. + * @dev: cpuidle device for the given CPU. * * If there are states with the ->enter_freeze callback, find the deepest of - * them and enter it with frozen tick. Otherwise, find the deepest state - * available and enter it normally. - * - * Returns with enabled interrupts. + * them and enter it with frozen tick. */ -void cpuidle_enter_freeze(void) +int cpuidle_enter_freeze(struct cpuidle_driver *drv, struct cpuidle_device *dev) { - struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); - struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); int index; - if (cpuidle_not_available(drv, dev)) - goto fallback; - /* * Find the deepest state with ->enter_freeze present, which guarantees * that interrupts won't be enabled when it exits and allows the tick to * be frozen safely. */ - index = cpuidle_find_deepest_state(drv, dev, true); - if (index >= 0) { + index = find_deepest_state(drv, dev, true); + if (index >= 0) enter_freeze_proper(drv, dev, index); - local_irq_enable(); - return; - } - /* - * It is not safe to freeze the tick, find the deepest state available - * at all and try to enter it normally. - */ - index = cpuidle_find_deepest_state(drv, dev, false); - if (index >= 0) { - cpuidle_enter(drv, dev, index); - return; - } - - fallback: - arch_cpu_idle(); + return index; } /** @@ -217,9 +202,6 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, */ int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) { - if (cpuidle_not_available(drv, dev)) - return -ENODEV; - return cpuidle_curr_governor->select(drv, dev); } diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index f551a9299ac9..306178d7309f 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -126,6 +126,8 @@ struct cpuidle_driver { #ifdef CONFIG_CPU_IDLE extern void disable_cpuidle(void); +extern bool cpuidle_not_available(struct cpuidle_driver *drv, + struct cpuidle_device *dev); extern int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev); @@ -150,11 +152,17 @@ extern void cpuidle_resume(void); extern int cpuidle_enable_device(struct cpuidle_device *dev); extern void cpuidle_disable_device(struct cpuidle_device *dev); extern int cpuidle_play_dead(void); -extern void cpuidle_enter_freeze(void); +extern int cpuidle_find_deepest_state(struct cpuidle_driver *drv, + struct cpuidle_device *dev); +extern int cpuidle_enter_freeze(struct cpuidle_driver *drv, + struct cpuidle_device *dev); extern struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev); #else static inline void disable_cpuidle(void) { } +static inline bool cpuidle_not_available(struct cpuidle_driver *drv, + struct cpuidle_device *dev) +{return true; } static inline int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) {return -ENODEV; } @@ -183,7 +191,12 @@ static inline int cpuidle_enable_device(struct cpuidle_device *dev) {return -ENODEV; } static inline void cpuidle_disable_device(struct cpuidle_device *dev) { } static inline int cpuidle_play_dead(void) {return -ENODEV; } -static inline void cpuidle_enter_freeze(void) { } +static inline int cpuidle_find_deepest_state(struct cpuidle_driver *drv, + struct cpuidle_device *dev) +{return -ENODEV; } +static inline int cpuidle_enter_freeze(struct cpuidle_driver *drv, + struct cpuidle_device *dev) +{return -ENODEV; } static inline struct cpuidle_driver *cpuidle_get_cpu_driver( struct cpuidle_device *dev) {return NULL; } #endif diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 84b93b68482a..80014a178342 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -82,6 +82,7 @@ static void cpuidle_idle_call(void) struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); int next_state, entered_state; unsigned int broadcast; + bool reflect; /* * Check if the idle task must be rescheduled. If it is the @@ -105,6 +106,9 @@ static void cpuidle_idle_call(void) */ rcu_idle_enter(); + if (cpuidle_not_available(drv, dev)) + goto use_default; + /* * Suspend-to-idle ("freeze") is a system state in which all user space * has been frozen, all I/O devices have been suspended and the only @@ -115,15 +119,22 @@ static void cpuidle_idle_call(void) * until a proper wakeup interrupt happens. */ if (idle_should_freeze()) { - cpuidle_enter_freeze(); - goto exit_idle; - } + entered_state = cpuidle_enter_freeze(drv, dev); + if (entered_state >= 0) { + local_irq_enable(); + goto exit_idle; + } - /* - * Ask the cpuidle framework to choose a convenient idle state. - * Fall back to the default arch idle method on errors. - */ - next_state = cpuidle_select(drv, dev); + reflect = false; + next_state = cpuidle_find_deepest_state(drv, dev); + } else { + reflect = true; + /* + * Ask the cpuidle framework to choose a convenient idle state. + */ + next_state = cpuidle_select(drv, dev); + } + /* Fall back to the default arch idle method on errors. */ if (next_state < 0) goto use_default; @@ -170,7 +181,8 @@ static void cpuidle_idle_call(void) /* * Give the governor an opportunity to reflect on the outcome */ - cpuidle_reflect(dev, entered_state); + if (reflect) + cpuidle_reflect(dev, entered_state); exit_idle: __current_set_polling(); -- cgit v1.2.3