From 58eb7b77ad01f058e523554cb7bae7272a7d2579 Mon Sep 17 00:00:00 2001 From: Kaitao Cheng Date: Sat, 18 Apr 2020 00:24:51 +0800 Subject: smp: Use smp_call_func_t in on_each_cpu() Use smp_call_func_t instead of the open coded function pointer argument. Signed-off-by: Kaitao Cheng Signed-off-by: Thomas Gleixner Acked-by: Sebastian Andrzej Siewior Link: https://lkml.kernel.org/r/20200417162451.91969-1-pilgrimtao@gmail.com --- kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/smp.c') diff --git a/kernel/smp.c b/kernel/smp.c index 786092aabdcd..84303197caf9 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -620,7 +620,7 @@ void __init smp_init(void) * early_boot_irqs_disabled is set. Use local_irq_save/restore() instead * of local_irq_disable/enable(). */ -void on_each_cpu(void (*func) (void *info), void *info, int wait) +void on_each_cpu(smp_call_func_t func, void *info, int wait) { unsigned long flags; -- cgit v1.2.3 From 52103be07d8b08311955f8c30e535c2dda290cf4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 26 May 2020 18:10:59 +0200 Subject: smp: Optimize flush_smp_call_function_queue() The call_single_queue can contain (two) different callbacks, synchronous and asynchronous. The current interrupt handler runs them in-order, which means that remote CPUs that are waiting for their synchronous call can be delayed by running asynchronous callbacks. Rework the interrupt handler to first run the synchonous callbacks. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200526161907.836818381@infradead.org --- kernel/smp.c | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) (limited to 'kernel/smp.c') diff --git a/kernel/smp.c b/kernel/smp.c index 786092aabdcd..db2f73808db5 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -209,9 +209,9 @@ void generic_smp_call_function_single_interrupt(void) */ static void flush_smp_call_function_queue(bool warn_cpu_offline) { - struct llist_head *head; - struct llist_node *entry; call_single_data_t *csd, *csd_next; + struct llist_node *entry, *prev; + struct llist_head *head; static bool warned; lockdep_assert_irqs_disabled(); @@ -235,20 +235,39 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline) csd->func); } + /* + * First; run all SYNC callbacks, people are waiting for us. + */ + prev = NULL; llist_for_each_entry_safe(csd, csd_next, entry, llist) { smp_call_func_t func = csd->func; void *info = csd->info; /* Do we wait until *after* callback? */ if (csd->flags & CSD_FLAG_SYNCHRONOUS) { + if (prev) { + prev->next = &csd_next->llist; + } else { + entry = &csd_next->llist; + } func(info); csd_unlock(csd); } else { - csd_unlock(csd); - func(info); + prev = &csd->llist; } } + /* + * Second; run all !SYNC callbacks. + */ + llist_for_each_entry_safe(csd, csd_next, entry, llist) { + smp_call_func_t func = csd->func; + void *info = csd->info; + + csd_unlock(csd); + func(info); + } + /* * Handle irq works queued remotely by irq_work_queue_on(). * Smp functions above are typically synchronous so they -- cgit v1.2.3 From afaa653c564da38c0b34c4baba31e88c46a8764c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 26 May 2020 18:11:00 +0200 Subject: smp: Move irq_work_run() out of flush_smp_call_function_queue() This ensures flush_smp_call_function_queue() is strictly about call_single_queue. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200526161907.895109676@infradead.org --- kernel/smp.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'kernel/smp.c') diff --git a/kernel/smp.c b/kernel/smp.c index db2f73808db5..f720e38e880d 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -84,6 +84,7 @@ int smpcfd_dying_cpu(unsigned int cpu) * still pending. */ flush_smp_call_function_queue(false); + irq_work_run(); return 0; } @@ -191,6 +192,14 @@ static int generic_exec_single(int cpu, call_single_data_t *csd, void generic_smp_call_function_single_interrupt(void) { flush_smp_call_function_queue(true); + + /* + * Handle irq works queued remotely by irq_work_queue_on(). + * Smp functions above are typically synchronous so they + * better run first since some other CPUs may be busy waiting + * for them. + */ + irq_work_run(); } /** @@ -267,14 +276,6 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline) csd_unlock(csd); func(info); } - - /* - * Handle irq works queued remotely by irq_work_queue_on(). - * Smp functions above are typically synchronous so they - * better run first since some other CPUs may be busy waiting - * for them. - */ - irq_work_run(); } /* -- cgit v1.2.3 From b2a02fc43a1f40ef4eb2fb2b06357382608d4d84 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 26 May 2020 18:11:01 +0200 Subject: smp: Optimize send_call_function_single_ipi() Just like the ttwu_queue_remote() IPI, make use of _TIF_POLLING_NRFLAG to avoid sending IPIs to idle CPUs. [ mingo: Fix UP build bug. ] Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200526161907.953304789@infradead.org --- kernel/sched/core.c | 10 ++++++++++ kernel/sched/idle.c | 5 +++++ kernel/sched/sched.h | 7 ++++--- kernel/smp.c | 16 +++++++++++++++- 4 files changed, 34 insertions(+), 4 deletions(-) (limited to 'kernel/smp.c') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2cacc1e44a84..fa0d4990618e 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2296,6 +2296,16 @@ static void wake_csd_func(void *info) sched_ttwu_pending(); } +void send_call_function_single_ipi(int cpu) +{ + struct rq *rq = cpu_rq(cpu); + + if (!set_nr_if_polling(rq->idle)) + arch_send_call_function_single_ipi(cpu); + else + trace_sched_wake_idle_without_ipi(cpu); +} + /* * Queue a task on the target CPUs wake_list and wake the CPU via IPI if * necessary. The wakee CPU on receipt of the IPI will queue the task diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index b743bf38f08f..387fd75ee6f7 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -289,6 +289,11 @@ static void do_idle(void) */ smp_mb__after_atomic(); + /* + * RCU relies on this call to be done outside of an RCU read-side + * critical section. + */ + flush_smp_call_function_from_idle(); sched_ttwu_pending(); schedule_idle(); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 3c163cb5493f..75b062999c43 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1506,11 +1506,12 @@ static inline void unregister_sched_domain_sysctl(void) } #endif -#else +extern void flush_smp_call_function_from_idle(void); +#else /* !CONFIG_SMP: */ +static inline void flush_smp_call_function_from_idle(void) { } static inline void sched_ttwu_pending(void) { } - -#endif /* CONFIG_SMP */ +#endif #include "stats.h" #include "autogroup.h" diff --git a/kernel/smp.c b/kernel/smp.c index f720e38e880d..9f1181375141 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -135,6 +135,8 @@ static __always_inline void csd_unlock(call_single_data_t *csd) static DEFINE_PER_CPU_SHARED_ALIGNED(call_single_data_t, csd_data); +extern void send_call_function_single_ipi(int cpu); + /* * Insert a previously allocated call_single_data_t element * for execution on the given CPU. data must already have @@ -178,7 +180,7 @@ static int generic_exec_single(int cpu, call_single_data_t *csd, * equipped to do the right thing... */ if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu))) - arch_send_call_function_single_ipi(cpu); + send_call_function_single_ipi(cpu); return 0; } @@ -278,6 +280,18 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline) } } +void flush_smp_call_function_from_idle(void) +{ + unsigned long flags; + + if (llist_empty(this_cpu_ptr(&call_single_queue))) + return; + + local_irq_save(flags); + flush_smp_call_function_queue(true); + local_irq_restore(flags); +} + /* * smp_call_function_single - Run a function on a specific CPU * @func: The function to run. This must be fast and non-blocking. -- cgit v1.2.3 From 4b44a21dd640b692d4e9b12d3e37c24825f90baa Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 26 May 2020 18:11:02 +0200 Subject: irq_work, smp: Allow irq_work on call_single_queue Currently irq_work_queue_on() will issue an unconditional arch_send_call_function_single_ipi() and has the handler do irq_work_run(). This is unfortunate in that it makes the IPI handler look at a second cacheline and it misses the opportunity to avoid the IPI. Instead note that struct irq_work and struct __call_single_data are very similar in layout, so use a few bits in the flags word to encode a type and stick the irq_work on the call_single_queue list. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200526161908.011635912@infradead.org --- include/linux/irq_work.h | 7 ++- include/linux/smp.h | 23 ++++++++- kernel/irq_work.c | 53 +++++++++++---------- kernel/smp.c | 119 +++++++++++++++++++++++++++++------------------ 4 files changed, 130 insertions(+), 72 deletions(-) (limited to 'kernel/smp.c') diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h index 3b752e80c017..f23a359c8f46 100644 --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h @@ -13,6 +13,8 @@ * busy NULL, 2 -> {free, claimed} : callback in progress, can be claimed */ +/* flags share CSD_FLAG_ space */ + #define IRQ_WORK_PENDING BIT(0) #define IRQ_WORK_BUSY BIT(1) @@ -23,9 +25,12 @@ #define IRQ_WORK_CLAIMED (IRQ_WORK_PENDING | IRQ_WORK_BUSY) +/* + * structure shares layout with single_call_data_t. + */ struct irq_work { - atomic_t flags; struct llist_node llnode; + atomic_t flags; void (*func)(struct irq_work *); }; diff --git a/include/linux/smp.h b/include/linux/smp.h index cbc9162689d0..45ad6e30f398 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -16,17 +16,38 @@ typedef void (*smp_call_func_t)(void *info); typedef bool (*smp_cond_func_t)(int cpu, void *info); + +enum { + CSD_FLAG_LOCK = 0x01, + + /* IRQ_WORK_flags */ + + CSD_TYPE_ASYNC = 0x00, + CSD_TYPE_SYNC = 0x10, + CSD_TYPE_IRQ_WORK = 0x20, + CSD_FLAG_TYPE_MASK = 0xF0, +}; + +/* + * structure shares (partial) layout with struct irq_work + */ struct __call_single_data { struct llist_node llist; + unsigned int flags; smp_call_func_t func; void *info; - unsigned int flags; }; /* Use __aligned() to avoid to use 2 cache lines for 1 csd */ typedef struct __call_single_data call_single_data_t __aligned(sizeof(struct __call_single_data)); +/* + * Enqueue a llist_node on the call_single_queue; be very careful, read + * flush_smp_call_function_queue() in detail. + */ +extern void __smp_call_single_queue(int cpu, struct llist_node *node); + /* total number of cpus in this system (may exceed NR_CPUS) */ extern unsigned int total_cpus; diff --git a/kernel/irq_work.c b/kernel/irq_work.c index 48b5d1b6af4d..eca83965b631 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -31,7 +31,7 @@ static bool irq_work_claim(struct irq_work *work) { int oflags; - oflags = atomic_fetch_or(IRQ_WORK_CLAIMED, &work->flags); + oflags = atomic_fetch_or(IRQ_WORK_CLAIMED | CSD_TYPE_IRQ_WORK, &work->flags); /* * If the work is already pending, no need to raise the IPI. * The pairing atomic_fetch_andnot() in irq_work_run() makes sure @@ -102,8 +102,7 @@ bool irq_work_queue_on(struct irq_work *work, int cpu) if (cpu != smp_processor_id()) { /* Arch remote IPI send/receive backend aren't NMI safe */ WARN_ON_ONCE(in_nmi()); - if (llist_add(&work->llnode, &per_cpu(raised_list, cpu))) - arch_send_call_function_single_ipi(cpu); + __smp_call_single_queue(cpu, &work->llnode); } else { __irq_work_queue_local(work); } @@ -131,6 +130,31 @@ bool irq_work_needs_cpu(void) return true; } +void irq_work_single(void *arg) +{ + struct irq_work *work = arg; + int flags; + + /* + * Clear the PENDING bit, after this point the @work + * can be re-used. + * Make it immediately visible so that other CPUs trying + * to claim that work don't rely on us to handle their data + * while we are in the middle of the func. + */ + flags = atomic_fetch_andnot(IRQ_WORK_PENDING, &work->flags); + + lockdep_irq_work_enter(work); + work->func(work); + lockdep_irq_work_exit(work); + /* + * Clear the BUSY bit and return to the free state if + * no-one else claimed it meanwhile. + */ + flags &= ~IRQ_WORK_PENDING; + (void)atomic_cmpxchg(&work->flags, flags, flags & ~IRQ_WORK_BUSY); +} + static void irq_work_run_list(struct llist_head *list) { struct irq_work *work, *tmp; @@ -142,27 +166,8 @@ static void irq_work_run_list(struct llist_head *list) return; llnode = llist_del_all(list); - llist_for_each_entry_safe(work, tmp, llnode, llnode) { - int flags; - /* - * Clear the PENDING bit, after this point the @work - * can be re-used. - * Make it immediately visible so that other CPUs trying - * to claim that work don't rely on us to handle their data - * while we are in the middle of the func. - */ - flags = atomic_fetch_andnot(IRQ_WORK_PENDING, &work->flags); - - lockdep_irq_work_enter(work); - work->func(work); - lockdep_irq_work_exit(work); - /* - * Clear the BUSY bit and return to the free state if - * no-one else claimed it meanwhile. - */ - flags &= ~IRQ_WORK_PENDING; - (void)atomic_cmpxchg(&work->flags, flags, flags & ~IRQ_WORK_BUSY); - } + llist_for_each_entry_safe(work, tmp, llnode, llnode) + irq_work_single(work); } /* diff --git a/kernel/smp.c b/kernel/smp.c index 9f1181375141..856562b80794 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -23,10 +23,8 @@ #include "smpboot.h" -enum { - CSD_FLAG_LOCK = 0x01, - CSD_FLAG_SYNCHRONOUS = 0x02, -}; + +#define CSD_TYPE(_csd) ((_csd)->flags & CSD_FLAG_TYPE_MASK) struct call_function_data { call_single_data_t __percpu *csd; @@ -137,15 +135,33 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(call_single_data_t, csd_data); extern void send_call_function_single_ipi(int cpu); +void __smp_call_single_queue(int cpu, struct llist_node *node) +{ + /* + * The list addition should be visible before sending the IPI + * handler locks the list to pull the entry off it because of + * normal cache coherency rules implied by spinlocks. + * + * If IPIs can go out of order to the cache coherency protocol + * in an architecture, sufficient synchronisation should be added + * to arch code to make it appear to obey cache coherency WRT + * locking and barrier primitives. Generic code isn't really + * equipped to do the right thing... + */ + if (llist_add(node, &per_cpu(call_single_queue, cpu))) + send_call_function_single_ipi(cpu); +} + /* * Insert a previously allocated call_single_data_t element * for execution on the given CPU. data must already have * ->func, ->info, and ->flags set. */ -static int generic_exec_single(int cpu, call_single_data_t *csd, - smp_call_func_t func, void *info) +static int generic_exec_single(int cpu, call_single_data_t *csd) { if (cpu == smp_processor_id()) { + smp_call_func_t func = csd->func; + void *info = csd->info; unsigned long flags; /* @@ -159,28 +175,12 @@ static int generic_exec_single(int cpu, call_single_data_t *csd, return 0; } - if ((unsigned)cpu >= nr_cpu_ids || !cpu_online(cpu)) { csd_unlock(csd); return -ENXIO; } - csd->func = func; - csd->info = info; - - /* - * The list addition should be visible before sending the IPI - * handler locks the list to pull the entry off it because of - * normal cache coherency rules implied by spinlocks. - * - * If IPIs can go out of order to the cache coherency protocol - * in an architecture, sufficient synchronisation should be added - * to arch code to make it appear to obey cache coherency WRT - * locking and barrier primitives. Generic code isn't really - * equipped to do the right thing... - */ - if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu))) - send_call_function_single_ipi(cpu); + __smp_call_single_queue(cpu, &csd->llist); return 0; } @@ -194,16 +194,10 @@ static int generic_exec_single(int cpu, call_single_data_t *csd, void generic_smp_call_function_single_interrupt(void) { flush_smp_call_function_queue(true); - - /* - * Handle irq works queued remotely by irq_work_queue_on(). - * Smp functions above are typically synchronous so they - * better run first since some other CPUs may be busy waiting - * for them. - */ - irq_work_run(); } +extern void irq_work_single(void *); + /** * flush_smp_call_function_queue - Flush pending smp-call-function callbacks * @@ -241,9 +235,21 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline) * We don't have to use the _safe() variant here * because we are not invoking the IPI handlers yet. */ - llist_for_each_entry(csd, entry, llist) - pr_warn("IPI callback %pS sent to offline CPU\n", - csd->func); + llist_for_each_entry(csd, entry, llist) { + switch (CSD_TYPE(csd)) { + case CSD_TYPE_ASYNC: + case CSD_TYPE_SYNC: + case CSD_TYPE_IRQ_WORK: + pr_warn("IPI callback %pS sent to offline CPU\n", + csd->func); + break; + + default: + pr_warn("IPI callback, unknown type %d, sent to offline CPU\n", + CSD_TYPE(csd)); + break; + } + } } /* @@ -251,16 +257,17 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline) */ prev = NULL; llist_for_each_entry_safe(csd, csd_next, entry, llist) { - smp_call_func_t func = csd->func; - void *info = csd->info; - /* Do we wait until *after* callback? */ - if (csd->flags & CSD_FLAG_SYNCHRONOUS) { + if (CSD_TYPE(csd) == CSD_TYPE_SYNC) { + smp_call_func_t func = csd->func; + void *info = csd->info; + if (prev) { prev->next = &csd_next->llist; } else { entry = &csd_next->llist; } + func(info); csd_unlock(csd); } else { @@ -272,11 +279,17 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline) * Second; run all !SYNC callbacks. */ llist_for_each_entry_safe(csd, csd_next, entry, llist) { - smp_call_func_t func = csd->func; - void *info = csd->info; + int type = CSD_TYPE(csd); - csd_unlock(csd); - func(info); + if (type == CSD_TYPE_ASYNC) { + smp_call_func_t func = csd->func; + void *info = csd->info; + + csd_unlock(csd); + func(info); + } else if (type == CSD_TYPE_IRQ_WORK) { + irq_work_single(csd); + } } } @@ -305,7 +318,7 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info, { call_single_data_t *csd; call_single_data_t csd_stack = { - .flags = CSD_FLAG_LOCK | CSD_FLAG_SYNCHRONOUS, + .flags = CSD_FLAG_LOCK | CSD_TYPE_SYNC, }; int this_cpu; int err; @@ -339,7 +352,10 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info, csd_lock(csd); } - err = generic_exec_single(cpu, csd, func, info); + csd->func = func; + csd->info = info; + + err = generic_exec_single(cpu, csd); if (wait) csd_lock_wait(csd); @@ -385,7 +401,7 @@ int smp_call_function_single_async(int cpu, call_single_data_t *csd) csd->flags = CSD_FLAG_LOCK; smp_wmb(); - err = generic_exec_single(cpu, csd, csd->func, csd->info); + err = generic_exec_single(cpu, csd); out: preempt_enable(); @@ -500,7 +516,7 @@ static void smp_call_function_many_cond(const struct cpumask *mask, csd_lock(csd); if (wait) - csd->flags |= CSD_FLAG_SYNCHRONOUS; + csd->flags |= CSD_TYPE_SYNC; csd->func = func; csd->info = info; if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu))) @@ -632,6 +648,17 @@ void __init smp_init(void) { int num_nodes, num_cpus; + /* + * Ensure struct irq_work layout matches so that + * flush_smp_call_function_queue() can do horrible things. + */ + BUILD_BUG_ON(offsetof(struct irq_work, llnode) != + offsetof(struct __call_single_data, llist)); + BUILD_BUG_ON(offsetof(struct irq_work, func) != + offsetof(struct __call_single_data, func)); + BUILD_BUG_ON(offsetof(struct irq_work, flags) != + offsetof(struct __call_single_data, flags)); + idle_threads_init(); cpuhp_threads_init(); -- cgit v1.2.3 From a148866489fbe243c936fe43e4525d8dbfa0318f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 26 May 2020 18:11:04 +0200 Subject: sched: Replace rq::wake_list The recent commit: 90b5363acd47 ("sched: Clean up scheduler_ipi()") got smp_call_function_single_async() subtly wrong. Even though it will return -EBUSY when trying to re-use a csd, that condition is not atomic and still requires external serialization. The change in ttwu_queue_remote() got this wrong. While on first reading ttwu_queue_remote() has an atomic test-and-set that appears to serialize the use, the matching 'release' is not in the right place to actually guarantee this serialization. The actual race is vs the sched_ttwu_pending() call in the idle loop; that can run the wakeup-list without consuming the CSD. Instead of trying to chain the lists, merge them. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200526161908.129371594@infradead.org --- include/linux/sched.h | 1 + include/linux/smp.h | 1 + kernel/sched/core.c | 25 +++++++------------------ kernel/sched/idle.c | 1 - kernel/sched/sched.h | 8 -------- kernel/smp.c | 47 ++++++++++++++++++++++++++++++++++++++++------- 6 files changed, 49 insertions(+), 34 deletions(-) (limited to 'kernel/smp.c') diff --git a/include/linux/sched.h b/include/linux/sched.h index ebc68706152a..e0f5f41cf2ee 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -654,6 +654,7 @@ struct task_struct { #ifdef CONFIG_SMP struct llist_node wake_entry; + unsigned int wake_entry_type; int on_cpu; #ifdef CONFIG_THREAD_INFO_IN_TASK /* Current CPU: */ diff --git a/include/linux/smp.h b/include/linux/smp.h index 45ad6e30f398..84f90e24ed6f 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -25,6 +25,7 @@ enum { CSD_TYPE_ASYNC = 0x00, CSD_TYPE_SYNC = 0x10, CSD_TYPE_IRQ_WORK = 0x20, + CSD_TYPE_TTWU = 0x30, CSD_FLAG_TYPE_MASK = 0xF0, }; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index b71ed5ee97fd..b3c64c6b4d2e 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1538,7 +1538,7 @@ static int migration_cpu_stop(void *data) * __migrate_task() such that we will not miss enforcing cpus_ptr * during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test. */ - sched_ttwu_pending(); + flush_smp_call_function_from_idle(); raw_spin_lock(&p->pi_lock); rq_lock(rq, &rf); @@ -2272,14 +2272,13 @@ static int ttwu_remote(struct task_struct *p, int wake_flags) } #ifdef CONFIG_SMP -void sched_ttwu_pending(void) +void sched_ttwu_pending(void *arg) { + struct llist_node *llist = arg; struct rq *rq = this_rq(); - struct llist_node *llist; struct task_struct *p, *t; struct rq_flags rf; - llist = llist_del_all(&rq->wake_list); if (!llist) return; @@ -2299,11 +2298,6 @@ void sched_ttwu_pending(void) rq_unlock_irqrestore(rq, &rf); } -static void wake_csd_func(void *info) -{ - sched_ttwu_pending(); -} - void send_call_function_single_ipi(int cpu) { struct rq *rq = cpu_rq(cpu); @@ -2327,12 +2321,7 @@ static void __ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags p->sched_remote_wakeup = !!(wake_flags & WF_MIGRATED); WRITE_ONCE(rq->ttwu_pending, 1); - if (llist_add(&p->wake_entry, &rq->wake_list)) { - if (!set_nr_if_polling(rq->idle)) - smp_call_function_single_async(cpu, &rq->wake_csd); - else - trace_sched_wake_idle_without_ipi(cpu); - } + __smp_call_single_queue(cpu, &p->wake_entry); } void wake_up_if_idle(int cpu) @@ -2772,6 +2761,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) p->capture_control = NULL; #endif init_numa_balancing(clone_flags, p); +#ifdef CONFIG_SMP + p->wake_entry_type = CSD_TYPE_TTWU; +#endif } DEFINE_STATIC_KEY_FALSE(sched_numa_balancing); @@ -6564,7 +6556,6 @@ int sched_cpu_dying(unsigned int cpu) struct rq_flags rf; /* Handle pending wakeups and then migrate everything off */ - sched_ttwu_pending(); sched_tick_stop(cpu); rq_lock_irqsave(rq, &rf); @@ -6763,8 +6754,6 @@ void __init sched_init(void) rq->avg_idle = 2*sysctl_sched_migration_cost; rq->max_idle_balance_cost = sysctl_sched_migration_cost; - rq_csd_init(rq, &rq->wake_csd, wake_csd_func); - INIT_LIST_HEAD(&rq->cfs_tasks); rq_attach_root(rq, &def_root_domain); diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 387fd75ee6f7..05deb81bb3e3 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -294,7 +294,6 @@ static void do_idle(void) * critical section. */ flush_smp_call_function_from_idle(); - sched_ttwu_pending(); schedule_idle(); if (unlikely(klp_patch_pending(current))) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index c86fc94c54e5..1d4e94c1e5fe 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1023,11 +1023,6 @@ struct rq { unsigned int ttwu_local; #endif -#ifdef CONFIG_SMP - call_single_data_t wake_csd; - struct llist_head wake_list; -#endif - #ifdef CONFIG_CPU_IDLE /* Must be inspected within a rcu lock section */ struct cpuidle_state *idle_state; @@ -1371,8 +1366,6 @@ queue_balance_callback(struct rq *rq, rq->balance_callback = head; } -extern void sched_ttwu_pending(void); - #define rcu_dereference_check_sched_domain(p) \ rcu_dereference_check((p), \ lockdep_is_held(&sched_domains_mutex)) @@ -1512,7 +1505,6 @@ extern void flush_smp_call_function_from_idle(void); #else /* !CONFIG_SMP: */ static inline void flush_smp_call_function_from_idle(void) { } -static inline void sched_ttwu_pending(void) { } #endif #include "stats.h" diff --git a/kernel/smp.c b/kernel/smp.c index 856562b80794..0d61dc060b01 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -196,6 +196,7 @@ void generic_smp_call_function_single_interrupt(void) flush_smp_call_function_queue(true); } +extern void sched_ttwu_pending(void *); extern void irq_work_single(void *); /** @@ -244,6 +245,10 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline) csd->func); break; + case CSD_TYPE_TTWU: + pr_warn("IPI task-wakeup sent to offline CPU\n"); + break; + default: pr_warn("IPI callback, unknown type %d, sent to offline CPU\n", CSD_TYPE(csd)); @@ -275,22 +280,43 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline) } } + if (!entry) + return; + /* * Second; run all !SYNC callbacks. */ + prev = NULL; llist_for_each_entry_safe(csd, csd_next, entry, llist) { int type = CSD_TYPE(csd); - if (type == CSD_TYPE_ASYNC) { - smp_call_func_t func = csd->func; - void *info = csd->info; + if (type != CSD_TYPE_TTWU) { + if (prev) { + prev->next = &csd_next->llist; + } else { + entry = &csd_next->llist; + } - csd_unlock(csd); - func(info); - } else if (type == CSD_TYPE_IRQ_WORK) { - irq_work_single(csd); + if (type == CSD_TYPE_ASYNC) { + smp_call_func_t func = csd->func; + void *info = csd->info; + + csd_unlock(csd); + func(info); + } else if (type == CSD_TYPE_IRQ_WORK) { + irq_work_single(csd); + } + + } else { + prev = &csd->llist; } } + + /* + * Third; only CSD_TYPE_TTWU is left, issue those. + */ + if (entry) + sched_ttwu_pending(entry); } void flush_smp_call_function_from_idle(void) @@ -659,6 +685,13 @@ void __init smp_init(void) BUILD_BUG_ON(offsetof(struct irq_work, flags) != offsetof(struct __call_single_data, flags)); + /* + * Assert the CSD_TYPE_TTWU layout is similar enough + * for task_struct to be on the @call_single_queue. + */ + BUILD_BUG_ON(offsetof(struct task_struct, wake_entry_type) - offsetof(struct task_struct, wake_entry) != + offsetof(struct __call_single_data, flags) - offsetof(struct __call_single_data, llist)); + idle_threads_init(); cpuhp_threads_init(); -- cgit v1.2.3 From 1f8db4150536431b031585ecc2a6793f69245de2 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 28 May 2020 11:01:34 +0200 Subject: sched/headers: Split out open-coded prototypes into kernel/sched/smp.h Move the prototypes for sched_ttwu_pending() and send_call_function_single_ipi() into the newly created kernel/sched/smp.h header, to make sure they are all the same, and to architectures happy that use -Wmissing-prototypes. Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 1 + kernel/sched/smp.h | 9 +++++++++ kernel/smp.c | 5 +---- 3 files changed, 11 insertions(+), 4 deletions(-) create mode 100644 kernel/sched/smp.h (limited to 'kernel/smp.c') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index b3c64c6b4d2e..43ba2d4a8eca 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -20,6 +20,7 @@ #include "../smpboot.h" #include "pelt.h" +#include "smp.h" #define CREATE_TRACE_POINTS #include diff --git a/kernel/sched/smp.h b/kernel/sched/smp.h new file mode 100644 index 000000000000..9620e323162c --- /dev/null +++ b/kernel/sched/smp.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Scheduler internal SMP callback types and methods between the scheduler + * and other internal parts of the core kernel: + */ + +extern void sched_ttwu_pending(void *arg); + +extern void send_call_function_single_ipi(int cpu); diff --git a/kernel/smp.c b/kernel/smp.c index 0d61dc060b01..4dec04f7fdc5 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -22,7 +22,7 @@ #include #include "smpboot.h" - +#include "sched/smp.h" #define CSD_TYPE(_csd) ((_csd)->flags & CSD_FLAG_TYPE_MASK) @@ -133,8 +133,6 @@ static __always_inline void csd_unlock(call_single_data_t *csd) static DEFINE_PER_CPU_SHARED_ALIGNED(call_single_data_t, csd_data); -extern void send_call_function_single_ipi(int cpu); - void __smp_call_single_queue(int cpu, struct llist_node *node) { /* @@ -196,7 +194,6 @@ void generic_smp_call_function_single_interrupt(void) flush_smp_call_function_queue(true); } -extern void sched_ttwu_pending(void *); extern void irq_work_single(void *); /** -- cgit v1.2.3 From 25de110d148666752dc0e0da7a0b69de31cd7098 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 2 Jun 2020 12:08:39 +0200 Subject: irq_work: Define irq_work_single() on !CONFIG_IRQ_WORK too Some SMP platforms don't have CONFIG_IRQ_WORK defined, resulting in a link error at build time. Define a stub and clean up the prototype definitions. Reported-by: kbuild test robot Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/irq_work.h | 2 ++ kernel/smp.c | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/smp.c') diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h index f23a359c8f46..2735da5f839e 100644 --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h @@ -58,9 +58,11 @@ void irq_work_sync(struct irq_work *work); void irq_work_run(void); bool irq_work_needs_cpu(void); +void irq_work_single(void *arg); #else static inline bool irq_work_needs_cpu(void) { return false; } static inline void irq_work_run(void) { } +static inline void irq_work_single(void *arg) { } #endif #endif /* _LINUX_IRQ_WORK_H */ diff --git a/kernel/smp.c b/kernel/smp.c index 4dec04f7fdc5..c80486a7e3b8 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -194,8 +194,6 @@ void generic_smp_call_function_single_interrupt(void) flush_smp_call_function_queue(true); } -extern void irq_work_single(void *); - /** * flush_smp_call_function_queue - Flush pending smp-call-function callbacks * -- cgit v1.2.3