diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/irq_work.c | 9 | ||||
-rw-r--r-- | kernel/kallsyms.c | 43 | ||||
-rw-r--r-- | kernel/module.c | 32 | ||||
-rw-r--r-- | kernel/rcu/rcu.h | 21 | ||||
-rw-r--r-- | kernel/rcu/rcu_segcblist.c | 1 | ||||
-rw-r--r-- | kernel/rcu/rcutorture.c | 24 | ||||
-rw-r--r-- | kernel/rcu/tree.c | 159 | ||||
-rw-r--r-- | kernel/rcu/tree.h | 5 | ||||
-rw-r--r-- | kernel/rcu/tree_plugin.h | 14 | ||||
-rw-r--r-- | kernel/rcu/update.c | 25 | ||||
-rw-r--r-- | kernel/sched/core.c | 5 | ||||
-rw-r--r-- | kernel/sched/cpufreq_schedutil.c | 6 |
12 files changed, 267 insertions, 77 deletions
diff --git a/kernel/irq_work.c b/kernel/irq_work.c index 899579657a0a..ec8ac337404d 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -56,7 +56,6 @@ void __weak arch_irq_work_raise(void) */ } -#ifdef CONFIG_SMP /* * Enqueue the irq_work @work on @cpu unless it's already pending * somewhere. @@ -68,6 +67,8 @@ bool irq_work_queue_on(struct irq_work *work, int cpu) /* All work should have been flushed before going offline */ WARN_ON_ONCE(cpu_is_offline(cpu)); +#ifdef CONFIG_SMP + /* Arch remote IPI send/receive backend aren't NMI safe */ WARN_ON_ONCE(in_nmi()); @@ -78,10 +79,12 @@ bool irq_work_queue_on(struct irq_work *work, int cpu) if (llist_add(&work->llnode, &per_cpu(raised_list, cpu))) arch_send_call_function_single_ipi(cpu); +#else /* #ifdef CONFIG_SMP */ + irq_work_queue(work); +#endif /* #else #ifdef CONFIG_SMP */ + return true; } -EXPORT_SYMBOL_GPL(irq_work_queue_on); -#endif /* Enqueue the irq work @work on the current CPU */ bool irq_work_queue(struct irq_work *work) diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 127e7cfafa55..1e6ae66c6244 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -480,6 +480,7 @@ struct kallsym_iter { char name[KSYM_NAME_LEN]; char module_name[MODULE_NAME_LEN]; int exported; + int show_value; }; static int get_ksymbol_mod(struct kallsym_iter *iter) @@ -582,12 +583,15 @@ static void s_stop(struct seq_file *m, void *p) static int s_show(struct seq_file *m, void *p) { + unsigned long value; struct kallsym_iter *iter = m->private; /* Some debugging symbols have no name. Ignore them. */ if (!iter->name[0]) return 0; + value = iter->show_value ? iter->value : 0; + if (iter->module_name[0]) { char type; @@ -597,10 +601,10 @@ static int s_show(struct seq_file *m, void *p) */ type = iter->exported ? toupper(iter->type) : tolower(iter->type); - seq_printf(m, "%pK %c %s\t[%s]\n", (void *)iter->value, + seq_printf(m, KALLSYM_FMT " %c %s\t[%s]\n", value, type, iter->name, iter->module_name); } else - seq_printf(m, "%pK %c %s\n", (void *)iter->value, + seq_printf(m, KALLSYM_FMT " %c %s\n", value, iter->type, iter->name); return 0; } @@ -612,6 +616,40 @@ static const struct seq_operations kallsyms_op = { .show = s_show }; +static inline int kallsyms_for_perf(void) +{ +#ifdef CONFIG_PERF_EVENTS + extern int sysctl_perf_event_paranoid; + if (sysctl_perf_event_paranoid <= 1) + return 1; +#endif + return 0; +} + +/* + * We show kallsyms information even to normal users if we've enabled + * kernel profiling and are explicitly not paranoid (so kptr_restrict + * is clear, and sysctl_perf_event_paranoid isn't set). + * + * Otherwise, require CAP_SYSLOG (assuming kptr_restrict isn't set to + * block even that). + */ +int kallsyms_show_value(void) +{ + switch (kptr_restrict) { + case 0: + if (kallsyms_for_perf()) + return 1; + /* fallthrough */ + case 1: + if (has_capability_noaudit(current, CAP_SYSLOG)) + return 1; + /* fallthrough */ + default: + return 0; + } +} + static int kallsyms_open(struct inode *inode, struct file *file) { /* @@ -625,6 +663,7 @@ static int kallsyms_open(struct inode *inode, struct file *file) return -ENOMEM; reset_iter(iter, 0); + iter->show_value = kallsyms_show_value(); return 0; } diff --git a/kernel/module.c b/kernel/module.c index de66ec825992..32c2cdaccd93 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -278,6 +278,16 @@ static bool sig_enforce = IS_ENABLED(CONFIG_MODULE_SIG_FORCE); module_param(sig_enforce, bool_enable_only, 0644); #endif /* !CONFIG_MODULE_SIG_FORCE */ +/* + * Export sig_enforce kernel cmdline parameter to allow other subsystems rely + * on that instead of directly to CONFIG_MODULE_SIG_FORCE config. + */ +bool is_module_sig_enforced(void) +{ + return sig_enforce; +} +EXPORT_SYMBOL(is_module_sig_enforced); + /* Block module loading/unloading? */ int modules_disabled = 0; core_param(nomodule, modules_disabled, bint, 0); @@ -1516,7 +1526,7 @@ static void add_sect_attrs(struct module *mod, const struct load_info *info) sattr->mattr.show = module_sect_show; sattr->mattr.store = NULL; sattr->mattr.attr.name = sattr->name; - sattr->mattr.attr.mode = S_IRUGO; + sattr->mattr.attr.mode = S_IRUSR; *(gattr++) = &(sattr++)->mattr.attr; } *gattr = NULL; @@ -4147,6 +4157,7 @@ static int m_show(struct seq_file *m, void *p) { struct module *mod = list_entry(p, struct module, list); char buf[MODULE_FLAGS_BUF_SIZE]; + unsigned long value; /* We always ignore unformed modules. */ if (mod->state == MODULE_STATE_UNFORMED) @@ -4162,7 +4173,8 @@ static int m_show(struct seq_file *m, void *p) mod->state == MODULE_STATE_COMING ? "Loading" : "Live"); /* Used by oprofile and other similar tools. */ - seq_printf(m, " 0x%pK", mod->core_layout.base); + value = m->private ? 0 : (unsigned long)mod->core_layout.base; + seq_printf(m, " 0x" KALLSYM_FMT, value); /* Taints info */ if (mod->taints) @@ -4184,9 +4196,23 @@ static const struct seq_operations modules_op = { .show = m_show }; +/* + * This also sets the "private" pointer to non-NULL if the + * kernel pointers should be hidden (so you can just test + * "m->private" to see if you should keep the values private). + * + * We use the same logic as for /proc/kallsyms. + */ static int modules_open(struct inode *inode, struct file *file) { - return seq_open(file, &modules_op); + int err = seq_open(file, &modules_op); + + if (!err) { + struct seq_file *m = file->private_data; + m->private = kallsyms_show_value() ? NULL : (void *)8ul; + } + + return 0; } static const struct file_operations proc_modules_operations = { diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index e4b43fef89f5..59c471de342a 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -203,6 +203,21 @@ static inline bool __rcu_reclaim(const char *rn, struct rcu_head *head) extern int rcu_cpu_stall_suppress; int rcu_jiffies_till_stall_check(void); +#define rcu_ftrace_dump_stall_suppress() \ +do { \ + if (!rcu_cpu_stall_suppress) \ + rcu_cpu_stall_suppress = 3; \ +} while (0) + +#define rcu_ftrace_dump_stall_unsuppress() \ +do { \ + if (rcu_cpu_stall_suppress == 3) \ + rcu_cpu_stall_suppress = 0; \ +} while (0) + +#else /* #endif #ifdef CONFIG_RCU_STALL_COMMON */ +#define rcu_ftrace_dump_stall_suppress() +#define rcu_ftrace_dump_stall_unsuppress() #endif /* #ifdef CONFIG_RCU_STALL_COMMON */ /* @@ -220,8 +235,12 @@ do { \ static atomic_t ___rfd_beenhere = ATOMIC_INIT(0); \ \ if (!atomic_read(&___rfd_beenhere) && \ - !atomic_xchg(&___rfd_beenhere, 1)) \ + !atomic_xchg(&___rfd_beenhere, 1)) { \ + tracing_off(); \ + rcu_ftrace_dump_stall_suppress(); \ ftrace_dump(oops_dump_mode); \ + rcu_ftrace_dump_stall_unsuppress(); \ + } \ } while (0) void rcu_early_boot_tests(void); diff --git a/kernel/rcu/rcu_segcblist.c b/kernel/rcu/rcu_segcblist.c index 7649fcd2c4c7..88cba7c2956c 100644 --- a/kernel/rcu/rcu_segcblist.c +++ b/kernel/rcu/rcu_segcblist.c @@ -23,6 +23,7 @@ #include <linux/types.h> #include <linux/kernel.h> #include <linux/interrupt.h> +#include <linux/rcupdate.h> #include "rcu_segcblist.h" diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 45f2ffbc1e78..362eb2f78b3c 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -51,6 +51,7 @@ #include <asm/byteorder.h> #include <linux/torture.h> #include <linux/vmalloc.h> +#include <linux/sched/debug.h> #include "rcu.h" @@ -89,6 +90,7 @@ torture_param(int, shutdown_secs, 0, "Shutdown time (s), <= zero to disable."); torture_param(int, stall_cpu, 0, "Stall duration (s), zero to disable."); torture_param(int, stall_cpu_holdoff, 10, "Time to wait before starting stall (s)."); +torture_param(int, stall_cpu_irqsoff, 0, "Disable interrupts while stalling."); torture_param(int, stat_interval, 60, "Number of seconds between stats printk()s"); torture_param(int, stutter, 5, "Number of seconds to run/halt test"); @@ -1239,6 +1241,7 @@ rcu_torture_stats_print(void) long pipesummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 }; long batchsummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 }; static unsigned long rtcv_snap = ULONG_MAX; + static bool splatted; struct task_struct *wtp; for_each_possible_cpu(cpu) { @@ -1324,6 +1327,10 @@ rcu_torture_stats_print(void) gpnum, completed, flags, wtp == NULL ? ~0UL : wtp->state, wtp == NULL ? -1 : (int)task_cpu(wtp)); + if (!splatted && wtp) { + sched_show_task(wtp); + splatted = true; + } show_rcu_gp_kthreads(); rcu_ftrace_dump(DUMP_ALL); } @@ -1357,7 +1364,7 @@ rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, const char *tag) "fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d " "test_boost=%d/%d test_boost_interval=%d " "test_boost_duration=%d shutdown_secs=%d " - "stall_cpu=%d stall_cpu_holdoff=%d " + "stall_cpu=%d stall_cpu_holdoff=%d stall_cpu_irqsoff=%d " "n_barrier_cbs=%d " "onoff_interval=%d onoff_holdoff=%d\n", torture_type, tag, nrealreaders, nfakewriters, @@ -1365,7 +1372,7 @@ rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, const char *tag) stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter, test_boost, cur_ops->can_boost, test_boost_interval, test_boost_duration, shutdown_secs, - stall_cpu, stall_cpu_holdoff, + stall_cpu, stall_cpu_holdoff, stall_cpu_irqsoff, n_barrier_cbs, onoff_interval, onoff_holdoff); } @@ -1430,12 +1437,19 @@ static int rcu_torture_stall(void *args) if (!kthread_should_stop()) { stop_at = get_seconds() + stall_cpu; /* RCU CPU stall is expected behavior in following code. */ - pr_alert("rcu_torture_stall start.\n"); rcu_read_lock(); - preempt_disable(); + if (stall_cpu_irqsoff) + local_irq_disable(); + else + preempt_disable(); + pr_alert("rcu_torture_stall start on CPU %d.\n", + smp_processor_id()); while (ULONG_CMP_LT(get_seconds(), stop_at)) continue; /* Induce RCU CPU stall warning. */ - preempt_enable(); + if (stall_cpu_irqsoff) + local_irq_enable(); + else + preempt_enable(); rcu_read_unlock(); pr_alert("rcu_torture_stall end.\n"); } diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 08fa586d7f8c..f9c0ca2ccf0c 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -534,8 +534,8 @@ module_param(rcu_kick_kthreads, bool, 0644); * How long the grace period must be before we start recruiting * quiescent-state help from rcu_note_context_switch(). */ -static ulong jiffies_till_sched_qs = HZ / 20; -module_param(jiffies_till_sched_qs, ulong, 0644); +static ulong jiffies_till_sched_qs = HZ / 10; +module_param(jiffies_till_sched_qs, ulong, 0444); static bool rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp); @@ -837,6 +837,9 @@ static void rcu_eqs_enter(bool user) * We crowbar the ->dynticks_nesting field to zero to allow for * the possibility of usermode upcalls having messed up our count * of interrupt nesting level during the prior busy period. + * + * If you add or remove a call to rcu_idle_enter(), be sure to test with + * CONFIG_RCU_EQS_DEBUG=y. */ void rcu_idle_enter(void) { @@ -852,6 +855,9 @@ void rcu_idle_enter(void) * is permitted between this call and rcu_user_exit(). This way the * CPU doesn't need to maintain the tick for RCU maintenance purposes * when the CPU runs in userspace. + * + * If you add or remove a call to rcu_user_enter(), be sure to test with + * CONFIG_RCU_EQS_DEBUG=y. */ void rcu_user_enter(void) { @@ -875,6 +881,9 @@ void rcu_user_enter(void) * Use things like work queues to work around this limitation. * * You have been warned. + * + * If you add or remove a call to rcu_irq_exit(), be sure to test with + * CONFIG_RCU_EQS_DEBUG=y. */ void rcu_irq_exit(void) { @@ -899,6 +908,9 @@ void rcu_irq_exit(void) /* * Wrapper for rcu_irq_exit() where interrupts are enabled. + * + * If you add or remove a call to rcu_irq_exit_irqson(), be sure to test + * with CONFIG_RCU_EQS_DEBUG=y. */ void rcu_irq_exit_irqson(void) { @@ -971,6 +983,9 @@ static void rcu_eqs_exit(bool user) * allow for the possibility of usermode upcalls messing up our count * of interrupt nesting level during the busy period that is just * now starting. + * + * If you add or remove a call to rcu_idle_exit(), be sure to test with + * CONFIG_RCU_EQS_DEBUG=y. */ void rcu_idle_exit(void) { @@ -987,6 +1002,9 @@ void rcu_idle_exit(void) * * Exit RCU idle mode while entering the kernel because it can * run a RCU read side critical section anytime. + * + * If you add or remove a call to rcu_user_exit(), be sure to test with + * CONFIG_RCU_EQS_DEBUG=y. */ void rcu_user_exit(void) { @@ -1012,6 +1030,9 @@ void rcu_user_exit(void) * Use things like work queues to work around this limitation. * * You have been warned. + * + * If you add or remove a call to rcu_irq_enter(), be sure to test with + * CONFIG_RCU_EQS_DEBUG=y. */ void rcu_irq_enter(void) { @@ -1037,6 +1058,9 @@ void rcu_irq_enter(void) /* * Wrapper for rcu_irq_enter() where interrupts are enabled. + * + * If you add or remove a call to rcu_irq_enter_irqson(), be sure to test + * with CONFIG_RCU_EQS_DEBUG=y. */ void rcu_irq_enter_irqson(void) { @@ -1055,6 +1079,9 @@ void rcu_irq_enter_irqson(void) * that the CPU is active. This implementation permits nested NMIs, as * long as the nesting level does not overflow an int. (You will probably * run out of stack space first.) + * + * If you add or remove a call to rcu_nmi_enter(), be sure to test + * with CONFIG_RCU_EQS_DEBUG=y. */ void rcu_nmi_enter(void) { @@ -1087,6 +1114,9 @@ void rcu_nmi_enter(void) * RCU-idle period, update rdtp->dynticks and rdtp->dynticks_nmi_nesting * to let the RCU grace-period handling know that the CPU is back to * being RCU-idle. + * + * If you add or remove a call to rcu_nmi_exit(), be sure to test + * with CONFIG_RCU_EQS_DEBUG=y. */ void rcu_nmi_exit(void) { @@ -1207,6 +1237,22 @@ static int rcu_is_cpu_rrupt_from_idle(void) } /* + * We are reporting a quiescent state on behalf of some other CPU, so + * it is our responsibility to check for and handle potential overflow + * of the rcu_node ->gpnum counter with respect to the rcu_data counters. + * After all, the CPU might be in deep idle state, and thus executing no + * code whatsoever. + */ +static void rcu_gpnum_ovf(struct rcu_node *rnp, struct rcu_data *rdp) +{ + lockdep_assert_held(&rnp->lock); + if (ULONG_CMP_LT(READ_ONCE(rdp->gpnum) + ULONG_MAX / 4, rnp->gpnum)) + WRITE_ONCE(rdp->gpwrap, true); + if (ULONG_CMP_LT(rdp->rcu_iw_gpnum + ULONG_MAX / 4, rnp->gpnum)) + rdp->rcu_iw_gpnum = rnp->gpnum + ULONG_MAX / 4; +} + +/* * Snapshot the specified CPU's dynticks counter so that we can later * credit them with an implicit quiescent state. Return 1 if this CPU * is in dynticks idle mode, which is an extended quiescent state. @@ -1216,15 +1262,34 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp) rdp->dynticks_snap = rcu_dynticks_snap(rdp->dynticks); if (rcu_dynticks_in_eqs(rdp->dynticks_snap)) { trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti")); - if (ULONG_CMP_LT(READ_ONCE(rdp->gpnum) + ULONG_MAX / 4, - rdp->mynode->gpnum)) - WRITE_ONCE(rdp->gpwrap, true); + rcu_gpnum_ovf(rdp->mynode, rdp); return 1; } return 0; } /* + * Handler for the irq_work request posted when a grace period has + * gone on for too long, but not yet long enough for an RCU CPU + * stall warning. Set state appropriately, but just complain if + * there is unexpected state on entry. + */ +static void rcu_iw_handler(struct irq_work *iwp) +{ + struct rcu_data *rdp; + struct rcu_node *rnp; + + rdp = container_of(iwp, struct rcu_data, rcu_iw); + rnp = rdp->mynode; + raw_spin_lock_rcu_node(rnp); + if (!WARN_ON_ONCE(!rdp->rcu_iw_pending)) { + rdp->rcu_iw_gpnum = rnp->gpnum; + rdp->rcu_iw_pending = false; + } + raw_spin_unlock_rcu_node(rnp); +} + +/* * Return true if the specified CPU has passed through a quiescent * state by virtue of being in or having passed through an dynticks * idle state since the last call to dyntick_save_progress_counter() @@ -1235,8 +1300,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) unsigned long jtsq; bool *rnhqp; bool *ruqp; - unsigned long rjtsc; - struct rcu_node *rnp; + struct rcu_node *rnp = rdp->mynode; /* * If the CPU passed through or entered a dynticks idle phase with @@ -1249,34 +1313,25 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) if (rcu_dynticks_in_eqs_since(rdp->dynticks, rdp->dynticks_snap)) { trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti")); rdp->dynticks_fqs++; + rcu_gpnum_ovf(rnp, rdp); return 1; } - /* Compute and saturate jiffies_till_sched_qs. */ - jtsq = jiffies_till_sched_qs; - rjtsc = rcu_jiffies_till_stall_check(); - if (jtsq > rjtsc / 2) { - WRITE_ONCE(jiffies_till_sched_qs, rjtsc); - jtsq = rjtsc / 2; - } else if (jtsq < 1) { - WRITE_ONCE(jiffies_till_sched_qs, 1); - jtsq = 1; - } - /* * Has this CPU encountered a cond_resched_rcu_qs() since the * beginning of the grace period? For this to be the case, * the CPU has to have noticed the current grace period. This * might not be the case for nohz_full CPUs looping in the kernel. */ - rnp = rdp->mynode; + jtsq = jiffies_till_sched_qs; ruqp = per_cpu_ptr(&rcu_dynticks.rcu_urgent_qs, rdp->cpu); if (time_after(jiffies, rdp->rsp->gp_start + jtsq) && READ_ONCE(rdp->rcu_qs_ctr_snap) != per_cpu(rcu_dynticks.rcu_qs_ctr, rdp->cpu) && READ_ONCE(rdp->gpnum) == rnp->gpnum && !rdp->gpwrap) { trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("rqc")); + rcu_gpnum_ovf(rnp, rdp); return 1; - } else { + } else if (time_after(jiffies, rdp->rsp->gp_start + jtsq)) { /* Load rcu_qs_ctr before store to rcu_urgent_qs. */ smp_store_release(ruqp, true); } @@ -1285,6 +1340,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) if (!(rdp->grpmask & rcu_rnp_online_cpus(rnp))) { trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("ofl")); rdp->offline_fqs++; + rcu_gpnum_ovf(rnp, rdp); return 1; } @@ -1304,10 +1360,6 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) * updates are only once every few jiffies, the probability of * lossage (and thus of slight grace-period extension) is * quite low. - * - * Note that if the jiffies_till_sched_qs boot/sysfs parameter - * is set too high, we override with half of the RCU CPU stall - * warning delay. */ rnhqp = &per_cpu(rcu_dynticks.rcu_need_heavy_qs, rdp->cpu); if (!READ_ONCE(*rnhqp) && @@ -1316,15 +1368,26 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) WRITE_ONCE(*rnhqp, true); /* Store rcu_need_heavy_qs before rcu_urgent_qs. */ smp_store_release(ruqp, true); - rdp->rsp->jiffies_resched += 5; /* Re-enable beating. */ + rdp->rsp->jiffies_resched += jtsq; /* Re-enable beating. */ } /* - * If more than halfway to RCU CPU stall-warning time, do - * a resched_cpu() to try to loosen things up a bit. + * If more than halfway to RCU CPU stall-warning time, do a + * resched_cpu() to try to loosen things up a bit. Also check to + * see if the CPU is getting hammered with interrupts, but only + * once per grace period, just to keep the IPIs down to a dull roar. */ - if (jiffies - rdp->rsp->gp_start > rcu_jiffies_till_stall_check() / 2) + if (jiffies - rdp->rsp->gp_start > rcu_jiffies_till_stall_check() / 2) { resched_cpu(rdp->cpu); + if (IS_ENABLED(CONFIG_IRQ_WORK) && + !rdp->rcu_iw_pending && rdp->rcu_iw_gpnum != rnp->gpnum && + (rnp->ffmask & rdp->grpmask)) { + init_irq_work(&rdp->rcu_iw, rcu_iw_handler); + rdp->rcu_iw_pending = true; + rdp->rcu_iw_gpnum = rnp->gpnum; + irq_work_queue_on(&rdp->rcu_iw, rdp->cpu); + } + } return 0; } @@ -1513,6 +1576,7 @@ static void print_cpu_stall(struct rcu_state *rsp) { int cpu; unsigned long flags; + struct rcu_data *rdp = this_cpu_ptr(rsp->rda); struct rcu_node *rnp = rcu_get_root(rsp); long totqlen = 0; @@ -1528,7 +1592,9 @@ static void print_cpu_stall(struct rcu_state *rsp) */ pr_err("INFO: %s self-detected stall on CPU", rsp->name); print_cpu_stall_info_begin(); + raw_spin_lock_irqsave_rcu_node(rdp->mynode, flags); print_cpu_stall_info(rsp, smp_processor_id()); + raw_spin_unlock_irqrestore_rcu_node(rdp->mynode, flags); print_cpu_stall_info_end(); for_each_possible_cpu(cpu) totqlen += rcu_segcblist_n_cbs(&per_cpu_ptr(rsp->rda, @@ -1922,6 +1988,7 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, rdp->core_needs_qs = need_gp; zero_cpu_stall_ticks(rdp); WRITE_ONCE(rdp->gpwrap, false); + rcu_gpnum_ovf(rnp, rdp); } return ret; } @@ -3702,6 +3769,8 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp) rdp->cpu_no_qs.b.norm = true; rdp->rcu_qs_ctr_snap = per_cpu(rcu_dynticks.rcu_qs_ctr, cpu); rdp->core_needs_qs = false; + rdp->rcu_iw_pending = false; + rdp->rcu_iw_gpnum = rnp->gpnum - 1; trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl")); raw_spin_unlock_irqrestore_rcu_node(rnp, flags); } @@ -3739,10 +3808,24 @@ static void rcutree_affinity_setting(unsigned int cpu, int outgoing) */ int rcutree_online_cpu(unsigned int cpu) { - sync_sched_exp_online_cleanup(cpu); - rcutree_affinity_setting(cpu, -1); + unsigned long flags; + struct rcu_data *rdp; + struct rcu_node *rnp; + struct rcu_state *rsp; + + for_each_rcu_flavor(rsp) { + rdp = per_cpu_ptr(rsp->rda, cpu); + rnp = rdp->mynode; + raw_spin_lock_irqsave_rcu_node(rnp, flags); + rnp->ffmask |= rdp->grpmask; + raw_spin_unlock_irqrestore_rcu_node(rnp, flags); + } if (IS_ENABLED(CONFIG_TREE_SRCU)) srcu_online_cpu(cpu); + if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE) + return 0; /* Too early in boot for scheduler work. */ + sync_sched_exp_online_cleanup(cpu); + rcutree_affinity_setting(cpu, -1); return 0; } @@ -3752,6 +3835,19 @@ int rcutree_online_cpu(unsigned int cpu) */ int rcutree_offline_cpu(unsigned int cpu) { + unsigned long flags; + struct rcu_data *rdp; + struct rcu_node *rnp; + struct rcu_state *rsp; + + for_each_rcu_flavor(rsp) { + rdp = per_cpu_ptr(rsp->rda, cpu); + rnp = rdp->mynode; + raw_spin_lock_irqsave_rcu_node(rnp, flags); + rnp->ffmask &= ~rdp->grpmask; + raw_spin_unlock_irqrestore_rcu_node(rnp, flags); + } + rcutree_affinity_setting(cpu, cpu); if (IS_ENABLED(CONFIG_TREE_SRCU)) srcu_offline_cpu(cpu); @@ -4200,8 +4296,7 @@ void __init rcu_init(void) for_each_online_cpu(cpu) { rcutree_prepare_cpu(cpu); rcu_cpu_starting(cpu); - if (IS_ENABLED(CONFIG_TREE_SRCU)) - srcu_online_cpu(cpu); + rcutree_online_cpu(cpu); } } diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 8e1f285f0a70..46a5d1991450 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -103,6 +103,7 @@ struct rcu_node { /* Online CPUs for next expedited GP. */ /* Any CPU that has ever been online will */ /* have its bit set. */ + unsigned long ffmask; /* Fully functional CPUs. */ unsigned long grpmask; /* Mask to apply to parent qsmask. */ /* Only one bit will be set in this mask. */ int grplo; /* lowest-numbered CPU or group here. */ @@ -285,6 +286,10 @@ struct rcu_data { /* 8) RCU CPU stall data. */ unsigned int softirq_snap; /* Snapshot of softirq activity. */ + /* ->rcu_iw* fields protected by leaf rcu_node ->lock. */ + struct irq_work rcu_iw; /* Check for non-irq activity. */ + bool rcu_iw_pending; /* Is ->rcu_iw pending? */ + unsigned long rcu_iw_gpnum; /* ->gpnum associated with ->rcu_iw. */ int cpu; struct rcu_state *rsp; diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index df08e5c8126b..dd4d0d390e5b 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -54,6 +54,7 @@ DEFINE_PER_CPU(char, rcu_cpu_has_work); * This probably needs to be excluded from -rt builds. */ #define rt_mutex_owner(a) ({ WARN_ON_ONCE(1); NULL; }) +#define rt_mutex_futex_unlock(x) WARN_ON_ONCE(1) #endif /* #else #ifdef CONFIG_RCU_BOOST */ @@ -530,7 +531,7 @@ void rcu_read_unlock_special(struct task_struct *t) /* Unboost if we were boosted. */ if (IS_ENABLED(CONFIG_RCU_BOOST) && drop_boost_mutex) - rt_mutex_unlock(&rnp->boost_mtx); + rt_mutex_futex_unlock(&rnp->boost_mtx); /* * If this was the last task on the expedited lists, @@ -911,8 +912,6 @@ void exit_rcu(void) #ifdef CONFIG_RCU_BOOST -#include "../locking/rtmutex_common.h" - static void rcu_wake_cond(struct task_struct *t, int status) { /* @@ -1507,7 +1506,7 @@ static void rcu_prepare_for_idle(void) rdtp->last_accelerate = jiffies; for_each_rcu_flavor(rsp) { rdp = this_cpu_ptr(rsp->rda); - if (rcu_segcblist_pend_cbs(&rdp->cblist)) + if (!rcu_segcblist_pend_cbs(&rdp->cblist)) continue; rnp = rdp->mynode; raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */ @@ -1671,6 +1670,7 @@ static void print_cpu_stall_info_begin(void) */ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu) { + unsigned long delta; char fast_no_hz[72]; struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); struct rcu_dynticks *rdtp = rdp->dynticks; @@ -1685,11 +1685,15 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu) ticks_value = rsp->gpnum - rdp->gpnum; } print_cpu_stall_fast_no_hz(fast_no_hz, cpu); - pr_err("\t%d-%c%c%c: (%lu %s) idle=%03x/%llx/%d softirq=%u/%u fqs=%ld %s\n", + delta = rdp->mynode->gpnum - rdp->rcu_iw_gpnum; + pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%03x/%llx/%d softirq=%u/%u fqs=%ld %s\n", cpu, "O."[!!cpu_online(cpu)], "o."[!!(rdp->grpmask & rdp->mynode->qsmaskinit)], "N."[!!(rdp->grpmask & rdp->mynode->qsmaskinitnext)], + !IS_ENABLED(CONFIG_IRQ_WORK) ? '?' : + rdp->rcu_iw_pending ? (int)min(delta, 9UL) + '0' : + "!."[!delta], ticks_value, ticks_title, rcu_dynticks_snap(rdtp) & 0xfff, rdtp->dynticks_nesting, rdtp->dynticks_nmi_nesting, diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 5033b66d2753..27694561f769 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -494,6 +494,7 @@ EXPORT_SYMBOL_GPL(do_trace_rcu_torture_read); #endif int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */ +EXPORT_SYMBOL_GPL(rcu_cpu_stall_suppress); static int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT; module_param(rcu_cpu_stall_suppress, int, 0644); @@ -575,7 +576,6 @@ DEFINE_STATIC_SRCU(tasks_rcu_exit_srcu); static int rcu_task_stall_timeout __read_mostly = RCU_TASK_STALL_TIMEOUT; module_param(rcu_task_stall_timeout, int, 0644); -static void rcu_spawn_tasks_kthread(void); static struct task_struct *rcu_tasks_kthread_ptr; /** @@ -600,7 +600,6 @@ void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func) { unsigned long flags; bool needwake; - bool havetask = READ_ONCE(rcu_tasks_kthread_ptr); rhp->next = NULL; rhp->func = func; @@ -610,11 +609,8 @@ void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func) rcu_tasks_cbs_tail = &rhp->next; raw_spin_unlock_irqrestore(&rcu_tasks_cbs_lock, flags); /* We can't create the thread unless interrupts are enabled. */ - if ((needwake && havetask) || - (!havetask && !irqs_disabled_flags(flags))) { - rcu_spawn_tasks_kthread(); + if (needwake && READ_ONCE(rcu_tasks_kthread_ptr)) wake_up(&rcu_tasks_cbs_wq); - } } EXPORT_SYMBOL_GPL(call_rcu_tasks); @@ -853,27 +849,18 @@ static int __noreturn rcu_tasks_kthread(void *arg) } } -/* Spawn rcu_tasks_kthread() at first call to call_rcu_tasks(). */ -static void rcu_spawn_tasks_kthread(void) +/* Spawn rcu_tasks_kthread() at core_initcall() time. */ +static int __init rcu_spawn_tasks_kthread(void) { - static DEFINE_MUTEX(rcu_tasks_kthread_mutex); struct task_struct *t; - if (READ_ONCE(rcu_tasks_kthread_ptr)) { - smp_mb(); /* Ensure caller sees full kthread. */ - return; - } - mutex_lock(&rcu_tasks_kthread_mutex); - if (rcu_tasks_kthread_ptr) { - mutex_unlock(&rcu_tasks_kthread_mutex); - return; - } t = kthread_run(rcu_tasks_kthread, NULL, "rcu_tasks_kthread"); BUG_ON(IS_ERR(t)); smp_mb(); /* Ensure others see full kthread. */ WRITE_ONCE(rcu_tasks_kthread_ptr, t); - mutex_unlock(&rcu_tasks_kthread_mutex); + return 0; } +core_initcall(rcu_spawn_tasks_kthread); /* Do the srcu_read_lock() for the above synchronize_srcu(). */ void exit_tasks_rcu_start(void) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index d17c5da523a0..9446b2e5eac5 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -505,8 +505,7 @@ void resched_cpu(int cpu) struct rq *rq = cpu_rq(cpu); unsigned long flags; - if (!raw_spin_trylock_irqsave(&rq->lock, flags)) - return; + raw_spin_lock_irqsave(&rq->lock, flags); resched_curr(rq); raw_spin_unlock_irqrestore(&rq->lock, flags); } @@ -4842,6 +4841,7 @@ int __sched _cond_resched(void) preempt_schedule_common(); return 1; } + rcu_all_qs(); return 0; } EXPORT_SYMBOL(_cond_resched); @@ -5165,6 +5165,7 @@ void sched_show_task(struct task_struct *p) show_stack(p, NULL); put_task_stack(p); } +EXPORT_SYMBOL_GPL(sched_show_task); static inline bool state_filter_match(unsigned long state_filter, struct task_struct *p) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 9209d83ecdcf..ba0da243fdd8 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -649,6 +649,7 @@ static int sugov_start(struct cpufreq_policy *policy) struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu); memset(sg_cpu, 0, sizeof(*sg_cpu)); + sg_cpu->cpu = cpu; sg_cpu->sg_policy = sg_policy; sg_cpu->flags = SCHED_CPUFREQ_RT; sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq; @@ -714,11 +715,6 @@ struct cpufreq_governor *cpufreq_default_governor(void) static int __init sugov_register(void) { - int cpu; - - for_each_possible_cpu(cpu) - per_cpu(sugov_cpu, cpu).cpu = cpu; - return cpufreq_register_governor(&schedutil_gov); } fs_initcall(sugov_register); |