From c69a00a12e26cf4faffdcdb340cb2d059b61d57e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 25 Jan 2022 15:41:10 -0800 Subject: srcu: Add boot-time control over srcu_node array allocation This commit adds an srcu_tree.convert_to_big kernel parameter that either refuses to convert at all (0), converts immediately at init_srcu_struct() time (1), or lets rcutorture convert it (2). An addition contention-based dynamic conversion choice will be added, along with documentation. [ paulmck: Apply callback-scanning feedback from Neeraj Upadhyay. ] Co-developed-by: Neeraj Upadhyay Signed-off-by: Neeraj Upadhyay Signed-off-by: Paul E. McKenney --- Documentation/admin-guide/kernel-parameters.txt | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'Documentation') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 3f1cc5e317ed..1f1fcac7777d 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5608,6 +5608,19 @@ off: Disable mitigation and remove performance impact to RDRAND and RDSEED + srcutree.convert_to_big [KNL] + Specifies under what conditions an SRCU tree + srcu_struct structure will be converted to big + form, that is, with an rcu_node tree: + + 0: Never. + 1: At init_srcu_struct() time. + 2: When rcutorture decides to. + + Either way, the srcu_node tree will be sized based + on the actual runtime number of CPUs (nr_cpu_ids) + instead of the compile-time CONFIG_NR_CPUS. + srcutree.counter_wrap_check [KNL] Specifies how frequently to check for grace-period sequence counter wrap for the -- cgit v1.2.3 From 9f2e91d94c91558e3764fe4e01c5e6281a90f239 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 27 Jan 2022 20:32:05 -0800 Subject: srcu: Add contention-triggered addition of srcu_node tree This commit instruments the acquisitions of the srcu_struct structure's ->lock, enabling the initiation of a transition from SRCU_SIZE_SMALL to SRCU_SIZE_BIG when sufficient contention is experienced. The instrumentation counts the number of trylock failures within the confines of a single jiffy. If that number exceeds the value specified by the srcutree.small_contention_lim kernel boot parameter (which defaults to 100), and if the value specified by the srcutree.convert_to_big kernel boot parameter has the 0x10 bit set (defaults to 0), then a transition will be automatically initiated. By default, there will never be any transitions, so that none of the srcu_struct structures ever gains an srcu_node array. The useful values for srcutree.convert_to_big are: 0x00: Never convert. 0x01: Always convert at init_srcu_struct() time. 0x02: Convert when rcutorture prints its first round of statistics. 0x03: Decide conversion approach at boot given system size. 0x10: Convert if contention is encountered. 0x12: Convert if contention is encountered or when rcutorture prints its first round of statistics, whichever comes first. The value 0x11 acts the same as 0x01 because the conversion happens before there is any chance of contention. [ paulmck: Apply "static" feedback from kernel test robot. ] Co-developed-by: Neeraj Upadhyay Signed-off-by: Neeraj Upadhyay Signed-off-by: Paul E. McKenney --- Documentation/admin-guide/kernel-parameters.txt | 9 ++ include/linux/srcutree.h | 2 + kernel/rcu/srcutree.c | 107 ++++++++++++++++++------ 3 files changed, 94 insertions(+), 24 deletions(-) (limited to 'Documentation') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 1f1fcac7777d..177e688768c0 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5616,6 +5616,7 @@ 0: Never. 1: At init_srcu_struct() time. 2: When rcutorture decides to. + 0x1X: Above plus if high contention. Either way, the srcu_node tree will be sized based on the actual runtime number of CPUs (nr_cpu_ids) @@ -5638,6 +5639,14 @@ expediting. Set to zero to disable automatic expediting. + srcutree.small_contention_lim [KNL] + Specifies the number of update-side contention + events per jiffy will be tolerated before + initiating a conversion of an srcu_struct + structure to big form. Note that the value of + srcutree.convert_to_big must have the 0x10 bit + set for contention-based conversions to occur. + ssbd= [ARM64,HW] Speculative Store Bypass Disable control diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 44bd204498a1..1b9ff4ed37e4 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -72,6 +72,8 @@ struct srcu_struct { unsigned long srcu_gp_seq_needed; /* Latest gp_seq needed. */ unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */ unsigned long srcu_last_gp_end; /* Last GP end timestamp (ns) */ + unsigned long srcu_size_jiffies; /* Current contention-measurement interval. */ + unsigned long srcu_n_lock_retries; /* Contention events in current interval. */ struct srcu_data __percpu *sda; /* Per-CPU srcu_data array. */ bool sda_is_static; /* May ->sda be passed to free_percpu()? */ unsigned long srcu_barrier_seq; /* srcu_barrier seq #. */ diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index c9460374d437..0bc6a0a3edee 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -41,13 +41,29 @@ module_param(counter_wrap_check, ulong, 0444); /* * Control conversion to SRCU_SIZE_BIG: - * 0: Don't convert at all (default). - * 1: Convert at init_srcu_struct() time. - * 2: Convert when rcutorture invokes srcu_torture_stats_print(). + * 0: Don't convert at all (default). + * 1: Convert at init_srcu_struct() time. + * 2: Convert when rcutorture invokes srcu_torture_stats_print(). + * 3: Decide at boot time based on system shape. + * 0x1x: Convert when excessive contention encountered. */ -static int convert_to_big; +#define SRCU_SIZING_NONE 0 +#define SRCU_SIZING_INIT 1 +#define SRCU_SIZING_TORTURE 2 +#define SRCU_SIZING_AUTO 3 +#define SRCU_SIZING_CONTEND 0x10 +#define SRCU_SIZING_IS(x) ((convert_to_big & ~SRCU_SIZING_CONTEND) == x) +#define SRCU_SIZING_IS_NONE() (SRCU_SIZING_IS(SRCU_SIZING_NONE)) +#define SRCU_SIZING_IS_INIT() (SRCU_SIZING_IS(SRCU_SIZING_INIT)) +#define SRCU_SIZING_IS_TORTURE() (SRCU_SIZING_IS(SRCU_SIZING_TORTURE)) +#define SRCU_SIZING_IS_CONTEND() (convert_to_big & SRCU_SIZING_CONTEND) +static int convert_to_big = SRCU_SIZING_NONE; module_param(convert_to_big, int, 0444); +/* Contention events per jiffy to initiate transition to big. */ +static int small_contention_lim __read_mostly = 100; +module_param(small_contention_lim, int, 0444); + /* Early-boot callback-management, so early that no lock is required! */ static LIST_HEAD(srcu_boot_list); static bool __read_mostly srcu_init_done; @@ -58,31 +74,40 @@ static void process_srcu(struct work_struct *work); static void srcu_delay_timer(struct timer_list *t); /* Wrappers for lock acquisition and release, see raw_spin_lock_rcu_node(). */ -#define spin_lock_rcu_node(p) \ -do { \ - spin_lock(&ACCESS_PRIVATE(p, lock)); \ - smp_mb__after_unlock_lock(); \ +#define spin_lock_rcu_node(p) \ +do { \ + spin_lock(&ACCESS_PRIVATE(p, lock)); \ + smp_mb__after_unlock_lock(); \ } while (0) #define spin_unlock_rcu_node(p) spin_unlock(&ACCESS_PRIVATE(p, lock)) -#define spin_lock_irq_rcu_node(p) \ -do { \ - spin_lock_irq(&ACCESS_PRIVATE(p, lock)); \ - smp_mb__after_unlock_lock(); \ +#define spin_lock_irq_rcu_node(p) \ +do { \ + spin_lock_irq(&ACCESS_PRIVATE(p, lock)); \ + smp_mb__after_unlock_lock(); \ } while (0) -#define spin_unlock_irq_rcu_node(p) \ +#define spin_unlock_irq_rcu_node(p) \ spin_unlock_irq(&ACCESS_PRIVATE(p, lock)) -#define spin_lock_irqsave_rcu_node(p, flags) \ -do { \ - spin_lock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \ - smp_mb__after_unlock_lock(); \ +#define spin_lock_irqsave_rcu_node(p, flags) \ +do { \ + spin_lock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \ + smp_mb__after_unlock_lock(); \ } while (0) -#define spin_unlock_irqrestore_rcu_node(p, flags) \ - spin_unlock_irqrestore(&ACCESS_PRIVATE(p, lock), flags) \ +#define spin_trylock_irqsave_rcu_node(p, flags) \ +({ \ + bool ___locked = spin_trylock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \ + \ + if (___locked) \ + smp_mb__after_unlock_lock(); \ + ___locked; \ +}) + +#define spin_unlock_irqrestore_rcu_node(p, flags) \ + spin_unlock_irqrestore(&ACCESS_PRIVATE(p, lock), flags) \ /* * Initialize SRCU per-CPU data. Note that statically allocated @@ -225,7 +250,7 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static) init_srcu_struct_data(ssp); ssp->srcu_gp_seq_needed_exp = 0; ssp->srcu_last_gp_end = ktime_get_mono_fast_ns(); - if (READ_ONCE(ssp->srcu_size_state) == SRCU_SIZE_SMALL && convert_to_big == 1) { + if (READ_ONCE(ssp->srcu_size_state) == SRCU_SIZE_SMALL && SRCU_SIZING_IS_INIT()) { if (!init_srcu_struct_nodes(ssp, GFP_ATOMIC)) { if (!ssp->sda_is_static) { free_percpu(ssp->sda); @@ -272,6 +297,15 @@ EXPORT_SYMBOL_GPL(init_srcu_struct); #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ +/* + * Initiate a transition to SRCU_SIZE_BIG with lock held. + */ +static void __srcu_transition_to_big(struct srcu_struct *ssp) +{ + lockdep_assert_held(&ACCESS_PRIVATE(ssp, lock)); + smp_store_release(&ssp->srcu_size_state, SRCU_SIZE_ALLOC); +} + /* * Initiate an idempotent transition to SRCU_SIZE_BIG. */ @@ -287,10 +321,35 @@ static void srcu_transition_to_big(struct srcu_struct *ssp) spin_unlock_irqrestore_rcu_node(ssp, flags); return; } - smp_store_release(&ssp->srcu_size_state, SRCU_SIZE_ALLOC); + __srcu_transition_to_big(ssp); spin_unlock_irqrestore_rcu_node(ssp, flags); } +/* + * Acquire the specified srcu_struct structure's ->lock, but check for + * excessive contention, which results in initiation of a transition + * to SRCU_SIZE_BIG. But only if the srcutree.convert_to_big module + * parameter permits this. + */ +static void spin_lock_irqsave_ssp_contention(struct srcu_struct *ssp, unsigned long *flags) +{ + unsigned long j; + + if (spin_trylock_irqsave_rcu_node(ssp, *flags)) + return; + spin_lock_irqsave_rcu_node(ssp, *flags); + if (!SRCU_SIZING_IS_CONTEND() || ssp->srcu_size_state) + return; + j = jiffies; + if (ssp->srcu_size_jiffies != j) { + ssp->srcu_size_jiffies = j; + ssp->srcu_n_lock_retries = 0; + } + if (++ssp->srcu_n_lock_retries <= small_contention_lim) + return; + __srcu_transition_to_big(ssp); +} + /* * First-use initialization of statically allocated srcu_struct * structure. Wiring up the combining tree is more than can be @@ -718,7 +777,7 @@ static void srcu_funnel_exp_start(struct srcu_struct *ssp, struct srcu_node *snp WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s); spin_unlock_irqrestore_rcu_node(snp, flags); } - spin_lock_irqsave_rcu_node(ssp, flags); + spin_lock_irqsave_ssp_contention(ssp, &flags); if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, s)) WRITE_ONCE(ssp->srcu_gp_seq_needed_exp, s); spin_unlock_irqrestore_rcu_node(ssp, flags); @@ -779,7 +838,7 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp, } /* Top of tree, must ensure the grace period will be started. */ - spin_lock_irqsave_rcu_node(ssp, flags); + spin_lock_irqsave_ssp_contention(ssp, &flags); if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed, s)) { /* * Record need for grace period s. Pair with load @@ -1542,7 +1601,7 @@ void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf) } pr_cont(" T(%ld,%ld)\n", s0, s1); } - if (convert_to_big == 2) + if (SRCU_SIZING_IS_TORTURE()) srcu_transition_to_big(ssp); } EXPORT_SYMBOL_GPL(srcu_torture_stats_print); -- cgit v1.2.3 From f25390033fa2445cdc4d6cf8243a9b85d942845f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 25 Feb 2022 16:01:12 -0800 Subject: rcu-tasks: Print pre-stall-warning informational messages RCU-tasks stall-warning messages are printed after the grace period is ten minutes old. Unfortunately, most of us will have rebooted the system in response to an apparently-hung command long before the ten minutes is up, and will thus see what looks to be a silent hang. This commit therefore adds pr_info() messages that are printed earlier. These should avoid being classified as errors, but should give impatient users a hint. These are controlled by new rcupdate.rcu_task_stall_info and rcupdate.rcu_task_stall_info_mult kernel-boot parameters. The former defines the initial delay in jiffies (defaulting to 10 seconds) and the latter defines the multiplier (defaulting to 3). Thus, by default, the first message will appear 10 seconds into the RCU-tasks grace period, the second 40 seconds in, and the third 160 seconds in. There would be a fourth at 640 seconds in, but the stall warning message appears 600 seconds in, and once a stall warning is printed for a given grace period, no further informational messages are printed. Signed-off-by: Paul E. McKenney --- Documentation/admin-guide/kernel-parameters.txt | 30 +++++++++++++++++-- kernel/rcu/tasks.h | 40 +++++++++++++++++++++---- 2 files changed, 62 insertions(+), 8 deletions(-) (limited to 'Documentation') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 3f1cc5e317ed..babc701d4864 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4955,10 +4955,34 @@ number avoids disturbing real-time workloads, but lengthens grace periods. + rcupdate.rcu_task_stall_info= [KNL] + Set initial timeout in jiffies for RCU task stall + informational messages, which give some indication + of the problem for those not patient enough to + wait for ten minutes. Informational messages are + only printed prior to the stall-warning message + for a given grace period. Disable with a value + less than or equal to zero. Defaults to ten + seconds. A change in value does not take effect + until the beginning of the next grace period. + + rcupdate.rcu_task_stall_info_mult= [KNL] + Multiplier for time interval between successive + RCU task stall informational messages for a given + RCU tasks grace period. This value is clamped + to one through ten, inclusive. It defaults to + the value three, so that the first informational + message is printed 10 seconds into the grace + period, the second at 40 seconds, the third at + 160 seconds, and then the stall warning at 600 + seconds would prevent a fourth at 640 seconds. + rcupdate.rcu_task_stall_timeout= [KNL] - Set timeout in jiffies for RCU task stall warning - messages. Disable with a value less than or equal - to zero. + Set timeout in jiffies for RCU task stall + warning messages. Disable with a value less + than or equal to zero. Defaults to ten minutes. + A change in value does not take effect until + the beginning of the next grace period. rcupdate.rcu_self_test= [KNL] Run the RCU early boot self tests diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index b43320b149d2..76799c81d4be 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -143,6 +143,11 @@ module_param(rcu_task_ipi_delay, int, 0644); #define RCU_TASK_STALL_TIMEOUT (HZ * 60 * 10) static int rcu_task_stall_timeout __read_mostly = RCU_TASK_STALL_TIMEOUT; module_param(rcu_task_stall_timeout, int, 0644); +#define RCU_TASK_STALL_INFO (HZ * 10) +static int rcu_task_stall_info __read_mostly = RCU_TASK_STALL_INFO; +module_param(rcu_task_stall_info, int, 0644); +static int rcu_task_stall_info_mult __read_mostly = 3; +module_param(rcu_task_stall_info_mult, int, 0444); static int rcu_task_enqueue_lim __read_mostly = -1; module_param(rcu_task_enqueue_lim, int, 0444); @@ -548,8 +553,15 @@ static void __init rcu_spawn_tasks_kthread_generic(struct rcu_tasks *rtp) static void __init rcu_tasks_bootup_oddness(void) { #if defined(CONFIG_TASKS_RCU) || defined(CONFIG_TASKS_TRACE_RCU) + int rtsimc; + if (rcu_task_stall_timeout != RCU_TASK_STALL_TIMEOUT) pr_info("\tTasks-RCU CPU stall warnings timeout set to %d (rcu_task_stall_timeout).\n", rcu_task_stall_timeout); + rtsimc = clamp(rcu_task_stall_info_mult, 1, 10); + if (rtsimc != rcu_task_stall_info_mult) { + pr_info("\tTasks-RCU CPU stall info multiplier clamped to %d (rcu_task_stall_info_mult).\n", rtsimc); + rcu_task_stall_info_mult = rtsimc; + } #endif /* #ifdef CONFIG_TASKS_RCU */ #ifdef CONFIG_TASKS_RCU pr_info("\tTrampoline variant of Tasks RCU enabled.\n"); @@ -592,10 +604,15 @@ static void exit_tasks_rcu_finish_trace(struct task_struct *t); /* Wait for one RCU-tasks grace period. */ static void rcu_tasks_wait_gp(struct rcu_tasks *rtp) { - struct task_struct *g, *t; - unsigned long lastreport; - LIST_HEAD(holdouts); + struct task_struct *g; int fract; + LIST_HEAD(holdouts); + unsigned long j; + unsigned long lastinfo; + unsigned long lastreport; + bool reported = false; + int rtsi; + struct task_struct *t; set_tasks_gp_state(rtp, RTGS_PRE_WAIT_GP); rtp->pregp_func(); @@ -621,6 +638,8 @@ static void rcu_tasks_wait_gp(struct rcu_tasks *rtp) * is empty, we are done. */ lastreport = jiffies; + lastinfo = lastreport; + rtsi = READ_ONCE(rcu_task_stall_info); // Start off with initial wait and slowly back off to 1 HZ wait. fract = rtp->init_fract; @@ -630,7 +649,7 @@ static void rcu_tasks_wait_gp(struct rcu_tasks *rtp) bool needreport; int rtst; - /* Slowly back off waiting for holdouts */ + // Slowly back off waiting for holdouts set_tasks_gp_state(rtp, RTGS_WAIT_SCAN_HOLDOUTS); schedule_timeout_idle(fract); @@ -639,12 +658,23 @@ static void rcu_tasks_wait_gp(struct rcu_tasks *rtp) rtst = READ_ONCE(rcu_task_stall_timeout); needreport = rtst > 0 && time_after(jiffies, lastreport + rtst); - if (needreport) + if (needreport) { lastreport = jiffies; + reported = true; + } firstreport = true; WARN_ON(signal_pending(current)); set_tasks_gp_state(rtp, RTGS_SCAN_HOLDOUTS); rtp->holdouts_func(&holdouts, needreport, &firstreport); + + // Print pre-stall informational messages if needed. + j = jiffies; + if (rtsi > 0 && !reported && time_after(j, lastinfo + rtsi)) { + lastinfo = j; + rtsi = rtsi * rcu_task_stall_info_mult; + pr_info("%s: %s grace period %lu is %lu jiffies old.\n", + __func__, rtp->kname, rtp->tasks_gp_seq, j - rtp->gp_start); + } } set_tasks_gp_state(rtp, RTGS_POST_GP); -- cgit v1.2.3 From 6172de3c7f1171e55314bfc5ee4ae6edd225b048 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 17 Feb 2022 12:26:00 -0800 Subject: docs: Add documentation for rude and trace RCU flavors This commit belatedly adds documentation of Tasks Rude RCU and Tasks Trace RCU to RCU's requirements document. Tested-by: Bagas Sanjaya Reviewed-by: Steven Rostedt (Google) Signed-off-by: Paul E. McKenney --- .../RCU/Design/Requirements/Requirements.rst | 32 ++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'Documentation') diff --git a/Documentation/RCU/Design/Requirements/Requirements.rst b/Documentation/RCU/Design/Requirements/Requirements.rst index 45278e2974c0..ff2be1ac54c4 100644 --- a/Documentation/RCU/Design/Requirements/Requirements.rst +++ b/Documentation/RCU/Design/Requirements/Requirements.rst @@ -2654,6 +2654,38 @@ synchronize_rcu(), and rcu_barrier(), respectively. In three APIs are therefore implemented by separate functions that check for voluntary context switches. +Tasks Rude RCU +~~~~~~~~~~~~~~ + +Some forms of tracing need to wait for all preemption-disabled regions +of code running on any online CPU, including those executed when RCU is +not watching. This means that synchronize_rcu() is insufficient, and +Tasks Rude RCU must be used instead. This flavor of RCU does its work by +forcing a workqueue to be scheduled on each online CPU, hence the "Rude" +moniker. And this operation is considered to be quite rude by real-time +workloads that don't want their ``nohz_full`` CPUs receiving IPIs and +by battery-powered systems that don't want their idle CPUs to be awakened. + +The tasks-rude-RCU API is also reader-marking-free and thus quite compact, +consisting of call_rcu_tasks_rude(), synchronize_rcu_tasks_rude(), +and rcu_barrier_tasks_rude(). + +Tasks Trace RCU +~~~~~~~~~~~~~~~ + +Some forms of tracing need to sleep in readers, but cannot tolerate +SRCU's read-side overhead, which includes a full memory barrier in both +srcu_read_lock() and srcu_read_unlock(). This need is handled by a +Tasks Trace RCU that uses scheduler locking and IPIs to synchronize with +readers. Real-time systems that cannot tolerate IPIs may build their +kernels with ``CONFIG_TASKS_TRACE_RCU_READ_MB=y``, which avoids the IPIs at +the expense of adding full memory barriers to the read-side primitives. + +The tasks-trace-RCU API is also reasonably compact, +consisting of rcu_read_lock_trace(), rcu_read_unlock_trace(), +rcu_read_lock_trace_held(), call_rcu_tasks_trace(), +synchronize_rcu_tasks_trace(), and rcu_barrier_tasks_trace(). + Possible Future Changes ----------------------- -- cgit v1.2.3 From 404147faaaf28319ba8e60392ba9d4f3b6055ad5 Mon Sep 17 00:00:00 2001 From: Akira Yokosawa Date: Wed, 30 Mar 2022 23:41:00 +0900 Subject: docs: Update RCU cross-references as suggested in doc-guide The RCU documentation contains old-style cross references which do not follow the best practices outlined in doc-guide/sphinx.rst. In addition, some of the cross references use URLs that should be replaced by pathnames. Update all of these cross references and adjust the surrounding words. Summary of changes: - out-of-date plaintext file names (*.txt) -> *.rst - references by :ref: tags -> path names of *.rst * use relative paths to .rst files under the RCU/ subdirectory * use abs paths of Documentation/xxx for other .rst files - references by URL under https://www.kernel.org/ -> paths of *.rst - adjust surrounding words of some of updated references. Note: The automarkup.py script interprets references via "*.txt" as if they were via "*.rst", so the *.txt -> *.rst changes should be regarded as cleanups rather than bug fixes. Cc: rcu@vger.kernel.org Cc: linux-doc@vger.kernel.org Tested-by: Bagas Sanjaya Reviewed-by: Steven Rostedt (Google) Signed-off-by: Akira Yokosawa Signed-off-by: Paul E. McKenney --- .../RCU/Design/Data-Structures/Data-Structures.rst | 2 +- .../Expedited-Grace-Periods.rst | 2 +- Documentation/RCU/Design/Requirements/Requirements.rst | 4 ++-- Documentation/RCU/arrayRCU.rst | 4 ++-- Documentation/RCU/checklist.rst | 9 ++++----- Documentation/RCU/rcu.rst | 13 ++++++------- Documentation/RCU/rculist_nulls.rst | 2 +- Documentation/RCU/whatisRCU.rst | 18 ++++++++---------- 8 files changed, 25 insertions(+), 29 deletions(-) (limited to 'Documentation') diff --git a/Documentation/RCU/Design/Data-Structures/Data-Structures.rst b/Documentation/RCU/Design/Data-Structures/Data-Structures.rst index f4efd6897b09..b34990c7c377 100644 --- a/Documentation/RCU/Design/Data-Structures/Data-Structures.rst +++ b/Documentation/RCU/Design/Data-Structures/Data-Structures.rst @@ -973,7 +973,7 @@ The ``->dynticks`` field counts the corresponding CPU's transitions to and from either dyntick-idle or user mode, so that this counter has an even value when the CPU is in dyntick-idle mode or user mode and an odd value otherwise. The transitions to/from user mode need to be counted -for user mode adaptive-ticks support (see timers/NO_HZ.txt). +for user mode adaptive-ticks support (see Documentation/timers/no_hz.rst). The ``->rcu_need_heavy_qs`` field is used to record the fact that the RCU core code would really like to see a quiescent state from the diff --git a/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst b/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst index 6f89cf1e567d..c9c957c85bac 100644 --- a/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst +++ b/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst @@ -406,7 +406,7 @@ In earlier implementations, the task requesting the expedited grace period also drove it to completion. This straightforward approach had the disadvantage of needing to account for POSIX signals sent to user tasks, so more recent implemementations use the Linux kernel's -`workqueues `__. +workqueues (see Documentation/core-api/workqueue.rst). The requesting task still does counter snapshotting and funnel-lock processing, but the task reaching the top of the funnel lock does a diff --git a/Documentation/RCU/Design/Requirements/Requirements.rst b/Documentation/RCU/Design/Requirements/Requirements.rst index ff2be1ac54c4..04ed8bf27a0e 100644 --- a/Documentation/RCU/Design/Requirements/Requirements.rst +++ b/Documentation/RCU/Design/Requirements/Requirements.rst @@ -370,8 +370,8 @@ pointer fetched by rcu_dereference() may not be used outside of the outermost RCU read-side critical section containing that rcu_dereference(), unless protection of the corresponding data element has been passed from RCU to some other synchronization -mechanism, most commonly locking or `reference -counting `__. +mechanism, most commonly locking or reference counting +(see ../../rcuref.rst). .. |high-quality implementation of C11 memory_order_consume [PDF]| replace:: high-quality implementation of C11 ``memory_order_consume`` [PDF] .. _high-quality implementation of C11 memory_order_consume [PDF]: http://www.rdrop.com/users/paulmck/RCU/consume.2015.07.13a.pdf diff --git a/Documentation/RCU/arrayRCU.rst b/Documentation/RCU/arrayRCU.rst index 4051ea3871ef..a5f2ff8fc54c 100644 --- a/Documentation/RCU/arrayRCU.rst +++ b/Documentation/RCU/arrayRCU.rst @@ -33,8 +33,8 @@ Situation 1: Hash Tables Hash tables are often implemented as an array, where each array entry has a linked-list hash chain. Each hash chain can be protected by RCU -as described in the listRCU.txt document. This approach also applies -to other array-of-list situations, such as radix trees. +as described in listRCU.rst. This approach also applies to other +array-of-list situations, such as radix trees. .. _static_arrays: diff --git a/Documentation/RCU/checklist.rst b/Documentation/RCU/checklist.rst index f4545b7c9a63..42cc5d891bd2 100644 --- a/Documentation/RCU/checklist.rst +++ b/Documentation/RCU/checklist.rst @@ -140,8 +140,7 @@ over a rather long period of time, but improvements are always welcome! prevents destructive compiler optimizations. However, with a bit of devious creativity, it is possible to mishandle the return value from rcu_dereference(). - Please see rcu_dereference.txt in this directory for - more information. + Please see rcu_dereference.rst for more information. The rcu_dereference() primitive is used by the various "_rcu()" list-traversal primitives, such @@ -151,7 +150,7 @@ over a rather long period of time, but improvements are always welcome! primitives. This is particularly useful in code that is common to readers and updaters. However, lockdep will complain if you access rcu_dereference() outside - of an RCU read-side critical section. See lockdep.txt + of an RCU read-side critical section. See lockdep.rst to learn what to do about this. Of course, neither rcu_dereference() nor the "_rcu()" @@ -323,7 +322,7 @@ over a rather long period of time, but improvements are always welcome! primitives when the update-side lock is held is that doing so can be quite helpful in reducing code bloat when common code is shared between readers and updaters. Additional primitives - are provided for this case, as discussed in lockdep.txt. + are provided for this case, as discussed in lockdep.rst. One exception to this rule is when data is only ever added to the linked data structure, and is never removed during any @@ -480,4 +479,4 @@ over a rather long period of time, but improvements are always welcome! both rcu_barrier() and synchronize_rcu(), if necessary, using something like workqueues to to execute them concurrently. - See rcubarrier.txt for more information. + See rcubarrier.rst for more information. diff --git a/Documentation/RCU/rcu.rst b/Documentation/RCU/rcu.rst index 0e03c6ef3147..3cfe01ba9a49 100644 --- a/Documentation/RCU/rcu.rst +++ b/Documentation/RCU/rcu.rst @@ -10,9 +10,8 @@ A "grace period" must elapse between the two parts, and this grace period must be long enough that any readers accessing the item being deleted have since dropped their references. For example, an RCU-protected deletion from a linked list would first remove the item from the list, wait for -a grace period to elapse, then free the element. See the -:ref:`Documentation/RCU/listRCU.rst ` for more information on -using RCU with linked lists. +a grace period to elapse, then free the element. See listRCU.rst for more +information on using RCU with linked lists. Frequently Asked Questions -------------------------- @@ -50,7 +49,7 @@ Frequently Asked Questions - If I am running on a uniprocessor kernel, which can only do one thing at a time, why should I wait for a grace period? - See :ref:`Documentation/RCU/UP.rst ` for more information. + See UP.rst for more information. - How can I see where RCU is currently used in the Linux kernel? @@ -64,13 +63,13 @@ Frequently Asked Questions - What guidelines should I follow when writing code that uses RCU? - See the checklist.txt file in this directory. + See checklist.rst. - Why the name "RCU"? "RCU" stands for "read-copy update". - :ref:`Documentation/RCU/listRCU.rst ` has more information on where - this name came from, search for "read-copy update" to find it. + listRCU.rst has more information on where this name came from, search + for "read-copy update" to find it. - I hear that RCU is patented? What is with that? diff --git a/Documentation/RCU/rculist_nulls.rst b/Documentation/RCU/rculist_nulls.rst index a9fc774bc400..ca4692775ad4 100644 --- a/Documentation/RCU/rculist_nulls.rst +++ b/Documentation/RCU/rculist_nulls.rst @@ -8,7 +8,7 @@ This section describes how to use hlist_nulls to protect read-mostly linked lists and objects using SLAB_TYPESAFE_BY_RCU allocations. -Please read the basics in Documentation/RCU/listRCU.rst +Please read the basics in listRCU.rst. Using 'nulls' ============= diff --git a/Documentation/RCU/whatisRCU.rst b/Documentation/RCU/whatisRCU.rst index c34d2212eaca..77ea260efd12 100644 --- a/Documentation/RCU/whatisRCU.rst +++ b/Documentation/RCU/whatisRCU.rst @@ -224,7 +224,7 @@ synchronize_rcu() be delayed. This property results in system resilience in face of denial-of-service attacks. Code using call_rcu() should limit update rate in order to gain this same sort of resilience. See - checklist.txt for some approaches to limiting the update rate. + checklist.rst for some approaches to limiting the update rate. rcu_assign_pointer() ^^^^^^^^^^^^^^^^^^^^ @@ -318,7 +318,7 @@ rcu_dereference() must prohibit. The rcu_dereference_protected() variant takes a lockdep expression to indicate which locks must be acquired by the caller. If the indicated protection is not provided, - a lockdep splat is emitted. See Documentation/RCU/Design/Requirements/Requirements.rst + a lockdep splat is emitted. See Design/Requirements/Requirements.rst and the API's code comments for more details and example usage. .. [2] If the list_for_each_entry_rcu() instance might be used by @@ -399,8 +399,7 @@ for specialized uses, but are relatively uncommon. This section shows a simple use of the core RCU API to protect a global pointer to a dynamically allocated structure. More-typical -uses of RCU may be found in :ref:`listRCU.rst `, -:ref:`arrayRCU.rst `, and :ref:`NMI-RCU.rst `. +uses of RCU may be found in listRCU.rst, arrayRCU.rst, and NMI-RCU.rst. :: struct foo { @@ -482,10 +481,9 @@ So, to sum up: RCU read-side critical sections that might be referencing that data item. -See checklist.txt for additional rules to follow when using RCU. -And again, more-typical uses of RCU may be found in :ref:`listRCU.rst -`, :ref:`arrayRCU.rst `, and :ref:`NMI-RCU.rst -`. +See checklist.rst for additional rules to follow when using RCU. +And again, more-typical uses of RCU may be found in listRCU.rst, +arrayRCU.rst, and NMI-RCU.rst. .. _4_whatisRCU: @@ -579,7 +577,7 @@ to avoid having to write your own callback:: kfree_rcu(old_fp, rcu); -Again, see checklist.txt for additional rules governing the use of RCU. +Again, see checklist.rst for additional rules governing the use of RCU. .. _5_whatisRCU: @@ -663,7 +661,7 @@ been able to write-acquire the lock otherwise. The smp_mb__after_spinlock() promotes synchronize_rcu() to a full memory barrier in compliance with the "Memory-Barrier Guarantees" listed in: - Documentation/RCU/Design/Requirements/Requirements.rst + Design/Requirements/Requirements.rst It is possible to nest rcu_read_lock(), since reader-writer locks may be recursively acquired. Note also that rcu_read_lock() is immune -- cgit v1.2.3 From 3791a22374715b36ad806db13d8b2afb1b57fd36 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 28 Feb 2022 18:08:33 -0800 Subject: kernel/smp: Provide boot-time timeout for CSD lock diagnostics Debugging of problems involving insanely long-running SMI handlers proceeds better if the CSD-lock timeout can be adjusted. This commit therefore provides a new smp.csd_lock_timeout kernel boot parameter that specifies the timeout in milliseconds. The default remains at the previously hard-coded value of five seconds. [ paulmck: Apply feedback from Juergen Gross. ] Cc: Rik van Riel Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Sebastian Andrzej Siewior Reviewed-by: Juergen Gross Signed-off-by: Paul E. McKenney --- Documentation/admin-guide/kernel-parameters.txt | 11 +++++++++++ kernel/smp.c | 7 +++++-- 2 files changed, 16 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 3f1cc5e317ed..645c4c001b16 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5377,6 +5377,17 @@ smart2= [HW] Format: [,[,...,]] + smp.csd_lock_timeout= [KNL] + Specify the period of time in milliseconds + that smp_call_function() and friends will wait + for a CPU to release the CSD lock. This is + useful when diagnosing bugs involving CPUs + disabling interrupts for extended periods + of time. Defaults to 5,000 milliseconds, and + setting a value of zero disables this feature. + This feature may be more efficiently disabled + using the csdlock_debug- kernel parameter. + smsc-ircc2.nopnp [HW] Don't use PNP to discover SMC devices smsc-ircc2.ircc_cfg= [HW] Device configuration I/O port smsc-ircc2.ircc_sir= [HW] SIR base I/O port diff --git a/kernel/smp.c b/kernel/smp.c index 01a7c1706a58..6a1f1daa3dc4 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -183,7 +183,9 @@ static DEFINE_PER_CPU(smp_call_func_t, cur_csd_func); static DEFINE_PER_CPU(void *, cur_csd_info); static DEFINE_PER_CPU(struct cfd_seq_local, cfd_seq_local); -#define CSD_LOCK_TIMEOUT (5ULL * NSEC_PER_SEC) +static ulong csd_lock_timeout = 5000; /* CSD lock timeout in milliseconds. */ +module_param(csd_lock_timeout, ulong, 0444); + static atomic_t csd_bug_count = ATOMIC_INIT(0); static u64 cfd_seq; @@ -329,6 +331,7 @@ static bool csd_lock_wait_toolong(struct __call_single_data *csd, u64 ts0, u64 * u64 ts2, ts_delta; call_single_data_t *cpu_cur_csd; unsigned int flags = READ_ONCE(csd->node.u_flags); + unsigned long long csd_lock_timeout_ns = csd_lock_timeout * NSEC_PER_MSEC; if (!(flags & CSD_FLAG_LOCK)) { if (!unlikely(*bug_id)) @@ -341,7 +344,7 @@ static bool csd_lock_wait_toolong(struct __call_single_data *csd, u64 ts0, u64 * ts2 = sched_clock(); ts_delta = ts2 - *ts1; - if (likely(ts_delta <= CSD_LOCK_TIMEOUT)) + if (likely(ts_delta <= csd_lock_timeout_ns || csd_lock_timeout_ns == 0)) return false; firsttime = !*bug_id; -- cgit v1.2.3 From a57ffb3c6b67e59e8632f731414b792eacc6cca0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 31 Jan 2022 11:21:30 -0800 Subject: srcu: Automatically determine size-transition strategy at boot This commit adds a srcutree.convert_to_big option of zero that causes SRCU to decide at boot whether to wait for contention (small systems) or immediately expand to large (large systems). A new srcutree.big_cpu_lim (defaulting to 128) defines how many CPUs constitute a large system. Co-developed-by: Neeraj Upadhyay Signed-off-by: Neeraj Upadhyay Signed-off-by: Paul E. McKenney --- Documentation/admin-guide/kernel-parameters.txt | 10 ++++++++++ kernel/rcu/srcutree.c | 23 ++++++++++++++++++++--- 2 files changed, 30 insertions(+), 3 deletions(-) (limited to 'Documentation') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 177e688768c0..0a094bb2d722 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5608,6 +5608,15 @@ off: Disable mitigation and remove performance impact to RDRAND and RDSEED + srcutree.big_cpu_lim [KNL] + Specifies the number of CPUs constituting a + large system, such that srcu_struct structures + should immediately allocate an srcu_node array. + This kernel-boot parameter defaults to 128, + but takes effect only when the low-order four + bits of srcutree.convert_to_big is equal to 3 + (decide at boot). + srcutree.convert_to_big [KNL] Specifies under what conditions an SRCU tree srcu_struct structure will be converted to big @@ -5616,6 +5625,7 @@ 0: Never. 1: At init_srcu_struct() time. 2: When rcutorture decides to. + 3: Decide at boot time (default). 0x1X: Above plus if high contention. Either way, the srcu_node tree will be sized based diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 0bc6a0a3edee..b9dec26245e0 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -41,10 +41,10 @@ module_param(counter_wrap_check, ulong, 0444); /* * Control conversion to SRCU_SIZE_BIG: - * 0: Don't convert at all (default). + * 0: Don't convert at all. * 1: Convert at init_srcu_struct() time. * 2: Convert when rcutorture invokes srcu_torture_stats_print(). - * 3: Decide at boot time based on system shape. + * 3: Decide at boot time based on system shape (default). * 0x1x: Convert when excessive contention encountered. */ #define SRCU_SIZING_NONE 0 @@ -57,9 +57,13 @@ module_param(counter_wrap_check, ulong, 0444); #define SRCU_SIZING_IS_INIT() (SRCU_SIZING_IS(SRCU_SIZING_INIT)) #define SRCU_SIZING_IS_TORTURE() (SRCU_SIZING_IS(SRCU_SIZING_TORTURE)) #define SRCU_SIZING_IS_CONTEND() (convert_to_big & SRCU_SIZING_CONTEND) -static int convert_to_big = SRCU_SIZING_NONE; +static int convert_to_big = SRCU_SIZING_AUTO; module_param(convert_to_big, int, 0444); +/* Number of CPUs to trigger init_srcu_struct()-time transition to big. */ +static int big_cpu_lim __read_mostly = 128; +module_param(big_cpu_lim, int, 0444); + /* Contention events per jiffy to initiate transition to big. */ static int small_contention_lim __read_mostly = 100; module_param(small_contention_lim, int, 0444); @@ -1619,6 +1623,17 @@ void __init srcu_init(void) { struct srcu_struct *ssp; + /* Decide on srcu_struct-size strategy. */ + if (SRCU_SIZING_IS(SRCU_SIZING_AUTO)) { + if (nr_cpu_ids >= big_cpu_lim) { + convert_to_big = SRCU_SIZING_INIT; // Don't bother waiting for contention. + pr_info("%s: Setting srcu_struct sizes to big.\n", __func__); + } else { + convert_to_big = SRCU_SIZING_NONE | SRCU_SIZING_CONTEND; + pr_info("%s: Setting srcu_struct sizes based on contention.\n", __func__); + } + } + /* * Once that is set, call_srcu() can follow the normal path and * queue delayed work. This must follow RCU workqueues creation @@ -1629,6 +1644,8 @@ void __init srcu_init(void) ssp = list_first_entry(&srcu_boot_list, struct srcu_struct, work.work.entry); list_del_init(&ssp->work.work.entry); + if (SRCU_SIZING_IS(SRCU_SIZING_INIT) && ssp->srcu_size_state == SRCU_SIZE_SMALL) + ssp->srcu_size_state = SRCU_SIZE_ALLOC; queue_work(rcu_gp_wq, &ssp->work.work); } } -- cgit v1.2.3 From 28b3ae426598e722cf5d5ab9cc7038791b955a56 Mon Sep 17 00:00:00 2001 From: Uladzislau Rezki Date: Wed, 16 Feb 2022 14:52:09 +0100 Subject: rcu: Introduce CONFIG_RCU_EXP_CPU_STALL_TIMEOUT Currently both expedited and regular grace period stall warnings use a single timeout value that with units of seconds. However, recent Android use cases problem require a sub-100-millisecond expedited RCU CPU stall warning. Given that expedited RCU grace periods normally complete in far less than a single millisecond, especially for small systems, this is not unreasonable. Therefore introduce the CONFIG_RCU_EXP_CPU_STALL_TIMEOUT kernel configuration that defaults to 20 msec on Android and remains the same as that of the non-expedited stall warnings otherwise. It also can be changed in run-time via: /sys/.../parameters/rcu_exp_cpu_stall_timeout. [ paulmck: Default of zero to use CONFIG_RCU_STALL_TIMEOUT. ] Signed-off-by: Uladzislau Rezki Signed-off-by: Uladzislau Rezki (Sony) Signed-off-by: Paul E. McKenney --- Documentation/RCU/stallwarn.rst | 20 ++++++++++++++++++ Documentation/admin-guide/kernel-parameters.txt | 12 +++++++++++ kernel/rcu/Kconfig.debug | 14 +++++++++++++ kernel/rcu/rcu.h | 2 ++ kernel/rcu/tree_exp.h | 4 ++-- kernel/rcu/tree_stall.h | 28 +++++++++++++++++++++++++ kernel/rcu/update.c | 2 ++ 7 files changed, 80 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/RCU/stallwarn.rst b/Documentation/RCU/stallwarn.rst index 78404625bad2..794837eb519b 100644 --- a/Documentation/RCU/stallwarn.rst +++ b/Documentation/RCU/stallwarn.rst @@ -162,6 +162,26 @@ CONFIG_RCU_CPU_STALL_TIMEOUT Stall-warning messages may be enabled and disabled completely via /sys/module/rcupdate/parameters/rcu_cpu_stall_suppress. +CONFIG_RCU_EXP_CPU_STALL_TIMEOUT +-------------------------------- + + Same as the CONFIG_RCU_CPU_STALL_TIMEOUT parameter but only for + the expedited grace period. This parameter defines the period + of time that RCU will wait from the beginning of an expedited + grace period until it issues an RCU CPU stall warning. This time + period is normally 20 milliseconds on Android devices. A zero + value causes the CONFIG_RCU_CPU_STALL_TIMEOUT value to be used, + after conversion to milliseconds. + + This configuration parameter may be changed at runtime via the + /sys/module/rcupdate/parameters/rcu_exp_cpu_stall_timeout, however + this parameter is checked only at the beginning of a cycle. If you + are in a current stall cycle, setting it to a new value will change + the timeout for the -next- stall. + + Stall-warning messages may be enabled and disabled completely via + /sys/module/rcupdate/parameters/rcu_cpu_stall_suppress. + RCU_STALL_DELAY_DELTA --------------------- diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 3f1cc5e317ed..5e21a3fb57c4 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4893,6 +4893,18 @@ rcupdate.rcu_cpu_stall_timeout= [KNL] Set timeout for RCU CPU stall warning messages. + The value is in seconds and the maximum allowed + value is 300 seconds. + + rcupdate.rcu_exp_cpu_stall_timeout= [KNL] + Set timeout for expedited RCU CPU stall warning + messages. The value is in milliseconds + and the maximum allowed value is 21000 + milliseconds. Please note that this value is + adjusted to an arch timer tick resolution. + Setting this to zero causes the value from + rcupdate.rcu_cpu_stall_timeout to be used (after + conversion from seconds to milliseconds). rcupdate.rcu_expedited= [KNL] Use expedited grace-period primitives, for diff --git a/kernel/rcu/Kconfig.debug b/kernel/rcu/Kconfig.debug index 4fd64999300f..0b397b5bf846 100644 --- a/kernel/rcu/Kconfig.debug +++ b/kernel/rcu/Kconfig.debug @@ -91,6 +91,20 @@ config RCU_CPU_STALL_TIMEOUT RCU grace period persists, additional CPU stall warnings are printed at more widely spaced intervals. +config RCU_EXP_CPU_STALL_TIMEOUT + int "Expedited RCU CPU stall timeout in milliseconds" + depends on RCU_STALL_COMMON + range 0 21000 + default 20 if ANDROID + default 0 if !ANDROID + help + If a given expedited RCU grace period extends more than the + specified number of milliseconds, a CPU stall warning is printed. + If the RCU grace period persists, additional CPU stall warnings + are printed at more widely spaced intervals. A value of zero + says to use the RCU_CPU_STALL_TIMEOUT value converted from + seconds to milliseconds. + config RCU_TRACE bool "Enable tracing for RCU" depends on DEBUG_KERNEL diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 24b5f2c2de87..20f0300f6cb1 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -210,7 +210,9 @@ static inline bool rcu_stall_is_suppressed_at_boot(void) extern int rcu_cpu_stall_ftrace_dump; extern int rcu_cpu_stall_suppress; extern int rcu_cpu_stall_timeout; +extern int rcu_exp_cpu_stall_timeout; int rcu_jiffies_till_stall_check(void); +int rcu_exp_jiffies_till_stall_check(void); static inline bool rcu_stall_is_suppressed(void) { diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index 60197ea24ceb..b1f52b59fa4b 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -496,7 +496,7 @@ static void synchronize_rcu_expedited_wait(void) struct rcu_node *rnp_root = rcu_get_root(); trace_rcu_exp_grace_period(rcu_state.name, rcu_exp_gp_seq_endval(), TPS("startwait")); - jiffies_stall = rcu_jiffies_till_stall_check(); + jiffies_stall = rcu_exp_jiffies_till_stall_check(); jiffies_start = jiffies; if (tick_nohz_full_enabled() && rcu_inkernel_boot_has_ended()) { if (synchronize_rcu_expedited_wait_once(1)) @@ -571,7 +571,7 @@ static void synchronize_rcu_expedited_wait(void) dump_cpu_task(cpu); } } - jiffies_stall = 3 * rcu_jiffies_till_stall_check() + 3; + jiffies_stall = 3 * rcu_exp_jiffies_till_stall_check() + 3; } } diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index 0c5d8516516a..009d3f9305cf 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -25,6 +25,34 @@ int sysctl_max_rcu_stall_to_panic __read_mostly; #define RCU_STALL_MIGHT_DIV 8 #define RCU_STALL_MIGHT_MIN (2 * HZ) +int rcu_exp_jiffies_till_stall_check(void) +{ + int cpu_stall_timeout = READ_ONCE(rcu_exp_cpu_stall_timeout); + int exp_stall_delay_delta = 0; + int till_stall_check; + + // Zero says to use rcu_cpu_stall_timeout, but in milliseconds. + if (!cpu_stall_timeout) + cpu_stall_timeout = jiffies_to_msecs(rcu_jiffies_till_stall_check()); + + // Limit check must be consistent with the Kconfig limits for + // CONFIG_RCU_EXP_CPU_STALL_TIMEOUT, so check the allowed range. + // The minimum clamped value is "2UL", because at least one full + // tick has to be guaranteed. + till_stall_check = clamp(msecs_to_jiffies(cpu_stall_timeout), 2UL, 21UL * HZ); + + if (cpu_stall_timeout && jiffies_to_msecs(till_stall_check) != cpu_stall_timeout) + WRITE_ONCE(rcu_exp_cpu_stall_timeout, jiffies_to_msecs(till_stall_check)); + +#ifdef CONFIG_PROVE_RCU + /* Add extra ~25% out of till_stall_check. */ + exp_stall_delay_delta = ((till_stall_check * 25) / 100) + 1; +#endif + + return till_stall_check + exp_stall_delay_delta; +} +EXPORT_SYMBOL_GPL(rcu_exp_jiffies_till_stall_check); + /* Limit-check stall timeouts specified at boottime and runtime. */ int rcu_jiffies_till_stall_check(void) { diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 180ff9c41fa8..fc7fef575606 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -506,6 +506,8 @@ EXPORT_SYMBOL_GPL(rcu_cpu_stall_suppress); module_param(rcu_cpu_stall_suppress, int, 0644); int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT; module_param(rcu_cpu_stall_timeout, int, 0644); +int rcu_exp_cpu_stall_timeout __read_mostly = CONFIG_RCU_EXP_CPU_STALL_TIMEOUT; +module_param(rcu_exp_cpu_stall_timeout, int, 0644); #endif /* #ifdef CONFIG_RCU_STALL_COMMON */ // Suppress boot-time RCU CPU stall warnings and rcutorture writer stall -- cgit v1.2.3