summaryrefslogtreecommitdiffstats
path: root/kernel/rcu
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/rcu')
-rw-r--r--kernel/rcu/rcu.h8
-rw-r--r--kernel/rcu/srcu.c4
-rw-r--r--kernel/rcu/tree.c59
-rw-r--r--kernel/rcu/tree.h8
-rw-r--r--kernel/rcu/tree_plugin.h46
-rw-r--r--kernel/rcu/update.c3
6 files changed, 73 insertions, 55 deletions
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index bfda2726ca45..ff1a6de62f17 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -99,6 +99,10 @@ static inline void debug_rcu_head_unqueue(struct rcu_head *head)
void kfree(const void *);
+/*
+ * Reclaim the specified callback, either by invoking it (non-lazy case)
+ * or freeing it directly (lazy case). Return true if lazy, false otherwise.
+ */
static inline bool __rcu_reclaim(const char *rn, struct rcu_head *head)
{
unsigned long offset = (unsigned long)head->func;
@@ -108,12 +112,12 @@ static inline bool __rcu_reclaim(const char *rn, struct rcu_head *head)
RCU_TRACE(trace_rcu_invoke_kfree_callback(rn, head, offset));
kfree((void *)head - offset);
rcu_lock_release(&rcu_callback_map);
- return 1;
+ return true;
} else {
RCU_TRACE(trace_rcu_invoke_callback(rn, head));
head->func(head);
rcu_lock_release(&rcu_callback_map);
- return 0;
+ return false;
}
}
diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c
index c639556f3fa0..e037f3eb2f7b 100644
--- a/kernel/rcu/srcu.c
+++ b/kernel/rcu/srcu.c
@@ -298,9 +298,9 @@ int __srcu_read_lock(struct srcu_struct *sp)
idx = ACCESS_ONCE(sp->completed) & 0x1;
preempt_disable();
- ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->c[idx]) += 1;
+ __this_cpu_inc(sp->per_cpu_ref->c[idx]);
smp_mb(); /* B */ /* Avoid leaking the critical section. */
- ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->seq[idx]) += 1;
+ __this_cpu_inc(sp->per_cpu_ref->seq[idx]);
preempt_enable();
return idx;
}
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 625d0b0cd75a..1b70cb6fbe3c 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1013,10 +1013,7 @@ static void record_gp_stall_check_time(struct rcu_state *rsp)
}
/*
- * Dump stacks of all tasks running on stalled CPUs. This is a fallback
- * for architectures that do not implement trigger_all_cpu_backtrace().
- * The NMI-triggered stack traces are more accurate because they are
- * printed by the target CPU.
+ * Dump stacks of all tasks running on stalled CPUs.
*/
static void rcu_dump_cpu_stacks(struct rcu_state *rsp)
{
@@ -1094,7 +1091,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
(long)rsp->gpnum, (long)rsp->completed, totqlen);
if (ndetected == 0)
pr_err("INFO: Stall ended before state dump start\n");
- else if (!trigger_all_cpu_backtrace())
+ else
rcu_dump_cpu_stacks(rsp);
/* Complain about tasks blocking the grace period. */
@@ -1125,8 +1122,7 @@ static void print_cpu_stall(struct rcu_state *rsp)
pr_cont(" (t=%lu jiffies g=%ld c=%ld q=%lu)\n",
jiffies - rsp->gp_start,
(long)rsp->gpnum, (long)rsp->completed, totqlen);
- if (!trigger_all_cpu_backtrace())
- dump_stack();
+ rcu_dump_cpu_stacks(rsp);
raw_spin_lock_irqsave(&rnp->lock, flags);
if (ULONG_CMP_GE(jiffies, ACCESS_ONCE(rsp->jiffies_stall)))
@@ -1305,10 +1301,16 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
* believe that a grace period is in progress, then we must wait
* for the one following, which is in "c". Because our request
* will be noticed at the end of the current grace period, we don't
- * need to explicitly start one.
+ * need to explicitly start one. We only do the lockless check
+ * of rnp_root's fields if the current rcu_node structure thinks
+ * there is no grace period in flight, and because we hold rnp->lock,
+ * the only possible change is when rnp_root's two fields are
+ * equal, in which case rnp_root->gpnum might be concurrently
+ * incremented. But that is OK, as it will just result in our
+ * doing some extra useless work.
*/
if (rnp->gpnum != rnp->completed ||
- ACCESS_ONCE(rnp->gpnum) != ACCESS_ONCE(rnp->completed)) {
+ ACCESS_ONCE(rnp_root->gpnum) != ACCESS_ONCE(rnp_root->completed)) {
rnp->need_future_gp[c & 0x1]++;
trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleaf"));
goto out;
@@ -1645,11 +1647,6 @@ static int rcu_gp_init(struct rcu_state *rsp)
rnp->level, rnp->grplo,
rnp->grphi, rnp->qsmask);
raw_spin_unlock_irq(&rnp->lock);
-#ifdef CONFIG_PROVE_RCU_DELAY
- if ((prandom_u32() % (rcu_num_nodes + 1)) == 0 &&
- system_state == SYSTEM_RUNNING)
- udelay(200);
-#endif /* #ifdef CONFIG_PROVE_RCU_DELAY */
cond_resched();
}
@@ -2347,7 +2344,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
}
smp_mb(); /* List handling before counting for rcu_barrier(). */
rdp->qlen_lazy -= count_lazy;
- ACCESS_ONCE(rdp->qlen) -= count;
+ ACCESS_ONCE(rdp->qlen) = rdp->qlen - count;
rdp->n_cbs_invoked += count;
/* Reinstate batch limit if we have worked down the excess. */
@@ -2485,14 +2482,14 @@ static void force_quiescent_state(struct rcu_state *rsp)
struct rcu_node *rnp_old = NULL;
/* Funnel through hierarchy to reduce memory contention. */
- rnp = per_cpu_ptr(rsp->rda, raw_smp_processor_id())->mynode;
+ rnp = __this_cpu_read(rsp->rda->mynode);
for (; rnp != NULL; rnp = rnp->parent) {
ret = (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) ||
!raw_spin_trylock(&rnp->fqslock);
if (rnp_old != NULL)
raw_spin_unlock(&rnp_old->fqslock);
if (ret) {
- ACCESS_ONCE(rsp->n_force_qs_lh)++;
+ rsp->n_force_qs_lh++;
return;
}
rnp_old = rnp;
@@ -2504,7 +2501,7 @@ static void force_quiescent_state(struct rcu_state *rsp)
smp_mb__after_unlock_lock();
raw_spin_unlock(&rnp_old->fqslock);
if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
- ACCESS_ONCE(rsp->n_force_qs_lh)++;
+ rsp->n_force_qs_lh++;
raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
return; /* Someone beat us to it. */
}
@@ -2662,7 +2659,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
unsigned long flags;
struct rcu_data *rdp;
- WARN_ON_ONCE((unsigned long)head & 0x3); /* Misaligned rcu_head! */
+ WARN_ON_ONCE((unsigned long)head & 0x1); /* Misaligned rcu_head! */
if (debug_rcu_head_queue(head)) {
/* Probable double call_rcu(), so leak the callback. */
ACCESS_ONCE(head->func) = rcu_leak_callback;
@@ -2693,7 +2690,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
local_irq_restore(flags);
return;
}
- ACCESS_ONCE(rdp->qlen)++;
+ ACCESS_ONCE(rdp->qlen) = rdp->qlen + 1;
if (lazy)
rdp->qlen_lazy++;
else
@@ -3257,7 +3254,7 @@ static void _rcu_barrier(struct rcu_state *rsp)
* ACCESS_ONCE() to prevent the compiler from speculating
* the increment to precede the early-exit check.
*/
- ACCESS_ONCE(rsp->n_barrier_done)++;
+ ACCESS_ONCE(rsp->n_barrier_done) = rsp->n_barrier_done + 1;
WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 1);
_rcu_barrier_trace(rsp, "Inc1", -1, rsp->n_barrier_done);
smp_mb(); /* Order ->n_barrier_done increment with below mechanism. */
@@ -3307,7 +3304,7 @@ static void _rcu_barrier(struct rcu_state *rsp)
/* Increment ->n_barrier_done to prevent duplicate work. */
smp_mb(); /* Keep increment after above mechanism. */
- ACCESS_ONCE(rsp->n_barrier_done)++;
+ ACCESS_ONCE(rsp->n_barrier_done) = rsp->n_barrier_done + 1;
WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 0);
_rcu_barrier_trace(rsp, "Inc2", -1, rsp->n_barrier_done);
smp_mb(); /* Keep increment before caller's subsequent code. */
@@ -3564,14 +3561,16 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp)
static void __init rcu_init_one(struct rcu_state *rsp,
struct rcu_data __percpu *rda)
{
- static char *buf[] = { "rcu_node_0",
- "rcu_node_1",
- "rcu_node_2",
- "rcu_node_3" }; /* Match MAX_RCU_LVLS */
- static char *fqs[] = { "rcu_node_fqs_0",
- "rcu_node_fqs_1",
- "rcu_node_fqs_2",
- "rcu_node_fqs_3" }; /* Match MAX_RCU_LVLS */
+ static const char * const buf[] = {
+ "rcu_node_0",
+ "rcu_node_1",
+ "rcu_node_2",
+ "rcu_node_3" }; /* Match MAX_RCU_LVLS */
+ static const char * const fqs[] = {
+ "rcu_node_fqs_0",
+ "rcu_node_fqs_1",
+ "rcu_node_fqs_2",
+ "rcu_node_fqs_3" }; /* Match MAX_RCU_LVLS */
static u8 fl_mask = 0x1;
int cpustride = 1;
int i;
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index e996d1e53c84..71e64c718f75 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -172,6 +172,14 @@ struct rcu_node {
/* queued on this rcu_node structure that */
/* are blocking the current grace period, */
/* there can be no such task. */
+ struct completion boost_completion;
+ /* Used to ensure that the rt_mutex used */
+ /* to carry out the boosting is fully */
+ /* released with no future boostee accesses */
+ /* before that rt_mutex is re-initialized. */
+ struct rt_mutex boost_mtx;
+ /* Used only for the priority-boosting */
+ /* side effect, not as a lock. */
unsigned long boost_time;
/* When to start boosting (jiffies). */
struct task_struct *boost_kthread_task;
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 17eed0856b03..f62b7f2f6abd 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -33,6 +33,7 @@
#define RCU_KTHREAD_PRIO 1
#ifdef CONFIG_RCU_BOOST
+#include "../locking/rtmutex_common.h"
#define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO
#else
#define RCU_BOOST_PRIO RCU_KTHREAD_PRIO
@@ -336,7 +337,7 @@ void rcu_read_unlock_special(struct task_struct *t)
unsigned long flags;
struct list_head *np;
#ifdef CONFIG_RCU_BOOST
- struct rt_mutex *rbmp = NULL;
+ bool drop_boost_mutex = false;
#endif /* #ifdef CONFIG_RCU_BOOST */
struct rcu_node *rnp;
int special;
@@ -398,11 +399,8 @@ void rcu_read_unlock_special(struct task_struct *t)
#ifdef CONFIG_RCU_BOOST
if (&t->rcu_node_entry == rnp->boost_tasks)
rnp->boost_tasks = np;
- /* Snapshot/clear ->rcu_boost_mutex with rcu_node lock held. */
- if (t->rcu_boost_mutex) {
- rbmp = t->rcu_boost_mutex;
- t->rcu_boost_mutex = NULL;
- }
+ /* Snapshot ->boost_mtx ownership with rcu_node lock held. */
+ drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx) == t;
#endif /* #ifdef CONFIG_RCU_BOOST */
/*
@@ -427,8 +425,10 @@ void rcu_read_unlock_special(struct task_struct *t)
#ifdef CONFIG_RCU_BOOST
/* Unboost if we were boosted. */
- if (rbmp)
- rt_mutex_unlock(rbmp);
+ if (drop_boost_mutex) {
+ rt_mutex_unlock(&rnp->boost_mtx);
+ complete(&rnp->boost_completion);
+ }
#endif /* #ifdef CONFIG_RCU_BOOST */
/*
@@ -988,6 +988,7 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
/* Because preemptible RCU does not exist, no quieting of tasks. */
static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
+ __releases(rnp->lock)
{
raw_spin_unlock_irqrestore(&rnp->lock, flags);
}
@@ -1149,7 +1150,6 @@ static void rcu_wake_cond(struct task_struct *t, int status)
static int rcu_boost(struct rcu_node *rnp)
{
unsigned long flags;
- struct rt_mutex mtx;
struct task_struct *t;
struct list_head *tb;
@@ -1200,11 +1200,15 @@ static int rcu_boost(struct rcu_node *rnp)
* section.
*/
t = container_of(tb, struct task_struct, rcu_node_entry);
- rt_mutex_init_proxy_locked(&mtx, t);
- t->rcu_boost_mutex = &mtx;
+ rt_mutex_init_proxy_locked(&rnp->boost_mtx, t);
+ init_completion(&rnp->boost_completion);
raw_spin_unlock_irqrestore(&rnp->lock, flags);
- rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */
- rt_mutex_unlock(&mtx); /* Keep lockdep happy. */
+ /* Lock only for side effect: boosts task t's priority. */
+ rt_mutex_lock(&rnp->boost_mtx);
+ rt_mutex_unlock(&rnp->boost_mtx); /* Then keep lockdep happy. */
+
+ /* Wait for boostee to be done w/boost_mtx before reinitializing. */
+ wait_for_completion(&rnp->boost_completion);
return ACCESS_ONCE(rnp->exp_tasks) != NULL ||
ACCESS_ONCE(rnp->boost_tasks) != NULL;
@@ -1256,6 +1260,7 @@ static int rcu_boost_kthread(void *arg)
* about it going away.
*/
static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
+ __releases(rnp->lock)
{
struct task_struct *t;
@@ -1491,6 +1496,7 @@ static void rcu_prepare_kthreads(int cpu)
#else /* #ifdef CONFIG_RCU_BOOST */
static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
+ __releases(rnp->lock)
{
raw_spin_unlock_irqrestore(&rnp->lock, flags);
}
@@ -3016,12 +3022,16 @@ static bool rcu_nohz_full_cpu(struct rcu_state *rsp)
*/
static void rcu_bind_gp_kthread(void)
{
-#ifdef CONFIG_NO_HZ_FULL
- int cpu = ACCESS_ONCE(tick_do_timer_cpu);
+ int __maybe_unused cpu;
- if (cpu < 0 || cpu >= nr_cpu_ids)
+ if (!tick_nohz_full_enabled())
return;
- if (raw_smp_processor_id() != cpu)
+#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
+ cpu = tick_do_timer_cpu;
+ if (cpu >= 0 && cpu < nr_cpu_ids && raw_smp_processor_id() != cpu)
set_cpus_allowed_ptr(current, cpumask_of(cpu));
-#endif /* #ifdef CONFIG_NO_HZ_FULL */
+#else /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
+ if (!is_housekeeping_cpu(raw_smp_processor_id()))
+ housekeeping_affine(current);
+#endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
}
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index bc7883570530..4056d7992a6c 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -90,9 +90,6 @@ void __rcu_read_unlock(void)
} else {
barrier(); /* critical section before exit code. */
t->rcu_read_lock_nesting = INT_MIN;
-#ifdef CONFIG_PROVE_RCU_DELAY
- udelay(10); /* Make preemption more probable. */
-#endif /* #ifdef CONFIG_PROVE_RCU_DELAY */
barrier(); /* assign before ->rcu_read_unlock_special load */
if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
rcu_read_unlock_special(t);