summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/verifier.c2
-rw-r--r--kernel/cgroup.c7
-rw-r--r--kernel/cpu.c33
-rw-r--r--kernel/cpuset.c4
-rw-r--r--kernel/futex.c27
-rw-r--r--kernel/irq/ipi.c1
-rw-r--r--kernel/locking/lockdep.c37
-rw-r--r--kernel/locking/lockdep_proc.c2
-rw-r--r--kernel/locking/qspinlock_stat.h8
-rw-r--r--kernel/workqueue.c29
10 files changed, 128 insertions, 22 deletions
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 2e08f8e9b771..db2574e7b8b0 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1374,6 +1374,7 @@ static int check_ld_abs(struct verifier_env *env, struct bpf_insn *insn)
}
if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
+ BPF_SIZE(insn->code) == BPF_DW ||
(mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
verbose("BPF_LD_ABS uses reserved fields\n");
return -EINVAL;
@@ -2029,7 +2030,6 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env)
if (IS_ERR(map)) {
verbose("fd %d is not pointing to valid bpf_map\n",
insn->imm);
- fdput(f);
return PTR_ERR(map);
}
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 671dc05c0b0f..909a7d31ffd3 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2825,9 +2825,10 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
size_t nbytes, loff_t off, bool threadgroup)
{
struct task_struct *tsk;
+ struct cgroup_subsys *ss;
struct cgroup *cgrp;
pid_t pid;
- int ret;
+ int ssid, ret;
if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
return -EINVAL;
@@ -2875,8 +2876,10 @@ out_unlock_rcu:
rcu_read_unlock();
out_unlock_threadgroup:
percpu_up_write(&cgroup_threadgroup_rwsem);
+ for_each_subsys(ss, ssid)
+ if (ss->post_attach)
+ ss->post_attach();
cgroup_kn_unlock(of->kn);
- cpuset_post_attach_flush();
return ret ?: nbytes;
}
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 6ea42e8da861..3e3f6e49eabb 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -36,6 +36,7 @@
* @target: The target state
* @thread: Pointer to the hotplug thread
* @should_run: Thread should execute
+ * @rollback: Perform a rollback
* @cb_stat: The state for a single callback (install/uninstall)
* @cb: Single callback function (install/uninstall)
* @result: Result of the operation
@@ -47,6 +48,7 @@ struct cpuhp_cpu_state {
#ifdef CONFIG_SMP
struct task_struct *thread;
bool should_run;
+ bool rollback;
enum cpuhp_state cb_state;
int (*cb)(unsigned int cpu);
int result;
@@ -301,6 +303,11 @@ static int cpu_notify(unsigned long val, unsigned int cpu)
return __cpu_notify(val, cpu, -1, NULL);
}
+static void cpu_notify_nofail(unsigned long val, unsigned int cpu)
+{
+ BUG_ON(cpu_notify(val, cpu));
+}
+
/* Notifier wrappers for transitioning to state machine */
static int notify_prepare(unsigned int cpu)
{
@@ -477,6 +484,16 @@ static void cpuhp_thread_fun(unsigned int cpu)
} else {
ret = cpuhp_invoke_callback(cpu, st->cb_state, st->cb);
}
+ } else if (st->rollback) {
+ BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE);
+
+ undo_cpu_down(cpu, st, cpuhp_ap_states);
+ /*
+ * This is a momentary workaround to keep the notifier users
+ * happy. Will go away once we got rid of the notifiers.
+ */
+ cpu_notify_nofail(CPU_DOWN_FAILED, cpu);
+ st->rollback = false;
} else {
/* Cannot happen .... */
BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE);
@@ -636,11 +653,6 @@ static inline void check_for_tasks(int dead_cpu)
read_unlock(&tasklist_lock);
}
-static void cpu_notify_nofail(unsigned long val, unsigned int cpu)
-{
- BUG_ON(cpu_notify(val, cpu));
-}
-
static int notify_down_prepare(unsigned int cpu)
{
int err, nr_calls = 0;
@@ -721,9 +733,10 @@ static int takedown_cpu(unsigned int cpu)
*/
err = stop_machine(take_cpu_down, NULL, cpumask_of(cpu));
if (err) {
- /* CPU didn't die: tell everyone. Can't complain. */
- cpu_notify_nofail(CPU_DOWN_FAILED, cpu);
+ /* CPU refused to die */
irq_unlock_sparse();
+ /* Unpark the hotplug thread so we can rollback there */
+ kthread_unpark(per_cpu_ptr(&cpuhp_state, cpu)->thread);
return err;
}
BUG_ON(cpu_online(cpu));
@@ -832,6 +845,11 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
* to do the further cleanups.
*/
ret = cpuhp_down_callbacks(cpu, st, cpuhp_bp_states, target);
+ if (ret && st->state > CPUHP_TEARDOWN_CPU && st->state < prev_state) {
+ st->target = prev_state;
+ st->rollback = true;
+ cpuhp_kick_ap_work(cpu);
+ }
hasdied = prev_state != st->state && st->state == CPUHP_OFFLINE;
out:
@@ -1249,6 +1267,7 @@ static struct cpuhp_step cpuhp_ap_states[] = {
.name = "notify:online",
.startup = notify_online,
.teardown = notify_down_prepare,
+ .skip_onerr = true,
},
#endif
/*
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 00ab5c2b7c5b..1902956baba1 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -58,7 +58,6 @@
#include <asm/uaccess.h>
#include <linux/atomic.h>
#include <linux/mutex.h>
-#include <linux/workqueue.h>
#include <linux/cgroup.h>
#include <linux/wait.h>
@@ -1016,7 +1015,7 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
}
}
-void cpuset_post_attach_flush(void)
+static void cpuset_post_attach(void)
{
flush_workqueue(cpuset_migrate_mm_wq);
}
@@ -2087,6 +2086,7 @@ struct cgroup_subsys cpuset_cgrp_subsys = {
.can_attach = cpuset_can_attach,
.cancel_attach = cpuset_cancel_attach,
.attach = cpuset_attach,
+ .post_attach = cpuset_post_attach,
.bind = cpuset_bind,
.legacy_cftypes = files,
.early_init = true,
diff --git a/kernel/futex.c b/kernel/futex.c
index a5d2e74c89e0..c20f06f38ef3 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1295,10 +1295,20 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
if (unlikely(should_fail_futex(true)))
ret = -EFAULT;
- if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
+ if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) {
ret = -EFAULT;
- else if (curval != uval)
- ret = -EINVAL;
+ } else if (curval != uval) {
+ /*
+ * If a unconditional UNLOCK_PI operation (user space did not
+ * try the TID->0 transition) raced with a waiter setting the
+ * FUTEX_WAITERS flag between get_user() and locking the hash
+ * bucket lock, retry the operation.
+ */
+ if ((FUTEX_TID_MASK & curval) == uval)
+ ret = -EAGAIN;
+ else
+ ret = -EINVAL;
+ }
if (ret) {
raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
return ret;
@@ -1525,8 +1535,8 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
if (likely(&hb1->chain != &hb2->chain)) {
plist_del(&q->list, &hb1->chain);
hb_waiters_dec(hb1);
- plist_add(&q->list, &hb2->chain);
hb_waiters_inc(hb2);
+ plist_add(&q->list, &hb2->chain);
q->lock_ptr = &hb2->lock;
}
get_futex_key_refs(key2);
@@ -2623,6 +2633,15 @@ retry:
if (ret == -EFAULT)
goto pi_faulted;
/*
+ * A unconditional UNLOCK_PI op raced against a waiter
+ * setting the FUTEX_WAITERS bit. Try again.
+ */
+ if (ret == -EAGAIN) {
+ spin_unlock(&hb->lock);
+ put_futex_key(&key);
+ goto retry;
+ }
+ /*
* wake_futex_pi has detected invalid state. Tell user
* space.
*/
diff --git a/kernel/irq/ipi.c b/kernel/irq/ipi.c
index c37f34b00a11..14777af8e097 100644
--- a/kernel/irq/ipi.c
+++ b/kernel/irq/ipi.c
@@ -94,6 +94,7 @@ unsigned int irq_reserve_ipi(struct irq_domain *domain,
data = irq_get_irq_data(virq + i);
cpumask_copy(data->common->affinity, dest);
data->common->ipi_offset = offset;
+ irq_set_status_flags(virq + i, IRQ_NO_BALANCING);
}
return virq;
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index ed9410936a22..78c1c0ee6dc1 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -2176,15 +2176,37 @@ cache_hit:
chain->irq_context = hlock->irq_context;
i = get_first_held_lock(curr, hlock);
chain->depth = curr->lockdep_depth + 1 - i;
+
+ BUILD_BUG_ON((1UL << 24) <= ARRAY_SIZE(chain_hlocks));
+ BUILD_BUG_ON((1UL << 6) <= ARRAY_SIZE(curr->held_locks));
+ BUILD_BUG_ON((1UL << 8*sizeof(chain_hlocks[0])) <= ARRAY_SIZE(lock_classes));
+
if (likely(nr_chain_hlocks + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) {
chain->base = nr_chain_hlocks;
- nr_chain_hlocks += chain->depth;
for (j = 0; j < chain->depth - 1; j++, i++) {
int lock_id = curr->held_locks[i].class_idx - 1;
chain_hlocks[chain->base + j] = lock_id;
}
chain_hlocks[chain->base + j] = class - lock_classes;
}
+
+ if (nr_chain_hlocks < MAX_LOCKDEP_CHAIN_HLOCKS)
+ nr_chain_hlocks += chain->depth;
+
+#ifdef CONFIG_DEBUG_LOCKDEP
+ /*
+ * Important for check_no_collision().
+ */
+ if (unlikely(nr_chain_hlocks > MAX_LOCKDEP_CHAIN_HLOCKS)) {
+ if (debug_locks_off_graph_unlock())
+ return 0;
+
+ print_lockdep_off("BUG: MAX_LOCKDEP_CHAIN_HLOCKS too low!");
+ dump_stack();
+ return 0;
+ }
+#endif
+
hlist_add_head_rcu(&chain->entry, hash_head);
debug_atomic_inc(chain_lookup_misses);
inc_chains();
@@ -2932,6 +2954,11 @@ static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock)
return 1;
}
+static inline unsigned int task_irq_context(struct task_struct *task)
+{
+ return 2 * !!task->hardirq_context + !!task->softirq_context;
+}
+
static int separate_irq_context(struct task_struct *curr,
struct held_lock *hlock)
{
@@ -2940,8 +2967,6 @@ static int separate_irq_context(struct task_struct *curr,
/*
* Keep track of points where we cross into an interrupt context:
*/
- hlock->irq_context = 2*(curr->hardirq_context ? 1 : 0) +
- curr->softirq_context;
if (depth) {
struct held_lock *prev_hlock;
@@ -2973,6 +2998,11 @@ static inline int mark_irqflags(struct task_struct *curr,
return 1;
}
+static inline unsigned int task_irq_context(struct task_struct *task)
+{
+ return 0;
+}
+
static inline int separate_irq_context(struct task_struct *curr,
struct held_lock *hlock)
{
@@ -3241,6 +3271,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
hlock->acquire_ip = ip;
hlock->instance = lock;
hlock->nest_lock = nest_lock;
+ hlock->irq_context = task_irq_context(curr);
hlock->trylock = trylock;
hlock->read = read;
hlock->check = check;
diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c
index dbb61a302548..a0f61effad25 100644
--- a/kernel/locking/lockdep_proc.c
+++ b/kernel/locking/lockdep_proc.c
@@ -141,6 +141,8 @@ static int lc_show(struct seq_file *m, void *v)
int i;
if (v == SEQ_START_TOKEN) {
+ if (nr_chain_hlocks > MAX_LOCKDEP_CHAIN_HLOCKS)
+ seq_printf(m, "(buggered) ");
seq_printf(m, "all lock chains:\n");
return 0;
}
diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h
index eb2a2c9bc3fc..d734b7502001 100644
--- a/kernel/locking/qspinlock_stat.h
+++ b/kernel/locking/qspinlock_stat.h
@@ -136,10 +136,12 @@ static ssize_t qstat_read(struct file *file, char __user *user_buf,
}
if (counter == qstat_pv_hash_hops) {
- u64 frac;
+ u64 frac = 0;
- frac = 100ULL * do_div(stat, kicks);
- frac = DIV_ROUND_CLOSEST_ULL(frac, kicks);
+ if (kicks) {
+ frac = 100ULL * do_div(stat, kicks);
+ frac = DIV_ROUND_CLOSEST_ULL(frac, kicks);
+ }
/*
* Return a X.XX decimal number
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 2232ae3e3ad6..3bfdff06eea7 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -666,6 +666,35 @@ static void set_work_pool_and_clear_pending(struct work_struct *work,
*/
smp_wmb();
set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT, 0);
+ /*
+ * The following mb guarantees that previous clear of a PENDING bit
+ * will not be reordered with any speculative LOADS or STORES from
+ * work->current_func, which is executed afterwards. This possible
+ * reordering can lead to a missed execution on attempt to qeueue
+ * the same @work. E.g. consider this case:
+ *
+ * CPU#0 CPU#1
+ * ---------------------------- --------------------------------
+ *
+ * 1 STORE event_indicated
+ * 2 queue_work_on() {
+ * 3 test_and_set_bit(PENDING)
+ * 4 } set_..._and_clear_pending() {
+ * 5 set_work_data() # clear bit
+ * 6 smp_mb()
+ * 7 work->current_func() {
+ * 8 LOAD event_indicated
+ * }
+ *
+ * Without an explicit full barrier speculative LOAD on line 8 can
+ * be executed before CPU#0 does STORE on line 1. If that happens,
+ * CPU#0 observes the PENDING bit is still set and new execution of
+ * a @work is not queued in a hope, that CPU#1 will eventually
+ * finish the queued @work. Meanwhile CPU#1 does not see
+ * event_indicated is set, because speculative LOAD was executed
+ * before actual STORE.
+ */
+ smp_mb();
}
static void clear_work_data(struct work_struct *work)