Merge branch 'WIP.sched/core' into sched/core

Conflicts: kernel/sched/Makefile Pick up the waitqueue related renames - it didn't get much feedback, so it appears to be uncontroversial. Famous last words? ;-) Signed-off-by: Ingo Molnar <mingo@kernel.org>
author: Ingo Molnar <mingo@kernel.org> 2017-06-20 12:28:21 +0200
committer: Ingo Molnar <mingo@kernel.org> 2017-06-20 12:28:21 +0200
commit: 902b31941327a0e9c0ca9eb7750414ae41bf8a89 (patch)
tree: 300f4226c87e6df403cda861ab6766b23d278f09 /kernel
parent: c5ae366e12b2bd56fc7d7e9d484836bec9ddc110 (diff)
parent: 2055da97389a605c8a00d163d40903afbe413921 (diff)
download: linux-902b31941327a0e9c0ca9eb7750414ae41bf8a89.tar.bz2
35 files changed, 550 insertions, 519 deletions
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 5e00b2333c26..172dc8ee0e3b 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -86,6 +86,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
 	array->map.key_size = attr->key_size;
 	array->map.value_size = attr->value_size;
 	array->map.max_entries = attr->max_entries;
+	array->map.map_flags = attr->map_flags;
 	array->elem_size = elem_size;
 
 	if (!percpu)
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index 39cfafd895b8..b09185f0f17d 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -432,6 +432,7 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr)
 	trie->map.key_size = attr->key_size;
 	trie->map.value_size = attr->value_size;
 	trie->map.max_entries = attr->max_entries;
+	trie->map.map_flags = attr->map_flags;
 	trie->data_size = attr->key_size -
 			  offsetof(struct bpf_lpm_trie_key, data);
 	trie->max_prefixlen = trie->data_size * 8;
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 4dfd6f2ec2f9..31147d730abf 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -88,6 +88,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
 	smap->map.key_size = attr->key_size;
 	smap->map.value_size = value_size;
 	smap->map.max_entries = attr->max_entries;
+	smap->map.map_flags = attr->map_flags;
 	smap->n_buckets = n_buckets;
 	smap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
 
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index c72cd41f5b8b..339c8a1371de 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -463,19 +463,22 @@ static const int caller_saved[CALLER_SAVED_REGS] = {
 	BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
 };
 
+static void mark_reg_not_init(struct bpf_reg_state *regs, u32 regno)
+{
+	BUG_ON(regno >= MAX_BPF_REG);
+
+	memset(&regs[regno], 0, sizeof(regs[regno]));
+	regs[regno].type = NOT_INIT;
+	regs[regno].min_value = BPF_REGISTER_MIN_RANGE;
+	regs[regno].max_value = BPF_REGISTER_MAX_RANGE;
+}
+
 static void init_reg_state(struct bpf_reg_state *regs)
 {
 	int i;
 
-	for (i = 0; i < MAX_BPF_REG; i++) {
-		regs[i].type = NOT_INIT;
-		regs[i].imm = 0;
-		regs[i].min_value = BPF_REGISTER_MIN_RANGE;
-		regs[i].max_value = BPF_REGISTER_MAX_RANGE;
-		regs[i].min_align = 0;
-		regs[i].aux_off = 0;
-		regs[i].aux_off_align = 0;
-	}
+	for (i = 0; i < MAX_BPF_REG; i++)
+		mark_reg_not_init(regs, i);
 
 	/* frame pointer */
 	regs[BPF_REG_FP].type = FRAME_PTR;
@@ -843,9 +846,6 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
 {
 	bool strict = env->strict_alignment;
 
-	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
-		strict = true;
-
 	switch (reg->type) {
 	case PTR_TO_PACKET:
 		return check_pkt_ptr_alignment(reg, off, size, strict);
@@ -1349,7 +1349,6 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
 	struct bpf_verifier_state *state = &env->cur_state;
 	const struct bpf_func_proto *fn = NULL;
 	struct bpf_reg_state *regs = state->regs;
-	struct bpf_reg_state *reg;
 	struct bpf_call_arg_meta meta;
 	bool changes_data;
 	int i, err;
@@ -1416,11 +1415,8 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
 	}
 
 	/* reset caller saved regs */
-	for (i = 0; i < CALLER_SAVED_REGS; i++) {
-		reg = regs + caller_saved[i];
-		reg->type = NOT_INIT;
-		reg->imm = 0;
-	}
+	for (i = 0; i < CALLER_SAVED_REGS; i++)
+		mark_reg_not_init(regs, caller_saved[i]);
 
 	/* update return register */
 	if (fn->ret_type == RET_INTEGER) {
@@ -2448,7 +2444,6 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
 {
 	struct bpf_reg_state *regs = env->cur_state.regs;
 	u8 mode = BPF_MODE(insn->code);
-	struct bpf_reg_state *reg;
 	int i, err;
 
 	if (!may_access_skb(env->prog->type)) {
@@ -2481,11 +2476,8 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
 	}
 
 	/* reset caller saved regs to unreadable */
-	for (i = 0; i < CALLER_SAVED_REGS; i++) {
-		reg = regs + caller_saved[i];
-		reg->type = NOT_INIT;
-		reg->imm = 0;
-	}
+	for (i = 0; i < CALLER_SAVED_REGS; i++)
+		mark_reg_not_init(regs, caller_saved[i]);
 
 	/* mark destination R0 register as readable, since it contains
 	 * the value fetched from the packet
@@ -2696,7 +2688,8 @@ err_free:
 /* the following conditions reduce the number of explored insns
  * from ~140k to ~80k for ultra large programs that use a lot of ptr_to_packet
  */
-static bool compare_ptrs_to_packet(struct bpf_reg_state *old,
+static bool compare_ptrs_to_packet(struct bpf_verifier_env *env,
+				   struct bpf_reg_state *old,
 				   struct bpf_reg_state *cur)
 {
 	if (old->id != cur->id)
@@ -2739,7 +2732,7 @@ static bool compare_ptrs_to_packet(struct bpf_reg_state *old,
 	 * 'if (R4 > data_end)' and all further insn were already good with r=20,
 	 * so they will be good with r=30 and we can prune the search.
 	 */
-	if (old->off <= cur->off &&
+	if (!env->strict_alignment && old->off <= cur->off &&
 	    old->off >= old->range && cur->off >= cur->range)
 		return true;
 
@@ -2810,7 +2803,7 @@ static bool states_equal(struct bpf_verifier_env *env,
 			continue;
 
 		if (rold->type == PTR_TO_PACKET && rcur->type == PTR_TO_PACKET &&
-		    compare_ptrs_to_packet(rold, rcur))
+		    compare_ptrs_to_packet(env, rold, rcur))
 			continue;
 
 		return false;
@@ -3588,10 +3581,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
 	} else {
 		log_level = 0;
 	}
-	if (attr->prog_flags & BPF_F_STRICT_ALIGNMENT)
+
+	env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
+	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
 		env->strict_alignment = true;
-	else
-		env->strict_alignment = false;
 
 	ret = replace_map_fd_with_map_ptr(env);
 	if (ret < 0)
@@ -3697,7 +3690,10 @@ int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops,
 	mutex_lock(&bpf_verifier_lock);
 
 	log_level = 0;
+
 	env->strict_alignment = false;
+	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
+		env->strict_alignment = true;
 
 	env->explored_states = kcalloc(env->prog->len,
 				       sizeof(struct bpf_verifier_state_list *),
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index c3c9a0e1b3c9..8d4e85eae42c 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -4265,6 +4265,11 @@ static void kill_css(struct cgroup_subsys_state *css)
 {
 	lockdep_assert_held(&cgroup_mutex);
 
+	if (css->flags & CSS_DYING)
+		return;
+
+	css->flags |= CSS_DYING;
+
 	/*
 	 * This must happen before css is disassociated with its cgroup.
 	 * See seq_css() for details.
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index f6501f4f6040..ae643412948a 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -176,9 +176,9 @@ typedef enum {
 } cpuset_flagbits_t;
 
 /* convenient tests for these bits */
-static inline bool is_cpuset_online(const struct cpuset *cs)
+static inline bool is_cpuset_online(struct cpuset *cs)
 {
-	return test_bit(CS_ONLINE, &cs->flags);
+	return test_bit(CS_ONLINE, &cs->flags) && !css_is_dying(&cs->css);
 }
 
 static inline int is_cpu_exclusive(const struct cpuset *cs)
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 9ae6fbe5b5cf..cb5103413bd8 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1658,13 +1658,13 @@ static ssize_t write_cpuhp_target(struct device *dev,
 	ret = !sp->name || sp->cant_stop ? -EINVAL : 0;
 	mutex_unlock(&cpuhp_state_mutex);
 	if (ret)
-		return ret;
+		goto out;
 
 	if (st->state < target)
 		ret = do_cpu_up(dev->id, target);
 	else
 		ret = do_cpu_down(dev->id, target);
-
+out:
 	unlock_device_hotplug();
 	return ret ? ret : count;
 }
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 6e75a5c9412d..6c4e523dc1e2 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7316,6 +7316,21 @@ int perf_event_account_interrupt(struct perf_event *event)
 	return __perf_event_account_interrupt(event, 1);
 }
 
+static bool sample_is_allowed(struct perf_event *event, struct pt_regs *regs)
+{
+	/*
+	 * Due to interrupt latency (AKA "skid"), we may enter the
+	 * kernel before taking an overflow, even if the PMU is only
+	 * counting user events.
+	 * To avoid leaking information to userspace, we must always
+	 * reject kernel samples when exclude_kernel is set.
+	 */
+	if (event->attr.exclude_kernel && !user_mode(regs))
+		return false;
+
+	return true;
+}
+
 /*
  * Generic event overflow handling, sampling.
  */
@@ -7337,6 +7352,12 @@ static int __perf_event_overflow(struct perf_event *event,
 	ret = __perf_event_account_interrupt(event, throttle);
 
 	/*
+	 * For security, drop the skid kernel samples if necessary.
+	 */
+	if (!sample_is_allowed(event, regs))
+		return ret;
+
+	/*
 	 * XXX event_limit might not quite work as expected on inherited
 	 * events
 	 */
diff --git a/kernel/exit.c b/kernel/exit.c
index 516acdb0e0ec..7d694437ab44 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1004,7 +1004,7 @@ struct wait_opts {
 	int __user		*wo_stat;
 	struct rusage __user	*wo_rusage;
 
-	wait_queue_t		child_wait;
+	wait_queue_entry_t		child_wait;
 	int			notask_error;
 };
 
@@ -1541,7 +1541,7 @@ static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk)
 	return 0;
 }
 
-static int child_wait_callback(wait_queue_t *wait, unsigned mode,
+static int child_wait_callback(wait_queue_entry_t *wait, unsigned mode,
 				int sync, void *key)
 {
 	struct wait_opts *wo = container_of(wait, struct wait_opts,
diff --git a/kernel/fork.c b/kernel/fork.c
index aa1076c5e4a9..e53770d2bf95 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1577,6 +1577,18 @@ static __latent_entropy struct task_struct *copy_process(
 	if (!p)
 		goto fork_out;
 
+	/*
+	 * This _must_ happen before we call free_task(), i.e. before we jump
+	 * to any of the bad_fork_* labels. This is to avoid freeing
+	 * p->set_child_tid which is (ab)used as a kthread's data pointer for
+	 * kernel threads (PF_KTHREAD).
+	 */
+	p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
+	/*
+	 * Clear TID on mm_release()?
+	 */
+	p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL;
+
 	ftrace_graph_init_task(p);
 
 	rt_mutex_init_task(p);
@@ -1743,11 +1755,6 @@ static __latent_entropy struct task_struct *copy_process(
 		}
 	}
 
-	p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
-	/*
-	 * Clear TID on mm_release()?
-	 */
-	p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL;
 #ifdef CONFIG_BLOCK
 	p->plug = NULL;
 #endif
diff --git a/kernel/futex.c b/kernel/futex.c
index 357348a6cf6b..d6cf71d08f21 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -225,7 +225,7 @@ struct futex_pi_state {
  * @requeue_pi_key:	the requeue_pi target futex key
  * @bitset:		bitset for the optional bitmasked wakeup
  *
- * We use this hashed waitqueue, instead of a normal wait_queue_t, so
+ * We use this hashed waitqueue, instead of a normal wait_queue_entry_t, so
  * we can wake only the relevant ones (hashed queues may be shared).
  *
  * A futex_q has a woken state, just like tasks have TASK_RUNNING.
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 070be980c37a..425170d4439b 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1312,8 +1312,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 			ret = __irq_set_trigger(desc,
 						new->flags & IRQF_TRIGGER_MASK);
 
-			if (ret)
+			if (ret) {
+				irq_release_resources(desc);
 				goto out_mask;
+			}
 		}
 
 		desc->istate &= ~(IRQS_AUTODETECT | IRQS_SPURIOUS_DISABLED | \
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 2d2d3a568e4e..adfe3b4cfe05 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -122,7 +122,7 @@ static void *alloc_insn_page(void)
 	return module_alloc(PAGE_SIZE);
 }
 
-static void free_insn_page(void *page)
+void __weak free_insn_page(void *page)
 {
 	module_memfree(page);
 }
diff --git a/kernel/livepatch/Kconfig b/kernel/livepatch/Kconfig
index 045022557936..ec4565122e65 100644
--- a/kernel/livepatch/Kconfig
+++ b/kernel/livepatch/Kconfig
@@ -10,6 +10,7 @@ config LIVEPATCH
 	depends on SYSFS
 	depends on KALLSYMS_ALL
 	depends on HAVE_LIVEPATCH
+	depends on !TRIM_UNUSED_KSYMS
 	help
 	  Say Y here if you want to support kernel live patching.
 	  This option has no runtime impact until a kernel "patch"
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index b95509416909..28cd09e635ed 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1785,12 +1785,14 @@ int rt_mutex_wait_proxy_lock(struct rt_mutex *lock,
 	int ret;
 
 	raw_spin_lock_irq(&lock->wait_lock);
-
-	set_current_state(TASK_INTERRUPTIBLE);
-
 	/* sleep on the mutex */
+	set_current_state(TASK_INTERRUPTIBLE);
 	ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
-
+	/*
+	 * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
+	 * have to fix that up.
+	 */
+	fixup_rt_mutex_waiters(lock);
 	raw_spin_unlock_irq(&lock->wait_lock);
 
 	return ret;
@@ -1822,15 +1824,25 @@ bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock,
 
 	raw_spin_lock_irq(&lock->wait_lock);
 	/*
+	 * Do an unconditional try-lock, this deals with the lock stealing
+	 * state where __rt_mutex_futex_unlock() -> mark_wakeup_next_waiter()
+	 * sets a NULL owner.
+	 *
+	 * We're not interested in the return value, because the subsequent
+	 * test on rt_mutex_owner() will infer that. If the trylock succeeded,
+	 * we will own the lock and it will have removed the waiter. If we
+	 * failed the trylock, we're still not owner and we need to remove
+	 * ourselves.
+	 */
+	try_to_take_rt_mutex(lock, current, waiter);
+	/*
 	 * Unless we're the owner; we're still enqueued on the wait_list.
 	 * So check if we became owner, if not, take us off the wait_list.
 	 */
 	if (rt_mutex_owner(lock) != current) {
 		remove_waiter(lock, waiter);
-		fixup_rt_mutex_waiters(lock);
 		cleanup = true;
 	}
-
 	/*
 	 * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
 	 * have to fix that up.
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 78672d324a6e..c7209f060eeb 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -132,7 +132,7 @@ int freeze_processes(void)
 	if (!pm_freezing)
 		atomic_inc(&system_freezing_cnt);
 
-	pm_wakeup_clear(true);
+	pm_wakeup_clear();
 	pr_info("Freezing user space processes ... ");
 	pm_freezing = true;
 	error = try_to_freeze_tasks(true);
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index c0248c74d6d4..15e6baef5c73 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -72,8 +72,6 @@ static void freeze_begin(void)
 
 static void freeze_enter(void)
 {
-	trace_suspend_resume(TPS("machine_suspend"), PM_SUSPEND_FREEZE, true);
-
 	spin_lock_irq(&suspend_freeze_lock);
 	if (pm_wakeup_pending())
 		goto out;
@@ -100,27 +98,6 @@ static void freeze_enter(void)
  out:
 	suspend_freeze_state = FREEZE_STATE_NONE;
 	spin_unlock_irq(&suspend_freeze_lock);
-
-	trace_suspend_resume(TPS("machine_suspend"), PM_SUSPEND_FREEZE, false);
-}
-
-static void s2idle_loop(void)
-{
-	do {
-		freeze_enter();
-
-		if (freeze_ops && freeze_ops->wake)
-			freeze_ops->wake();
-
-		dpm_resume_noirq(PMSG_RESUME);
-		if (freeze_ops && freeze_ops->sync)
-			freeze_ops->sync();
-
-		if (pm_wakeup_pending())
-			break;
-
-		pm_wakeup_clear(false);
-	} while (!dpm_suspend_noirq(PMSG_SUSPEND));
 }
 
 void freeze_wake(void)
@@ -394,8 +371,10 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
 	 * all the devices are suspended.
 	 */
 	if (state == PM_SUSPEND_FREEZE) {
-		s2idle_loop();
-		goto Platform_early_resume;
+		trace_suspend_resume(TPS("machine_suspend"), state, true);
+		freeze_enter();
+		trace_suspend_resume(TPS("machine_suspend"), state, false);
+		goto Platform_wake;
 	}
 
 	error = disable_nonboot_cpus();
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 32fac391ac2a..bd53ea579dc8 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -269,7 +269,6 @@ static struct console *exclusive_console;
 #define MAX_CMDLINECONSOLES 8
 
 static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES];
-static int console_cmdline_cnt;
 
 static int preferred_console = -1;
 int console_set_on_cmdline;
@@ -1906,25 +1905,12 @@ static int __add_preferred_console(char *name, int idx, char *options,
 	 *	See if this tty is not yet registered, and
 	 *	if we have a slot free.
 	 */
-	for (i = 0, c = console_cmdline; i < console_cmdline_cnt; i++, c++) {
+	for (i = 0, c = console_cmdline;
+	     i < MAX_CMDLINECONSOLES && c->name[0];
+	     i++, c++) {
 		if (strcmp(c->name, name) == 0 && c->index == idx) {
-			if (brl_options)
-				return 0;
-
-			/*
-			 * Maintain an invariant that will help to find if
-			 * the matching console is preferred, see
-			 * register_console():
-			 *
-			 * The last non-braille console is always
-			 * the preferred one.
-			 */
-			if (i != console_cmdline_cnt - 1)
-				swap(console_cmdline[i],
-				     console_cmdline[console_cmdline_cnt - 1]);
-
-			preferred_console = console_cmdline_cnt - 1;
-
+			if (!brl_options)
+				preferred_console = i;
 			return 0;
 		}
 	}
@@ -1937,7 +1923,6 @@ static int __add_preferred_console(char *name, int idx, char *options,
 	braille_set_options(c, brl_options);
 
 	c->index = idx;
-	console_cmdline_cnt++;
 	return 0;
 }
 /*
@@ -2477,23 +2462,12 @@ void register_console(struct console *newcon)
 	}
 
 	/*
-	 * See if this console matches one we selected on the command line.
-	 *
-	 * There may be several entries in the console_cmdline array matching
-	 * with the same console, one with newcon->match(), another by
-	 * name/index:
-	 *
-	 *	pl011,mmio,0x87e024000000,115200 -- added from SPCR
-	 *	ttyAMA0 -- added from command line
-	 *
-	 * Traverse the console_cmdline array in reverse order to be
-	 * sure that if this console is preferred then it will be the first
-	 * matching entry.  We use the invariant that is maintained in
-	 * __add_preferred_console().
+	 *	See if this console matches one we selected on
+	 *	the command line.
 	 */
-	for (i = console_cmdline_cnt - 1; i >= 0; i--) {
-		c = console_cmdline + i;
-
+	for (i = 0, c = console_cmdline;
+	     i < MAX_CMDLINECONSOLES && c->name[0];
+	     i++, c++) {
 		if (!newcon->match ||
 		    newcon->match(newcon, c->name, c->index, c->options) != 0) {
 			/* default matching */
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 266ddcc1d8bb..60f356d91060 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -60,19 +60,25 @@ int ptrace_access_vm(struct task_struct *tsk, unsigned long addr,
 }
 
 
+void __ptrace_link(struct task_struct *child, struct task_struct *new_parent,
+		   const struct cred *ptracer_cred)
+{
+	BUG_ON(!list_empty(&child->ptrace_entry));
+	list_add(&child->ptrace_entry, &new_parent->ptraced);
+	child->parent = new_parent;
+	child->ptracer_cred = get_cred(ptracer_cred);
+}
+
 /*
  * ptrace a task: make the debugger its new parent and
  * move it to the ptrace list.
  *
  * Must be called with the tasklist lock write-held.
  */
-void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
+static void ptrace_link(struct task_struct *child, struct task_struct *new_parent)
 {
-	BUG_ON(!list_empty(&child->ptrace_entry));
-	list_add(&child->ptrace_entry, &new_parent->ptraced);
-	child->parent = new_parent;
 	rcu_read_lock();
-	child->ptracer_cred = get_cred(__task_cred(new_parent));
+	__ptrace_link(child, new_parent, __task_cred(new_parent));
 	rcu_read_unlock();
 }
 
@@ -386,7 +392,7 @@ static int ptrace_attach(struct task_struct *task, long request,
 		flags |= PT_SEIZED;
 	task->ptrace = flags;
 
-	__ptrace_link(task, current);
+	ptrace_link(task, current);
 
 	/* SEIZE doesn't trap tracee on attach */
 	if (!seize)
@@ -459,7 +465,7 @@ static int ptrace_traceme(void)
 		 */
 		if (!ret && !(current->real_parent->flags & PF_EXITING)) {
 			current->ptrace = PT_PTRACED;
-			__ptrace_link(current, current->real_parent);
+			ptrace_link(current, current->real_parent);
 		}
 	}
 	write_unlock_irq(&tasklist_lock);
diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c
index 584d8a983883..dea03614263f 100644
--- a/kernel/rcu/srcu.c
+++ b/kernel/rcu/srcu.c
@@ -263,7 +263,7 @@ EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
 
 /*
  * Counts the new reader in the appropriate per-CPU element of the
- * srcu_struct.  Must be called from process context.
+ * srcu_struct.
  * Returns an index that must be passed to the matching srcu_read_unlock().
  */
 int __srcu_read_lock(struct srcu_struct *sp)
@@ -271,7 +271,7 @@ int __srcu_read_lock(struct srcu_struct *sp)
 	int idx;
 
 	idx = READ_ONCE(sp->completed) & 0x1;
-	__this_cpu_inc(sp->per_cpu_ref->lock_count[idx]);
+	this_cpu_inc(sp->per_cpu_ref->lock_count[idx]);
 	smp_mb(); /* B */  /* Avoid leaking the critical section. */
 	return idx;
 }
@@ -281,7 +281,6 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock);
  * Removes the count for the old reader from the appropriate per-CPU
  * element of the srcu_struct.  Note that this may well be a different
  * CPU than that which was incremented by the corresponding srcu_read_lock().
- * Must be called from process context.
  */
 void __srcu_read_unlock(struct srcu_struct *sp, int idx)
 {
diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c
index 36e1f82faed1..32798eb14853 100644
--- a/kernel/rcu/srcutiny.c
+++ b/kernel/rcu/srcutiny.c
@@ -97,8 +97,9 @@ EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
 
 /*
  * Counts the new reader in the appropriate per-CPU element of the
- * srcu_struct.  Must be called from process context.
- * Returns an index that must be passed to the matching srcu_read_unlock().
+ * srcu_struct.  Can be invoked from irq/bh handlers, but the matching
+ * __srcu_read_unlock() must be in the same handler instance.  Returns an
+ * index that must be passed to the matching srcu_read_unlock().
  */
 int __srcu_read_lock(struct srcu_struct *sp)
 {
@@ -112,7 +113,7 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock);
 
 /*
  * Removes the count for the old reader from the appropriate element of
- * the srcu_struct.  Must be called from process context.
+ * the srcu_struct.
  */
 void __srcu_read_unlock(struct srcu_struct *sp, int idx)
 {
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 3ae8474557df..157654fa436a 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -357,7 +357,7 @@ EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
 
 /*
  * Counts the new reader in the appropriate per-CPU element of the
- * srcu_struct.  Must be called from process context.
+ * srcu_struct.
  * Returns an index that must be passed to the matching srcu_read_unlock().
  */
 int __srcu_read_lock(struct srcu_struct *sp)
@@ -365,7 +365,7 @@ int __srcu_read_lock(struct srcu_struct *sp)
 	int idx;
 
 	idx = READ_ONCE(sp->srcu_idx) & 0x1;
-	__this_cpu_inc(sp->sda->srcu_lock_count[idx]);
+	this_cpu_inc(sp->sda->srcu_lock_count[idx]);
 	smp_mb(); /* B */  /* Avoid leaking the critical section. */
 	return idx;
 }
@@ -375,7 +375,6 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock);
  * Removes the count for the old reader from the appropriate per-CPU
  * element of the srcu_struct.  Note that this may well be a different
  * CPU than that which was incremented by the corresponding srcu_read_lock().
- * Must be called from process context.
  */
 void __srcu_read_unlock(struct srcu_struct *sp, int idx)
 {
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index 5e4c2e7a632b..53f0164ed362 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -17,7 +17,7 @@ endif
 
 obj-y += core.o loadavg.o clock.o cputime.o
 obj-y += idle_task.o fair.o rt.o deadline.o
-obj-y += wait.o swait.o completion.o idle.o
+obj-y += wait.o wait_bit.o swait.o completion.o idle.o
 obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o
 obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
 obj-$(CONFIG_SCHEDSTATS) += stats.o
diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c
index 53f9558fa925..13fc5ae9bf2f 100644
--- a/kernel/sched/completion.c
+++ b/kernel/sched/completion.c
@@ -66,7 +66,7 @@ do_wait_for_common(struct completion *x,
 	if (!x->done) {
 		DECLARE_WAITQUEUE(wait, current);
 
-		__add_wait_queue_tail_exclusive(&x->wait, &wait);
+		__add_wait_queue_entry_tail_exclusive(&x->wait, &wait);
 		do {
 			if (signal_pending_state(state, current)) {
 				timeout = -ERESTARTSYS;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c343b8135774..62166da1c359 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -10,6 +10,7 @@
 #include <uapi/linux/sched/types.h>
 #include <linux/sched/loadavg.h>
 #include <linux/sched/hotplug.h>
+#include <linux/wait_bit.h>
 #include <linux/cpuset.h>
 #include <linux/delayacct.h>
 #include <linux/init_task.h>
@@ -3692,7 +3693,7 @@ asmlinkage __visible void __sched preempt_schedule_irq(void)
 	exception_exit(prev_state);
 }
 
-int default_wake_function(wait_queue_t *curr, unsigned mode, int wake_flags,
+int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flags,
 			  void *key)
 {
 	return try_to_wake_up(curr->private, mode, wake_flags);
@@ -5592,7 +5593,7 @@ void idle_task_exit(void)
 	BUG_ON(cpu_online(smp_processor_id()));
 
 	if (mm != &init_mm) {
-		switch_mm_irqs_off(mm, &init_mm, current);
+		switch_mm(mm, &init_mm, current);
 		finish_arch_post_lock_switch();
 	}
 	mmdrop(mm);
@@ -6010,28 +6011,13 @@ static struct kmem_cache *task_group_cache __read_mostly;
 DECLARE_PER_CPU(cpumask_var_t, load_balance_mask);
 DECLARE_PER_CPU(cpumask_var_t, select_idle_mask);
 
-#define WAIT_TABLE_BITS 8
-#define WAIT_TABLE_SIZE (1 << WAIT_TABLE_BITS)
-static wait_queue_head_t bit_wait_table[WAIT_TABLE_SIZE] __cacheline_aligned;
-
-wait_queue_head_t *bit_waitqueue(void *word, int bit)
-{
-	const int shift = BITS_PER_LONG == 32 ? 5 : 6;
-	unsigned long val = (unsigned long)word << shift | bit;
-
-	return bit_wait_table + hash_long(val, WAIT_TABLE_BITS);
-}
-EXPORT_SYMBOL(bit_waitqueue);
-
 void __init sched_init(void)
 {
 	int i, j;
 	unsigned long alloc_size = 0, ptr;
 
 	sched_clock_init();
-
-	for (i = 0; i < WAIT_TABLE_SIZE; i++)
-		init_waitqueue_head(bit_wait_table + i);
+	wait_bit_init();
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	alloc_size += 2 * nr_cpu_ids * sizeof(void **);
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 622eed1b7658..076a2e31951c 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -101,9 +101,6 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time,
 	if (sg_policy->next_freq == next_freq)
 		return;
 
-	if (sg_policy->next_freq > next_freq)
-		next_freq = (sg_policy->next_freq + next_freq) >> 1;
-
 	sg_policy->next_freq = next_freq;
 	sg_policy->last_freq_update_time = time;
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index cb3a3da7089f..a24661ac3d23 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3565,7 +3565,7 @@ static inline void check_schedstat_required(void)
 			trace_sched_stat_runtime_enabled())  {
 		printk_deferred_once("Scheduler tracepoints stat_sleep, stat_iowait, "
 			     "stat_blocked and stat_runtime require the "
-			     "kernel parameter schedstats=enabled or "
+			     "kernel parameter schedstats=enable or "
 			     "kernel.sched_schedstats=1\n");
 	}
 #endif
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index b8c84c6dee64..17f11c6b0a9f 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -12,44 +12,44 @@
 #include <linux/hash.h>
 #include <linux/kthread.h>
 
-void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct lock_class_key *key)
+void __init_waitqueue_head(struct wait_queue_head *wq_head, const char *name, struct lock_class_key *key)
 {
-	spin_lock_init(&q->lock);
-	lockdep_set_class_and_name(&q->lock, key, name);
-	INIT_LIST_HEAD(&q->task_list);
+	spin_lock_init(&wq_head->lock);
+	lockdep_set_class_and_name(&wq_head->lock, key, name);
+	INIT_LIST_HEAD(&wq_head->head);
 }
 
 EXPORT_SYMBOL(__init_waitqueue_head);
 
-void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
+void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
 {
 	unsigned long flags;
 
-	wait->flags &= ~WQ_FLAG_EXCLUSIVE;
-	spin_lock_irqsave(&q->lock, flags);
-	__add_wait_queue(q, wait);
-	spin_unlock_irqrestore(&q->lock, flags);
+	wq_entry->flags &= ~WQ_FLAG_EXCLUSIVE;
+	spin_lock_irqsave(&wq_head->lock, flags);
+	__add_wait_queue_entry_tail(wq_head, wq_entry);
+	spin_unlock_irqrestore(&wq_head->lock, flags);
 }
 EXPORT_SYMBOL(add_wait_queue);
 
-void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait)
+void add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
 {
 	unsigned long flags;
 
-	wait->flags |= WQ_FLAG_EXCLUSIVE;
-	spin_lock_irqsave(&q->lock, flags);
-	__add_wait_queue_tail(q, wait);
-	spin_unlock_irqrestore(&q->lock, flags);
+	wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
+	spin_lock_irqsave(&wq_head->lock, flags);
+	__add_wait_queue_entry_tail(wq_head, wq_entry);
+	spin_unlock_irqrestore(&wq_head->lock, flags);
 }
 EXPORT_SYMBOL(add_wait_queue_exclusive);
 
-void remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
+void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&q->lock, flags);
-	__remove_wait_queue(q, wait);
-	spin_unlock_irqrestore(&q->lock, flags);
+	spin_lock_irqsave(&wq_head->lock, flags);
+	__remove_wait_queue(wq_head, wq_entry);
+	spin_unlock_irqrestore(&wq_head->lock, flags);
 }
 EXPORT_SYMBOL(remove_wait_queue);
 
@@ -63,12 +63,12 @@ EXPORT_SYMBOL(remove_wait_queue);
  * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
  * zero in this (rare) case, and we handle it by continuing to scan the queue.
  */
-static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
+static void __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode,
 			int nr_exclusive, int wake_flags, void *key)
 {
-	wait_queue_t *curr, *next;
+	wait_queue_entry_t *curr, *next;
 
-	list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
+	list_for_each_entry_safe(curr, next, &wq_head->head, entry) {
 		unsigned flags = curr->flags;
 
 		if (curr->func(curr, mode, wake_flags, key) &&
@@ -79,7 +79,7 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
 
 /**
  * __wake_up - wake up threads blocked on a waitqueue.
- * @q: the waitqueue
+ * @wq_head: the waitqueue
  * @mode: which threads
  * @nr_exclusive: how many wake-one or wake-many threads to wake up
  * @key: is directly passed to the wakeup function
@@ -87,35 +87,35 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
  * It may be assumed that this function implies a write memory barrier before
  * changing the task state if and only if any tasks are woken up.
  */
-void __wake_up(wait_queue_head_t *q, unsigned int mode,
+void __wake_up(struct wait_queue_head *wq_head, unsigned int mode,
 			int nr_exclusive, void *key)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&q->lock, flags);
-	__wake_up_common(q, mode, nr_exclusive, 0, key);
-	spin_unlock_irqrestore(&q->lock, flags);
+	spin_lock_irqsave(&wq_head->lock, flags);
+	__wake_up_common(wq_head, mode, nr_exclusive, 0, key);
+	spin_unlock_irqrestore(&wq_head->lock, flags);
 }
 EXPORT_SYMBOL(__wake_up);
 
 /*
  * Same as __wake_up but called with the spinlock in wait_queue_head_t held.
  */
-void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr)
+void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr)
 {
-	__wake_up_common(q, mode, nr, 0, NULL);
+	__wake_up_common(wq_head, mode, nr, 0, NULL);
 }
 EXPORT_SYMBOL_GPL(__wake_up_locked);
 
-void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key)
+void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key)
 {
-	__wake_up_common(q, mode, 1, 0, key);
+	__wake_up_common(wq_head, mode, 1, 0, key);
 }
 EXPORT_SYMBOL_GPL(__wake_up_locked_key);
 
 /**
  * __wake_up_sync_key - wake up threads blocked on a waitqueue.
- * @q: the waitqueue
+ * @wq_head: the waitqueue
  * @mode: which threads
  * @nr_exclusive: how many wake-one or wake-many threads to wake up
  * @key: opaque value to be passed to wakeup targets
@@ -130,30 +130,30 @@ EXPORT_SYMBOL_GPL(__wake_up_locked_key);
  * It may be assumed that this function implies a write memory barrier before
  * changing the task state if and only if any tasks are woken up.
  */
-void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode,
+void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode,
 			int nr_exclusive, void *key)
 {
 	unsigned long flags;
 	int wake_flags = 1; /* XXX WF_SYNC */
 
-	if (unlikely(!q))
+	if (unlikely(!wq_head))
 		return;
 
 	if (unlikely(nr_exclusive != 1))
 		wake_flags = 0;
 
-	spin_lock_irqsave(&q->lock, flags);
-	__wake_up_common(q, mode, nr_exclusive, wake_flags, key);
-	spin_unlock_irqrestore(&q->lock, flags);
+	spin_lock_irqsave(&wq_head->lock, flags);
+	__wake_up_common(wq_head, mode, nr_exclusive, wake_flags, key);
+	spin_unlock_irqrestore(&wq_head->lock, flags);
 }
 EXPORT_SYMBOL_GPL(__wake_up_sync_key);
 
 /*
  * __wake_up_sync - see __wake_up_sync_key()
  */
-void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
+void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode, int nr_exclusive)
 {
-	__wake_up_sync_key(q, mode, nr_exclusive, NULL);
+	__wake_up_sync_key(wq_head, mode, nr_exclusive, NULL);
 }
 EXPORT_SYMBOL_GPL(__wake_up_sync);	/* For internal use only */
 
@@ -170,48 +170,48 @@ EXPORT_SYMBOL_GPL(__wake_up_sync);	/* For internal use only */
  * loads to move into the critical region).
  */
 void
-prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state)
+prepare_to_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state)
 {
 	unsigned long flags;
 
-	wait->flags &= ~WQ_FLAG_EXCLUSIVE;
-	spin_lock_irqsave(&q->lock, flags);
-	if (list_empty(&wait->task_list))
-		__add_wait_queue(q, wait);
+	wq_entry->flags &= ~WQ_FLAG_EXCLUSIVE;
+	spin_lock_irqsave(&wq_head->lock, flags);
+	if (list_empty(&wq_entry->entry))
+		__add_wait_queue(wq_head, wq_entry);
 	set_current_state(state);
-	spin_unlock_irqrestore(&q->lock, flags);
+	spin_unlock_irqrestore(&wq_head->lock, flags);
 }
 EXPORT_SYMBOL(prepare_to_wait);
 
 void
-prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state)
+prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state)
 {
 	unsigned long flags;
 
-	wait->flags |= WQ_FLAG_EXCLUSIVE;
-	spin_lock_irqsave(&q->lock, flags);
-	if (list_empty(&wait->task_list))
-		__add_wait_queue_tail(q, wait);
+	wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
+	spin_lock_irqsave(&wq_head->lock, flags);
+	if (list_empty(&wq_entry->entry))
+		__add_wait_queue_entry_tail(wq_head, wq_entry);
 	set_current_state(state);
-	spin_unlock_irqrestore(&q->lock, flags);
+	spin_unlock_irqrestore(&wq_head->lock, flags);
 }
 EXPORT_SYMBOL(prepare_to_wait_exclusive);
 
-void init_wait_entry(wait_queue_t *wait, int flags)
+void init_wait_entry(struct wait_queue_entry *wq_entry, int flags)
 {
-	wait->flags = flags;
-	wait->private = current;
-	wait->func = autoremove_wake_function;
-	INIT_LIST_HEAD(&wait->task_list);
+	wq_entry->flags = flags;
+	wq_entry->private = current;
+	wq_entry->func = autoremove_wake_function;
+	INIT_LIST_HEAD(&wq_entry->entry);
 }
 EXPORT_SYMBOL(init_wait_entry);
 
-long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state)
+long prepare_to_wait_event(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state)
 {
 	unsigned long flags;
 	long ret = 0;
 
-	spin_lock_irqsave(&q->lock, flags);
+	spin_lock_irqsave(&wq_head->lock, flags);
 	if (unlikely(signal_pending_state(state, current))) {
 		/*
 		 * Exclusive waiter must not fail if it was selected by wakeup,
@@ -219,24 +219,24 @@ long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state)
 		 *
 		 * The caller will recheck the condition and return success if
 		 * we were already woken up, we can not miss the event because
-		 * wakeup locks/unlocks the same q->lock.
+		 * wakeup locks/unlocks the same wq_head->lock.
 		 *
 		 * But we need to ensure that set-condition + wakeup after that
 		 * can't see us, it should wake up another exclusive waiter if
 		 * we fail.
 		 */
-		list_del_init(&wait->task_list);
+		list_del_init(&wq_entry->entry);
 		ret = -ERESTARTSYS;
 	} else {
-		if (list_empty(&wait->task_list)) {
-			if (wait->flags & WQ_FLAG_EXCLUSIVE)
-				__add_wait_queue_tail(q, wait);
+		if (list_empty(&wq_entry->entry)) {
+			if (wq_entry->flags & WQ_FLAG_EXCLUSIVE)
+				__add_wait_queue_entry_tail(wq_head, wq_entry);
 			else
-				__add_wait_queue(q, wait);
+				__add_wait_queue(wq_head, wq_entry);
 		}
 		set_current_state(state);
 	}
-	spin_unlock_irqrestore(&q->lock, flags);
+	spin_unlock_irqrestore(&wq_head->lock, flags);
 
 	return ret;
 }
@@ -249,10 +249,10 @@ EXPORT_SYMBOL(prepare_to_wait_event);
  * condition in the caller before they add the wait
  * entry to the wake queue.
  */
-int do_wait_intr(wait_queue_head_t *wq, wait_queue_t *wait)
+int do_wait_intr(wait_queue_head_t *wq, wait_queue_entry_t *wait)
 {
-	if (likely(list_empty(&wait->task_list)))
-		__add_wait_queue_tail(wq, wait);
+	if (likely(list_empty(&wait->entry)))
+		__add_wait_queue_entry_tail(wq, wait);
 
 	set_current_state(TASK_INTERRUPTIBLE);
 	if (signal_pending(current))
@@ -265,10 +265,10 @@ int do_wait_intr(wait_queue_head_t *wq, wait_queue_t *wait)
 }
 EXPORT_SYMBOL(do_wait_intr);
 
-int do_wait_intr_irq(wait_queue_head_t *wq, wait_queue_t *wait)
+int do_wait_intr_irq(wait_queue_head_t *wq, wait_queue_entry_t *wait)
 {
-	if (likely(list_empty(&wait->task_list)))
-		__add_wait_queue_tail(wq, wait);
+	if (likely(list_empty(&wait->entry)))
+		__add_wait_queue_entry_tail(wq, wait);
 
 	set_current_state(TASK_INTERRUPTIBLE);
 	if (signal_pending(current))
@@ -283,14 +283,14 @@ EXPORT_SYMBOL(do_wait_intr_irq);
 
 /**
  * finish_wait - clean up after waiting in a queue
- * @q: waitqueue waited on
- * @wait: wait descriptor
+ * @wq_head: waitqueue waited on
+ * @wq_entry: wait descriptor
  *
  * Sets current thread back to running state and removes
  * the wait descriptor from the given waitqueue if still
  * queued.
  */
-void finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
+void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
 {
 	unsigned long flags;
 
@@ -308,20 +308,20 @@ void finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
 	 *    have _one_ other CPU that looks at or modifies
 	 *    the list).
 	 */
-	if (!list_empty_careful(&wait->task_list)) {
-		spin_lock_irqsave(&q->lock, flags);
-		list_del_init(&wait->task_list);
-		spin_unlock_irqrestore(&q->lock, flags);
+	if (!list_empty_careful(&wq_entry->entry)) {
+		spin_lock_irqsave(&wq_head->lock, flags);
+		list_del_init(&wq_entry->entry);
+		spin_unlock_irqrestore(&wq_head->lock, flags);
 	}
 }
 EXPORT_SYMBOL(finish_wait);
 
-int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
+int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key)
 {
-	int ret = default_wake_function(wait, mode, sync, key);
+	int ret = default_wake_function(wq_entry, mode, sync, key);
 
 	if (ret)
-		list_del_init(&wait->task_list);
+		list_del_init(&wq_entry->entry);
 	return ret;
 }
 EXPORT_SYMBOL(autoremove_wake_function);
@@ -334,24 +334,24 @@ static inline bool is_kthread_should_stop(void)
 /*
  * DEFINE_WAIT_FUNC(wait, woken_wake_func);
  *
- * add_wait_queue(&wq, &wait);
+ * add_wait_queue(&wq_head, &wait);
  * for (;;) {
  *     if (condition)
  *         break;
  *
  *     p->state = mode;				condition = true;
  *     smp_mb(); // A				smp_wmb(); // C
- *     if (!wait->flags & WQ_FLAG_WOKEN)	wait->flags |= WQ_FLAG_WOKEN;
+ *     if (!wq_entry->flags & WQ_FLAG_WOKEN)	wq_entry->flags |= WQ_FLAG_WOKEN;
  *         schedule()				try_to_wake_up();
  *     p->state = TASK_RUNNING;		    ~~~~~~~~~~~~~~~~~~
- *     wait->flags &= ~WQ_FLAG_WOKEN;		condition = true;
+ *     wq_entry->flags &= ~WQ_FLAG_WOKEN;		condition = true;
  *     smp_mb() // B				smp_wmb(); // C
- *						wait->flags |= WQ_FLAG_WOKEN;
+ *						wq_entry->flags |= WQ_FLAG_WOKEN;
  * }
- * remove_wait_queue(&wq, &wait);
+ * remove_wait_queue(&wq_head, &wait);
  *
  */
-long wait_woken(wait_queue_t *wait, unsigned mode, long timeout)
+long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout)
 {
 	set_current_state(mode); /* A */
 	/*
@@ -359,7 +359,7 @@ long wait_woken(wait_queue_t *wait, unsigned mode, long timeout)
 	 * woken_wake_function() such that if we observe WQ_FLAG_WOKEN we must
 	 * also observe all state before the wakeup.
 	 */
-	if (!(wait->flags & WQ_FLAG_WOKEN) && !is_kthread_should_stop())
+	if (!(wq_entry->flags & WQ_FLAG_WOKEN) && !is_kthread_should_stop())
 		timeout = schedule_timeout(timeout);
 	__set_current_state(TASK_RUNNING);
 
@@ -369,13 +369,13 @@ long wait_woken(wait_queue_t *wait, unsigned mode, long timeout)
 	 * condition being true _OR_ WQ_FLAG_WOKEN such that we will not miss
 	 * an event.
 	 */
-	smp_store_mb(wait->flags, wait->flags & ~WQ_FLAG_WOKEN); /* B */
+	smp_store_mb(wq_entry->flags, wq_entry->flags & ~WQ_FLAG_WOKEN); /* B */
 
 	return timeout;
 }
 EXPORT_SYMBOL(wait_woken);
 
-int woken_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
+int woken_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key)
 {
 	/*
 	 * Although this function is called under waitqueue lock, LOCK
@@ -385,267 +385,8 @@ int woken_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
 	 * and is paired with smp_store_mb() in wait_woken().
 	 */
 	smp_wmb(); /* C */
-	wait->flags |= WQ_FLAG_WOKEN;
+	wq_entry->flags |= WQ_FLAG_WOKEN;
 
-	return default_wake_function(wait, mode, sync, key);
+	return default_wake_function(wq_entry, mode, sync, key);
 }
 EXPORT_SYMBOL(woken_wake_function);
-
-int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *arg)
-{
-	struct wait_bit_key *key = arg;
-	struct wait_bit_queue *wait_bit
-		= container_of(wait, struct wait_bit_queue, wait);
-
-	if (wait_bit->key.flags != key->flags ||
-			wait_bit->key.bit_nr != key->bit_nr ||
-			test_bit(key->bit_nr, key->flags))
-		return 0;
-	else
-		return autoremove_wake_function(wait, mode, sync, key);
-}
-EXPORT_SYMBOL(wake_bit_function);
-
-/*
- * To allow interruptible waiting and asynchronous (i.e. nonblocking)
- * waiting, the actions of __wait_on_bit() and __wait_on_bit_lock() are
- * permitted return codes. Nonzero return codes halt waiting and return.
- */
-int __sched
-__wait_on_bit(wait_queue_head_t *wq, struct wait_bit_queue *q,
-	      wait_bit_action_f *action, unsigned mode)
-{
-	int ret = 0;
-
-	do {
-		prepare_to_wait(wq, &q->wait, mode);
-		if (test_bit(q->key.bit_nr, q->key.flags))
-			ret = (*action)(&q->key, mode);
-	} while (test_bit(q->key.bit_nr, q->key.flags) && !ret);
-	finish_wait(wq, &q->wait);
-	return ret;
-}
-EXPORT_SYMBOL(__wait_on_bit);
-
-int __sched out_of_line_wait_on_bit(void *word, int bit,
-				    wait_bit_action_f *action, unsigned mode)
-{
-	wait_queue_head_t *wq = bit_waitqueue(word, bit);
-	DEFINE_WAIT_BIT(wait, word, bit);
-
-	return __wait_on_bit(wq, &wait, action, mode);
-}
-EXPORT_SYMBOL(out_of_line_wait_on_bit);
-
-int __sched out_of_line_wait_on_bit_timeout(
-	void *word, int bit, wait_bit_action_f *action,
-	unsigned mode, unsigned long timeout)
-{
-	wait_queue_head_t *wq = bit_waitqueue(word, bit);
-	DEFINE_WAIT_BIT(wait, word, bit);
-
-	wait.key.timeout = jiffies + timeout;
-	return __wait_on_bit(wq, &wait, action, mode);
-}
-EXPORT_SYMBOL_GPL(out_of_line_wait_on_bit_timeout);
-
-int __sched
-__wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q,
-			wait_bit_action_f *action, unsigned mode)
-{
-	int ret = 0;
-
-	for (;;) {
-		prepare_to_wait_exclusive(wq, &q->wait, mode);
-		if (test_bit(q->key.bit_nr, q->key.flags)) {
-			ret = action(&q->key, mode);
-			/*
-			 * See the comment in prepare_to_wait_event().
-			 * finish_wait() does not necessarily takes wq->lock,
-			 * but test_and_set_bit() implies mb() which pairs with
-			 * smp_mb__after_atomic() before wake_up_page().
-			 */
-			if (ret)
-				finish_wait(wq, &q->wait);
-		}
-		if (!test_and_set_bit(q->key.bit_nr, q->key.flags)) {
-			if (!ret)
-				finish_wait(wq, &q->wait);
-			return 0;
-		} else if (ret) {
-			return ret;
-		}
-	}
-}
-EXPORT_SYMBOL(__wait_on_bit_lock);
-
-int __sched out_of_line_wait_on_bit_lock(void *word, int bit,
-					 wait_bit_action_f *action, unsigned mode)
-{
-	wait_queue_head_t *wq = bit_waitqueue(word, bit);
-	DEFINE_WAIT_BIT(wait, word, bit);
-
-	return __wait_on_bit_lock(wq, &wait, action, mode);
-}
-EXPORT_SYMBOL(out_of_line_wait_on_bit_lock);
-
-void __wake_up_bit(wait_queue_head_t *wq, void *word, int bit)
-{
-	struct wait_bit_key key = __WAIT_BIT_KEY_INITIALIZER(word, bit);
-	if (waitqueue_active(wq))
-		__wake_up(wq, TASK_NORMAL, 1, &key);
-}
-EXPORT_SYMBOL(__wake_up_bit);
-
-/**
- * wake_up_bit - wake up a waiter on a bit
- * @word: the word being waited on, a kernel virtual address
- * @bit: the bit of the word being waited on
- *
- * There is a standard hashed waitqueue table for generic use. This
- * is the part of the hashtable's accessor API that wakes up waiters
- * on a bit. For instance, if one were to have waiters on a bitflag,
- * one would call wake_up_bit() after clearing the bit.
- *
- * In order for this to function properly, as it uses waitqueue_active()
- * internally, some kind of memory barrier must be done prior to calling
- * this. Typically, this will be smp_mb__after_atomic(), but in some
- * cases where bitflags are manipulated non-atomically under a lock, one
- * may need to use a less regular barrier, such fs/inode.c's smp_mb(),
- * because spin_unlock() does not guarantee a memory barrier.
- */
-void wake_up_bit(void *word, int bit)
-{
-	__wake_up_bit(bit_waitqueue(word, bit), word, bit);
-}
-EXPORT_SYMBOL(wake_up_bit);
-
-/*
- * Manipulate the atomic_t address to produce a better bit waitqueue table hash
- * index (we're keying off bit -1, but that would produce a horrible hash
- * value).
- */
-static inline wait_queue_head_t *atomic_t_waitqueue(atomic_t *p)
-{
-	if (BITS_PER_LONG == 64) {
-		unsigned long q = (unsigned long)p;
-		return bit_waitqueue((void *)(q & ~1), q & 1);
-	}
-	return bit_waitqueue(p, 0);
-}
-
-static int wake_atomic_t_function(wait_queue_t *wait, unsigned mode, int sync,
-				  void *arg)
-{
-	struct wait_bit_key *key = arg;
-	struct wait_bit_queue *wait_bit
-		= container_of(wait, struct wait_bit_queue, wait);
-	atomic_t *val = key->flags;
-
-	if (wait_bit->key.flags != key->flags ||
-	    wait_bit->key.bit_nr != key->bit_nr ||
-	    atomic_read(val) != 0)
-		return 0;
-	return autoremove_wake_function(wait, mode, sync, key);
-}
-
-/*
- * To allow interruptible waiting and asynchronous (i.e. nonblocking) waiting,
- * the actions of __wait_on_atomic_t() are permitted return codes.  Nonzero
- * return codes halt waiting and return.
- */
-static __sched
-int __wait_on_atomic_t(wait_queue_head_t *wq, struct wait_bit_queue *q,
-		       int (*action)(atomic_t *), unsigned mode)
-{
-	atomic_t *val;
-	int ret = 0;
-
-	do {
-		prepare_to_wait(wq, &q->wait, mode);
-		val = q->key.flags;
-		if (atomic_read(val) == 0)
-			break;
-		ret = (*action)(val);
-	} while (!ret && atomic_read(val) != 0);
-	finish_wait(wq, &q->wait);
-	return ret;
-}
-
-#define DEFINE_WAIT_ATOMIC_T(name, p)					\
-	struct wait_bit_queue name = {					\
-		.key = __WAIT_ATOMIC_T_KEY_INITIALIZER(p),		\
-		.wait	= {						\
-			.private	= current,			\
-			.func		= wake_atomic_t_function,	\
-			.task_list	=				\
-				LIST_HEAD_INIT((name).wait.task_list),	\
-		},							\
-	}
-
-__sched int out_of_line_wait_on_atomic_t(atomic_t *p, int (*action)(atomic_t *),
-					 unsigned mode)
-{
-	wait_queue_head_t *wq = atomic_t_waitqueue(p);
-	DEFINE_WAIT_ATOMIC_T(wait, p);
-
-	return __wait_on_atomic_t(wq, &wait, action, mode);
-}
-EXPORT_SYMBOL(out_of_line_wait_on_atomic_t);
-
-/**
- * wake_up_atomic_t - Wake up a waiter on a atomic_t
- * @p: The atomic_t being waited on, a kernel virtual address
- *
- * Wake up anyone waiting for the atomic_t to go to zero.
- *
- * Abuse the bit-waker function and its waitqueue hash table set (the atomic_t
- * check is done by the waiter's wake function, not the by the waker itself).
- */
-void wake_up_atomic_t(atomic_t *p)
-{
-	__wake_up_bit(atomic_t_waitqueue(p), p, WAIT_ATOMIC_T_BIT_NR);
-}
-EXPORT_SYMBOL(wake_up_atomic_t);
-
-__sched int bit_wait(struct wait_bit_key *word, int mode)
-{
-	schedule();
-	if (signal_pending_state(mode, current))
-		return -EINTR;
-	return 0;
-}
-EXPORT_SYMBOL(bit_wait);
-
-__sched int bit_wait_io(struct wait_bit_key *word, int mode)
-{
-	io_schedule();
-	if (signal_pending_state(mode, current))
-		return -EINTR;
-	return 0;
-}
-EXPORT_SYMBOL(bit_wait_io);
-
-__sched int bit_wait_timeout(struct wait_bit_key *word, int mode)
-{
-	unsigned long now = READ_ONCE(jiffies);
-	if (time_after_eq(now, word->timeout))
-		return -EAGAIN;
-	schedule_timeout(word->timeout - now);
-	if (signal_pending_state(mode, current))
-		return -EINTR;
-	return 0;
-}
-EXPORT_SYMBOL_GPL(bit_wait_timeout);
-
-__sched int bit_wait_io_timeout(struct wait_bit_key *word, int mode)
-{
-	unsigned long now = READ_ONCE(jiffies);
-	if (time_after_eq(now, word->timeout))
-		return -EAGAIN;
-	io_schedule_timeout(word->timeout - now);
-	if (signal_pending_state(mode, current))
-		return -EINTR;
-	return 0;
-}
-EXPORT_SYMBOL_GPL(bit_wait_io_timeout);
diff --git a/kernel/sched/wait_bit.c b/kernel/sched/wait_bit.c
new file mode 100644
index 000000000000..f8159698aa4d
--- /dev/null
+++ b/kernel/sched/wait_bit.c
@@ -0,0 +1,286 @@
+/*
+ * The implementation of the wait_bit*() and related waiting APIs:
+ */
+#include <linux/wait_bit.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
+#include <linux/hash.h>
+
+#define WAIT_TABLE_BITS 8
+#define WAIT_TABLE_SIZE (1 << WAIT_TABLE_BITS)
+
+static wait_queue_head_t bit_wait_table[WAIT_TABLE_SIZE] __cacheline_aligned;
+
+wait_queue_head_t *bit_waitqueue(void *word, int bit)
+{
+	const int shift = BITS_PER_LONG == 32 ? 5 : 6;
+	unsigned long val = (unsigned long)word << shift | bit;
+
+	return bit_wait_table + hash_long(val, WAIT_TABLE_BITS);
+}
+EXPORT_SYMBOL(bit_waitqueue);
+
+int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *arg)
+{
+	struct wait_bit_key *key = arg;
+	struct wait_bit_queue_entry *wait_bit = container_of(wq_entry, struct wait_bit_queue_entry, wq_entry);
+
+	if (wait_bit->key.flags != key->flags ||
+			wait_bit->key.bit_nr != key->bit_nr ||
+			test_bit(key->bit_nr, key->flags))
+		return 0;
+	else
+		return autoremove_wake_function(wq_entry, mode, sync, key);
+}
+EXPORT_SYMBOL(wake_bit_function);
+
+/*
+ * To allow interruptible waiting and asynchronous (i.e. nonblocking)
+ * waiting, the actions of __wait_on_bit() and __wait_on_bit_lock() are
+ * permitted return codes. Nonzero return codes halt waiting and return.
+ */
+int __sched
+__wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry,
+	      wait_bit_action_f *action, unsigned mode)
+{
+	int ret = 0;
+
+	do {
+		prepare_to_wait(wq_head, &wbq_entry->wq_entry, mode);
+		if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags))
+			ret = (*action)(&wbq_entry->key, mode);
+	} while (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret);
+	finish_wait(wq_head, &wbq_entry->wq_entry);
+	return ret;
+}
+EXPORT_SYMBOL(__wait_on_bit);
+
+int __sched out_of_line_wait_on_bit(void *word, int bit,
+				    wait_bit_action_f *action, unsigned mode)
+{
+	struct wait_queue_head *wq_head = bit_waitqueue(word, bit);
+	DEFINE_WAIT_BIT(wq_entry, word, bit);
+
+	return __wait_on_bit(wq_head, &wq_entry, action, mode);
+}
+EXPORT_SYMBOL(out_of_line_wait_on_bit);
+
+int __sched out_of_line_wait_on_bit_timeout(
+	void *word, int bit, wait_bit_action_f *action,
+	unsigned mode, unsigned long timeout)
+{
+	struct wait_queue_head *wq_head = bit_waitqueue(word, bit);
+	DEFINE_WAIT_BIT(wq_entry, word, bit);
+
+	wq_entry.key.timeout = jiffies + timeout;
+	return __wait_on_bit(wq_head, &wq_entry, action, mode);
+}
+EXPORT_SYMBOL_GPL(out_of_line_wait_on_bit_timeout);
+
+int __sched
+__wait_on_bit_lock(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry,
+			wait_bit_action_f *action, unsigned mode)
+{
+	int ret = 0;
+
+	for (;;) {
+		prepare_to_wait_exclusive(wq_head, &wbq_entry->wq_entry, mode);
+		if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags)) {
+			ret = action(&wbq_entry->key, mode);
+			/*
+			 * See the comment in prepare_to_wait_event().
+			 * finish_wait() does not necessarily takes wwq_head->lock,
+			 * but test_and_set_bit() implies mb() which pairs with
+			 * smp_mb__after_atomic() before wake_up_page().
+			 */
+			if (ret)
+				finish_wait(wq_head, &wbq_entry->wq_entry);
+		}
+		if (!test_and_set_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags)) {
+			if (!ret)
+				finish_wait(wq_head, &wbq_entry->wq_entry);
+			return 0;
+		} else if (ret) {
+			return ret;
+		}
+	}
+}
+EXPORT_SYMBOL(__wait_on_bit_lock);
+
+int __sched out_of_line_wait_on_bit_lock(void *word, int bit,
+					 wait_bit_action_f *action, unsigned mode)
+{
+	struct wait_queue_head *wq_head = bit_waitqueue(word, bit);
+	DEFINE_WAIT_BIT(wq_entry, word, bit);
+
+	return __wait_on_bit_lock(wq_head, &wq_entry, action, mode);
+}
+EXPORT_SYMBOL(out_of_line_wait_on_bit_lock);
+
+void __wake_up_bit(struct wait_queue_head *wq_head, void *word, int bit)
+{
+	struct wait_bit_key key = __WAIT_BIT_KEY_INITIALIZER(word, bit);
+	if (waitqueue_active(wq_head))
+		__wake_up(wq_head, TASK_NORMAL, 1, &key);
+}
+EXPORT_SYMBOL(__wake_up_bit);
+
+/**
+ * wake_up_bit - wake up a waiter on a bit
+ * @word: the word being waited on, a kernel virtual address
+ * @bit: the bit of the word being waited on
+ *
+ * There is a standard hashed waitqueue table for generic use. This
+ * is the part of the hashtable's accessor API that wakes up waiters
+ * on a bit. For instance, if one were to have waiters on a bitflag,
+ * one would call wake_up_bit() after clearing the bit.
+ *
+ * In order for this to function properly, as it uses waitqueue_active()
+ * internally, some kind of memory barrier must be done prior to calling
+ * this. Typically, this will be smp_mb__after_atomic(), but in some
+ * cases where bitflags are manipulated non-atomically under a lock, one
+ * may need to use a less regular barrier, such fs/inode.c's smp_mb(),
+ * because spin_unlock() does not guarantee a memory barrier.
+ */
+void wake_up_bit(void *word, int bit)
+{
+	__wake_up_bit(bit_waitqueue(word, bit), word, bit);
+}
+EXPORT_SYMBOL(wake_up_bit);
+
+/*
+ * Manipulate the atomic_t address to produce a better bit waitqueue table hash
+ * index (we're keying off bit -1, but that would produce a horrible hash
+ * value).
+ */
+static inline wait_queue_head_t *atomic_t_waitqueue(atomic_t *p)
+{
+	if (BITS_PER_LONG == 64) {
+		unsigned long q = (unsigned long)p;
+		return bit_waitqueue((void *)(q & ~1), q & 1);
+	}
+	return bit_waitqueue(p, 0);
+}
+
+static int wake_atomic_t_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync,
+				  void *arg)
+{
+	struct wait_bit_key *key = arg;
+	struct wait_bit_queue_entry *wait_bit = container_of(wq_entry, struct wait_bit_queue_entry, wq_entry);
+	atomic_t *val = key->flags;
+
+	if (wait_bit->key.flags != key->flags ||
+	    wait_bit->key.bit_nr != key->bit_nr ||
+	    atomic_read(val) != 0)
+		return 0;
+	return autoremove_wake_function(wq_entry, mode, sync, key);
+}
+
+/*
+ * To allow interruptible waiting and asynchronous (i.e. nonblocking) waiting,
+ * the actions of __wait_on_atomic_t() are permitted return codes.  Nonzero
+ * return codes halt waiting and return.
+ */
+static __sched
+int __wait_on_atomic_t(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry,
+		       int (*action)(atomic_t *), unsigned mode)
+{
+	atomic_t *val;
+	int ret = 0;
+
+	do {
+		prepare_to_wait(wq_head, &wbq_entry->wq_entry, mode);
+		val = wbq_entry->key.flags;
+		if (atomic_read(val) == 0)
+			break;
+		ret = (*action)(val);
+	} while (!ret && atomic_read(val) != 0);
+	finish_wait(wq_head, &wbq_entry->wq_entry);
+	return ret;
+}
+
+#define DEFINE_WAIT_ATOMIC_T(name, p)					\
+	struct wait_bit_queue_entry name = {				\
+		.key = __WAIT_ATOMIC_T_KEY_INITIALIZER(p),		\
+		.wq_entry = {						\
+			.private	= current,			\
+			.func		= wake_atomic_t_function,	\
+			.entry		=				\
+				LIST_HEAD_INIT((name).wq_entry.entry),	\
+		},							\
+	}
+
+__sched int out_of_line_wait_on_atomic_t(atomic_t *p, int (*action)(atomic_t *),
+					 unsigned mode)
+{
+	struct wait_queue_head *wq_head = atomic_t_waitqueue(p);
+	DEFINE_WAIT_ATOMIC_T(wq_entry, p);
+
+	return __wait_on_atomic_t(wq_head, &wq_entry, action, mode);
+}
+EXPORT_SYMBOL(out_of_line_wait_on_atomic_t);
+
+/**
+ * wake_up_atomic_t - Wake up a waiter on a atomic_t
+ * @p: The atomic_t being waited on, a kernel virtual address
+ *
+ * Wake up anyone waiting for the atomic_t to go to zero.
+ *
+ * Abuse the bit-waker function and its waitqueue hash table set (the atomic_t
+ * check is done by the waiter's wake function, not the by the waker itself).
+ */
+void wake_up_atomic_t(atomic_t *p)
+{
+	__wake_up_bit(atomic_t_waitqueue(p), p, WAIT_ATOMIC_T_BIT_NR);
+}
+EXPORT_SYMBOL(wake_up_atomic_t);
+
+__sched int bit_wait(struct wait_bit_key *word, int mode)
+{
+	schedule();
+	if (signal_pending_state(mode, current))
+		return -EINTR;
+	return 0;
+}
+EXPORT_SYMBOL(bit_wait);
+
+__sched int bit_wait_io(struct wait_bit_key *word, int mode)
+{
+	io_schedule();
+	if (signal_pending_state(mode, current))
+		return -EINTR;
+	return 0;
+}
+EXPORT_SYMBOL(bit_wait_io);
+
+__sched int bit_wait_timeout(struct wait_bit_key *word, int mode)
+{
+	unsigned long now = READ_ONCE(jiffies);
+	if (time_after_eq(now, word->timeout))
+		return -EAGAIN;
+	schedule_timeout(word->timeout - now);
+	if (signal_pending_state(mode, current))
+		return -EINTR;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(bit_wait_timeout);
+
+__sched int bit_wait_io_timeout(struct wait_bit_key *word, int mode)
+{
+	unsigned long now = READ_ONCE(jiffies);
+	if (time_after_eq(now, word->timeout))
+		return -EAGAIN;
+	io_schedule_timeout(word->timeout - now);
+	if (signal_pending_state(mode, current))
+		return -EINTR;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(bit_wait_io_timeout);
+
+void __init wait_bit_init(void)
+{
+	int i;
+
+	for (i = 0; i < WAIT_TABLE_SIZE; i++)
+		init_waitqueue_head(bit_wait_table + i);
+}
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 5cb5b0008d97..ee2f4202d82a 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -387,7 +387,7 @@ void alarm_start_relative(struct alarm *alarm, ktime_t start)
 {
 	struct alarm_base *base = &alarm_bases[alarm->type];
 
-	start = ktime_add(start, base->gettime());
+	start = ktime_add_safe(start, base->gettime());
 	alarm_start(alarm, start);
 }
 EXPORT_SYMBOL_GPL(alarm_start_relative);
@@ -475,7 +475,7 @@ u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval)
 		overrun++;
 	}
 
-	alarm->node.expires = ktime_add(alarm->node.expires, interval);
+	alarm->node.expires = ktime_add_safe(alarm->node.expires, interval);
 	return overrun;
 }
 EXPORT_SYMBOL_GPL(alarm_forward);
@@ -660,13 +660,21 @@ static int alarm_timer_set(struct k_itimer *timr, int flags,
 
 	/* start the timer */
 	timr->it.alarm.interval = timespec64_to_ktime(new_setting->it_interval);
+
+	/*
+	 * Rate limit to the tick as a hot fix to prevent DOS. Will be
+	 * mopped up later.
+	 */
+	if (timr->it.alarm.interval < TICK_NSEC)
+		timr->it.alarm.interval = TICK_NSEC;
+
 	exp = timespec64_to_ktime(new_setting->it_value);
 	/* Convert (if necessary) to absolute time */
 	if (flags != TIMER_ABSTIME) {
 		ktime_t now;
 
 		now = alarm_bases[timr->it.alarm.alarmtimer.type].gettime();
-		exp = ktime_add(now, exp);
+		exp = ktime_add_safe(now, exp);
 	}
 
 	alarm_start(&timr->it.alarm.alarmtimer, exp);
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 1370f067fb51..d2a1e6dd0291 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -825,8 +825,10 @@ static void check_thread_timers(struct task_struct *tsk,
 			 * At the hard limit, we just die.
 			 * No need to calculate anything else now.
 			 */
-			pr_info("CPU Watchdog Timeout (hard): %s[%d]\n",
-				tsk->comm, task_pid_nr(tsk));
+			if (print_fatal_signals) {
+				pr_info("CPU Watchdog Timeout (hard): %s[%d]\n",
+					tsk->comm, task_pid_nr(tsk));
+			}
 			__group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
 			return;
 		}
@@ -838,8 +840,10 @@ static void check_thread_timers(struct task_struct *tsk,
 				soft += USEC_PER_SEC;
 				sig->rlim[RLIMIT_RTTIME].rlim_cur = soft;
 			}
-			pr_info("RT Watchdog Timeout (soft): %s[%d]\n",
-				tsk->comm, task_pid_nr(tsk));
+			if (print_fatal_signals) {
+				pr_info("RT Watchdog Timeout (soft): %s[%d]\n",
+					tsk->comm, task_pid_nr(tsk));
+			}
 			__group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
 		}
 	}
@@ -936,8 +940,10 @@ static void check_process_timers(struct task_struct *tsk,
 			 * At the hard limit, we just die.
 			 * No need to calculate anything else now.
 			 */
-			pr_info("RT Watchdog Timeout (hard): %s[%d]\n",
-				tsk->comm, task_pid_nr(tsk));
+			if (print_fatal_signals) {
+				pr_info("RT Watchdog Timeout (hard): %s[%d]\n",
+					tsk->comm, task_pid_nr(tsk));
+			}
 			__group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
 			return;
 		}
@@ -945,8 +951,10 @@ static void check_process_timers(struct task_struct *tsk,
 			/*
 			 * At the soft limit, send a SIGXCPU every second.
 			 */
-			pr_info("CPU Watchdog Timeout (soft): %s[%d]\n",
-				tsk->comm, task_pid_nr(tsk));
+			if (print_fatal_signals) {
+				pr_info("CPU Watchdog Timeout (soft): %s[%d]\n",
+					tsk->comm, task_pid_nr(tsk));
+			}
 			__group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
 			if (soft < hard) {
 				soft++;
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 987e496bb51a..b398c2ea69b2 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -37,9 +37,11 @@ static int tick_broadcast_forced;
 static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
 
 #ifdef CONFIG_TICK_ONESHOT
+static void tick_broadcast_setup_oneshot(struct clock_event_device *bc);
 static void tick_broadcast_clear_oneshot(int cpu);
 static void tick_resume_broadcast_oneshot(struct clock_event_device *bc);
 #else
+static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); }
 static inline void tick_broadcast_clear_oneshot(int cpu) { }
 static inline void tick_resume_broadcast_oneshot(struct clock_event_device *bc) { }
 #endif
@@ -867,7 +869,7 @@ static void tick_broadcast_init_next_event(struct cpumask *mask,
 /**
  * tick_broadcast_setup_oneshot - setup the broadcast device
  */
-void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
+static void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
 {
 	int cpu = smp_processor_id();
 
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index f738251000fe..be0ac01f2e12 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -126,7 +126,6 @@ static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
 
 /* Functions related to oneshot broadcasting */
 #if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT)
-extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc);
 extern void tick_broadcast_switch_to_oneshot(void);
 extern void tick_shutdown_broadcast_oneshot(unsigned int cpu);
 extern int tick_broadcast_oneshot_active(void);
@@ -134,7 +133,6 @@ extern void tick_check_oneshot_broadcast_this_cpu(void);
 bool tick_broadcast_oneshot_available(void);
 extern struct cpumask *tick_get_broadcast_oneshot_mask(void);
 #else /* !(BROADCAST && ONESHOT): */
-static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); }
 static inline void tick_broadcast_switch_to_oneshot(void) { }
 static inline void tick_shutdown_broadcast_oneshot(unsigned int cpu) { }
 static inline int tick_broadcast_oneshot_active(void) { return 0; }
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 74fdfe9ed3db..9e5841dc14b5 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -5063,7 +5063,7 @@ ftrace_graph_release(struct inode *inode, struct file *file)
 	}
 
  out:
-	kfree(fgd->new_hash);
+	free_ftrace_hash(fgd->new_hash);
 	kfree(fgd);
 
 	return ret;
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index c74bf39ef764..a86688fabc55 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2864,11 +2864,11 @@ bool flush_work(struct work_struct *work)
 EXPORT_SYMBOL_GPL(flush_work);
 
 struct cwt_wait {
-	wait_queue_t		wait;
+	wait_queue_entry_t		wait;
 	struct work_struct	*work;
 };
 
-static int cwt_wakefn(wait_queue_t *wait, unsigned mode, int sync, void *key)
+static int cwt_wakefn(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
 {
 	struct cwt_wait *cwait = container_of(wait, struct cwt_wait, wait);
author	Ingo Molnar <mingo@kernel.org>	2017-06-20 12:28:21 +0200
committer	Ingo Molnar <mingo@kernel.org>	2017-06-20 12:28:21 +0200
commit	902b31941327a0e9c0ca9eb7750414ae41bf8a89 (patch)
tree	300f4226c87e6df403cda861ab6766b23d278f09 /kernel
parent	c5ae366e12b2bd56fc7d7e9d484836bec9ddc110 (diff)
parent	2055da97389a605c8a00d163d40903afbe413921 (diff)
download	linux-902b31941327a0e9c0ca9eb7750414ae41bf8a89.tar.bz2