summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/inode.c2
-rw-r--r--kernel/cgroup.c75
-rw-r--r--kernel/configs/android-base.config7
-rw-r--r--kernel/configs/android-recommended.config4
-rw-r--r--kernel/cpu.c2
-rw-r--r--kernel/cpuset.c13
-rw-r--r--kernel/exit.c2
-rw-r--r--kernel/fork.c15
-rw-r--r--kernel/groups.c67
-rw-r--r--kernel/hung_task.c28
-rw-r--r--kernel/kprobes.c2
-rw-r--r--kernel/kthread.c577
-rw-r--r--kernel/livepatch/core.c19
-rw-r--r--kernel/module.c13
-rw-r--r--kernel/panic.c47
-rw-r--r--kernel/power/process.c17
-rw-r--r--kernel/printk/printk.c137
-rw-r--r--kernel/ptrace.c3
-rw-r--r--kernel/rcu/tiny.c2
-rw-r--r--kernel/rcu/tree.c2
-rw-r--r--kernel/relay.c102
-rw-r--r--kernel/sched/debug.c3
-rw-r--r--kernel/sched/fair.c13
-rw-r--r--kernel/sched/idle.c13
-rw-r--r--kernel/smpboot.c7
-rw-r--r--kernel/softirq.c4
-rw-r--r--kernel/sys_ni.c5
-rw-r--r--kernel/sysctl.c14
-rw-r--r--kernel/time/alarmtimer.c2
-rw-r--r--kernel/time/timekeeping.c7
-rw-r--r--kernel/time/timer.c2
-rw-r--r--kernel/trace/Makefile4
-rw-r--r--kernel/trace/trace_syscalls.c6
-rw-r--r--kernel/uid16.c4
-rw-r--r--kernel/workqueue.c42
35 files changed, 870 insertions, 392 deletions
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 5967b870a895..1ed8473ec537 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -97,7 +97,7 @@ static struct inode *bpf_get_inode(struct super_block *sb,
return ERR_PTR(-ENOSPC);
inode->i_ino = get_next_ino();
- inode->i_atime = CURRENT_TIME;
+ inode->i_atime = current_time(inode);
inode->i_mtime = inode->i_atime;
inode->i_ctime = inode->i_atime;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 44066158f0d1..85bc9beb046d 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -64,6 +64,9 @@
#include <linux/file.h>
#include <net/sock.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/cgroup.h>
+
/*
* pidlists linger the following amount before being destroyed. The goal
* is avoiding frequent destruction in the middle of consecutive read calls
@@ -1176,6 +1179,8 @@ static void cgroup_destroy_root(struct cgroup_root *root)
struct cgroup *cgrp = &root->cgrp;
struct cgrp_cset_link *link, *tmp_link;
+ trace_cgroup_destroy_root(root);
+
cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
BUG_ON(atomic_read(&root->nr_cgrps));
@@ -1874,6 +1879,9 @@ static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data)
strcpy(root->release_agent_path, opts.release_agent);
spin_unlock(&release_agent_path_lock);
}
+
+ trace_cgroup_remount(root);
+
out_unlock:
kfree(opts.release_agent);
kfree(opts.name);
@@ -2031,6 +2039,8 @@ static int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
if (ret)
goto destroy_root;
+ trace_cgroup_setup_root(root);
+
/*
* There must be no failure case after here, since rebinding takes
* care of subsystems' refcounts, which are explicitly dropped in
@@ -2315,22 +2325,18 @@ static struct file_system_type cgroup2_fs_type = {
.fs_flags = FS_USERNS_MOUNT,
};
-static char *cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
- struct cgroup_namespace *ns)
+static int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
+ struct cgroup_namespace *ns)
{
struct cgroup *root = cset_cgroup_from_root(ns->root_cset, cgrp->root);
- int ret;
- ret = kernfs_path_from_node(cgrp->kn, root->kn, buf, buflen);
- if (ret < 0 || ret >= buflen)
- return NULL;
- return buf;
+ return kernfs_path_from_node(cgrp->kn, root->kn, buf, buflen);
}
-char *cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
- struct cgroup_namespace *ns)
+int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
+ struct cgroup_namespace *ns)
{
- char *ret;
+ int ret;
mutex_lock(&cgroup_mutex);
spin_lock_irq(&css_set_lock);
@@ -2357,12 +2363,12 @@ EXPORT_SYMBOL_GPL(cgroup_path_ns);
*
* Return value is the same as kernfs_path().
*/
-char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
+int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
{
struct cgroup_root *root;
struct cgroup *cgrp;
int hierarchy_id = 1;
- char *path = NULL;
+ int ret;
mutex_lock(&cgroup_mutex);
spin_lock_irq(&css_set_lock);
@@ -2371,16 +2377,15 @@ char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
if (root) {
cgrp = task_cgroup_from_root(task, root);
- path = cgroup_path_ns_locked(cgrp, buf, buflen, &init_cgroup_ns);
+ ret = cgroup_path_ns_locked(cgrp, buf, buflen, &init_cgroup_ns);
} else {
/* if no hierarchy exists, everyone is in "/" */
- if (strlcpy(buf, "/", buflen) < buflen)
- path = buf;
+ ret = strlcpy(buf, "/", buflen);
}
spin_unlock_irq(&css_set_lock);
mutex_unlock(&cgroup_mutex);
- return path;
+ return ret;
}
EXPORT_SYMBOL_GPL(task_cgroup_path);
@@ -2830,6 +2835,10 @@ static int cgroup_attach_task(struct cgroup *dst_cgrp,
ret = cgroup_migrate(leader, threadgroup, dst_cgrp->root);
cgroup_migrate_finish(&preloaded_csets);
+
+ if (!ret)
+ trace_cgroup_attach_task(dst_cgrp, leader, threadgroup);
+
return ret;
}
@@ -3611,6 +3620,8 @@ static int cgroup_rename(struct kernfs_node *kn, struct kernfs_node *new_parent,
mutex_lock(&cgroup_mutex);
ret = kernfs_rename(kn, new_parent, new_name_str);
+ if (!ret)
+ trace_cgroup_rename(cgrp);
mutex_unlock(&cgroup_mutex);
@@ -4381,6 +4392,8 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
if (task) {
ret = cgroup_migrate(task, false, to->root);
+ if (!ret)
+ trace_cgroup_transfer_tasks(to, task, false);
put_task_struct(task);
}
} while (task && !ret);
@@ -5046,6 +5059,8 @@ static void css_release_work_fn(struct work_struct *work)
ss->css_released(css);
} else {
/* cgroup release path */
+ trace_cgroup_release(cgrp);
+
cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
cgrp->id = -1;
@@ -5332,6 +5347,8 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
if (ret)
goto out_destroy;
+ trace_cgroup_mkdir(cgrp);
+
/* let's create and online css's */
kernfs_activate(kn);
@@ -5507,6 +5524,9 @@ static int cgroup_rmdir(struct kernfs_node *kn)
ret = cgroup_destroy_locked(cgrp);
+ if (!ret)
+ trace_cgroup_rmdir(cgrp);
+
cgroup_kn_unlock(kn);
return ret;
}
@@ -5743,7 +5763,7 @@ core_initcall(cgroup_wq_init);
int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *tsk)
{
- char *buf, *path;
+ char *buf;
int retval;
struct cgroup_root *root;
@@ -5786,18 +5806,18 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
* " (deleted)" is appended to the cgroup path.
*/
if (cgroup_on_dfl(cgrp) || !(tsk->flags & PF_EXITING)) {
- path = cgroup_path_ns_locked(cgrp, buf, PATH_MAX,
+ retval = cgroup_path_ns_locked(cgrp, buf, PATH_MAX,
current->nsproxy->cgroup_ns);
- if (!path) {
+ if (retval >= PATH_MAX)
retval = -ENAMETOOLONG;
+ if (retval < 0)
goto out_unlock;
- }
+
+ seq_puts(m, buf);
} else {
- path = "/";
+ seq_puts(m, "/");
}
- seq_puts(m, path);
-
if (cgroup_on_dfl(cgrp) && cgroup_is_dead(cgrp))
seq_puts(m, " (deleted)\n");
else
@@ -6062,8 +6082,9 @@ static void cgroup_release_agent(struct work_struct *work)
{
struct cgroup *cgrp =
container_of(work, struct cgroup, release_agent_work);
- char *pathbuf = NULL, *agentbuf = NULL, *path;
+ char *pathbuf = NULL, *agentbuf = NULL;
char *argv[3], *envp[3];
+ int ret;
mutex_lock(&cgroup_mutex);
@@ -6073,13 +6094,13 @@ static void cgroup_release_agent(struct work_struct *work)
goto out;
spin_lock_irq(&css_set_lock);
- path = cgroup_path_ns_locked(cgrp, pathbuf, PATH_MAX, &init_cgroup_ns);
+ ret = cgroup_path_ns_locked(cgrp, pathbuf, PATH_MAX, &init_cgroup_ns);
spin_unlock_irq(&css_set_lock);
- if (!path)
+ if (ret < 0 || ret >= PATH_MAX)
goto out;
argv[0] = agentbuf;
- argv[1] = path;
+ argv[1] = pathbuf;
argv[2] = NULL;
/* minimal command environment */
diff --git a/kernel/configs/android-base.config b/kernel/configs/android-base.config
index 9f748ed7bea8..1a8f34f63601 100644
--- a/kernel/configs/android-base.config
+++ b/kernel/configs/android-base.config
@@ -11,7 +11,6 @@ CONFIG_ANDROID_LOW_MEMORY_KILLER=y
CONFIG_ARMV8_DEPRECATED=y
CONFIG_ASHMEM=y
CONFIG_AUDIT=y
-CONFIG_BLK_DEV_DM=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_CGROUPS=y
CONFIG_CGROUP_CPUACCT=y
@@ -19,9 +18,7 @@ CONFIG_CGROUP_DEBUG=y
CONFIG_CGROUP_FREEZER=y
CONFIG_CGROUP_SCHED=y
CONFIG_CP15_BARRIER_EMULATION=y
-CONFIG_DM_CRYPT=y
-CONFIG_DM_VERITY=y
-CONFIG_DM_VERITY_FEC=y
+CONFIG_DEFAULT_SECURITY_SELINUX=y
CONFIG_EMBEDDED=y
CONFIG_FB=y
CONFIG_HIGH_RES_TIMERS=y
@@ -41,7 +38,6 @@ CONFIG_IPV6=y
CONFIG_IPV6_MIP6=y
CONFIG_IPV6_MULTIPLE_TABLES=y
CONFIG_IPV6_OPTIMISTIC_DAD=y
-CONFIG_IPV6_PRIVACY=y
CONFIG_IPV6_ROUTER_PREF=y
CONFIG_IPV6_ROUTE_INFO=y
CONFIG_IP_ADVANCED_ROUTER=y
@@ -135,6 +131,7 @@ CONFIG_PREEMPT=y
CONFIG_QUOTA=y
CONFIG_RTC_CLASS=y
CONFIG_RT_GROUP_SCHED=y
+CONFIG_SECCOMP=y
CONFIG_SECURITY=y
CONFIG_SECURITY_NETWORK=y
CONFIG_SECURITY_SELINUX=y
diff --git a/kernel/configs/android-recommended.config b/kernel/configs/android-recommended.config
index e3b953e966d2..297756be369c 100644
--- a/kernel/configs/android-recommended.config
+++ b/kernel/configs/android-recommended.config
@@ -6,12 +6,16 @@
# CONFIG_PM_WAKELOCKS_GC is not set
# CONFIG_VT is not set
CONFIG_BACKLIGHT_LCD_SUPPORT=y
+CONFIG_BLK_DEV_DM=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=8192
CONFIG_COMPACTION=y
CONFIG_DEBUG_RODATA=y
+CONFIG_DM_CRYPT=y
CONFIG_DM_UEVENT=y
+CONFIG_DM_VERITY=y
+CONFIG_DM_VERITY_FEC=y
CONFIG_DRAGONRISE_FF=y
CONFIG_ENABLE_DEFAULT_TRACERS=y
CONFIG_EXT4_FS=y
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 5df20d6d1520..29de1a9352c0 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -228,7 +228,7 @@ static struct {
.wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
.lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
#ifdef CONFIG_DEBUG_LOCK_ALLOC
- .dep_map = {.name = "cpu_hotplug.lock" },
+ .dep_map = STATIC_LOCKDEP_MAP_INIT("cpu_hotplug.dep_map", &cpu_hotplug.dep_map),
#endif
};
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 2b4c20ab5bbe..29f815d2ef7e 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2715,7 +2715,7 @@ void __cpuset_memory_pressure_bump(void)
int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *tsk)
{
- char *buf, *p;
+ char *buf;
struct cgroup_subsys_state *css;
int retval;
@@ -2724,14 +2724,15 @@ int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns,
if (!buf)
goto out;
- retval = -ENAMETOOLONG;
css = task_get_css(tsk, cpuset_cgrp_id);
- p = cgroup_path_ns(css->cgroup, buf, PATH_MAX,
- current->nsproxy->cgroup_ns);
+ retval = cgroup_path_ns(css->cgroup, buf, PATH_MAX,
+ current->nsproxy->cgroup_ns);
css_put(css);
- if (!p)
+ if (retval >= PATH_MAX)
+ retval = -ENAMETOOLONG;
+ if (retval < 0)
goto out_free;
- seq_puts(m, p);
+ seq_puts(m, buf);
seq_putc(m, '\n');
retval = 0;
out_free:
diff --git a/kernel/exit.c b/kernel/exit.c
index 1e1d913914c0..9d68c45ebbe3 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -511,7 +511,7 @@ static void exit_mm(struct task_struct *tsk)
mm_update_next_owner(mm);
mmput(mm);
if (test_thread_flag(TIF_MEMDIE))
- exit_oom_victim(tsk);
+ exit_oom_victim();
}
static struct task_struct *find_alive_thread(struct task_struct *p)
diff --git a/kernel/fork.c b/kernel/fork.c
index 9a05bd93f8e7..623259fc794d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -359,6 +359,12 @@ static inline void free_signal_struct(struct signal_struct *sig)
{
taskstats_tgid_free(sig);
sched_autogroup_exit(sig);
+ /*
+ * __mmdrop is not safe to call from softirq context on x86 due to
+ * pgd_dtor so postpone it to the async context
+ */
+ if (sig->oom_mm)
+ mmdrop_async(sig->oom_mm);
kmem_cache_free(signal_cachep, sig);
}
@@ -541,7 +547,8 @@ free_tsk:
}
#ifdef CONFIG_MMU
-static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
+static __latent_entropy int dup_mmap(struct mm_struct *mm,
+ struct mm_struct *oldmm)
{
struct vm_area_struct *mpnt, *tmp, *prev, **pprev;
struct rb_node **rb_link, *rb_parent;
@@ -848,6 +855,7 @@ static inline void __mmput(struct mm_struct *mm)
ksm_exit(mm);
khugepaged_exit(mm); /* must run before exit_mmap */
exit_mmap(mm);
+ mm_put_huge_zero_page(mm);
set_mm_exe_file(mm, NULL);
if (!list_empty(&mm->mmlist)) {
spin_lock(&mmlist_lock);
@@ -856,6 +864,7 @@ static inline void __mmput(struct mm_struct *mm)
}
if (mm->binfmt)
module_put(mm->binfmt->module);
+ set_bit(MMF_OOM_SKIP, &mm->flags);
mmdrop(mm);
}
@@ -1433,7 +1442,8 @@ init_task_pid(struct task_struct *task, enum pid_type type, struct pid *pid)
* parts of the process environment (as per the clone
* flags). The actual kick-off is left to the caller.
*/
-static struct task_struct *copy_process(unsigned long clone_flags,
+static __latent_entropy struct task_struct *copy_process(
+ unsigned long clone_flags,
unsigned long stack_start,
unsigned long stack_size,
int __user *child_tidptr,
@@ -1918,6 +1928,7 @@ long _do_fork(unsigned long clone_flags,
p = copy_process(clone_flags, stack_start, stack_size,
child_tidptr, NULL, trace, tls, NUMA_NO_NODE);
+ add_latent_entropy();
/*
* Do this prior waking up the new thread - the thread pointer
* might get invalid after that point, if the thread exits quickly.
diff --git a/kernel/groups.c b/kernel/groups.c
index 74d431d25251..2fcadd66a8fd 100644
--- a/kernel/groups.c
+++ b/kernel/groups.c
@@ -7,55 +7,31 @@
#include <linux/security.h>
#include <linux/syscalls.h>
#include <linux/user_namespace.h>
+#include <linux/vmalloc.h>
#include <asm/uaccess.h>
struct group_info *groups_alloc(int gidsetsize)
{
- struct group_info *group_info;
- int nblocks;
- int i;
-
- nblocks = (gidsetsize + NGROUPS_PER_BLOCK - 1) / NGROUPS_PER_BLOCK;
- /* Make sure we always allocate at least one indirect block pointer */
- nblocks = nblocks ? : 1;
- group_info = kmalloc(sizeof(*group_info) + nblocks*sizeof(gid_t *), GFP_USER);
- if (!group_info)
+ struct group_info *gi;
+ unsigned int len;
+
+ len = sizeof(struct group_info) + sizeof(kgid_t) * gidsetsize;
+ gi = kmalloc(len, GFP_KERNEL_ACCOUNT|__GFP_NOWARN|__GFP_NORETRY);
+ if (!gi)
+ gi = __vmalloc(len, GFP_KERNEL_ACCOUNT|__GFP_HIGHMEM, PAGE_KERNEL);
+ if (!gi)
return NULL;
- group_info->ngroups = gidsetsize;
- group_info->nblocks = nblocks;
- atomic_set(&group_info->usage, 1);
-
- if (gidsetsize <= NGROUPS_SMALL)
- group_info->blocks[0] = group_info->small_block;
- else {
- for (i = 0; i < nblocks; i++) {
- kgid_t *b;
- b = (void *)__get_free_page(GFP_USER);
- if (!b)
- goto out_undo_partial_alloc;
- group_info->blocks[i] = b;
- }
- }
- return group_info;
-out_undo_partial_alloc:
- while (--i >= 0) {
- free_page((unsigned long)group_info->blocks[i]);
- }
- kfree(group_info);
- return NULL;
+ atomic_set(&gi->usage, 1);
+ gi->ngroups = gidsetsize;
+ return gi;
}
EXPORT_SYMBOL(groups_alloc);
void groups_free(struct group_info *group_info)
{
- if (group_info->blocks[0] != group_info->small_block) {
- int i;
- for (i = 0; i < group_info->nblocks; i++)
- free_page((unsigned long)group_info->blocks[i]);
- }
- kfree(group_info);
+ kvfree(group_info);
}
EXPORT_SYMBOL(groups_free);
@@ -70,7 +46,7 @@ static int groups_to_user(gid_t __user *grouplist,
for (i = 0; i < count; i++) {
gid_t gid;
- gid = from_kgid_munged(user_ns, GROUP_AT(group_info, i));
+ gid = from_kgid_munged(user_ns, group_info->gid[i]);
if (put_user(gid, grouplist+i))
return -EFAULT;
}
@@ -95,7 +71,7 @@ static int groups_from_user(struct group_info *group_info,
if (!gid_valid(kgid))
return -EINVAL;
- GROUP_AT(group_info, i) = kgid;
+ group_info->gid[i] = kgid;
}
return 0;
}
@@ -115,15 +91,14 @@ static void groups_sort(struct group_info *group_info)
for (base = 0; base < max; base++) {
int left = base;
int right = left + stride;
- kgid_t tmp = GROUP_AT(group_info, right);
+ kgid_t tmp = group_info->gid[right];
- while (left >= 0 && gid_gt(GROUP_AT(group_info, left), tmp)) {
- GROUP_AT(group_info, right) =
- GROUP_AT(group_info, left);
+ while (left >= 0 && gid_gt(group_info->gid[left], tmp)) {
+ group_info->gid[right] = group_info->gid[left];
right = left;
left -= stride;
}
- GROUP_AT(group_info, right) = tmp;
+ group_info->gid[right] = tmp;
}
stride /= 3;
}
@@ -141,9 +116,9 @@ int groups_search(const struct group_info *group_info, kgid_t grp)
right = group_info->ngroups;
while (left < right) {
unsigned int mid = (left+right)/2;
- if (gid_gt(grp, GROUP_AT(group_info, mid)))
+ if (gid_gt(grp, group_info->gid[mid]))
left = mid + 1;
- else if (gid_lt(grp, GROUP_AT(group_info, mid)))
+ else if (gid_lt(grp, group_info->gid[mid]))
right = mid;
else
return 1;
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 432c3d71d195..2b59c82cc3e1 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -98,26 +98,26 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
trace_sched_process_hang(t);
- if (!sysctl_hung_task_warnings)
+ if (!sysctl_hung_task_warnings && !sysctl_hung_task_panic)
return;
- if (sysctl_hung_task_warnings > 0)
- sysctl_hung_task_warnings--;
-
/*
* Ok, the task did not get scheduled for more than 2 minutes,
* complain:
*/
- pr_err("INFO: task %s:%d blocked for more than %ld seconds.\n",
- t->comm, t->pid, timeout);
- pr_err(" %s %s %.*s\n",
- print_tainted(), init_utsname()->release,
- (int)strcspn(init_utsname()->version, " "),
- init_utsname()->version);
- pr_err("\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
- " disables this message.\n");
- sched_show_task(t);
- debug_show_all_locks();
+ if (sysctl_hung_task_warnings) {
+ sysctl_hung_task_warnings--;
+ pr_err("INFO: task %s:%d blocked for more than %ld seconds.\n",
+ t->comm, t->pid, timeout);
+ pr_err(" %s %s %.*s\n",
+ print_tainted(), init_utsname()->release,
+ (int)strcspn(init_utsname()->version, " "),
+ init_utsname()->version);
+ pr_err("\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
+ " disables this message.\n");
+ sched_show_task(t);
+ debug_show_all_locks();
+ }
touch_nmi_watchdog();
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index d10ab6b9b5e0..d63095472ea9 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -49,7 +49,7 @@
#include <linux/cpu.h>
#include <linux/jump_label.h>
-#include <asm-generic/sections.h>
+#include <asm/sections.h>
#include <asm/cacheflush.h>
#include <asm/errno.h>
#include <asm/uaccess.h>
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 4ab4c3766a80..be2cc1f9dd57 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -138,7 +138,7 @@ void *kthread_data(struct task_struct *task)
}
/**
- * probe_kthread_data - speculative version of kthread_data()
+ * kthread_probe_data - speculative version of kthread_data()
* @task: possible kthread task in question
*
* @task could be a kthread task. Return the data value specified when it
@@ -146,7 +146,7 @@ void *kthread_data(struct task_struct *task)
* inaccessible for any reason, %NULL is returned. This function requires
* that @task itself is safe to dereference.
*/
-void *probe_kthread_data(struct task_struct *task)
+void *kthread_probe_data(struct task_struct *task)
{
struct kthread *kthread = to_kthread(task);
void *data = NULL;
@@ -244,33 +244,10 @@ static void create_kthread(struct kthread_create_info *create)
}
}
-/**
- * kthread_create_on_node - create a kthread.
- * @threadfn: the function to run until signal_pending(current).
- * @data: data ptr for @threadfn.
- * @node: task and thread structures for the thread are allocated on this node
- * @namefmt: printf-style name for the thread.
- *
- * Description: This helper function creates and names a kernel
- * thread. The thread will be stopped: use wake_up_process() to start
- * it. See also kthread_run(). The new thread has SCHED_NORMAL policy and
- * is affine to all CPUs.
- *
- * If thread is going to be bound on a particular cpu, give its node
- * in @node, to get NUMA affinity for kthread stack, or else give NUMA_NO_NODE.
- * When woken, the thread will run @threadfn() with @data as its
- * argument. @threadfn() can either call do_exit() directly if it is a
- * standalone thread for which no one will call kthread_stop(), or
- * return when 'kthread_should_stop()' is true (which means
- * kthread_stop() has been called). The return value should be zero
- * or a negative error number; it will be passed to kthread_stop().
- *
- * Returns a task_struct or ERR_PTR(-ENOMEM) or ERR_PTR(-EINTR).
- */
-struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
- void *data, int node,
- const char namefmt[],
- ...)
+static struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data),
+ void *data, int node,
+ const char namefmt[],
+ va_list args)
{
DECLARE_COMPLETION_ONSTACK(done);
struct task_struct *task;
@@ -311,11 +288,8 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
task = create->result;
if (!IS_ERR(task)) {
static const struct sched_param param = { .sched_priority = 0 };
- va_list args;
- va_start(args, namefmt);
vsnprintf(task->comm, sizeof(task->comm), namefmt, args);
- va_end(args);
/*
* root may have changed our (kthreadd's) priority or CPU mask.
* The kernel thread should not inherit these properties.
@@ -326,6 +300,44 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
kfree(create);
return task;
}
+
+/**
+ * kthread_create_on_node - create a kthread.
+ * @threadfn: the function to run until signal_pending(current).
+ * @data: data ptr for @threadfn.
+ * @node: task and thread structures for the thread are allocated on this node
+ * @namefmt: printf-style name for the thread.
+ *
+ * Description: This helper function creates and names a kernel
+ * thread. The thread will be stopped: use wake_up_process() to start
+ * it. See also kthread_run(). The new thread has SCHED_NORMAL policy and
+ * is affine to all CPUs.
+ *
+ * If thread is going to be bound on a particular cpu, give its node
+ * in @node, to get NUMA affinity for kthread stack, or else give NUMA_NO_NODE.
+ * When woken, the thread will run @threadfn() with @data as its
+ * argument. @threadfn() can either call do_exit() directly if it is a
+ * standalone thread for which no one will call kthread_stop(), or
+ * return when 'kthread_should_stop()' is true (which means
+ * kthread_stop() has been called). The return value should be zero
+ * or a negative error number; it will be passed to kthread_stop().
+ *
+ * Returns a task_struct or ERR_PTR(-ENOMEM) or ERR_PTR(-EINTR).
+ */
+struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
+ void *data, int node,
+ const char namefmt[],
+ ...)
+{
+ struct task_struct *task;
+ va_list args;
+
+ va_start(args, namefmt);
+ task = __kthread_create_on_node(threadfn, data, node, namefmt, args);
+ va_end(args);
+
+ return task;
+}
EXPORT_SYMBOL(kthread_create_on_node);
static void __kthread_bind_mask(struct task_struct *p, const struct cpumask *mask, long state)
@@ -390,10 +402,10 @@ struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data),
cpu);
if (IS_ERR(p))
return p;
+ kthread_bind(p, cpu);
+ /* CPU hotplug need to bind once again when unparking the thread. */
set_bit(KTHREAD_IS_PER_CPU, &to_kthread(p)->flags);
to_kthread(p)->cpu = cpu;
- /* Park the thread to get it out of TASK_UNINTERRUPTIBLE state */
- kthread_park(p);
return p;
}
@@ -407,6 +419,10 @@ static void __kthread_unpark(struct task_struct *k, struct kthread *kthread)
* which might be about to be cleared.
*/
if (test_and_clear_bit(KTHREAD_IS_PARKED, &kthread->flags)) {
+ /*
+ * Newly created kthread was parked when the CPU was offline.
+ * The binding was lost and we need to set it again.
+ */
if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags))
__kthread_bind(k, kthread->cpu, TASK_PARKED);
wake_up_state(k, TASK_PARKED);
@@ -540,39 +556,48 @@ int kthreadd(void *unused)
return 0;
}
-void __init_kthread_worker(struct kthread_worker *worker,
+void __kthread_init_worker(struct kthread_worker *worker,
const char *name,
struct lock_class_key *key)
{
+ memset(worker, 0, sizeof(struct kthread_worker));
spin_lock_init(&worker->lock);
lockdep_set_class_and_name(&worker->lock, key, name);
INIT_LIST_HEAD(&worker->work_list);
- worker->task = NULL;
+ INIT_LIST_HEAD(&worker->delayed_work_list);
}
-EXPORT_SYMBOL_GPL(__init_kthread_worker);
+EXPORT_SYMBOL_GPL(__kthread_init_worker);
/**
* kthread_worker_fn - kthread function to process kthread_worker
* @worker_ptr: pointer to initialized kthread_worker
*
- * This function can be used as @threadfn to kthread_create() or
- * kthread_run() with @worker_ptr argument pointing to an initialized
- * kthread_worker. The started kthread will process work_list until
- * the it is stopped with kthread_stop(). A kthread can also call
- * this function directly after extra initialization.
+ * This function implements the main cycle of kthread worker. It processes
+ * work_list until it is stopped with kthread_stop(). It sleeps when the queue
+ * is empty.
+ *
+ * The works are not allowed to keep any locks, disable preemption or interrupts
+ * when they finish. There is defined a safe point for freezing when one work
+ * finishes and before a new one is started.
*
- * Different kthreads can be used for the same kthread_worker as long
- * as there's only one kthread attached to it at any given time. A
- * kthread_worker without an attached kthread simply collects queued
- * kthread_works.
+ * Also the works must not be handled by more than one worker at the same time,
+ * see also kthread_queue_work().
*/
int kthread_worker_fn(void *worker_ptr)
{
struct kthread_worker *worker = worker_ptr;
struct kthread_work *work;
- WARN_ON(worker->task);
+ /*
+ * FIXME: Update the check and remove the assignment when all kthread
+ * worker users are created using kthread_create_worker*() functions.
+ */
+ WARN_ON(worker->task && worker->task != current);
worker->task = current;
+
+ if (worker->flags & KTW_FREEZABLE)
+ set_freezable();
+
repeat:
set_current_state(TASK_INTERRUPTIBLE); /* mb paired w/ kthread_stop */
@@ -605,12 +630,131 @@ repeat:
}
EXPORT_SYMBOL_GPL(kthread_worker_fn);
-/* insert @work before @pos in @worker */
-static void insert_kthread_work(struct kthread_worker *worker,
- struct kthread_work *work,
- struct list_head *pos)
+static struct kthread_worker *
+__kthread_create_worker(int cpu, unsigned int flags,
+ const char namefmt[], va_list args)
+{
+ struct kthread_worker *worker;
+ struct task_struct *task;
+
+ worker = kzalloc(sizeof(*worker), GFP_KERNEL);
+ if (!worker)
+ return ERR_PTR(-ENOMEM);
+
+ kthread_init_worker(worker);
+
+ if (cpu >= 0) {
+ char name[TASK_COMM_LEN];
+
+ /*
+ * kthread_create_worker_on_cpu() allows to pass a generic
+ * namefmt in compare with kthread_create_on_cpu. We need
+ * to format it here.
+ */
+ vsnprintf(name, sizeof(name), namefmt, args);
+ task = kthread_create_on_cpu(kthread_worker_fn, worker,
+ cpu, name);
+ } else {
+ task = __kthread_create_on_node(kthread_worker_fn, worker,
+ -1, namefmt, args);
+ }
+
+ if (IS_ERR(task))
+ goto fail_task;
+
+ worker->flags = flags;
+ worker->task = task;
+ wake_up_process(task);
+ return worker;
+
+fail_task:
+ kfree(worker);
+ return ERR_CAST(task);
+}
+
+/**
+ * kthread_create_worker - create a kthread worker
+ * @flags: flags modifying the default behavior of the worker
+ * @namefmt: printf-style name for the kthread worker (task).
+ *
+ * Returns a pointer to the allocated worker on success, ERR_PTR(-ENOMEM)
+ * when the needed structures could not get allocated, and ERR_PTR(-EINTR)
+ * when the worker was SIGKILLed.
+ */
+struct kthread_worker *
+kthread_create_worker(unsigned int flags, const char namefmt[], ...)
+{
+ struct kthread_worker *worker;
+ va_list args;
+
+ va_start(args, namefmt);
+ worker = __kthread_create_worker(-1, flags, namefmt, args);
+ va_end(args);
+
+ return worker;
+}
+EXPORT_SYMBOL(kthread_create_worker);
+
+/**
+ * kthread_create_worker_on_cpu - create a kthread worker and bind it
+ * it to a given CPU and the associated NUMA node.
+ * @cpu: CPU number
+ * @flags: flags modifying the default behavior of the worker
+ * @namefmt: printf-style name for the kthread worker (task).
+ *
+ * Use a valid CPU number if you want to bind the kthread worker
+ * to the given CPU and the associated NUMA node.
+ *
+ * A good practice is to add the cpu number also into the worker name.
+ * For example, use kthread_create_worker_on_cpu(cpu, "helper/%d", cpu).
+ *
+ * Returns a pointer to the allocated worker on success, ERR_PTR(-ENOMEM)
+ * when the needed structures could not get allocated, and ERR_PTR(-EINTR)
+ * when the worker was SIGKILLed.
+ */
+struct kthread_worker *
+kthread_create_worker_on_cpu(int cpu, unsigned int flags,
+ const char namefmt[], ...)
+{
+ struct kthread_worker *worker;
+ va_list args;
+
+ va_start(args, namefmt);
+ worker = __kthread_create_worker(cpu, flags, namefmt, args);
+ va_end(args);
+
+ return worker;
+}
+EXPORT_SYMBOL(kthread_create_worker_on_cpu);
+
+/*
+ * Returns true when the work could not be queued at the moment.
+ * It happens when it is already pending in a worker list
+ * or when it is being cancelled.
+ */
+static inline bool queuing_blocked(struct kthread_worker *worker,
+ struct kthread_work *work)
+{
+ lockdep_assert_held(&worker->lock);
+
+ return !list_empty(&work->node) || work->canceling;
+}
+
+static void kthread_insert_work_sanity_check(struct kthread_worker *worker,
+ struct kthread_work *work)
{
lockdep_assert_held(&worker->lock);
+ WARN_ON_ONCE(!list_empty(&work->node));
+ /* Do not use a work with >1 worker, see kthread_queue_work() */
+ WARN_ON_ONCE(work->worker && work->worker != worker);
+}
+
+/* insert @work before @pos in @worker */
+static void kthread_insert_work(struct kthread_worker *worker,
+ struct kthread_work *work,
+ struct list_head *pos)
+{
+ kthread_insert_work_sanity_check(worker, work);
list_add_tail(&work->node, pos);
work->worker = worker;
@@ -619,29 +763,133 @@ static void insert_kthread_work(struct kthread_worker *worker,
}
/**
- * queue_kthread_work - queue a kthread_work
+ * kthread_queue_work - queue a kthread_work
* @worker: target kthread_worker
* @work: kthread_work to queue
*
* Queue @work to work processor @task for async execution. @task
* must have been created with kthread_worker_create(). Returns %true
* if @work was successfully queued, %false if it was already pending.
+ *
+ * Reinitialize the work if it needs to be used by another worker.
+ * For example, when the worker was stopped and started again.
*/
-bool queue_kthread_work(struct kthread_worker *worker,
+bool kthread_queue_work(struct kthread_worker *worker,
struct kthread_work *work)
{
bool ret = false;
unsigned long flags;
spin_lock_irqsave(&worker->lock, flags);
- if (list_empty(&work->node)) {
- insert_kthread_work(worker, work, &worker->work_list);
+ if (!queuing_blocked(worker, work)) {
+ kthread_insert_work(worker, work, &worker->work_list);
+ ret = true;
+ }
+ spin_unlock_irqrestore(&worker->lock, flags);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(kthread_queue_work);
+
+/**
+ * kthread_delayed_work_timer_fn - callback that queues the associated kthread
+ * delayed work when the timer expires.
+ * @__data: pointer to the data associated with the timer
+ *
+ * The format of the function is defined by struct timer_list.
+ * It should have been called from irqsafe timer with irq already off.
+ */
+void kthread_delayed_work_timer_fn(unsigned long __data)
+{
+ struct kthread_delayed_work *dwork =
+ (struct kthread_delayed_work *)__data;
+ struct kthread_work *work = &dwork->work;
+ struct kthread_worker *worker = work->worker;
+
+ /*
+ * This might happen when a pending work is reinitialized.
+ * It means that it is used a wrong way.
+ */
+ if (WARN_ON_ONCE(!worker))
+ return;
+
+ spin_lock(&worker->lock);
+ /* Work must not be used with >1 worker, see kthread_queue_work(). */
+ WARN_ON_ONCE(work->worker != worker);
+
+ /* Move the work from worker->delayed_work_list. */
+ WARN_ON_ONCE(list_empty(&work->node));
+ list_del_init(&work->node);
+ kthread_insert_work(worker, work, &worker->work_list);
+
+ spin_unlock(&worker->lock);
+}
+EXPORT_SYMBOL(kthread_delayed_work_timer_fn);
+
+void __kthread_queue_delayed_work(struct kthread_worker *worker,
+ struct kthread_delayed_work *dwork,
+ unsigned long delay)
+{
+ struct timer_list *timer = &dwork->timer;
+ struct kthread_work *work = &dwork->work;
+
+ WARN_ON_ONCE(timer->function != kthread_delayed_work_timer_fn ||
+ timer->data != (unsigned long)dwork);
+
+ /*
+ * If @delay is 0, queue @dwork->work immediately. This is for
+ * both optimization and correctness. The earliest @timer can
+ * expire is on the closest next tick and delayed_work users depend
+ * on that there's no such delay when @delay is 0.
+ */
+ if (!delay) {
+ kthread_insert_work(worker, work, &worker->work_list);
+ return;
+ }
+
+ /* Be paranoid and try to detect possible races already now. */
+ kthread_insert_work_sanity_check(worker, work);
+
+ list_add(&work->node, &worker->delayed_work_list);
+ work->worker = worker;
+ timer_stats_timer_set_start_info(&dwork->timer);
+ timer->expires = jiffies + delay;
+ add_timer(timer);
+}
+
+/**
+ * kthread_queue_delayed_work - queue the associated kthread work
+ * after a delay.
+ * @worker: target kthread_worker
+ * @dwork: kthread_delayed_work to queue
+ * @delay: number of jiffies to wait before queuing
+ *
+ * If the work has not been pending it starts a timer that will queue
+ * the work after the given @delay. If @delay is zero, it queues the
+ * work immediately.
+ *
+ * Return: %false if the @work has already been pending. It means that
+ * either the timer was running or the work was queued. It returns %true
+ * otherwise.
+ */
+bool kthread_queue_delayed_work(struct kthread_worker *worker,
+ struct kthread_delayed_work *dwork,
+ unsigned long delay)
+{
+ struct kthread_work *work = &dwork->work;
+ unsigned long flags;
+ bool ret = false;
+
+ spin_lock_irqsave(&worker->lock, flags);
+
+ if (!queuing_blocked(worker, work)) {
+ __kthread_queue_delayed_work(worker, dwork, delay);
ret = true;
}
+
spin_unlock_irqrestore(&worker->lock, flags);
return ret;
}
-EXPORT_SYMBOL_GPL(queue_kthread_work);
+EXPORT_SYMBOL_GPL(kthread_queue_delayed_work);
struct kthread_flush_work {
struct kthread_work work;
@@ -656,12 +904,12 @@ static void kthread_flush_work_fn(struct kthread_work *work)
}
/**
- * flush_kthread_work - flush a kthread_work
+ * kthread_flush_work - flush a kthread_work
* @work: work to flush
*
* If @work is queued or executing, wait for it to finish execution.
*/
-void flush_kthread_work(struct kthread_work *work)
+void kthread_flush_work(struct kthread_work *work)
{
struct kthread_flush_work fwork = {
KTHREAD_WORK_INIT(fwork.work, kthread_flush_work_fn),
@@ -670,21 +918,19 @@ void flush_kthread_work(struct kthread_work *work)
struct kthread_worker *worker;
bool noop = false;
-retry:
worker = work->worker;
if (!worker)
return;
spin_lock_irq(&worker->lock);
- if (work->worker != worker) {
- spin_unlock_irq(&worker->lock);
- goto retry;
- }
+ /* Work must not be used with >1 worker, see kthread_queue_work(). */
+ WARN_ON_ONCE(work->worker != worker);
if (!list_empty(&work->node))
- insert_kthread_work(worker, &fwork.work, work->node.next);
+ kthread_insert_work(worker, &fwork.work, work->node.next);
else if (worker->current_work == work)
- insert_kthread_work(worker, &fwork.work, worker->work_list.next);
+ kthread_insert_work(worker, &fwork.work,
+ worker->work_list.next);
else
noop = true;
@@ -693,23 +939,214 @@ retry:
if (!noop)
wait_for_completion(&fwork.done);
}
-EXPORT_SYMBOL_GPL(flush_kthread_work);
+EXPORT_SYMBOL_GPL(kthread_flush_work);
+
+/*
+ * This function removes the work from the worker queue. Also it makes sure
+ * that it won't get queued later via the delayed work's timer.
+ *
+ * The work might still be in use when this function finishes. See the
+ * current_work proceed by the worker.
+ *
+ * Return: %true if @work was pending and successfully canceled,
+ * %false if @work was not pending
+ */
+static bool __kthread_cancel_work(struct kthread_work *work, bool is_dwork,
+ unsigned long *flags)
+{
+ /* Try to cancel the timer if exists. */
+ if (is_dwork) {
+ struct kthread_delayed_work *dwork =
+ container_of(work, struct kthread_delayed_work, work);
+ struct kthread_worker *worker = work->worker;
+
+ /*
+ * del_timer_sync() must be called to make sure that the timer
+ * callback is not running. The lock must be temporary released
+ * to avoid a deadlock with the callback. In the meantime,
+ * any queuing is blocked by setting the canceling counter.
+ */
+ work->canceling++;
+ spin_unlock_irqrestore(&worker->lock, *flags);
+ del_timer_sync(&dwork->timer);
+ spin_lock_irqsave(&worker->lock, *flags);
+ work->canceling--;
+ }
+
+ /*
+ * Try to remove the work from a worker list. It might either
+ * be from worker->work_list or from worker->delayed_work_list.
+ */
+ if (!list_empty(&work->node)) {
+ list_del_init(&work->node);
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * kthread_mod_delayed_work - modify delay of or queue a kthread delayed work
+ * @worker: kthread worker to use
+ * @dwork: kthread delayed work to queue
+ * @delay: number of jiffies to wait before queuing
+ *
+ * If @dwork is idle, equivalent to kthread_queue_delayed_work(). Otherwise,
+ * modify @dwork's timer so that it expires after @delay. If @delay is zero,
+ * @work is guaranteed to be queued immediately.
+ *
+ * Return: %true if @dwork was pending and its timer was modified,
+ * %false otherwise.
+ *
+ * A special case is when the work is being canceled in parallel.
+ * It might be caused either by the real kthread_cancel_delayed_work_sync()
+ * or yet another kthread_mod_delayed_work() call. We let the other command
+ * win and return %false here. The caller is supposed to synchronize these
+ * operations a reasonable way.
+ *
+ * This function is safe to call from any context including IRQ handler.
+ * See __kthread_cancel_work() and kthread_delayed_work_timer_fn()
+ * for details.
+ */
+bool kthread_mod_delayed_work(struct kthread_worker *worker,
+ struct kthread_delayed_work *dwork,
+ unsigned long delay)
+{
+ struct kthread_work *work = &dwork->work;
+ unsigned long flags;
+ int ret = false;
+
+ spin_lock_irqsave(&worker->lock, flags);
+
+ /* Do not bother with canceling when never queued. */
+ if (!work->worker)
+ goto fast_queue;
+
+ /* Work must not be used with >1 worker, see kthread_queue_work() */
+ WARN_ON_ONCE(work->worker != worker);
+
+ /* Do not fight with another command that is canceling this work. */
+ if (work->canceling)
+ goto out;
+
+ ret = __kthread_cancel_work(work, true, &flags);
+fast_queue:
+ __kthread_queue_delayed_work(worker, dwork, delay);
+out:
+ spin_unlock_irqrestore(&worker->lock, flags);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(kthread_mod_delayed_work);
+
+static bool __kthread_cancel_work_sync(struct kthread_work *work, bool is_dwork)
+{
+ struct kthread_worker *worker = work->worker;
+ unsigned long flags;
+ int ret = false;
+
+ if (!worker)
+ goto out;
+
+ spin_lock_irqsave(&worker->lock, flags);
+ /* Work must not be used with >1 worker, see kthread_queue_work(). */
+ WARN_ON_ONCE(work->worker != worker);
+
+ ret = __kthread_cancel_work(work, is_dwork, &flags);
+
+ if (worker->current_work != work)
+ goto out_fast;
+
+ /*
+ * The work is in progress and we need to wait with the lock released.
+ * In the meantime, block any queuing by setting the canceling counter.
+ */
+ work->canceling++;
+ spin_unlock_irqrestore(&worker->lock, flags);
+ kthread_flush_work(work);
+ spin_lock_irqsave(&worker->lock, flags);
+ work->canceling--;
+
+out_fast:
+ spin_unlock_irqrestore(&worker->lock, flags);
+out:
+ return ret;
+}
+
+/**
+ * kthread_cancel_work_sync - cancel a kthread work and wait for it to finish
+ * @work: the kthread work to cancel
+ *
+ * Cancel @work and wait for its execution to finish. This function
+ * can be used even if the work re-queues itself. On return from this
+ * function, @work is guaranteed to be not pending or executing on any CPU.
+ *
+ * kthread_cancel_work_sync(&delayed_work->work) must not be used for
+ * delayed_work's. Use kthread_cancel_delayed_work_sync() instead.
+ *
+ * The caller must ensure that the worker on which @work was last
+ * queued can't be destroyed before this function returns.
+ *
+ * Return: %true if @work was pending, %false otherwise.
+ */
+bool kthread_cancel_work_sync(struct kthread_work *work)
+{
+ return __kthread_cancel_work_sync(work, false);
+}
+EXPORT_SYMBOL_GPL(kthread_cancel_work_sync);
+
+/**
+ * kthread_cancel_delayed_work_sync - cancel a kthread delayed work and
+ * wait for it to finish.
+ * @dwork: the kthread delayed work to cancel
+ *
+ * This is kthread_cancel_work_sync() for delayed works.
+ *
+ * Return: %true if @dwork was pending, %false otherwise.
+ */
+bool kthread_cancel_delayed_work_sync(struct kthread_delayed_work *dwork)
+{
+ return __kthread_cancel_work_sync(&dwork->work, true);
+}
+EXPORT_SYMBOL_GPL(kthread_cancel_delayed_work_sync);
/**
- * flush_kthread_worker - flush all current works on a kthread_worker
+ * kthread_flush_worker - flush all current works on a kthread_worker
* @worker: worker to flush
*
* Wait until all currently executing or pending works on @worker are
* finished.
*/
-void flush_kthread_worker(struct kthread_worker *worker)
+void kthread_flush_worker(struct kthread_worker *worker)
{
struct kthread_flush_work fwork = {
KTHREAD_WORK_INIT(fwork.work, kthread_flush_work_fn),
COMPLETION_INITIALIZER_ONSTACK(fwork.done),
};
- queue_kthread_work(worker, &fwork.work);
+ kthread_queue_work(worker, &fwork.work);
wait_for_completion(&fwork.done);
}
-EXPORT_SYMBOL_GPL(flush_kthread_worker);
+EXPORT_SYMBOL_GPL(kthread_flush_worker);
+
+/**
+ * kthread_destroy_worker - destroy a kthread worker
+ * @worker: worker to be destroyed
+ *
+ * Flush and destroy @worker. The simple flush is enough because the kthread
+ * worker API is used only in trivial scenarios. There are no multi-step state
+ * machines needed.
+ */
+void kthread_destroy_worker(struct kthread_worker *worker)
+{
+ struct task_struct *task;
+
+ task = worker->task;
+ if (WARN_ON(!task))
+ return;
+
+ kthread_flush_worker(worker);
+ kthread_stop(task);
+ WARN_ON(!list_empty(&worker->work_list));
+ kfree(worker);
+}
+EXPORT_SYMBOL(kthread_destroy_worker);
diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index 8bbe50704621..af4643873e71 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -274,7 +274,6 @@ static int klp_write_object_relocations(struct module *pmod,
objname = klp_is_module(obj) ? obj->name : "vmlinux";
- module_disable_ro(pmod);
/* For each klp relocation section */
for (i = 1; i < pmod->klp_info->hdr.e_shnum; i++) {
sec = pmod->klp_info->sechdrs + i;
@@ -309,7 +308,6 @@ static int klp_write_object_relocations(struct module *pmod,
break;
}
- module_enable_ro(pmod, true);
return ret;
}
@@ -547,9 +545,6 @@ static int __klp_enable_patch(struct klp_patch *patch)
list_prev_entry(patch, list)->state == KLP_DISABLED)
return -EBUSY;
- pr_notice_once("tainting kernel with TAINT_LIVEPATCH\n");
- add_taint(TAINT_LIVEPATCH, LOCKDEP_STILL_OK);
-
pr_notice("enabling patch '%s'\n", patch->mod->name);
klp_for_each_object(patch, obj) {
@@ -763,6 +758,12 @@ static int klp_init_func(struct klp_object *obj, struct klp_func *func)
func->old_sympos ? func->old_sympos : 1);
}
+/* Arches may override this to finish any remaining arch-specific tasks */
+void __weak arch_klp_init_object_loaded(struct klp_patch *patch,
+ struct klp_object *obj)
+{
+}
+
/* parts of the initialization that is done only when the object is loaded */
static int klp_init_object_loaded(struct klp_patch *patch,
struct klp_object *obj)
@@ -770,9 +771,15 @@ static int klp_init_object_loaded(struct klp_patch *patch,
struct klp_func *func;
int ret;
+ module_disable_ro(patch->mod);
ret = klp_write_object_relocations(patch->mod, obj);
- if (ret)
+ if (ret) {
+ module_enable_ro(patch->mod, true);
return ret;
+ }
+
+ arch_klp_init_object_loaded(patch, obj);
+ module_enable_ro(patch->mod, true);
klp_for_each_func(obj, func) {
ret = klp_find_object_symbol(obj->name, func->old_name,
diff --git a/kernel/module.c b/kernel/module.c
index 529efae9f481..f57dd63186e6 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1149,6 +1149,8 @@ static size_t module_flags_taint(struct module *mod, char *buf)
buf[l++] = 'C';
if (mod->taints & (1 << TAINT_UNSIGNED_MODULE))
buf[l++] = 'E';
+ if (mod->taints & (1 << TAINT_LIVEPATCH))
+ buf[l++] = 'K';
/*
* TAINT_FORCED_RMMOD: could be added.
* TAINT_CPU_OUT_OF_SPEC, TAINT_MACHINE_CHECK, TAINT_BAD_PAGE don't
@@ -2792,14 +2794,17 @@ static int copy_chunked_from_user(void *dst, const void __user *usrc, unsigned l
}
#ifdef CONFIG_LIVEPATCH
-static int find_livepatch_modinfo(struct module *mod, struct load_info *info)
+static int check_modinfo_livepatch(struct module *mod, struct load_info *info)
{
- mod->klp = get_modinfo(info, "livepatch") ? true : false;
+ if (get_modinfo(info, "livepatch")) {
+ mod->klp = true;
+ add_taint_module(mod, TAINT_LIVEPATCH, LOCKDEP_STILL_OK);
+ }
return 0;
}
#else /* !CONFIG_LIVEPATCH */
-static int find_livepatch_modinfo(struct module *mod, struct load_info *info)
+static int check_modinfo_livepatch(struct module *mod, struct load_info *info)
{
if (get_modinfo(info, "livepatch")) {
pr_err("%s: module is marked as livepatch module, but livepatch support is disabled",
@@ -2969,7 +2974,7 @@ static int check_modinfo(struct module *mod, struct load_info *info, int flags)
"is unknown, you have been warned.\n", mod->name);
}
- err = find_livepatch_modinfo(mod, info);
+ err = check_modinfo_livepatch(mod, info);
if (err)
return err;
diff --git a/kernel/panic.c b/kernel/panic.c
index ca8cea1ef673..e6480e20379e 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -71,6 +71,32 @@ void __weak nmi_panic_self_stop(struct pt_regs *regs)
panic_smp_self_stop();
}
+/*
+ * Stop other CPUs in panic. Architecture dependent code may override this
+ * with more suitable version. For example, if the architecture supports
+ * crash dump, it should save registers of each stopped CPU and disable
+ * per-CPU features such as virtualization extensions.
+ */
+void __weak crash_smp_send_stop(void)
+{
+ static int cpus_stopped;
+
+ /*
+ * This function can be called twice in panic path, but obviously
+ * we execute this only once.
+ */
+ if (cpus_stopped)
+ return;
+
+ /*
+ * Note smp_send_stop is the usual smp shutdown function, which
+ * unfortunately means it may not be hardened to work in a panic
+ * situation.
+ */
+ smp_send_stop();
+ cpus_stopped = 1;
+}
+
atomic_t panic_cpu = ATOMIC_INIT(PANIC_CPU_INVALID);
/*
@@ -164,14 +190,21 @@ void panic(const char *fmt, ...)
if (!_crash_kexec_post_notifiers) {
printk_nmi_flush_on_panic();
__crash_kexec(NULL);
- }
- /*
- * Note smp_send_stop is the usual smp shutdown function, which
- * unfortunately means it may not be hardened to work in a panic
- * situation.
- */
- smp_send_stop();
+ /*
+ * Note smp_send_stop is the usual smp shutdown function, which
+ * unfortunately means it may not be hardened to work in a
+ * panic situation.
+ */
+ smp_send_stop();
+ } else {
+ /*
+ * If we want to do crash dump after notifier calls and
+ * kmsg_dump, we will need architecture dependent extra
+ * works in addition to stopping other CPUs.
+ */
+ crash_smp_send_stop();
+ }
/*
* Run any panic handlers, including those that might need to
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 8f27d5a8adf6..2fba066e125f 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -144,23 +144,12 @@ int freeze_processes(void)
/*
* Now that the whole userspace is frozen we need to disbale
* the OOM killer to disallow any further interference with
- * killable tasks.
+ * killable tasks. There is no guarantee oom victims will
+ * ever reach a point they go away we have to wait with a timeout.
*/
- if (!error && !oom_killer_disable())
+ if (!error && !oom_killer_disable(msecs_to_jiffies(freeze_timeout_msecs)))
error = -EBUSY;
- /*
- * There is a hard to fix race between oom_reaper kernel thread
- * and oom_killer_disable. oom_reaper calls exit_oom_victim
- * before the victim reaches exit_mm so try to freeze all the tasks
- * again and catch such a left over task.
- */
- if (!error) {
- pr_info("Double checking all user space processes after OOM killer disable... ");
- error = try_to_freeze_tasks(true);
- pr_cont("\n");
- }
-
if (error)
thaw_processes();
return error;
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index eea6dbc2d8cf..d5e397315473 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -253,6 +253,17 @@ static int preferred_console = -1;
int console_set_on_cmdline;
EXPORT_SYMBOL(console_set_on_cmdline);
+#ifdef CONFIG_OF
+static bool of_specified_console;
+
+void console_set_by_of(void)
+{
+ of_specified_console = true;
+}
+#else
+# define of_specified_console false
+#endif
+
/* Flag: console code may call schedule() */
static int console_may_schedule;
@@ -655,11 +666,8 @@ static ssize_t msg_print_ext_header(char *buf, size_t size,
* better readable output. 'c' in the record flags mark the first
* fragment of a line, '+' the following.
*/
- if (msg->flags & LOG_CONT && !(prev_flags & LOG_CONT))
- cont = 'c';
- else if ((msg->flags & LOG_CONT) ||
- ((prev_flags & LOG_CONT) && !(msg->flags & LOG_PREFIX)))
- cont = '+';
+ if (msg->flags & LOG_CONT)
+ cont = (prev_flags & LOG_CONT) ? '+' : 'c';
return scnprintf(buf, size, "%u,%llu,%llu,%c;",
(msg->facility << 3) | msg->level, seq, ts_usec, cont);
@@ -786,6 +794,8 @@ static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from)
return ret;
}
+static void cont_flush(void);
+
static ssize_t devkmsg_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
@@ -801,6 +811,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
if (ret)
return ret;
raw_spin_lock_irq(&logbuf_lock);
+ cont_flush();
while (user->seq == log_next_seq) {
if (file->f_flags & O_NONBLOCK) {
ret = -EAGAIN;
@@ -863,6 +874,7 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence)
return -ESPIPE;
raw_spin_lock_irq(&logbuf_lock);
+ cont_flush();
switch (whence) {
case SEEK_SET:
/* the first record */
@@ -901,6 +913,7 @@ static unsigned int devkmsg_poll(struct file *file, poll_table *wait)
poll_wait(file, &log_wait, wait);
raw_spin_lock_irq(&logbuf_lock);
+ cont_flush();
if (user->seq < log_next_seq) {
/* return error when data has vanished underneath us */
if (user->seq < log_first_seq)
@@ -1287,6 +1300,7 @@ static int syslog_print(char __user *buf, int size)
size_t skip;
raw_spin_lock_irq(&logbuf_lock);
+ cont_flush();
if (syslog_seq < log_first_seq) {
/* messages are gone, move to first one */
syslog_seq = log_first_seq;
@@ -1346,6 +1360,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
return -ENOMEM;
raw_spin_lock_irq(&logbuf_lock);
+ cont_flush();
if (buf) {
u64 next_seq;
u64 seq;
@@ -1507,6 +1522,7 @@ int do_syslog(int type, char __user *buf, int len, int source)
/* Number of chars in the log buffer */
case SYSLOG_ACTION_SIZE_UNREAD:
raw_spin_lock_irq(&logbuf_lock);
+ cont_flush();
if (syslog_seq < log_first_seq) {
/* messages are gone, move to first one */
syslog_seq = log_first_seq;
@@ -1643,35 +1659,33 @@ static struct cont {
bool flushed:1; /* buffer sealed and committed */
} cont;
-static void cont_flush(enum log_flags flags)
+static void cont_flush(void)
{
if (cont.flushed)
return;
if (cont.len == 0)
return;
-
if (cont.cons) {
/*
* If a fragment of this line was directly flushed to the
* console; wait for the console to pick up the rest of the
* line. LOG_NOCONS suppresses a duplicated output.
*/
- log_store(cont.facility, cont.level, flags | LOG_NOCONS,
+ log_store(cont.facility, cont.level, cont.flags | LOG_NOCONS,
cont.ts_nsec, NULL, 0, cont.buf, cont.len);
- cont.flags = flags;
cont.flushed = true;
} else {
/*
* If no fragment of this line ever reached the console,
* just submit it to the store and free the buffer.
*/
- log_store(cont.facility, cont.level, flags, 0,
+ log_store(cont.facility, cont.level, cont.flags, 0,
NULL, 0, cont.buf, cont.len);
cont.len = 0;
}
}
-static bool cont_add(int facility, int level, const char *text, size_t len)
+static bool cont_add(int facility, int level, enum log_flags flags, const char *text, size_t len)
{
if (cont.len && cont.flushed)
return false;
@@ -1682,7 +1696,7 @@ static bool cont_add(int facility, int level, const char *text, size_t len)
* the line gets too long, split it up in separate records.
*/
if (nr_ext_console_drivers || cont.len + len > sizeof(cont.buf)) {
- cont_flush(LOG_CONT);
+ cont_flush();
return false;
}
@@ -1691,7 +1705,7 @@ static bool cont_add(int facility, int level, const char *text, size_t len)
cont.level = level;
cont.owner = current;
cont.ts_nsec = local_clock();
- cont.flags = 0;
+ cont.flags = flags;
cont.cons = 0;
cont.flushed = false;
}
@@ -1699,8 +1713,15 @@ static bool cont_add(int facility, int level, const char *text, size_t len)
memcpy(cont.buf + cont.len, text, len);
cont.len += len;
+ // The original flags come from the first line,
+ // but later continuations can add a newline.
+ if (flags & LOG_NEWLINE) {
+ cont.flags |= LOG_NEWLINE;
+ cont_flush();
+ }
+
if (cont.len > (sizeof(cont.buf) * 80) / 100)
- cont_flush(LOG_CONT);
+ cont_flush();
return true;
}
@@ -1733,6 +1754,31 @@ static size_t cont_print_text(char *text, size_t size)
return textlen;
}
+static size_t log_output(int facility, int level, enum log_flags lflags, const char *dict, size_t dictlen, char *text, size_t text_len)
+{
+ /*
+ * If an earlier line was buffered, and we're a continuation
+ * write from the same process, try to add it to the buffer.
+ */
+ if (cont.len) {
+ if (cont.owner == current && (lflags & LOG_CONT)) {
+ if (cont_add(facility, level, lflags, text, text_len))
+ return text_len;
+ }
+ /* Otherwise, make sure it's flushed */
+ cont_flush();
+ }
+
+ /* If it doesn't end in a newline, try to buffer the current line */
+ if (!(lflags & LOG_NEWLINE)) {
+ if (cont_add(facility, level, lflags, text, text_len))
+ return text_len;
+ }
+
+ /* Store it in the record log */
+ return log_store(facility, level, lflags, 0, dict, dictlen, text, text_len);
+}
+
asmlinkage int vprintk_emit(int facility, int level,
const char *dict, size_t dictlen,
const char *fmt, va_list args)
@@ -1819,10 +1865,9 @@ asmlinkage int vprintk_emit(int facility, int level,
/* strip kernel syslog prefix and extract log level or control flags */
if (facility == 0) {
- int kern_level = printk_get_level(text);
+ int kern_level;
- if (kern_level) {
- const char *end_of_header = printk_skip_level(text);
+ while ((kern_level = printk_get_level(text)) != 0) {
switch (kern_level) {
case '0' ... '7':
if (level == LOGLEVEL_DEFAULT)
@@ -1830,14 +1875,13 @@ asmlinkage int vprintk_emit(int facility, int level,
/* fallthrough */
case 'd': /* KERN_DEFAULT */
lflags |= LOG_PREFIX;
+ break;
+ case 'c': /* KERN_CONT */
+ lflags |= LOG_CONT;
}
- /*
- * No need to check length here because vscnprintf
- * put '\0' at the end of the string. Only valid and
- * newly printed level is detected.
- */
- text_len -= end_of_header - text;
- text = (char *)end_of_header;
+
+ text_len -= 2;
+ text += 2;
}
}
@@ -1847,45 +1891,7 @@ asmlinkage int vprintk_emit(int facility, int level,
if (dict)
lflags |= LOG_PREFIX|LOG_NEWLINE;
- if (!(lflags & LOG_NEWLINE)) {
- /*
- * Flush the conflicting buffer. An earlier newline was missing,
- * or another task also prints continuation lines.
- */
- if (cont.len && (lflags & LOG_PREFIX || cont.owner != current))
- cont_flush(LOG_NEWLINE);
-
- /* buffer line if possible, otherwise store it right away */
- if (cont_add(facility, level, text, text_len))
- printed_len += text_len;
- else
- printed_len += log_store(facility, level,
- lflags | LOG_CONT, 0,
- dict, dictlen, text, text_len);
- } else {
- bool stored = false;
-
- /*
- * If an earlier newline was missing and it was the same task,
- * either merge it with the current buffer and flush, or if
- * there was a race with interrupts (prefix == true) then just
- * flush it out and store this line separately.
- * If the preceding printk was from a different task and missed
- * a newline, flush and append the newline.
- */
- if (cont.len) {
- if (cont.owner == current && !(lflags & LOG_PREFIX))
- stored = cont_add(facility, level, text,
- text_len);
- cont_flush(LOG_NEWLINE);
- }
-
- if (stored)
- printed_len += text_len;
- else
- printed_len += log_store(facility, level, lflags, 0,
- dict, dictlen, text, text_len);
- }
+ printed_len += log_output(facility, level, lflags, dict, dictlen, text, text_len);
logbuf_cpu = UINT_MAX;
raw_spin_unlock(&logbuf_lock);
@@ -2647,7 +2653,7 @@ void register_console(struct console *newcon)
* didn't select a console we take the first one
* that registers here.
*/
- if (preferred_console < 0) {
+ if (preferred_console < 0 && !of_specified_console) {
if (newcon->index < 0)
newcon->index = 0;
if (newcon->setup == NULL ||
@@ -3029,6 +3035,7 @@ void kmsg_dump(enum kmsg_dump_reason reason)
dumper->active = true;
raw_spin_lock_irqsave(&logbuf_lock, flags);
+ cont_flush();
dumper->cur_seq = clear_seq;
dumper->cur_idx = clear_idx;
dumper->next_seq = log_next_seq;
@@ -3119,6 +3126,7 @@ bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog,
bool ret;
raw_spin_lock_irqsave(&logbuf_lock, flags);
+ cont_flush();
ret = kmsg_dump_get_line_nolock(dumper, syslog, line, size, len);
raw_spin_unlock_irqrestore(&logbuf_lock, flags);
@@ -3161,6 +3169,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
goto out;
raw_spin_lock_irqsave(&logbuf_lock, flags);
+ cont_flush();
if (dumper->cur_seq < log_first_seq) {
/* messages are gone, move to first available one */
dumper->cur_seq = log_first_seq;
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 1d3b7665d0be..2a99027312a6 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -73,6 +73,8 @@ void __ptrace_unlink(struct task_struct *child)
{
BUG_ON(!child->ptrace);
+ clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+
child->parent = child->real_parent;
list_del_init(&child->ptrace_entry);
@@ -489,7 +491,6 @@ static int ptrace_detach(struct task_struct *child, unsigned int data)
/* Architecture-specific hardware disable .. */
ptrace_disable(child);
- clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
write_lock_irq(&tasklist_lock);
/*
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index 944b1b491ed8..1898559e6b60 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -170,7 +170,7 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
false));
}
-static void rcu_process_callbacks(struct softirq_action *unused)
+static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused)
{
__rcu_process_callbacks(&rcu_sched_ctrlblk);
__rcu_process_callbacks(&rcu_bh_ctrlblk);
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 7e2e03879c2e..69a5611a7e7c 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3013,7 +3013,7 @@ __rcu_process_callbacks(struct rcu_state *rsp)
/*
* Do RCU core processing for the current CPU.
*/
-static void rcu_process_callbacks(struct softirq_action *unused)
+static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused)
{
struct rcu_state *rsp;
diff --git a/kernel/relay.c b/kernel/relay.c
index fc9b4a4af463..da79a109dbeb 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -328,13 +328,15 @@ static struct rchan_callbacks default_channel_callbacks = {
/**
* wakeup_readers - wake up readers waiting on a channel
- * @data: contains the channel buffer
+ * @work: contains the channel buffer
*
- * This is the timer function used to defer reader waking.
+ * This is the function used to defer reader waking
*/
-static void wakeup_readers(unsigned long data)
+static void wakeup_readers(struct irq_work *work)
{
- struct rchan_buf *buf = (struct rchan_buf *)data;
+ struct rchan_buf *buf;
+
+ buf = container_of(work, struct rchan_buf, wakeup_work);
wake_up_interruptible(&buf->read_wait);
}
@@ -352,9 +354,10 @@ static void __relay_reset(struct rchan_buf *buf, unsigned int init)
if (init) {
init_waitqueue_head(&buf->read_wait);
kref_init(&buf->kref);
- setup_timer(&buf->timer, wakeup_readers, (unsigned long)buf);
- } else
- del_timer_sync(&buf->timer);
+ init_irq_work(&buf->wakeup_work, wakeup_readers);
+ } else {
+ irq_work_sync(&buf->wakeup_work);
+ }
buf->subbufs_produced = 0;
buf->subbufs_consumed = 0;
@@ -487,7 +490,7 @@ free_buf:
static void relay_close_buf(struct rchan_buf *buf)
{
buf->finalized = 1;
- del_timer_sync(&buf->timer);
+ irq_work_sync(&buf->wakeup_work);
buf->chan->cb->remove_buf_file(buf->dentry);
kref_put(&buf->kref, relay_remove_buf);
}
@@ -754,14 +757,15 @@ size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length)
buf->early_bytes += buf->chan->subbuf_size -
buf->padding[old_subbuf];
smp_mb();
- if (waitqueue_active(&buf->read_wait))
+ if (waitqueue_active(&buf->read_wait)) {
/*
* Calling wake_up_interruptible() from here
* will deadlock if we happen to be logging
* from the scheduler (trying to re-grab
* rq->lock), so defer it.
*/
- mod_timer(&buf->timer, jiffies + 1);
+ irq_work_queue(&buf->wakeup_work);
+ }
}
old = buf->data;
@@ -1108,51 +1112,23 @@ static size_t relay_file_read_end_pos(struct rchan_buf *buf,
return end_pos;
}
-/*
- * subbuf_read_actor - read up to one subbuf's worth of data
- */
-static int subbuf_read_actor(size_t read_start,
- struct rchan_buf *buf,
- size_t avail,
- read_descriptor_t *desc)
-{
- void *from;
- int ret = 0;
-
- from = buf->start + read_start;
- ret = avail;
- if (copy_to_user(desc->arg.buf, from, avail)) {
- desc->error = -EFAULT;
- ret = 0;
- }
- desc->arg.data += ret;
- desc->written += ret;
- desc->count -= ret;
-
- return ret;
-}
-
-typedef int (*subbuf_actor_t) (size_t read_start,
- struct rchan_buf *buf,
- size_t avail,
- read_descriptor_t *desc);
-
-/*
- * relay_file_read_subbufs - read count bytes, bridging subbuf boundaries
- */
-static ssize_t relay_file_read_subbufs(struct file *filp, loff_t *ppos,
- subbuf_actor_t subbuf_actor,
- read_descriptor_t *desc)
+static ssize_t relay_file_read(struct file *filp,
+ char __user *buffer,
+ size_t count,
+ loff_t *ppos)
{
struct rchan_buf *buf = filp->private_data;
size_t read_start, avail;
+ size_t written = 0;
int ret;
- if (!desc->count)
+ if (!count)
return 0;
inode_lock(file_inode(filp));
do {
+ void *from;
+
if (!relay_file_read_avail(buf, *ppos))
break;
@@ -1161,32 +1137,22 @@ static ssize_t relay_file_read_subbufs(struct file *filp, loff_t *ppos,
if (!avail)
break;
- avail = min(desc->count, avail);
- ret = subbuf_actor(read_start, buf, avail, desc);
- if (desc->error < 0)
+ avail = min(count, avail);
+ from = buf->start + read_start;
+ ret = avail;
+ if (copy_to_user(buffer, from, avail))
break;
- if (ret) {
- relay_file_read_consume(buf, read_start, ret);
- *ppos = relay_file_read_end_pos(buf, read_start, ret);
- }
- } while (desc->count && ret);
- inode_unlock(file_inode(filp));
+ buffer += ret;
+ written += ret;
+ count -= ret;
- return desc->written;
-}
+ relay_file_read_consume(buf, read_start, ret);
+ *ppos = relay_file_read_end_pos(buf, read_start, ret);
+ } while (count);
+ inode_unlock(file_inode(filp));
-static ssize_t relay_file_read(struct file *filp,
- char __user *buffer,
- size_t count,
- loff_t *ppos)
-{
- read_descriptor_t desc;
- desc.written = 0;
- desc.count = count;
- desc.arg.buf = buffer;
- desc.error = 0;
- return relay_file_read_subbufs(filp, ppos, subbuf_read_actor, &desc);
+ return written;
}
static void relay_consume_bytes(struct rchan_buf *rbuf, int bytes_consumed)
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 13935886a471..fa178b62ea79 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -415,7 +415,8 @@ static char *task_group_path(struct task_group *tg)
if (autogroup_path(tg, group_path, PATH_MAX))
return group_path;
- return cgroup_path(tg->css.cgroup, group_path, PATH_MAX);
+ cgroup_path(tg->css.cgroup, group_path, PATH_MAX);
+ return group_path;
}
#endif
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 502e95a6e927..76ee7de1859d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5471,13 +5471,18 @@ static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd
*/
static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int target)
{
- struct sched_domain *this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc));
- u64 avg_idle = this_rq()->avg_idle;
- u64 avg_cost = this_sd->avg_scan_cost;
+ struct sched_domain *this_sd;
+ u64 avg_cost, avg_idle = this_rq()->avg_idle;
u64 time, cost;
s64 delta;
int cpu, wrap;
+ this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc));
+ if (!this_sd)
+ return -1;
+
+ avg_cost = this_sd->avg_scan_cost;
+
/*
* Due to large variance we need a large fuzz factor; hackbench in
* particularly is sensitive here.
@@ -8522,7 +8527,7 @@ static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle) { }
* run_rebalance_domains is triggered when needed from the scheduler tick.
* Also triggered for nohz idle balancing (with nohz_balancing_kick set).
*/
-static void run_rebalance_domains(struct softirq_action *h)
+static __latent_entropy void run_rebalance_domains(struct softirq_action *h)
{
struct rq *this_rq = this_rq();
enum cpu_idle_type idle = this_rq->idle_balance ?
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 9fb873cfc75c..1d8718d5300d 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -16,6 +16,9 @@
#include "sched.h"
+/* Linker adds these: start and end of __cpuidle functions */
+extern char __cpuidle_text_start[], __cpuidle_text_end[];
+
/**
* sched_idle_set_state - Record idle state for the current CPU.
* @idle_state: State to record.
@@ -53,7 +56,7 @@ static int __init cpu_idle_nopoll_setup(char *__unused)
__setup("hlt", cpu_idle_nopoll_setup);
#endif
-static inline int cpu_idle_poll(void)
+static noinline int __cpuidle cpu_idle_poll(void)
{
rcu_idle_enter();
trace_cpu_idle_rcuidle(0, smp_processor_id());
@@ -84,7 +87,7 @@ void __weak arch_cpu_idle(void)
*
* To use when the cpuidle framework cannot be used.
*/
-void default_idle_call(void)
+void __cpuidle default_idle_call(void)
{
if (current_clr_polling_and_test()) {
local_irq_enable();
@@ -271,6 +274,12 @@ static void cpu_idle_loop(void)
}
}
+bool cpu_in_idle(unsigned long pc)
+{
+ return pc >= (unsigned long)__cpuidle_text_start &&
+ pc < (unsigned long)__cpuidle_text_end;
+}
+
void cpu_startup_entry(enum cpuhp_state state)
{
/*
diff --git a/kernel/smpboot.c b/kernel/smpboot.c
index fc0d8270f69e..4a5c6e73ecd4 100644
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -122,12 +122,12 @@ static int smpboot_thread_fn(void *data)
if (kthread_should_park()) {
__set_current_state(TASK_RUNNING);
+ preempt_enable();
if (ht->park && td->status == HP_THREAD_ACTIVE) {
BUG_ON(td->cpu != smp_processor_id());
ht->park(td->cpu);
td->status = HP_THREAD_PARKED;
}
- preempt_enable();
kthread_parkme();
/* We might have been woken for stop */
continue;
@@ -186,6 +186,11 @@ __smpboot_create_thread(struct smp_hotplug_thread *ht, unsigned int cpu)
kfree(td);
return PTR_ERR(tsk);
}
+ /*
+ * Park the thread so that it could start right on the CPU
+ * when it is available.
+ */
+ kthread_park(tsk);
get_task_struct(tsk);
*per_cpu_ptr(ht->store, cpu) = tsk;
if (ht->create) {
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 66762645f9e8..1bf81ef91375 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -496,7 +496,7 @@ void __tasklet_hi_schedule_first(struct tasklet_struct *t)
}
EXPORT_SYMBOL(__tasklet_hi_schedule_first);
-static void tasklet_action(struct softirq_action *a)
+static __latent_entropy void tasklet_action(struct softirq_action *a)
{
struct tasklet_struct *list;
@@ -532,7 +532,7 @@ static void tasklet_action(struct softirq_action *a)
}
}
-static void tasklet_hi_action(struct softirq_action *a)
+static __latent_entropy void tasklet_hi_action(struct softirq_action *a)
{
struct tasklet_struct *list;
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 2c5e3a8e00d7..635482e60ca3 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -250,3 +250,8 @@ cond_syscall(sys_execveat);
/* membarrier */
cond_syscall(sys_membarrier);
+
+/* memory protection keys */
+cond_syscall(sys_pkey_mprotect);
+cond_syscall(sys_pkey_alloc);
+cond_syscall(sys_pkey_free);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index a43775c6646c..706309f9ed84 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -107,9 +107,8 @@ extern unsigned int core_pipe_limit;
extern int pid_max;
extern int pid_max_min, pid_max_max;
extern int percpu_pagelist_fraction;
-extern int compat_log;
extern int latencytop_enabled;
-extern int sysctl_nr_open_min, sysctl_nr_open_max;
+extern unsigned int sysctl_nr_open_min, sysctl_nr_open_max;
#ifndef CONFIG_MMU
extern int sysctl_nr_trim_pages;
#endif
@@ -1085,15 +1084,6 @@ static struct ctl_table kern_table[] = {
.extra1 = &neg_one,
},
#endif
-#ifdef CONFIG_COMPAT
- {
- .procname = "compat-log",
- .data = &compat_log,
- .maxlen = sizeof (int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
-#endif
#ifdef CONFIG_RT_MUTEXES
{
.procname = "max_lock_depth",
@@ -1693,7 +1683,7 @@ static struct ctl_table fs_table[] = {
{
.procname = "nr_open",
.data = &sysctl_nr_open,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &sysctl_nr_open_min,
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index c3aad685bbc0..12dd190634ab 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -542,7 +542,6 @@ static int alarm_clock_get(clockid_t which_clock, struct timespec *tp)
static int alarm_timer_create(struct k_itimer *new_timer)
{
enum alarmtimer_type type;
- struct alarm_base *base;
if (!alarmtimer_get_rtcdev())
return -ENOTSUPP;
@@ -551,7 +550,6 @@ static int alarm_timer_create(struct k_itimer *new_timer)
return -EPERM;
type = clock2alarm(new_timer->it_clock);
- base = &alarm_bases[type];
alarm_init(&new_timer->it.alarm.alarmtimer, type, alarm_handle_timer);
return 0;
}
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index e07fb093f819..37dec7e3db43 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -403,8 +403,11 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
tkr = tkf->base + (seq & 0x01);
now = ktime_to_ns(tkr->base);
- now += clocksource_delta(tkr->read(tkr->clock),
- tkr->cycle_last, tkr->mask);
+ now += timekeeping_delta_to_ns(tkr,
+ clocksource_delta(
+ tkr->read(tkr->clock),
+ tkr->cycle_last,
+ tkr->mask));
} while (read_seqcount_retry(&tkf->seq, seq));
return now;
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 32bf6f75a8fe..2d47980a1bc4 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1633,7 +1633,7 @@ static inline void __run_timers(struct timer_base *base)
/*
* This function runs timers and the timer-tq in bottom half context.
*/
-static void run_timer_softirq(struct softirq_action *h)
+static __latent_entropy void run_timer_softirq(struct softirq_action *h)
{
struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 992ab9d99f35..e57980845549 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -1,8 +1,4 @@
-# We are fully aware of the dangers of __builtin_return_address()
-FRAME_CFLAGS := $(call cc-disable-warning,frame-address)
-KBUILD_CFLAGS += $(FRAME_CFLAGS)
-
# Do not instrument the tracer itself:
ifdef CONFIG_FUNCTION_TRACER
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index b2b6efc083a4..5e10395da88e 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -610,8 +610,7 @@ static int perf_sysenter_enable(struct trace_event_call *call)
if (!sys_perf_refcount_enter)
ret = register_trace_sys_enter(perf_syscall_enter, NULL);
if (ret) {
- pr_info("event trace: Could not activate"
- "syscall entry trace point");
+ pr_info("event trace: Could not activate syscall entry trace point");
} else {
set_bit(num, enabled_perf_enter_syscalls);
sys_perf_refcount_enter++;
@@ -682,8 +681,7 @@ static int perf_sysexit_enable(struct trace_event_call *call)
if (!sys_perf_refcount_exit)
ret = register_trace_sys_exit(perf_syscall_exit, NULL);
if (ret) {
- pr_info("event trace: Could not activate"
- "syscall exit trace point");
+ pr_info("event trace: Could not activate syscall exit trace point");
} else {
set_bit(num, enabled_perf_exit_syscalls);
sys_perf_refcount_exit++;
diff --git a/kernel/uid16.c b/kernel/uid16.c
index d58cc4d8f0d1..cc40793464e3 100644
--- a/kernel/uid16.c
+++ b/kernel/uid16.c
@@ -117,7 +117,7 @@ static int groups16_to_user(old_gid_t __user *grouplist,
kgid_t kgid;
for (i = 0; i < group_info->ngroups; i++) {
- kgid = GROUP_AT(group_info, i);
+ kgid = group_info->gid[i];
group = high2lowgid(from_kgid_munged(user_ns, kgid));
if (put_user(group, grouplist+i))
return -EFAULT;
@@ -142,7 +142,7 @@ static int groups16_from_user(struct group_info *group_info,
if (!gid_valid(kgid))
return -EINVAL;
- GROUP_AT(group_info, i) = kgid;
+ group_info->gid[i] = kgid;
}
return 0;
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index ef071ca73fc3..479d840db286 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2974,6 +2974,31 @@ bool flush_delayed_work(struct delayed_work *dwork)
}
EXPORT_SYMBOL(flush_delayed_work);
+static bool __cancel_work(struct work_struct *work, bool is_dwork)
+{
+ unsigned long flags;
+ int ret;
+
+ do {
+ ret = try_to_grab_pending(work, is_dwork, &flags);
+ } while (unlikely(ret == -EAGAIN));
+
+ if (unlikely(ret < 0))
+ return false;
+
+ set_work_pool_and_clear_pending(work, get_work_pool_id(work));
+ local_irq_restore(flags);
+ return ret;
+}
+
+/*
+ * See cancel_delayed_work()
+ */
+bool cancel_work(struct work_struct *work)
+{
+ return __cancel_work(work, false);
+}
+
/**
* cancel_delayed_work - cancel a delayed work
* @dwork: delayed_work to cancel
@@ -2992,20 +3017,7 @@ EXPORT_SYMBOL(flush_delayed_work);
*/
bool cancel_delayed_work(struct delayed_work *dwork)
{
- unsigned long flags;
- int ret;
-
- do {
- ret = try_to_grab_pending(&dwork->work, true, &flags);
- } while (unlikely(ret == -EAGAIN));
-
- if (unlikely(ret < 0))
- return false;
-
- set_work_pool_and_clear_pending(&dwork->work,
- get_work_pool_id(&dwork->work));
- local_irq_restore(flags);
- return ret;
+ return __cancel_work(&dwork->work, true);
}
EXPORT_SYMBOL(cancel_delayed_work);
@@ -4249,7 +4261,7 @@ void print_worker_info(const char *log_lvl, struct task_struct *task)
* This function is called without any synchronization and @task
* could be in any state. Be careful with dereferences.
*/
- worker = probe_kthread_data(task);
+ worker = kthread_probe_data(task);
/*
* Carefully copy the associated workqueue's workfn and name. Keep