diff options
Diffstat (limited to 'kernel')
35 files changed, 870 insertions, 392 deletions
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index 5967b870a895..1ed8473ec537 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -97,7 +97,7 @@ static struct inode *bpf_get_inode(struct super_block *sb, return ERR_PTR(-ENOSPC); inode->i_ino = get_next_ino(); - inode->i_atime = CURRENT_TIME; + inode->i_atime = current_time(inode); inode->i_mtime = inode->i_atime; inode->i_ctime = inode->i_atime; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 44066158f0d1..85bc9beb046d 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -64,6 +64,9 @@ #include <linux/file.h> #include <net/sock.h> +#define CREATE_TRACE_POINTS +#include <trace/events/cgroup.h> + /* * pidlists linger the following amount before being destroyed. The goal * is avoiding frequent destruction in the middle of consecutive read calls @@ -1176,6 +1179,8 @@ static void cgroup_destroy_root(struct cgroup_root *root) struct cgroup *cgrp = &root->cgrp; struct cgrp_cset_link *link, *tmp_link; + trace_cgroup_destroy_root(root); + cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp); BUG_ON(atomic_read(&root->nr_cgrps)); @@ -1874,6 +1879,9 @@ static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data) strcpy(root->release_agent_path, opts.release_agent); spin_unlock(&release_agent_path_lock); } + + trace_cgroup_remount(root); + out_unlock: kfree(opts.release_agent); kfree(opts.name); @@ -2031,6 +2039,8 @@ static int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask) if (ret) goto destroy_root; + trace_cgroup_setup_root(root); + /* * There must be no failure case after here, since rebinding takes * care of subsystems' refcounts, which are explicitly dropped in @@ -2315,22 +2325,18 @@ static struct file_system_type cgroup2_fs_type = { .fs_flags = FS_USERNS_MOUNT, }; -static char *cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen, - struct cgroup_namespace *ns) +static int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen, + struct cgroup_namespace *ns) { struct cgroup *root = cset_cgroup_from_root(ns->root_cset, cgrp->root); - int ret; - ret = kernfs_path_from_node(cgrp->kn, root->kn, buf, buflen); - if (ret < 0 || ret >= buflen) - return NULL; - return buf; + return kernfs_path_from_node(cgrp->kn, root->kn, buf, buflen); } -char *cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen, - struct cgroup_namespace *ns) +int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen, + struct cgroup_namespace *ns) { - char *ret; + int ret; mutex_lock(&cgroup_mutex); spin_lock_irq(&css_set_lock); @@ -2357,12 +2363,12 @@ EXPORT_SYMBOL_GPL(cgroup_path_ns); * * Return value is the same as kernfs_path(). */ -char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen) +int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen) { struct cgroup_root *root; struct cgroup *cgrp; int hierarchy_id = 1; - char *path = NULL; + int ret; mutex_lock(&cgroup_mutex); spin_lock_irq(&css_set_lock); @@ -2371,16 +2377,15 @@ char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen) if (root) { cgrp = task_cgroup_from_root(task, root); - path = cgroup_path_ns_locked(cgrp, buf, buflen, &init_cgroup_ns); + ret = cgroup_path_ns_locked(cgrp, buf, buflen, &init_cgroup_ns); } else { /* if no hierarchy exists, everyone is in "/" */ - if (strlcpy(buf, "/", buflen) < buflen) - path = buf; + ret = strlcpy(buf, "/", buflen); } spin_unlock_irq(&css_set_lock); mutex_unlock(&cgroup_mutex); - return path; + return ret; } EXPORT_SYMBOL_GPL(task_cgroup_path); @@ -2830,6 +2835,10 @@ static int cgroup_attach_task(struct cgroup *dst_cgrp, ret = cgroup_migrate(leader, threadgroup, dst_cgrp->root); cgroup_migrate_finish(&preloaded_csets); + + if (!ret) + trace_cgroup_attach_task(dst_cgrp, leader, threadgroup); + return ret; } @@ -3611,6 +3620,8 @@ static int cgroup_rename(struct kernfs_node *kn, struct kernfs_node *new_parent, mutex_lock(&cgroup_mutex); ret = kernfs_rename(kn, new_parent, new_name_str); + if (!ret) + trace_cgroup_rename(cgrp); mutex_unlock(&cgroup_mutex); @@ -4381,6 +4392,8 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from) if (task) { ret = cgroup_migrate(task, false, to->root); + if (!ret) + trace_cgroup_transfer_tasks(to, task, false); put_task_struct(task); } } while (task && !ret); @@ -5046,6 +5059,8 @@ static void css_release_work_fn(struct work_struct *work) ss->css_released(css); } else { /* cgroup release path */ + trace_cgroup_release(cgrp); + cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id); cgrp->id = -1; @@ -5332,6 +5347,8 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, if (ret) goto out_destroy; + trace_cgroup_mkdir(cgrp); + /* let's create and online css's */ kernfs_activate(kn); @@ -5507,6 +5524,9 @@ static int cgroup_rmdir(struct kernfs_node *kn) ret = cgroup_destroy_locked(cgrp); + if (!ret) + trace_cgroup_rmdir(cgrp); + cgroup_kn_unlock(kn); return ret; } @@ -5743,7 +5763,7 @@ core_initcall(cgroup_wq_init); int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *tsk) { - char *buf, *path; + char *buf; int retval; struct cgroup_root *root; @@ -5786,18 +5806,18 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns, * " (deleted)" is appended to the cgroup path. */ if (cgroup_on_dfl(cgrp) || !(tsk->flags & PF_EXITING)) { - path = cgroup_path_ns_locked(cgrp, buf, PATH_MAX, + retval = cgroup_path_ns_locked(cgrp, buf, PATH_MAX, current->nsproxy->cgroup_ns); - if (!path) { + if (retval >= PATH_MAX) retval = -ENAMETOOLONG; + if (retval < 0) goto out_unlock; - } + + seq_puts(m, buf); } else { - path = "/"; + seq_puts(m, "/"); } - seq_puts(m, path); - if (cgroup_on_dfl(cgrp) && cgroup_is_dead(cgrp)) seq_puts(m, " (deleted)\n"); else @@ -6062,8 +6082,9 @@ static void cgroup_release_agent(struct work_struct *work) { struct cgroup *cgrp = container_of(work, struct cgroup, release_agent_work); - char *pathbuf = NULL, *agentbuf = NULL, *path; + char *pathbuf = NULL, *agentbuf = NULL; char *argv[3], *envp[3]; + int ret; mutex_lock(&cgroup_mutex); @@ -6073,13 +6094,13 @@ static void cgroup_release_agent(struct work_struct *work) goto out; spin_lock_irq(&css_set_lock); - path = cgroup_path_ns_locked(cgrp, pathbuf, PATH_MAX, &init_cgroup_ns); + ret = cgroup_path_ns_locked(cgrp, pathbuf, PATH_MAX, &init_cgroup_ns); spin_unlock_irq(&css_set_lock); - if (!path) + if (ret < 0 || ret >= PATH_MAX) goto out; argv[0] = agentbuf; - argv[1] = path; + argv[1] = pathbuf; argv[2] = NULL; /* minimal command environment */ diff --git a/kernel/configs/android-base.config b/kernel/configs/android-base.config index 9f748ed7bea8..1a8f34f63601 100644 --- a/kernel/configs/android-base.config +++ b/kernel/configs/android-base.config @@ -11,7 +11,6 @@ CONFIG_ANDROID_LOW_MEMORY_KILLER=y CONFIG_ARMV8_DEPRECATED=y CONFIG_ASHMEM=y CONFIG_AUDIT=y -CONFIG_BLK_DEV_DM=y CONFIG_BLK_DEV_INITRD=y CONFIG_CGROUPS=y CONFIG_CGROUP_CPUACCT=y @@ -19,9 +18,7 @@ CONFIG_CGROUP_DEBUG=y CONFIG_CGROUP_FREEZER=y CONFIG_CGROUP_SCHED=y CONFIG_CP15_BARRIER_EMULATION=y -CONFIG_DM_CRYPT=y -CONFIG_DM_VERITY=y -CONFIG_DM_VERITY_FEC=y +CONFIG_DEFAULT_SECURITY_SELINUX=y CONFIG_EMBEDDED=y CONFIG_FB=y CONFIG_HIGH_RES_TIMERS=y @@ -41,7 +38,6 @@ CONFIG_IPV6=y CONFIG_IPV6_MIP6=y CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_IPV6_OPTIMISTIC_DAD=y -CONFIG_IPV6_PRIVACY=y CONFIG_IPV6_ROUTER_PREF=y CONFIG_IPV6_ROUTE_INFO=y CONFIG_IP_ADVANCED_ROUTER=y @@ -135,6 +131,7 @@ CONFIG_PREEMPT=y CONFIG_QUOTA=y CONFIG_RTC_CLASS=y CONFIG_RT_GROUP_SCHED=y +CONFIG_SECCOMP=y CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y CONFIG_SECURITY_SELINUX=y diff --git a/kernel/configs/android-recommended.config b/kernel/configs/android-recommended.config index e3b953e966d2..297756be369c 100644 --- a/kernel/configs/android-recommended.config +++ b/kernel/configs/android-recommended.config @@ -6,12 +6,16 @@ # CONFIG_PM_WAKELOCKS_GC is not set # CONFIG_VT is not set CONFIG_BACKLIGHT_LCD_SUPPORT=y +CONFIG_BLK_DEV_DM=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=8192 CONFIG_COMPACTION=y CONFIG_DEBUG_RODATA=y +CONFIG_DM_CRYPT=y CONFIG_DM_UEVENT=y +CONFIG_DM_VERITY=y +CONFIG_DM_VERITY_FEC=y CONFIG_DRAGONRISE_FF=y CONFIG_ENABLE_DEFAULT_TRACERS=y CONFIG_EXT4_FS=y diff --git a/kernel/cpu.c b/kernel/cpu.c index 5df20d6d1520..29de1a9352c0 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -228,7 +228,7 @@ static struct { .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq), .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock), #ifdef CONFIG_DEBUG_LOCK_ALLOC - .dep_map = {.name = "cpu_hotplug.lock" }, + .dep_map = STATIC_LOCKDEP_MAP_INIT("cpu_hotplug.dep_map", &cpu_hotplug.dep_map), #endif }; diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 2b4c20ab5bbe..29f815d2ef7e 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2715,7 +2715,7 @@ void __cpuset_memory_pressure_bump(void) int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *tsk) { - char *buf, *p; + char *buf; struct cgroup_subsys_state *css; int retval; @@ -2724,14 +2724,15 @@ int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns, if (!buf) goto out; - retval = -ENAMETOOLONG; css = task_get_css(tsk, cpuset_cgrp_id); - p = cgroup_path_ns(css->cgroup, buf, PATH_MAX, - current->nsproxy->cgroup_ns); + retval = cgroup_path_ns(css->cgroup, buf, PATH_MAX, + current->nsproxy->cgroup_ns); css_put(css); - if (!p) + if (retval >= PATH_MAX) + retval = -ENAMETOOLONG; + if (retval < 0) goto out_free; - seq_puts(m, p); + seq_puts(m, buf); seq_putc(m, '\n'); retval = 0; out_free: diff --git a/kernel/exit.c b/kernel/exit.c index 1e1d913914c0..9d68c45ebbe3 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -511,7 +511,7 @@ static void exit_mm(struct task_struct *tsk) mm_update_next_owner(mm); mmput(mm); if (test_thread_flag(TIF_MEMDIE)) - exit_oom_victim(tsk); + exit_oom_victim(); } static struct task_struct *find_alive_thread(struct task_struct *p) diff --git a/kernel/fork.c b/kernel/fork.c index 9a05bd93f8e7..623259fc794d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -359,6 +359,12 @@ static inline void free_signal_struct(struct signal_struct *sig) { taskstats_tgid_free(sig); sched_autogroup_exit(sig); + /* + * __mmdrop is not safe to call from softirq context on x86 due to + * pgd_dtor so postpone it to the async context + */ + if (sig->oom_mm) + mmdrop_async(sig->oom_mm); kmem_cache_free(signal_cachep, sig); } @@ -541,7 +547,8 @@ free_tsk: } #ifdef CONFIG_MMU -static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) +static __latent_entropy int dup_mmap(struct mm_struct *mm, + struct mm_struct *oldmm) { struct vm_area_struct *mpnt, *tmp, *prev, **pprev; struct rb_node **rb_link, *rb_parent; @@ -848,6 +855,7 @@ static inline void __mmput(struct mm_struct *mm) ksm_exit(mm); khugepaged_exit(mm); /* must run before exit_mmap */ exit_mmap(mm); + mm_put_huge_zero_page(mm); set_mm_exe_file(mm, NULL); if (!list_empty(&mm->mmlist)) { spin_lock(&mmlist_lock); @@ -856,6 +864,7 @@ static inline void __mmput(struct mm_struct *mm) } if (mm->binfmt) module_put(mm->binfmt->module); + set_bit(MMF_OOM_SKIP, &mm->flags); mmdrop(mm); } @@ -1433,7 +1442,8 @@ init_task_pid(struct task_struct *task, enum pid_type type, struct pid *pid) * parts of the process environment (as per the clone * flags). The actual kick-off is left to the caller. */ -static struct task_struct *copy_process(unsigned long clone_flags, +static __latent_entropy struct task_struct *copy_process( + unsigned long clone_flags, unsigned long stack_start, unsigned long stack_size, int __user *child_tidptr, @@ -1918,6 +1928,7 @@ long _do_fork(unsigned long clone_flags, p = copy_process(clone_flags, stack_start, stack_size, child_tidptr, NULL, trace, tls, NUMA_NO_NODE); + add_latent_entropy(); /* * Do this prior waking up the new thread - the thread pointer * might get invalid after that point, if the thread exits quickly. diff --git a/kernel/groups.c b/kernel/groups.c index 74d431d25251..2fcadd66a8fd 100644 --- a/kernel/groups.c +++ b/kernel/groups.c @@ -7,55 +7,31 @@ #include <linux/security.h> #include <linux/syscalls.h> #include <linux/user_namespace.h> +#include <linux/vmalloc.h> #include <asm/uaccess.h> struct group_info *groups_alloc(int gidsetsize) { - struct group_info *group_info; - int nblocks; - int i; - - nblocks = (gidsetsize + NGROUPS_PER_BLOCK - 1) / NGROUPS_PER_BLOCK; - /* Make sure we always allocate at least one indirect block pointer */ - nblocks = nblocks ? : 1; - group_info = kmalloc(sizeof(*group_info) + nblocks*sizeof(gid_t *), GFP_USER); - if (!group_info) + struct group_info *gi; + unsigned int len; + + len = sizeof(struct group_info) + sizeof(kgid_t) * gidsetsize; + gi = kmalloc(len, GFP_KERNEL_ACCOUNT|__GFP_NOWARN|__GFP_NORETRY); + if (!gi) + gi = __vmalloc(len, GFP_KERNEL_ACCOUNT|__GFP_HIGHMEM, PAGE_KERNEL); + if (!gi) return NULL; - group_info->ngroups = gidsetsize; - group_info->nblocks = nblocks; - atomic_set(&group_info->usage, 1); - - if (gidsetsize <= NGROUPS_SMALL) - group_info->blocks[0] = group_info->small_block; - else { - for (i = 0; i < nblocks; i++) { - kgid_t *b; - b = (void *)__get_free_page(GFP_USER); - if (!b) - goto out_undo_partial_alloc; - group_info->blocks[i] = b; - } - } - return group_info; -out_undo_partial_alloc: - while (--i >= 0) { - free_page((unsigned long)group_info->blocks[i]); - } - kfree(group_info); - return NULL; + atomic_set(&gi->usage, 1); + gi->ngroups = gidsetsize; + return gi; } EXPORT_SYMBOL(groups_alloc); void groups_free(struct group_info *group_info) { - if (group_info->blocks[0] != group_info->small_block) { - int i; - for (i = 0; i < group_info->nblocks; i++) - free_page((unsigned long)group_info->blocks[i]); - } - kfree(group_info); + kvfree(group_info); } EXPORT_SYMBOL(groups_free); @@ -70,7 +46,7 @@ static int groups_to_user(gid_t __user *grouplist, for (i = 0; i < count; i++) { gid_t gid; - gid = from_kgid_munged(user_ns, GROUP_AT(group_info, i)); + gid = from_kgid_munged(user_ns, group_info->gid[i]); if (put_user(gid, grouplist+i)) return -EFAULT; } @@ -95,7 +71,7 @@ static int groups_from_user(struct group_info *group_info, if (!gid_valid(kgid)) return -EINVAL; - GROUP_AT(group_info, i) = kgid; + group_info->gid[i] = kgid; } return 0; } @@ -115,15 +91,14 @@ static void groups_sort(struct group_info *group_info) for (base = 0; base < max; base++) { int left = base; int right = left + stride; - kgid_t tmp = GROUP_AT(group_info, right); + kgid_t tmp = group_info->gid[right]; - while (left >= 0 && gid_gt(GROUP_AT(group_info, left), tmp)) { - GROUP_AT(group_info, right) = - GROUP_AT(group_info, left); + while (left >= 0 && gid_gt(group_info->gid[left], tmp)) { + group_info->gid[right] = group_info->gid[left]; right = left; left -= stride; } - GROUP_AT(group_info, right) = tmp; + group_info->gid[right] = tmp; } stride /= 3; } @@ -141,9 +116,9 @@ int groups_search(const struct group_info *group_info, kgid_t grp) right = group_info->ngroups; while (left < right) { unsigned int mid = (left+right)/2; - if (gid_gt(grp, GROUP_AT(group_info, mid))) + if (gid_gt(grp, group_info->gid[mid])) left = mid + 1; - else if (gid_lt(grp, GROUP_AT(group_info, mid))) + else if (gid_lt(grp, group_info->gid[mid])) right = mid; else return 1; diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 432c3d71d195..2b59c82cc3e1 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -98,26 +98,26 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) trace_sched_process_hang(t); - if (!sysctl_hung_task_warnings) + if (!sysctl_hung_task_warnings && !sysctl_hung_task_panic) return; - if (sysctl_hung_task_warnings > 0) - sysctl_hung_task_warnings--; - /* * Ok, the task did not get scheduled for more than 2 minutes, * complain: */ - pr_err("INFO: task %s:%d blocked for more than %ld seconds.\n", - t->comm, t->pid, timeout); - pr_err(" %s %s %.*s\n", - print_tainted(), init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); - pr_err("\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\"" - " disables this message.\n"); - sched_show_task(t); - debug_show_all_locks(); + if (sysctl_hung_task_warnings) { + sysctl_hung_task_warnings--; + pr_err("INFO: task %s:%d blocked for more than %ld seconds.\n", + t->comm, t->pid, timeout); + pr_err(" %s %s %.*s\n", + print_tainted(), init_utsname()->release, + (int)strcspn(init_utsname()->version, " "), + init_utsname()->version); + pr_err("\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\"" + " disables this message.\n"); + sched_show_task(t); + debug_show_all_locks(); + } touch_nmi_watchdog(); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index d10ab6b9b5e0..d63095472ea9 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -49,7 +49,7 @@ #include <linux/cpu.h> #include <linux/jump_label.h> -#include <asm-generic/sections.h> +#include <asm/sections.h> #include <asm/cacheflush.h> #include <asm/errno.h> #include <asm/uaccess.h> diff --git a/kernel/kthread.c b/kernel/kthread.c index 4ab4c3766a80..be2cc1f9dd57 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -138,7 +138,7 @@ void *kthread_data(struct task_struct *task) } /** - * probe_kthread_data - speculative version of kthread_data() + * kthread_probe_data - speculative version of kthread_data() * @task: possible kthread task in question * * @task could be a kthread task. Return the data value specified when it @@ -146,7 +146,7 @@ void *kthread_data(struct task_struct *task) * inaccessible for any reason, %NULL is returned. This function requires * that @task itself is safe to dereference. */ -void *probe_kthread_data(struct task_struct *task) +void *kthread_probe_data(struct task_struct *task) { struct kthread *kthread = to_kthread(task); void *data = NULL; @@ -244,33 +244,10 @@ static void create_kthread(struct kthread_create_info *create) } } -/** - * kthread_create_on_node - create a kthread. - * @threadfn: the function to run until signal_pending(current). - * @data: data ptr for @threadfn. - * @node: task and thread structures for the thread are allocated on this node - * @namefmt: printf-style name for the thread. - * - * Description: This helper function creates and names a kernel - * thread. The thread will be stopped: use wake_up_process() to start - * it. See also kthread_run(). The new thread has SCHED_NORMAL policy and - * is affine to all CPUs. - * - * If thread is going to be bound on a particular cpu, give its node - * in @node, to get NUMA affinity for kthread stack, or else give NUMA_NO_NODE. - * When woken, the thread will run @threadfn() with @data as its - * argument. @threadfn() can either call do_exit() directly if it is a - * standalone thread for which no one will call kthread_stop(), or - * return when 'kthread_should_stop()' is true (which means - * kthread_stop() has been called). The return value should be zero - * or a negative error number; it will be passed to kthread_stop(). - * - * Returns a task_struct or ERR_PTR(-ENOMEM) or ERR_PTR(-EINTR). - */ -struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), - void *data, int node, - const char namefmt[], - ...) +static struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data), + void *data, int node, + const char namefmt[], + va_list args) { DECLARE_COMPLETION_ONSTACK(done); struct task_struct *task; @@ -311,11 +288,8 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), task = create->result; if (!IS_ERR(task)) { static const struct sched_param param = { .sched_priority = 0 }; - va_list args; - va_start(args, namefmt); vsnprintf(task->comm, sizeof(task->comm), namefmt, args); - va_end(args); /* * root may have changed our (kthreadd's) priority or CPU mask. * The kernel thread should not inherit these properties. @@ -326,6 +300,44 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), kfree(create); return task; } + +/** + * kthread_create_on_node - create a kthread. + * @threadfn: the function to run until signal_pending(current). + * @data: data ptr for @threadfn. + * @node: task and thread structures for the thread are allocated on this node + * @namefmt: printf-style name for the thread. + * + * Description: This helper function creates and names a kernel + * thread. The thread will be stopped: use wake_up_process() to start + * it. See also kthread_run(). The new thread has SCHED_NORMAL policy and + * is affine to all CPUs. + * + * If thread is going to be bound on a particular cpu, give its node + * in @node, to get NUMA affinity for kthread stack, or else give NUMA_NO_NODE. + * When woken, the thread will run @threadfn() with @data as its + * argument. @threadfn() can either call do_exit() directly if it is a + * standalone thread for which no one will call kthread_stop(), or + * return when 'kthread_should_stop()' is true (which means + * kthread_stop() has been called). The return value should be zero + * or a negative error number; it will be passed to kthread_stop(). + * + * Returns a task_struct or ERR_PTR(-ENOMEM) or ERR_PTR(-EINTR). + */ +struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), + void *data, int node, + const char namefmt[], + ...) +{ + struct task_struct *task; + va_list args; + + va_start(args, namefmt); + task = __kthread_create_on_node(threadfn, data, node, namefmt, args); + va_end(args); + + return task; +} EXPORT_SYMBOL(kthread_create_on_node); static void __kthread_bind_mask(struct task_struct *p, const struct cpumask *mask, long state) @@ -390,10 +402,10 @@ struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data), cpu); if (IS_ERR(p)) return p; + kthread_bind(p, cpu); + /* CPU hotplug need to bind once again when unparking the thread. */ set_bit(KTHREAD_IS_PER_CPU, &to_kthread(p)->flags); to_kthread(p)->cpu = cpu; - /* Park the thread to get it out of TASK_UNINTERRUPTIBLE state */ - kthread_park(p); return p; } @@ -407,6 +419,10 @@ static void __kthread_unpark(struct task_struct *k, struct kthread *kthread) * which might be about to be cleared. */ if (test_and_clear_bit(KTHREAD_IS_PARKED, &kthread->flags)) { + /* + * Newly created kthread was parked when the CPU was offline. + * The binding was lost and we need to set it again. + */ if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags)) __kthread_bind(k, kthread->cpu, TASK_PARKED); wake_up_state(k, TASK_PARKED); @@ -540,39 +556,48 @@ int kthreadd(void *unused) return 0; } -void __init_kthread_worker(struct kthread_worker *worker, +void __kthread_init_worker(struct kthread_worker *worker, const char *name, struct lock_class_key *key) { + memset(worker, 0, sizeof(struct kthread_worker)); spin_lock_init(&worker->lock); lockdep_set_class_and_name(&worker->lock, key, name); INIT_LIST_HEAD(&worker->work_list); - worker->task = NULL; + INIT_LIST_HEAD(&worker->delayed_work_list); } -EXPORT_SYMBOL_GPL(__init_kthread_worker); +EXPORT_SYMBOL_GPL(__kthread_init_worker); /** * kthread_worker_fn - kthread function to process kthread_worker * @worker_ptr: pointer to initialized kthread_worker * - * This function can be used as @threadfn to kthread_create() or - * kthread_run() with @worker_ptr argument pointing to an initialized - * kthread_worker. The started kthread will process work_list until - * the it is stopped with kthread_stop(). A kthread can also call - * this function directly after extra initialization. + * This function implements the main cycle of kthread worker. It processes + * work_list until it is stopped with kthread_stop(). It sleeps when the queue + * is empty. + * + * The works are not allowed to keep any locks, disable preemption or interrupts + * when they finish. There is defined a safe point for freezing when one work + * finishes and before a new one is started. * - * Different kthreads can be used for the same kthread_worker as long - * as there's only one kthread attached to it at any given time. A - * kthread_worker without an attached kthread simply collects queued - * kthread_works. + * Also the works must not be handled by more than one worker at the same time, + * see also kthread_queue_work(). */ int kthread_worker_fn(void *worker_ptr) { struct kthread_worker *worker = worker_ptr; struct kthread_work *work; - WARN_ON(worker->task); + /* + * FIXME: Update the check and remove the assignment when all kthread + * worker users are created using kthread_create_worker*() functions. + */ + WARN_ON(worker->task && worker->task != current); worker->task = current; + + if (worker->flags & KTW_FREEZABLE) + set_freezable(); + repeat: set_current_state(TASK_INTERRUPTIBLE); /* mb paired w/ kthread_stop */ @@ -605,12 +630,131 @@ repeat: } EXPORT_SYMBOL_GPL(kthread_worker_fn); -/* insert @work before @pos in @worker */ -static void insert_kthread_work(struct kthread_worker *worker, - struct kthread_work *work, - struct list_head *pos) +static struct kthread_worker * +__kthread_create_worker(int cpu, unsigned int flags, + const char namefmt[], va_list args) +{ + struct kthread_worker *worker; + struct task_struct *task; + + worker = kzalloc(sizeof(*worker), GFP_KERNEL); + if (!worker) + return ERR_PTR(-ENOMEM); + + kthread_init_worker(worker); + + if (cpu >= 0) { + char name[TASK_COMM_LEN]; + + /* + * kthread_create_worker_on_cpu() allows to pass a generic + * namefmt in compare with kthread_create_on_cpu. We need + * to format it here. + */ + vsnprintf(name, sizeof(name), namefmt, args); + task = kthread_create_on_cpu(kthread_worker_fn, worker, + cpu, name); + } else { + task = __kthread_create_on_node(kthread_worker_fn, worker, + -1, namefmt, args); + } + + if (IS_ERR(task)) + goto fail_task; + + worker->flags = flags; + worker->task = task; + wake_up_process(task); + return worker; + +fail_task: + kfree(worker); + return ERR_CAST(task); +} + +/** + * kthread_create_worker - create a kthread worker + * @flags: flags modifying the default behavior of the worker + * @namefmt: printf-style name for the kthread worker (task). + * + * Returns a pointer to the allocated worker on success, ERR_PTR(-ENOMEM) + * when the needed structures could not get allocated, and ERR_PTR(-EINTR) + * when the worker was SIGKILLed. + */ +struct kthread_worker * +kthread_create_worker(unsigned int flags, const char namefmt[], ...) +{ + struct kthread_worker *worker; + va_list args; + + va_start(args, namefmt); + worker = __kthread_create_worker(-1, flags, namefmt, args); + va_end(args); + + return worker; +} +EXPORT_SYMBOL(kthread_create_worker); + +/** + * kthread_create_worker_on_cpu - create a kthread worker and bind it + * it to a given CPU and the associated NUMA node. + * @cpu: CPU number + * @flags: flags modifying the default behavior of the worker + * @namefmt: printf-style name for the kthread worker (task). + * + * Use a valid CPU number if you want to bind the kthread worker + * to the given CPU and the associated NUMA node. + * + * A good practice is to add the cpu number also into the worker name. + * For example, use kthread_create_worker_on_cpu(cpu, "helper/%d", cpu). + * + * Returns a pointer to the allocated worker on success, ERR_PTR(-ENOMEM) + * when the needed structures could not get allocated, and ERR_PTR(-EINTR) + * when the worker was SIGKILLed. + */ +struct kthread_worker * +kthread_create_worker_on_cpu(int cpu, unsigned int flags, + const char namefmt[], ...) +{ + struct kthread_worker *worker; + va_list args; + + va_start(args, namefmt); + worker = __kthread_create_worker(cpu, flags, namefmt, args); + va_end(args); + + return worker; +} +EXPORT_SYMBOL(kthread_create_worker_on_cpu); + +/* + * Returns true when the work could not be queued at the moment. + * It happens when it is already pending in a worker list + * or when it is being cancelled. + */ +static inline bool queuing_blocked(struct kthread_worker *worker, + struct kthread_work *work) +{ + lockdep_assert_held(&worker->lock); + + return !list_empty(&work->node) || work->canceling; +} + +static void kthread_insert_work_sanity_check(struct kthread_worker *worker, + struct kthread_work *work) { lockdep_assert_held(&worker->lock); + WARN_ON_ONCE(!list_empty(&work->node)); + /* Do not use a work with >1 worker, see kthread_queue_work() */ + WARN_ON_ONCE(work->worker && work->worker != worker); +} + +/* insert @work before @pos in @worker */ +static void kthread_insert_work(struct kthread_worker *worker, + struct kthread_work *work, + struct list_head *pos) +{ + kthread_insert_work_sanity_check(worker, work); list_add_tail(&work->node, pos); work->worker = worker; @@ -619,29 +763,133 @@ static void insert_kthread_work(struct kthread_worker *worker, } /** - * queue_kthread_work - queue a kthread_work + * kthread_queue_work - queue a kthread_work * @worker: target kthread_worker * @work: kthread_work to queue * * Queue @work to work processor @task for async execution. @task * must have been created with kthread_worker_create(). Returns %true * if @work was successfully queued, %false if it was already pending. + * + * Reinitialize the work if it needs to be used by another worker. + * For example, when the worker was stopped and started again. */ -bool queue_kthread_work(struct kthread_worker *worker, +bool kthread_queue_work(struct kthread_worker *worker, struct kthread_work *work) { bool ret = false; unsigned long flags; spin_lock_irqsave(&worker->lock, flags); - if (list_empty(&work->node)) { - insert_kthread_work(worker, work, &worker->work_list); + if (!queuing_blocked(worker, work)) { + kthread_insert_work(worker, work, &worker->work_list); + ret = true; + } + spin_unlock_irqrestore(&worker->lock, flags); + return ret; +} +EXPORT_SYMBOL_GPL(kthread_queue_work); + +/** + * kthread_delayed_work_timer_fn - callback that queues the associated kthread + * delayed work when the timer expires. + * @__data: pointer to the data associated with the timer + * + * The format of the function is defined by struct timer_list. + * It should have been called from irqsafe timer with irq already off. + */ +void kthread_delayed_work_timer_fn(unsigned long __data) +{ + struct kthread_delayed_work *dwork = + (struct kthread_delayed_work *)__data; + struct kthread_work *work = &dwork->work; + struct kthread_worker *worker = work->worker; + + /* + * This might happen when a pending work is reinitialized. + * It means that it is used a wrong way. + */ + if (WARN_ON_ONCE(!worker)) + return; + + spin_lock(&worker->lock); + /* Work must not be used with >1 worker, see kthread_queue_work(). */ + WARN_ON_ONCE(work->worker != worker); + + /* Move the work from worker->delayed_work_list. */ + WARN_ON_ONCE(list_empty(&work->node)); + list_del_init(&work->node); + kthread_insert_work(worker, work, &worker->work_list); + + spin_unlock(&worker->lock); +} +EXPORT_SYMBOL(kthread_delayed_work_timer_fn); + +void __kthread_queue_delayed_work(struct kthread_worker *worker, + struct kthread_delayed_work *dwork, + unsigned long delay) +{ + struct timer_list *timer = &dwork->timer; + struct kthread_work *work = &dwork->work; + + WARN_ON_ONCE(timer->function != kthread_delayed_work_timer_fn || + timer->data != (unsigned long)dwork); + + /* + * If @delay is 0, queue @dwork->work immediately. This is for + * both optimization and correctness. The earliest @timer can + * expire is on the closest next tick and delayed_work users depend + * on that there's no such delay when @delay is 0. + */ + if (!delay) { + kthread_insert_work(worker, work, &worker->work_list); + return; + } + + /* Be paranoid and try to detect possible races already now. */ + kthread_insert_work_sanity_check(worker, work); + + list_add(&work->node, &worker->delayed_work_list); + work->worker = worker; + timer_stats_timer_set_start_info(&dwork->timer); + timer->expires = jiffies + delay; + add_timer(timer); +} + +/** + * kthread_queue_delayed_work - queue the associated kthread work + * after a delay. + * @worker: target kthread_worker + * @dwork: kthread_delayed_work to queue + * @delay: number of jiffies to wait before queuing + * + * If the work has not been pending it starts a timer that will queue + * the work after the given @delay. If @delay is zero, it queues the + * work immediately. + * + * Return: %false if the @work has already been pending. It means that + * either the timer was running or the work was queued. It returns %true + * otherwise. + */ +bool kthread_queue_delayed_work(struct kthread_worker *worker, + struct kthread_delayed_work *dwork, + unsigned long delay) +{ + struct kthread_work *work = &dwork->work; + unsigned long flags; + bool ret = false; + + spin_lock_irqsave(&worker->lock, flags); + + if (!queuing_blocked(worker, work)) { + __kthread_queue_delayed_work(worker, dwork, delay); ret = true; } + spin_unlock_irqrestore(&worker->lock, flags); return ret; } -EXPORT_SYMBOL_GPL(queue_kthread_work); +EXPORT_SYMBOL_GPL(kthread_queue_delayed_work); struct kthread_flush_work { struct kthread_work work; @@ -656,12 +904,12 @@ static void kthread_flush_work_fn(struct kthread_work *work) } /** - * flush_kthread_work - flush a kthread_work + * kthread_flush_work - flush a kthread_work * @work: work to flush * * If @work is queued or executing, wait for it to finish execution. */ -void flush_kthread_work(struct kthread_work *work) +void kthread_flush_work(struct kthread_work *work) { struct kthread_flush_work fwork = { KTHREAD_WORK_INIT(fwork.work, kthread_flush_work_fn), @@ -670,21 +918,19 @@ void flush_kthread_work(struct kthread_work *work) struct kthread_worker *worker; bool noop = false; -retry: worker = work->worker; if (!worker) return; spin_lock_irq(&worker->lock); - if (work->worker != worker) { - spin_unlock_irq(&worker->lock); - goto retry; - } + /* Work must not be used with >1 worker, see kthread_queue_work(). */ + WARN_ON_ONCE(work->worker != worker); if (!list_empty(&work->node)) - insert_kthread_work(worker, &fwork.work, work->node.next); + kthread_insert_work(worker, &fwork.work, work->node.next); else if (worker->current_work == work) - insert_kthread_work(worker, &fwork.work, worker->work_list.next); + kthread_insert_work(worker, &fwork.work, + worker->work_list.next); else noop = true; @@ -693,23 +939,214 @@ retry: if (!noop) wait_for_completion(&fwork.done); } -EXPORT_SYMBOL_GPL(flush_kthread_work); +EXPORT_SYMBOL_GPL(kthread_flush_work); + +/* + * This function removes the work from the worker queue. Also it makes sure + * that it won't get queued later via the delayed work's timer. + * + * The work might still be in use when this function finishes. See the + * current_work proceed by the worker. + * + * Return: %true if @work was pending and successfully canceled, + * %false if @work was not pending + */ +static bool __kthread_cancel_work(struct kthread_work *work, bool is_dwork, + unsigned long *flags) +{ + /* Try to cancel the timer if exists. */ + if (is_dwork) { + struct kthread_delayed_work *dwork = + container_of(work, struct kthread_delayed_work, work); + struct kthread_worker *worker = work->worker; + + /* + * del_timer_sync() must be called to make sure that the timer + * callback is not running. The lock must be temporary released + * to avoid a deadlock with the callback. In the meantime, + * any queuing is blocked by setting the canceling counter. + */ + work->canceling++; + spin_unlock_irqrestore(&worker->lock, *flags); + del_timer_sync(&dwork->timer); + spin_lock_irqsave(&worker->lock, *flags); + work->canceling--; + } + + /* + * Try to remove the work from a worker list. It might either + * be from worker->work_list or from worker->delayed_work_list. + */ + if (!list_empty(&work->node)) { + list_del_init(&work->node); + return true; + } + + return false; +} + +/** + * kthread_mod_delayed_work - modify delay of or queue a kthread delayed work + * @worker: kthread worker to use + * @dwork: kthread delayed work to queue + * @delay: number of jiffies to wait before queuing + * + * If @dwork is idle, equivalent to kthread_queue_delayed_work(). Otherwise, + * modify @dwork's timer so that it expires after @delay. If @delay is zero, + * @work is guaranteed to be queued immediately. + * + * Return: %true if @dwork was pending and its timer was modified, + * %false otherwise. + * + * A special case is when the work is being canceled in parallel. + * It might be caused either by the real kthread_cancel_delayed_work_sync() + * or yet another kthread_mod_delayed_work() call. We let the other command + * win and return %false here. The caller is supposed to synchronize these + * operations a reasonable way. + * + * This function is safe to call from any context including IRQ handler. + * See __kthread_cancel_work() and kthread_delayed_work_timer_fn() + * for details. + */ +bool kthread_mod_delayed_work(struct kthread_worker *worker, + struct kthread_delayed_work *dwork, + unsigned long delay) +{ + struct kthread_work *work = &dwork->work; + unsigned long flags; + int ret = false; + + spin_lock_irqsave(&worker->lock, flags); + + /* Do not bother with canceling when never queued. */ + if (!work->worker) + goto fast_queue; + + /* Work must not be used with >1 worker, see kthread_queue_work() */ + WARN_ON_ONCE(work->worker != worker); + + /* Do not fight with another command that is canceling this work. */ + if (work->canceling) + goto out; + + ret = __kthread_cancel_work(work, true, &flags); +fast_queue: + __kthread_queue_delayed_work(worker, dwork, delay); +out: + spin_unlock_irqrestore(&worker->lock, flags); + return ret; +} +EXPORT_SYMBOL_GPL(kthread_mod_delayed_work); + +static bool __kthread_cancel_work_sync(struct kthread_work *work, bool is_dwork) +{ + struct kthread_worker *worker = work->worker; + unsigned long flags; + int ret = false; + + if (!worker) + goto out; + + spin_lock_irqsave(&worker->lock, flags); + /* Work must not be used with >1 worker, see kthread_queue_work(). */ + WARN_ON_ONCE(work->worker != worker); + + ret = __kthread_cancel_work(work, is_dwork, &flags); + + if (worker->current_work != work) + goto out_fast; + + /* + * The work is in progress and we need to wait with the lock released. + * In the meantime, block any queuing by setting the canceling counter. + */ + work->canceling++; + spin_unlock_irqrestore(&worker->lock, flags); + kthread_flush_work(work); + spin_lock_irqsave(&worker->lock, flags); + work->canceling--; + +out_fast: + spin_unlock_irqrestore(&worker->lock, flags); +out: + return ret; +} + +/** + * kthread_cancel_work_sync - cancel a kthread work and wait for it to finish + * @work: the kthread work to cancel + * + * Cancel @work and wait for its execution to finish. This function + * can be used even if the work re-queues itself. On return from this + * function, @work is guaranteed to be not pending or executing on any CPU. + * + * kthread_cancel_work_sync(&delayed_work->work) must not be used for + * delayed_work's. Use kthread_cancel_delayed_work_sync() instead. + * + * The caller must ensure that the worker on which @work was last + * queued can't be destroyed before this function returns. + * + * Return: %true if @work was pending, %false otherwise. + */ +bool kthread_cancel_work_sync(struct kthread_work *work) +{ + return __kthread_cancel_work_sync(work, false); +} +EXPORT_SYMBOL_GPL(kthread_cancel_work_sync); + +/** + * kthread_cancel_delayed_work_sync - cancel a kthread delayed work and + * wait for it to finish. + * @dwork: the kthread delayed work to cancel + * + * This is kthread_cancel_work_sync() for delayed works. + * + * Return: %true if @dwork was pending, %false otherwise. + */ +bool kthread_cancel_delayed_work_sync(struct kthread_delayed_work *dwork) +{ + return __kthread_cancel_work_sync(&dwork->work, true); +} +EXPORT_SYMBOL_GPL(kthread_cancel_delayed_work_sync); /** - * flush_kthread_worker - flush all current works on a kthread_worker + * kthread_flush_worker - flush all current works on a kthread_worker * @worker: worker to flush * * Wait until all currently executing or pending works on @worker are * finished. */ -void flush_kthread_worker(struct kthread_worker *worker) +void kthread_flush_worker(struct kthread_worker *worker) { struct kthread_flush_work fwork = { KTHREAD_WORK_INIT(fwork.work, kthread_flush_work_fn), COMPLETION_INITIALIZER_ONSTACK(fwork.done), }; - queue_kthread_work(worker, &fwork.work); + kthread_queue_work(worker, &fwork.work); wait_for_completion(&fwork.done); } -EXPORT_SYMBOL_GPL(flush_kthread_worker); +EXPORT_SYMBOL_GPL(kthread_flush_worker); + +/** + * kthread_destroy_worker - destroy a kthread worker + * @worker: worker to be destroyed + * + * Flush and destroy @worker. The simple flush is enough because the kthread + * worker API is used only in trivial scenarios. There are no multi-step state + * machines needed. + */ +void kthread_destroy_worker(struct kthread_worker *worker) +{ + struct task_struct *task; + + task = worker->task; + if (WARN_ON(!task)) + return; + + kthread_flush_worker(worker); + kthread_stop(task); + WARN_ON(!list_empty(&worker->work_list)); + kfree(worker); +} +EXPORT_SYMBOL(kthread_destroy_worker); diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index 8bbe50704621..af4643873e71 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -274,7 +274,6 @@ static int klp_write_object_relocations(struct module *pmod, objname = klp_is_module(obj) ? obj->name : "vmlinux"; - module_disable_ro(pmod); /* For each klp relocation section */ for (i = 1; i < pmod->klp_info->hdr.e_shnum; i++) { sec = pmod->klp_info->sechdrs + i; @@ -309,7 +308,6 @@ static int klp_write_object_relocations(struct module *pmod, break; } - module_enable_ro(pmod, true); return ret; } @@ -547,9 +545,6 @@ static int __klp_enable_patch(struct klp_patch *patch) list_prev_entry(patch, list)->state == KLP_DISABLED) return -EBUSY; - pr_notice_once("tainting kernel with TAINT_LIVEPATCH\n"); - add_taint(TAINT_LIVEPATCH, LOCKDEP_STILL_OK); - pr_notice("enabling patch '%s'\n", patch->mod->name); klp_for_each_object(patch, obj) { @@ -763,6 +758,12 @@ static int klp_init_func(struct klp_object *obj, struct klp_func *func) func->old_sympos ? func->old_sympos : 1); } +/* Arches may override this to finish any remaining arch-specific tasks */ +void __weak arch_klp_init_object_loaded(struct klp_patch *patch, + struct klp_object *obj) +{ +} + /* parts of the initialization that is done only when the object is loaded */ static int klp_init_object_loaded(struct klp_patch *patch, struct klp_object *obj) @@ -770,9 +771,15 @@ static int klp_init_object_loaded(struct klp_patch *patch, struct klp_func *func; int ret; + module_disable_ro(patch->mod); ret = klp_write_object_relocations(patch->mod, obj); - if (ret) + if (ret) { + module_enable_ro(patch->mod, true); return ret; + } + + arch_klp_init_object_loaded(patch, obj); + module_enable_ro(patch->mod, true); klp_for_each_func(obj, func) { ret = klp_find_object_symbol(obj->name, func->old_name, diff --git a/kernel/module.c b/kernel/module.c index 529efae9f481..f57dd63186e6 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1149,6 +1149,8 @@ static size_t module_flags_taint(struct module *mod, char *buf) buf[l++] = 'C'; if (mod->taints & (1 << TAINT_UNSIGNED_MODULE)) buf[l++] = 'E'; + if (mod->taints & (1 << TAINT_LIVEPATCH)) + buf[l++] = 'K'; /* * TAINT_FORCED_RMMOD: could be added. * TAINT_CPU_OUT_OF_SPEC, TAINT_MACHINE_CHECK, TAINT_BAD_PAGE don't @@ -2792,14 +2794,17 @@ static int copy_chunked_from_user(void *dst, const void __user *usrc, unsigned l } #ifdef CONFIG_LIVEPATCH -static int find_livepatch_modinfo(struct module *mod, struct load_info *info) +static int check_modinfo_livepatch(struct module *mod, struct load_info *info) { - mod->klp = get_modinfo(info, "livepatch") ? true : false; + if (get_modinfo(info, "livepatch")) { + mod->klp = true; + add_taint_module(mod, TAINT_LIVEPATCH, LOCKDEP_STILL_OK); + } return 0; } #else /* !CONFIG_LIVEPATCH */ -static int find_livepatch_modinfo(struct module *mod, struct load_info *info) +static int check_modinfo_livepatch(struct module *mod, struct load_info *info) { if (get_modinfo(info, "livepatch")) { pr_err("%s: module is marked as livepatch module, but livepatch support is disabled", @@ -2969,7 +2974,7 @@ static int check_modinfo(struct module *mod, struct load_info *info, int flags) "is unknown, you have been warned.\n", mod->name); } - err = find_livepatch_modinfo(mod, info); + err = check_modinfo_livepatch(mod, info); if (err) return err; diff --git a/kernel/panic.c b/kernel/panic.c index ca8cea1ef673..e6480e20379e 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -71,6 +71,32 @@ void __weak nmi_panic_self_stop(struct pt_regs *regs) panic_smp_self_stop(); } +/* + * Stop other CPUs in panic. Architecture dependent code may override this + * with more suitable version. For example, if the architecture supports + * crash dump, it should save registers of each stopped CPU and disable + * per-CPU features such as virtualization extensions. + */ +void __weak crash_smp_send_stop(void) +{ + static int cpus_stopped; + + /* + * This function can be called twice in panic path, but obviously + * we execute this only once. + */ + if (cpus_stopped) + return; + + /* + * Note smp_send_stop is the usual smp shutdown function, which + * unfortunately means it may not be hardened to work in a panic + * situation. + */ + smp_send_stop(); + cpus_stopped = 1; +} + atomic_t panic_cpu = ATOMIC_INIT(PANIC_CPU_INVALID); /* @@ -164,14 +190,21 @@ void panic(const char *fmt, ...) if (!_crash_kexec_post_notifiers) { printk_nmi_flush_on_panic(); __crash_kexec(NULL); - } - /* - * Note smp_send_stop is the usual smp shutdown function, which - * unfortunately means it may not be hardened to work in a panic - * situation. - */ - smp_send_stop(); + /* + * Note smp_send_stop is the usual smp shutdown function, which + * unfortunately means it may not be hardened to work in a + * panic situation. + */ + smp_send_stop(); + } else { + /* + * If we want to do crash dump after notifier calls and + * kmsg_dump, we will need architecture dependent extra + * works in addition to stopping other CPUs. + */ + crash_smp_send_stop(); + } /* * Run any panic handlers, including those that might need to diff --git a/kernel/power/process.c b/kernel/power/process.c index 8f27d5a8adf6..2fba066e125f 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -144,23 +144,12 @@ int freeze_processes(void) /* * Now that the whole userspace is frozen we need to disbale * the OOM killer to disallow any further interference with - * killable tasks. + * killable tasks. There is no guarantee oom victims will + * ever reach a point they go away we have to wait with a timeout. */ - if (!error && !oom_killer_disable()) + if (!error && !oom_killer_disable(msecs_to_jiffies(freeze_timeout_msecs))) error = -EBUSY; - /* - * There is a hard to fix race between oom_reaper kernel thread - * and oom_killer_disable. oom_reaper calls exit_oom_victim - * before the victim reaches exit_mm so try to freeze all the tasks - * again and catch such a left over task. - */ - if (!error) { - pr_info("Double checking all user space processes after OOM killer disable... "); - error = try_to_freeze_tasks(true); - pr_cont("\n"); - } - if (error) thaw_processes(); return error; diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index eea6dbc2d8cf..d5e397315473 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -253,6 +253,17 @@ static int preferred_console = -1; int console_set_on_cmdline; EXPORT_SYMBOL(console_set_on_cmdline); +#ifdef CONFIG_OF +static bool of_specified_console; + +void console_set_by_of(void) +{ + of_specified_console = true; +} +#else +# define of_specified_console false +#endif + /* Flag: console code may call schedule() */ static int console_may_schedule; @@ -655,11 +666,8 @@ static ssize_t msg_print_ext_header(char *buf, size_t size, * better readable output. 'c' in the record flags mark the first * fragment of a line, '+' the following. */ - if (msg->flags & LOG_CONT && !(prev_flags & LOG_CONT)) - cont = 'c'; - else if ((msg->flags & LOG_CONT) || - ((prev_flags & LOG_CONT) && !(msg->flags & LOG_PREFIX))) - cont = '+'; + if (msg->flags & LOG_CONT) + cont = (prev_flags & LOG_CONT) ? '+' : 'c'; return scnprintf(buf, size, "%u,%llu,%llu,%c;", (msg->facility << 3) | msg->level, seq, ts_usec, cont); @@ -786,6 +794,8 @@ static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from) return ret; } +static void cont_flush(void); + static ssize_t devkmsg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { @@ -801,6 +811,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, if (ret) return ret; raw_spin_lock_irq(&logbuf_lock); + cont_flush(); while (user->seq == log_next_seq) { if (file->f_flags & O_NONBLOCK) { ret = -EAGAIN; @@ -863,6 +874,7 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence) return -ESPIPE; raw_spin_lock_irq(&logbuf_lock); + cont_flush(); switch (whence) { case SEEK_SET: /* the first record */ @@ -901,6 +913,7 @@ static unsigned int devkmsg_poll(struct file *file, poll_table *wait) poll_wait(file, &log_wait, wait); raw_spin_lock_irq(&logbuf_lock); + cont_flush(); if (user->seq < log_next_seq) { /* return error when data has vanished underneath us */ if (user->seq < log_first_seq) @@ -1287,6 +1300,7 @@ static int syslog_print(char __user *buf, int size) size_t skip; raw_spin_lock_irq(&logbuf_lock); + cont_flush(); if (syslog_seq < log_first_seq) { /* messages are gone, move to first one */ syslog_seq = log_first_seq; @@ -1346,6 +1360,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear) return -ENOMEM; raw_spin_lock_irq(&logbuf_lock); + cont_flush(); if (buf) { u64 next_seq; u64 seq; @@ -1507,6 +1522,7 @@ int do_syslog(int type, char __user *buf, int len, int source) /* Number of chars in the log buffer */ case SYSLOG_ACTION_SIZE_UNREAD: raw_spin_lock_irq(&logbuf_lock); + cont_flush(); if (syslog_seq < log_first_seq) { /* messages are gone, move to first one */ syslog_seq = log_first_seq; @@ -1643,35 +1659,33 @@ static struct cont { bool flushed:1; /* buffer sealed and committed */ } cont; -static void cont_flush(enum log_flags flags) +static void cont_flush(void) { if (cont.flushed) return; if (cont.len == 0) return; - if (cont.cons) { /* * If a fragment of this line was directly flushed to the * console; wait for the console to pick up the rest of the * line. LOG_NOCONS suppresses a duplicated output. */ - log_store(cont.facility, cont.level, flags | LOG_NOCONS, + log_store(cont.facility, cont.level, cont.flags | LOG_NOCONS, cont.ts_nsec, NULL, 0, cont.buf, cont.len); - cont.flags = flags; cont.flushed = true; } else { /* * If no fragment of this line ever reached the console, * just submit it to the store and free the buffer. */ - log_store(cont.facility, cont.level, flags, 0, + log_store(cont.facility, cont.level, cont.flags, 0, NULL, 0, cont.buf, cont.len); cont.len = 0; } } -static bool cont_add(int facility, int level, const char *text, size_t len) +static bool cont_add(int facility, int level, enum log_flags flags, const char *text, size_t len) { if (cont.len && cont.flushed) return false; @@ -1682,7 +1696,7 @@ static bool cont_add(int facility, int level, const char *text, size_t len) * the line gets too long, split it up in separate records. */ if (nr_ext_console_drivers || cont.len + len > sizeof(cont.buf)) { - cont_flush(LOG_CONT); + cont_flush(); return false; } @@ -1691,7 +1705,7 @@ static bool cont_add(int facility, int level, const char *text, size_t len) cont.level = level; cont.owner = current; cont.ts_nsec = local_clock(); - cont.flags = 0; + cont.flags = flags; cont.cons = 0; cont.flushed = false; } @@ -1699,8 +1713,15 @@ static bool cont_add(int facility, int level, const char *text, size_t len) memcpy(cont.buf + cont.len, text, len); cont.len += len; + // The original flags come from the first line, + // but later continuations can add a newline. + if (flags & LOG_NEWLINE) { + cont.flags |= LOG_NEWLINE; + cont_flush(); + } + if (cont.len > (sizeof(cont.buf) * 80) / 100) - cont_flush(LOG_CONT); + cont_flush(); return true; } @@ -1733,6 +1754,31 @@ static size_t cont_print_text(char *text, size_t size) return textlen; } +static size_t log_output(int facility, int level, enum log_flags lflags, const char *dict, size_t dictlen, char *text, size_t text_len) +{ + /* + * If an earlier line was buffered, and we're a continuation + * write from the same process, try to add it to the buffer. + */ + if (cont.len) { + if (cont.owner == current && (lflags & LOG_CONT)) { + if (cont_add(facility, level, lflags, text, text_len)) + return text_len; + } + /* Otherwise, make sure it's flushed */ + cont_flush(); + } + + /* If it doesn't end in a newline, try to buffer the current line */ + if (!(lflags & LOG_NEWLINE)) { + if (cont_add(facility, level, lflags, text, text_len)) + return text_len; + } + + /* Store it in the record log */ + return log_store(facility, level, lflags, 0, dict, dictlen, text, text_len); +} + asmlinkage int vprintk_emit(int facility, int level, const char *dict, size_t dictlen, const char *fmt, va_list args) @@ -1819,10 +1865,9 @@ asmlinkage int vprintk_emit(int facility, int level, /* strip kernel syslog prefix and extract log level or control flags */ if (facility == 0) { - int kern_level = printk_get_level(text); + int kern_level; - if (kern_level) { - const char *end_of_header = printk_skip_level(text); + while ((kern_level = printk_get_level(text)) != 0) { switch (kern_level) { case '0' ... '7': if (level == LOGLEVEL_DEFAULT) @@ -1830,14 +1875,13 @@ asmlinkage int vprintk_emit(int facility, int level, /* fallthrough */ case 'd': /* KERN_DEFAULT */ lflags |= LOG_PREFIX; + break; + case 'c': /* KERN_CONT */ + lflags |= LOG_CONT; } - /* - * No need to check length here because vscnprintf - * put '\0' at the end of the string. Only valid and - * newly printed level is detected. - */ - text_len -= end_of_header - text; - text = (char *)end_of_header; + + text_len -= 2; + text += 2; } } @@ -1847,45 +1891,7 @@ asmlinkage int vprintk_emit(int facility, int level, if (dict) lflags |= LOG_PREFIX|LOG_NEWLINE; - if (!(lflags & LOG_NEWLINE)) { - /* - * Flush the conflicting buffer. An earlier newline was missing, - * or another task also prints continuation lines. - */ - if (cont.len && (lflags & LOG_PREFIX || cont.owner != current)) - cont_flush(LOG_NEWLINE); - - /* buffer line if possible, otherwise store it right away */ - if (cont_add(facility, level, text, text_len)) - printed_len += text_len; - else - printed_len += log_store(facility, level, - lflags | LOG_CONT, 0, - dict, dictlen, text, text_len); - } else { - bool stored = false; - - /* - * If an earlier newline was missing and it was the same task, - * either merge it with the current buffer and flush, or if - * there was a race with interrupts (prefix == true) then just - * flush it out and store this line separately. - * If the preceding printk was from a different task and missed - * a newline, flush and append the newline. - */ - if (cont.len) { - if (cont.owner == current && !(lflags & LOG_PREFIX)) - stored = cont_add(facility, level, text, - text_len); - cont_flush(LOG_NEWLINE); - } - - if (stored) - printed_len += text_len; - else - printed_len += log_store(facility, level, lflags, 0, - dict, dictlen, text, text_len); - } + printed_len += log_output(facility, level, lflags, dict, dictlen, text, text_len); logbuf_cpu = UINT_MAX; raw_spin_unlock(&logbuf_lock); @@ -2647,7 +2653,7 @@ void register_console(struct console *newcon) * didn't select a console we take the first one * that registers here. */ - if (preferred_console < 0) { + if (preferred_console < 0 && !of_specified_console) { if (newcon->index < 0) newcon->index = 0; if (newcon->setup == NULL || @@ -3029,6 +3035,7 @@ void kmsg_dump(enum kmsg_dump_reason reason) dumper->active = true; raw_spin_lock_irqsave(&logbuf_lock, flags); + cont_flush(); dumper->cur_seq = clear_seq; dumper->cur_idx = clear_idx; dumper->next_seq = log_next_seq; @@ -3119,6 +3126,7 @@ bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, bool ret; raw_spin_lock_irqsave(&logbuf_lock, flags); + cont_flush(); ret = kmsg_dump_get_line_nolock(dumper, syslog, line, size, len); raw_spin_unlock_irqrestore(&logbuf_lock, flags); @@ -3161,6 +3169,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, goto out; raw_spin_lock_irqsave(&logbuf_lock, flags); + cont_flush(); if (dumper->cur_seq < log_first_seq) { /* messages are gone, move to first available one */ dumper->cur_seq = log_first_seq; diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 1d3b7665d0be..2a99027312a6 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -73,6 +73,8 @@ void __ptrace_unlink(struct task_struct *child) { BUG_ON(!child->ptrace); + clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); + child->parent = child->real_parent; list_del_init(&child->ptrace_entry); @@ -489,7 +491,6 @@ static int ptrace_detach(struct task_struct *child, unsigned int data) /* Architecture-specific hardware disable .. */ ptrace_disable(child); - clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); write_lock_irq(&tasklist_lock); /* diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index 944b1b491ed8..1898559e6b60 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c @@ -170,7 +170,7 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) false)); } -static void rcu_process_callbacks(struct softirq_action *unused) +static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused) { __rcu_process_callbacks(&rcu_sched_ctrlblk); __rcu_process_callbacks(&rcu_bh_ctrlblk); diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 7e2e03879c2e..69a5611a7e7c 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3013,7 +3013,7 @@ __rcu_process_callbacks(struct rcu_state *rsp) /* * Do RCU core processing for the current CPU. */ -static void rcu_process_callbacks(struct softirq_action *unused) +static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused) { struct rcu_state *rsp; diff --git a/kernel/relay.c b/kernel/relay.c index fc9b4a4af463..da79a109dbeb 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -328,13 +328,15 @@ static struct rchan_callbacks default_channel_callbacks = { /** * wakeup_readers - wake up readers waiting on a channel - * @data: contains the channel buffer + * @work: contains the channel buffer * - * This is the timer function used to defer reader waking. + * This is the function used to defer reader waking */ -static void wakeup_readers(unsigned long data) +static void wakeup_readers(struct irq_work *work) { - struct rchan_buf *buf = (struct rchan_buf *)data; + struct rchan_buf *buf; + + buf = container_of(work, struct rchan_buf, wakeup_work); wake_up_interruptible(&buf->read_wait); } @@ -352,9 +354,10 @@ static void __relay_reset(struct rchan_buf *buf, unsigned int init) if (init) { init_waitqueue_head(&buf->read_wait); kref_init(&buf->kref); - setup_timer(&buf->timer, wakeup_readers, (unsigned long)buf); - } else - del_timer_sync(&buf->timer); + init_irq_work(&buf->wakeup_work, wakeup_readers); + } else { + irq_work_sync(&buf->wakeup_work); + } buf->subbufs_produced = 0; buf->subbufs_consumed = 0; @@ -487,7 +490,7 @@ free_buf: static void relay_close_buf(struct rchan_buf *buf) { buf->finalized = 1; - del_timer_sync(&buf->timer); + irq_work_sync(&buf->wakeup_work); buf->chan->cb->remove_buf_file(buf->dentry); kref_put(&buf->kref, relay_remove_buf); } @@ -754,14 +757,15 @@ size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length) buf->early_bytes += buf->chan->subbuf_size - buf->padding[old_subbuf]; smp_mb(); - if (waitqueue_active(&buf->read_wait)) + if (waitqueue_active(&buf->read_wait)) { /* * Calling wake_up_interruptible() from here * will deadlock if we happen to be logging * from the scheduler (trying to re-grab * rq->lock), so defer it. */ - mod_timer(&buf->timer, jiffies + 1); + irq_work_queue(&buf->wakeup_work); + } } old = buf->data; @@ -1108,51 +1112,23 @@ static size_t relay_file_read_end_pos(struct rchan_buf *buf, return end_pos; } -/* - * subbuf_read_actor - read up to one subbuf's worth of data - */ -static int subbuf_read_actor(size_t read_start, - struct rchan_buf *buf, - size_t avail, - read_descriptor_t *desc) -{ - void *from; - int ret = 0; - - from = buf->start + read_start; - ret = avail; - if (copy_to_user(desc->arg.buf, from, avail)) { - desc->error = -EFAULT; - ret = 0; - } - desc->arg.data += ret; - desc->written += ret; - desc->count -= ret; - - return ret; -} - -typedef int (*subbuf_actor_t) (size_t read_start, - struct rchan_buf *buf, - size_t avail, - read_descriptor_t *desc); - -/* - * relay_file_read_subbufs - read count bytes, bridging subbuf boundaries - */ -static ssize_t relay_file_read_subbufs(struct file *filp, loff_t *ppos, - subbuf_actor_t subbuf_actor, - read_descriptor_t *desc) +static ssize_t relay_file_read(struct file *filp, + char __user *buffer, + size_t count, + loff_t *ppos) { struct rchan_buf *buf = filp->private_data; size_t read_start, avail; + size_t written = 0; int ret; - if (!desc->count) + if (!count) return 0; inode_lock(file_inode(filp)); do { + void *from; + if (!relay_file_read_avail(buf, *ppos)) break; @@ -1161,32 +1137,22 @@ static ssize_t relay_file_read_subbufs(struct file *filp, loff_t *ppos, if (!avail) break; - avail = min(desc->count, avail); - ret = subbuf_actor(read_start, buf, avail, desc); - if (desc->error < 0) + avail = min(count, avail); + from = buf->start + read_start; + ret = avail; + if (copy_to_user(buffer, from, avail)) break; - if (ret) { - relay_file_read_consume(buf, read_start, ret); - *ppos = relay_file_read_end_pos(buf, read_start, ret); - } - } while (desc->count && ret); - inode_unlock(file_inode(filp)); + buffer += ret; + written += ret; + count -= ret; - return desc->written; -} + relay_file_read_consume(buf, read_start, ret); + *ppos = relay_file_read_end_pos(buf, read_start, ret); + } while (count); + inode_unlock(file_inode(filp)); -static ssize_t relay_file_read(struct file *filp, - char __user *buffer, - size_t count, - loff_t *ppos) -{ - read_descriptor_t desc; - desc.written = 0; - desc.count = count; - desc.arg.buf = buffer; - desc.error = 0; - return relay_file_read_subbufs(filp, ppos, subbuf_read_actor, &desc); + return written; } static void relay_consume_bytes(struct rchan_buf *rbuf, int bytes_consumed) diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 13935886a471..fa178b62ea79 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -415,7 +415,8 @@ static char *task_group_path(struct task_group *tg) if (autogroup_path(tg, group_path, PATH_MAX)) return group_path; - return cgroup_path(tg->css.cgroup, group_path, PATH_MAX); + cgroup_path(tg->css.cgroup, group_path, PATH_MAX); + return group_path; } #endif diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 502e95a6e927..76ee7de1859d 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5471,13 +5471,18 @@ static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd */ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int target) { - struct sched_domain *this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc)); - u64 avg_idle = this_rq()->avg_idle; - u64 avg_cost = this_sd->avg_scan_cost; + struct sched_domain *this_sd; + u64 avg_cost, avg_idle = this_rq()->avg_idle; u64 time, cost; s64 delta; int cpu, wrap; + this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc)); + if (!this_sd) + return -1; + + avg_cost = this_sd->avg_scan_cost; + /* * Due to large variance we need a large fuzz factor; hackbench in * particularly is sensitive here. @@ -8522,7 +8527,7 @@ static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle) { } * run_rebalance_domains is triggered when needed from the scheduler tick. * Also triggered for nohz idle balancing (with nohz_balancing_kick set). */ -static void run_rebalance_domains(struct softirq_action *h) +static __latent_entropy void run_rebalance_domains(struct softirq_action *h) { struct rq *this_rq = this_rq(); enum cpu_idle_type idle = this_rq->idle_balance ? diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 9fb873cfc75c..1d8718d5300d 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -16,6 +16,9 @@ #include "sched.h" +/* Linker adds these: start and end of __cpuidle functions */ +extern char __cpuidle_text_start[], __cpuidle_text_end[]; + /** * sched_idle_set_state - Record idle state for the current CPU. * @idle_state: State to record. @@ -53,7 +56,7 @@ static int __init cpu_idle_nopoll_setup(char *__unused) __setup("hlt", cpu_idle_nopoll_setup); #endif -static inline int cpu_idle_poll(void) +static noinline int __cpuidle cpu_idle_poll(void) { rcu_idle_enter(); trace_cpu_idle_rcuidle(0, smp_processor_id()); @@ -84,7 +87,7 @@ void __weak arch_cpu_idle(void) * * To use when the cpuidle framework cannot be used. */ -void default_idle_call(void) +void __cpuidle default_idle_call(void) { if (current_clr_polling_and_test()) { local_irq_enable(); @@ -271,6 +274,12 @@ static void cpu_idle_loop(void) } } +bool cpu_in_idle(unsigned long pc) +{ + return pc >= (unsigned long)__cpuidle_text_start && + pc < (unsigned long)__cpuidle_text_end; +} + void cpu_startup_entry(enum cpuhp_state state) { /* diff --git a/kernel/smpboot.c b/kernel/smpboot.c index fc0d8270f69e..4a5c6e73ecd4 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c @@ -122,12 +122,12 @@ static int smpboot_thread_fn(void *data) if (kthread_should_park()) { __set_current_state(TASK_RUNNING); + preempt_enable(); if (ht->park && td->status == HP_THREAD_ACTIVE) { BUG_ON(td->cpu != smp_processor_id()); ht->park(td->cpu); td->status = HP_THREAD_PARKED; } - preempt_enable(); kthread_parkme(); /* We might have been woken for stop */ continue; @@ -186,6 +186,11 @@ __smpboot_create_thread(struct smp_hotplug_thread *ht, unsigned int cpu) kfree(td); return PTR_ERR(tsk); } + /* + * Park the thread so that it could start right on the CPU + * when it is available. + */ + kthread_park(tsk); get_task_struct(tsk); *per_cpu_ptr(ht->store, cpu) = tsk; if (ht->create) { diff --git a/kernel/softirq.c b/kernel/softirq.c index 66762645f9e8..1bf81ef91375 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -496,7 +496,7 @@ void __tasklet_hi_schedule_first(struct tasklet_struct *t) } EXPORT_SYMBOL(__tasklet_hi_schedule_first); -static void tasklet_action(struct softirq_action *a) +static __latent_entropy void tasklet_action(struct softirq_action *a) { struct tasklet_struct *list; @@ -532,7 +532,7 @@ static void tasklet_action(struct softirq_action *a) } } -static void tasklet_hi_action(struct softirq_action *a) +static __latent_entropy void tasklet_hi_action(struct softirq_action *a) { struct tasklet_struct *list; diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 2c5e3a8e00d7..635482e60ca3 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -250,3 +250,8 @@ cond_syscall(sys_execveat); /* membarrier */ cond_syscall(sys_membarrier); + +/* memory protection keys */ +cond_syscall(sys_pkey_mprotect); +cond_syscall(sys_pkey_alloc); +cond_syscall(sys_pkey_free); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index a43775c6646c..706309f9ed84 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -107,9 +107,8 @@ extern unsigned int core_pipe_limit; extern int pid_max; extern int pid_max_min, pid_max_max; extern int percpu_pagelist_fraction; -extern int compat_log; extern int latencytop_enabled; -extern int sysctl_nr_open_min, sysctl_nr_open_max; +extern unsigned int sysctl_nr_open_min, sysctl_nr_open_max; #ifndef CONFIG_MMU extern int sysctl_nr_trim_pages; #endif @@ -1085,15 +1084,6 @@ static struct ctl_table kern_table[] = { .extra1 = &neg_one, }, #endif -#ifdef CONFIG_COMPAT - { - .procname = "compat-log", - .data = &compat_log, - .maxlen = sizeof (int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, -#endif #ifdef CONFIG_RT_MUTEXES { .procname = "max_lock_depth", @@ -1693,7 +1683,7 @@ static struct ctl_table fs_table[] = { { .procname = "nr_open", .data = &sysctl_nr_open, - .maxlen = sizeof(int), + .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &sysctl_nr_open_min, diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index c3aad685bbc0..12dd190634ab 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -542,7 +542,6 @@ static int alarm_clock_get(clockid_t which_clock, struct timespec *tp) static int alarm_timer_create(struct k_itimer *new_timer) { enum alarmtimer_type type; - struct alarm_base *base; if (!alarmtimer_get_rtcdev()) return -ENOTSUPP; @@ -551,7 +550,6 @@ static int alarm_timer_create(struct k_itimer *new_timer) return -EPERM; type = clock2alarm(new_timer->it_clock); - base = &alarm_bases[type]; alarm_init(&new_timer->it.alarm.alarmtimer, type, alarm_handle_timer); return 0; } diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index e07fb093f819..37dec7e3db43 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -403,8 +403,11 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf) tkr = tkf->base + (seq & 0x01); now = ktime_to_ns(tkr->base); - now += clocksource_delta(tkr->read(tkr->clock), - tkr->cycle_last, tkr->mask); + now += timekeeping_delta_to_ns(tkr, + clocksource_delta( + tkr->read(tkr->clock), + tkr->cycle_last, + tkr->mask)); } while (read_seqcount_retry(&tkf->seq, seq)); return now; diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 32bf6f75a8fe..2d47980a1bc4 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1633,7 +1633,7 @@ static inline void __run_timers(struct timer_base *base) /* * This function runs timers and the timer-tq in bottom half context. */ -static void run_timer_softirq(struct softirq_action *h) +static __latent_entropy void run_timer_softirq(struct softirq_action *h) { struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 992ab9d99f35..e57980845549 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -1,8 +1,4 @@ -# We are fully aware of the dangers of __builtin_return_address() -FRAME_CFLAGS := $(call cc-disable-warning,frame-address) -KBUILD_CFLAGS += $(FRAME_CFLAGS) - # Do not instrument the tracer itself: ifdef CONFIG_FUNCTION_TRACER diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index b2b6efc083a4..5e10395da88e 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -610,8 +610,7 @@ static int perf_sysenter_enable(struct trace_event_call *call) if (!sys_perf_refcount_enter) ret = register_trace_sys_enter(perf_syscall_enter, NULL); if (ret) { - pr_info("event trace: Could not activate" - "syscall entry trace point"); + pr_info("event trace: Could not activate syscall entry trace point"); } else { set_bit(num, enabled_perf_enter_syscalls); sys_perf_refcount_enter++; @@ -682,8 +681,7 @@ static int perf_sysexit_enable(struct trace_event_call *call) if (!sys_perf_refcount_exit) ret = register_trace_sys_exit(perf_syscall_exit, NULL); if (ret) { - pr_info("event trace: Could not activate" - "syscall exit trace point"); + pr_info("event trace: Could not activate syscall exit trace point"); } else { set_bit(num, enabled_perf_exit_syscalls); sys_perf_refcount_exit++; diff --git a/kernel/uid16.c b/kernel/uid16.c index d58cc4d8f0d1..cc40793464e3 100644 --- a/kernel/uid16.c +++ b/kernel/uid16.c @@ -117,7 +117,7 @@ static int groups16_to_user(old_gid_t __user *grouplist, kgid_t kgid; for (i = 0; i < group_info->ngroups; i++) { - kgid = GROUP_AT(group_info, i); + kgid = group_info->gid[i]; group = high2lowgid(from_kgid_munged(user_ns, kgid)); if (put_user(group, grouplist+i)) return -EFAULT; @@ -142,7 +142,7 @@ static int groups16_from_user(struct group_info *group_info, if (!gid_valid(kgid)) return -EINVAL; - GROUP_AT(group_info, i) = kgid; + group_info->gid[i] = kgid; } return 0; diff --git a/kernel/workqueue.c b/kernel/workqueue.c index ef071ca73fc3..479d840db286 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2974,6 +2974,31 @@ bool flush_delayed_work(struct delayed_work *dwork) } EXPORT_SYMBOL(flush_delayed_work); +static bool __cancel_work(struct work_struct *work, bool is_dwork) +{ + unsigned long flags; + int ret; + + do { + ret = try_to_grab_pending(work, is_dwork, &flags); + } while (unlikely(ret == -EAGAIN)); + + if (unlikely(ret < 0)) + return false; + + set_work_pool_and_clear_pending(work, get_work_pool_id(work)); + local_irq_restore(flags); + return ret; +} + +/* + * See cancel_delayed_work() + */ +bool cancel_work(struct work_struct *work) +{ + return __cancel_work(work, false); +} + /** * cancel_delayed_work - cancel a delayed work * @dwork: delayed_work to cancel @@ -2992,20 +3017,7 @@ EXPORT_SYMBOL(flush_delayed_work); */ bool cancel_delayed_work(struct delayed_work *dwork) { - unsigned long flags; - int ret; - - do { - ret = try_to_grab_pending(&dwork->work, true, &flags); - } while (unlikely(ret == -EAGAIN)); - - if (unlikely(ret < 0)) - return false; - - set_work_pool_and_clear_pending(&dwork->work, - get_work_pool_id(&dwork->work)); - local_irq_restore(flags); - return ret; + return __cancel_work(&dwork->work, true); } EXPORT_SYMBOL(cancel_delayed_work); @@ -4249,7 +4261,7 @@ void print_worker_info(const char *log_lvl, struct task_struct *task) * This function is called without any synchronization and @task * could be in any state. Be careful with dereferences. */ - worker = probe_kthread_data(task); + worker = kthread_probe_data(task); /* * Carefully copy the associated workqueue's workfn and name. Keep |