diff options
author | Ingo Molnar <mingo@kernel.org> | 2018-10-16 17:30:11 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2018-10-16 17:30:11 +0200 |
commit | ec57e2f0acb01710cd465bc04495ed03a9e0fea1 (patch) | |
tree | 9f45889ba31b750f99fd3d0b625684d9b4cd4f17 /kernel | |
parent | 4766ab5677a2842834f9bc4a21587256a811531c (diff) | |
parent | 72a9c673636b779e370983fea08e40f97039b981 (diff) | |
download | linux-ec57e2f0acb01710cd465bc04495ed03a9e0fea1.tar.bz2 |
Merge branch 'x86/build' into locking/core, to pick up dependent patches and unify jump-label work
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/bpf/btf.c | 2 | ||||
-rw-r--r-- | kernel/bpf/sockmap.c | 91 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 2 | ||||
-rw-r--r-- | kernel/dma/Kconfig | 3 | ||||
-rw-r--r-- | kernel/events/core.c | 21 | ||||
-rw-r--r-- | kernel/events/hw_breakpoint.c | 13 | ||||
-rw-r--r-- | kernel/jump_label.c | 100 | ||||
-rw-r--r-- | kernel/locking/lockdep.c | 1 | ||||
-rw-r--r-- | kernel/locking/test-ww_mutex.c | 2 | ||||
-rw-r--r-- | kernel/module.c | 9 | ||||
-rw-r--r-- | kernel/pid.c | 2 | ||||
-rw-r--r-- | kernel/printk/printk.c | 12 | ||||
-rw-r--r-- | kernel/sched/debug.c | 6 | ||||
-rw-r--r-- | kernel/sched/fair.c | 26 | ||||
-rw-r--r-- | kernel/sched/topology.c | 5 | ||||
-rw-r--r-- | kernel/sys.c | 3 | ||||
-rw-r--r-- | kernel/trace/ring_buffer.c | 2 |
17 files changed, 180 insertions, 120 deletions
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 2590700237c1..138f0302692e 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -1844,7 +1844,7 @@ static int btf_check_all_metas(struct btf_verifier_env *env) hdr = &btf->hdr; cur = btf->nohdr_data + hdr->type_off; - end = btf->nohdr_data + hdr->type_len; + end = cur + hdr->type_len; env->log_type_id = 1; while (cur < end) { diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 488ef9663c01..0a0f2ec75370 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -132,6 +132,7 @@ struct smap_psock { struct work_struct gc_work; struct proto *sk_proto; + void (*save_unhash)(struct sock *sk); void (*save_close)(struct sock *sk, long timeout); void (*save_data_ready)(struct sock *sk); void (*save_write_space)(struct sock *sk); @@ -143,6 +144,7 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); static int bpf_tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); +static void bpf_tcp_unhash(struct sock *sk); static void bpf_tcp_close(struct sock *sk, long timeout); static inline struct smap_psock *smap_psock_sk(const struct sock *sk) @@ -184,6 +186,7 @@ static void build_protos(struct proto prot[SOCKMAP_NUM_CONFIGS], struct proto *base) { prot[SOCKMAP_BASE] = *base; + prot[SOCKMAP_BASE].unhash = bpf_tcp_unhash; prot[SOCKMAP_BASE].close = bpf_tcp_close; prot[SOCKMAP_BASE].recvmsg = bpf_tcp_recvmsg; prot[SOCKMAP_BASE].stream_memory_read = bpf_tcp_stream_read; @@ -217,6 +220,7 @@ static int bpf_tcp_init(struct sock *sk) return -EBUSY; } + psock->save_unhash = sk->sk_prot->unhash; psock->save_close = sk->sk_prot->close; psock->sk_proto = sk->sk_prot; @@ -305,30 +309,12 @@ static struct smap_psock_map_entry *psock_map_pop(struct sock *sk, return e; } -static void bpf_tcp_close(struct sock *sk, long timeout) +static void bpf_tcp_remove(struct sock *sk, struct smap_psock *psock) { - void (*close_fun)(struct sock *sk, long timeout); struct smap_psock_map_entry *e; struct sk_msg_buff *md, *mtmp; - struct smap_psock *psock; struct sock *osk; - lock_sock(sk); - rcu_read_lock(); - psock = smap_psock_sk(sk); - if (unlikely(!psock)) { - rcu_read_unlock(); - release_sock(sk); - return sk->sk_prot->close(sk, timeout); - } - - /* The psock may be destroyed anytime after exiting the RCU critial - * section so by the time we use close_fun the psock may no longer - * be valid. However, bpf_tcp_close is called with the sock lock - * held so the close hook and sk are still valid. - */ - close_fun = psock->save_close; - if (psock->cork) { free_start_sg(psock->sock, psock->cork, true); kfree(psock->cork); @@ -379,6 +365,42 @@ static void bpf_tcp_close(struct sock *sk, long timeout) kfree(e); e = psock_map_pop(sk, psock); } +} + +static void bpf_tcp_unhash(struct sock *sk) +{ + void (*unhash_fun)(struct sock *sk); + struct smap_psock *psock; + + rcu_read_lock(); + psock = smap_psock_sk(sk); + if (unlikely(!psock)) { + rcu_read_unlock(); + if (sk->sk_prot->unhash) + sk->sk_prot->unhash(sk); + return; + } + unhash_fun = psock->save_unhash; + bpf_tcp_remove(sk, psock); + rcu_read_unlock(); + unhash_fun(sk); +} + +static void bpf_tcp_close(struct sock *sk, long timeout) +{ + void (*close_fun)(struct sock *sk, long timeout); + struct smap_psock *psock; + + lock_sock(sk); + rcu_read_lock(); + psock = smap_psock_sk(sk); + if (unlikely(!psock)) { + rcu_read_unlock(); + release_sock(sk); + return sk->sk_prot->close(sk, timeout); + } + close_fun = psock->save_close; + bpf_tcp_remove(sk, psock); rcu_read_unlock(); release_sock(sk); close_fun(sk, timeout); @@ -2097,8 +2119,12 @@ static int sock_map_update_elem(struct bpf_map *map, return -EINVAL; } + /* ULPs are currently supported only for TCP sockets in ESTABLISHED + * state. + */ if (skops.sk->sk_type != SOCK_STREAM || - skops.sk->sk_protocol != IPPROTO_TCP) { + skops.sk->sk_protocol != IPPROTO_TCP || + skops.sk->sk_state != TCP_ESTABLISHED) { fput(socket->file); return -EOPNOTSUPP; } @@ -2453,6 +2479,16 @@ static int sock_hash_update_elem(struct bpf_map *map, return -EINVAL; } + /* ULPs are currently supported only for TCP sockets in ESTABLISHED + * state. + */ + if (skops.sk->sk_type != SOCK_STREAM || + skops.sk->sk_protocol != IPPROTO_TCP || + skops.sk->sk_state != TCP_ESTABLISHED) { + fput(socket->file); + return -EOPNOTSUPP; + } + lock_sock(skops.sk); preempt_disable(); rcu_read_lock(); @@ -2543,10 +2579,22 @@ const struct bpf_map_ops sock_hash_ops = { .map_check_btf = map_check_no_btf, }; +static bool bpf_is_valid_sock_op(struct bpf_sock_ops_kern *ops) +{ + return ops->op == BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB || + ops->op == BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB; +} BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock, struct bpf_map *, map, void *, key, u64, flags) { WARN_ON_ONCE(!rcu_read_lock_held()); + + /* ULPs are currently supported only for TCP sockets in ESTABLISHED + * state. This checks that the sock ops triggering the update is + * one indicating we are (or will be soon) in an ESTABLISHED state. + */ + if (!bpf_is_valid_sock_op(bpf_sock)) + return -EOPNOTSUPP; return sock_map_ctx_update_elem(bpf_sock, map, key, flags); } @@ -2565,6 +2613,9 @@ BPF_CALL_4(bpf_sock_hash_update, struct bpf_sock_ops_kern *, bpf_sock, struct bpf_map *, map, void *, key, u64, flags) { WARN_ON_ONCE(!rcu_read_lock_held()); + + if (!bpf_is_valid_sock_op(bpf_sock)) + return -EOPNOTSUPP; return sock_hash_ctx_update_elem(bpf_sock, map, key, flags); } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 92246117d2b0..bb07e74b34a2 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3163,7 +3163,7 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, * an arbitrary scalar. Disallow all math except * pointer subtraction */ - if (opcode == BPF_SUB){ + if (opcode == BPF_SUB && env->allow_ptr_leaks) { mark_reg_unknown(env, regs, insn->dst_reg); return 0; } diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index 9bd54304446f..1b1d63b3634b 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -23,6 +23,9 @@ config ARCH_HAS_SYNC_DMA_FOR_CPU bool select NEED_DMA_MAP_STATE +config ARCH_HAS_SYNC_DMA_FOR_CPU_ALL + bool + config DMA_DIRECT_OPS bool depends on HAS_DMA diff --git a/kernel/events/core.c b/kernel/events/core.c index 2a62b96600ad..dcb093e7b377 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2867,16 +2867,11 @@ static int perf_event_modify_breakpoint(struct perf_event *bp, _perf_event_disable(bp); err = modify_user_hw_breakpoint_check(bp, attr, true); - if (err) { - if (!bp->attr.disabled) - _perf_event_enable(bp); - return err; - } - - if (!attr->disabled) + if (!bp->attr.disabled) _perf_event_enable(bp); - return 0; + + return err; } static int perf_event_modify_attr(struct perf_event *event, @@ -3940,6 +3935,12 @@ int perf_event_read_local(struct perf_event *event, u64 *value, goto out; } + /* If this is a pinned event it must be running on this CPU */ + if (event->attr.pinned && event->oncpu != smp_processor_id()) { + ret = -EBUSY; + goto out; + } + /* * If the event is currently on this CPU, its either a per-task event, * or local to this CPU. Furthermore it means its ACTIVE (otherwise @@ -5948,6 +5949,7 @@ perf_output_sample_ustack(struct perf_output_handle *handle, u64 dump_size, unsigned long sp; unsigned int rem; u64 dyn_size; + mm_segment_t fs; /* * We dump: @@ -5965,7 +5967,10 @@ perf_output_sample_ustack(struct perf_output_handle *handle, u64 dump_size, /* Data. */ sp = perf_user_stack_pointer(regs); + fs = get_fs(); + set_fs(USER_DS); rem = __output_copy_user(handle, (void *) sp, dump_size); + set_fs(fs); dyn_size = dump_size - rem; perf_output_skip(handle, rem); diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c index b3814fce5ecb..d6b56180827c 100644 --- a/kernel/events/hw_breakpoint.c +++ b/kernel/events/hw_breakpoint.c @@ -509,6 +509,8 @@ modify_user_hw_breakpoint_check(struct perf_event *bp, struct perf_event_attr *a */ int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr) { + int err; + /* * modify_user_hw_breakpoint can be invoked with IRQs disabled and hence it * will not be possible to raise IPIs that invoke __perf_event_disable. @@ -520,15 +522,12 @@ int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *att else perf_event_disable(bp); - if (!attr->disabled) { - int err = modify_user_hw_breakpoint_check(bp, attr, false); + err = modify_user_hw_breakpoint_check(bp, attr, false); - if (err) - return err; + if (!bp->attr.disabled) perf_event_enable(bp); - bp->attr.disabled = 0; - } - return 0; + + return err; } EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint); diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 966a9e9c0f04..b28028b08d44 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -38,23 +38,43 @@ static int jump_label_cmp(const void *a, const void *b) const struct jump_entry *jea = a; const struct jump_entry *jeb = b; - if (jea->key < jeb->key) + if (jump_entry_key(jea) < jump_entry_key(jeb)) return -1; - if (jea->key > jeb->key) + if (jump_entry_key(jea) > jump_entry_key(jeb)) return 1; return 0; } +static void jump_label_swap(void *a, void *b, int size) +{ + long delta = (unsigned long)a - (unsigned long)b; + struct jump_entry *jea = a; + struct jump_entry *jeb = b; + struct jump_entry tmp = *jea; + + jea->code = jeb->code - delta; + jea->target = jeb->target - delta; + jea->key = jeb->key - delta; + + jeb->code = tmp.code + delta; + jeb->target = tmp.target + delta; + jeb->key = tmp.key + delta; +} + static void jump_label_sort_entries(struct jump_entry *start, struct jump_entry *stop) { unsigned long size; + void *swapfn = NULL; + + if (IS_ENABLED(CONFIG_HAVE_ARCH_JUMP_LABEL_RELATIVE)) + swapfn = jump_label_swap; size = (((unsigned long)stop - (unsigned long)start) / sizeof(struct jump_entry)); - sort(start, size, sizeof(struct jump_entry), jump_label_cmp, NULL); + sort(start, size, sizeof(struct jump_entry), jump_label_cmp, swapfn); } static void jump_label_update(struct static_key *key); @@ -266,8 +286,8 @@ EXPORT_SYMBOL_GPL(jump_label_rate_limit); static int addr_conflict(struct jump_entry *entry, void *start, void *end) { - if (entry->code <= (unsigned long)end && - entry->code + JUMP_LABEL_NOP_SIZE > (unsigned long)start) + if (jump_entry_code(entry) <= (unsigned long)end && + jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE > (unsigned long)start) return 1; return 0; @@ -326,16 +346,6 @@ static inline void static_key_set_linked(struct static_key *key) key->type |= JUMP_TYPE_LINKED; } -static inline struct static_key *jump_entry_key(struct jump_entry *entry) -{ - return (struct static_key *)((unsigned long)entry->key & ~1UL); -} - -static bool jump_entry_branch(struct jump_entry *entry) -{ - return (unsigned long)entry->key & 1UL; -} - /*** * A 'struct static_key' uses a union such that it either points directly * to a table of 'struct jump_entry' or to a linked list of modules which in @@ -360,7 +370,7 @@ static enum jump_label_type jump_label_type(struct jump_entry *entry) { struct static_key *key = jump_entry_key(entry); bool enabled = static_key_enabled(key); - bool branch = jump_entry_branch(entry); + bool branch = jump_entry_is_branch(entry); /* See the comment in linux/jump_label.h */ return enabled ^ branch; @@ -368,19 +378,20 @@ static enum jump_label_type jump_label_type(struct jump_entry *entry) static void __jump_label_update(struct static_key *key, struct jump_entry *entry, - struct jump_entry *stop) + struct jump_entry *stop, + bool init) { for (; (entry < stop) && (jump_entry_key(entry) == key); entry++) { /* * An entry->code of 0 indicates an entry which has been * disabled because it was in an init text area. */ - if (entry->code) { - if (kernel_text_address(entry->code)) + if (init || !jump_entry_is_init(entry)) { + if (kernel_text_address(jump_entry_code(entry))) arch_jump_label_transform(entry, jump_label_type(entry)); else WARN_ONCE(1, "can't patch jump_label at %pS", - (void *)(unsigned long)entry->code); + (void *)jump_entry_code(entry)); } } } @@ -415,6 +426,9 @@ void __init jump_label_init(void) if (jump_label_type(iter) == JUMP_LABEL_NOP) arch_jump_label_transform_static(iter, JUMP_LABEL_NOP); + if (init_section_contains((void *)jump_entry_code(iter), 1)) + jump_entry_set_init(iter); + iterk = jump_entry_key(iter); if (iterk == key) continue; @@ -427,26 +441,13 @@ void __init jump_label_init(void) cpus_read_unlock(); } -/* Disable any jump label entries in __init/__exit code */ -void __init jump_label_invalidate_initmem(void) -{ - struct jump_entry *iter_start = __start___jump_table; - struct jump_entry *iter_stop = __stop___jump_table; - struct jump_entry *iter; - - for (iter = iter_start; iter < iter_stop; iter++) { - if (init_section_contains((void *)(unsigned long)iter->code, 1)) - iter->code = 0; - } -} - #ifdef CONFIG_MODULES static enum jump_label_type jump_label_init_type(struct jump_entry *entry) { struct static_key *key = jump_entry_key(entry); bool type = static_key_type(key); - bool branch = jump_entry_branch(entry); + bool branch = jump_entry_is_branch(entry); /* See the comment in linux/jump_label.h */ return type ^ branch; @@ -519,7 +520,8 @@ static void __jump_label_mod_update(struct static_key *key) stop = __stop___jump_table; else stop = m->jump_entries + m->num_jump_entries; - __jump_label_update(key, mod->entries, stop); + __jump_label_update(key, mod->entries, stop, + m && m->state == MODULE_STATE_COMING); } } @@ -565,12 +567,15 @@ static int jump_label_add_module(struct module *mod) for (iter = iter_start; iter < iter_stop; iter++) { struct static_key *iterk; + if (within_module_init(jump_entry_code(iter), mod)) + jump_entry_set_init(iter); + iterk = jump_entry_key(iter); if (iterk == key) continue; key = iterk; - if (within_module(iter->key, mod)) { + if (within_module((unsigned long)key, mod)) { static_key_set_entries(key, iter); continue; } @@ -600,7 +605,7 @@ static int jump_label_add_module(struct module *mod) /* Only update if we've changed from our initial state */ if (jump_label_type(iter) != jump_label_init_type(iter)) - __jump_label_update(key, iter, iter_stop); + __jump_label_update(key, iter, iter_stop, true); } return 0; @@ -620,7 +625,7 @@ static void jump_label_del_module(struct module *mod) key = jump_entry_key(iter); - if (within_module(iter->key, mod)) + if (within_module((unsigned long)key, mod)) continue; /* No memory during module load */ @@ -656,19 +661,6 @@ static void jump_label_del_module(struct module *mod) } } -/* Disable any jump label entries in module init code */ -static void jump_label_invalidate_module_init(struct module *mod) -{ - struct jump_entry *iter_start = mod->jump_entries; - struct jump_entry *iter_stop = iter_start + mod->num_jump_entries; - struct jump_entry *iter; - - for (iter = iter_start; iter < iter_stop; iter++) { - if (within_module_init(iter->code, mod)) - iter->code = 0; - } -} - static int jump_label_module_notify(struct notifier_block *self, unsigned long val, void *data) @@ -690,9 +682,6 @@ jump_label_module_notify(struct notifier_block *self, unsigned long val, case MODULE_STATE_GOING: jump_label_del_module(mod); break; - case MODULE_STATE_LIVE: - jump_label_invalidate_module_init(mod); - break; } jump_label_unlock(); @@ -762,7 +751,8 @@ static void jump_label_update(struct static_key *key) entry = static_key_entries(key); /* if there are no users, entry can be NULL */ if (entry) - __jump_label_update(key, entry, stop); + __jump_label_update(key, entry, stop, + system_state < SYSTEM_RUNNING); } #ifdef CONFIG_STATIC_KEYS_SELFTEST diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 8a732c856624..be76f476c63f 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -55,7 +55,6 @@ #include "lockdep_internals.h" -#include <trace/events/preemptirq.h> #define CREATE_TRACE_POINTS #include <trace/events/lock.h> diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c index 5b915b370d5a..0be047dbd897 100644 --- a/kernel/locking/test-ww_mutex.c +++ b/kernel/locking/test-ww_mutex.c @@ -324,7 +324,7 @@ static int __test_cycle(unsigned int nthreads) if (!cycle->result) continue; - pr_err("cylic deadlock not resolved, ret[%d/%d] = %d\n", + pr_err("cyclic deadlock not resolved, ret[%d/%d] = %d\n", n, nthreads, cycle->result); ret = -EINVAL; break; diff --git a/kernel/module.c b/kernel/module.c index 6746c85511fe..49a405891587 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3317,6 +3317,15 @@ static struct module *layout_and_allocate(struct load_info *info, int flags) ndx = find_sec(info, ".data..ro_after_init"); if (ndx) info->sechdrs[ndx].sh_flags |= SHF_RO_AFTER_INIT; + /* + * Mark the __jump_table section as ro_after_init as well: these data + * structures are never modified, with the exception of entries that + * refer to code in the __init section, which are annotated as such + * at module load time. + */ + ndx = find_sec(info, "__jump_table"); + if (ndx) + info->sechdrs[ndx].sh_flags |= SHF_RO_AFTER_INIT; /* Determine total sizes, and put offsets in sh_entsize. For now this is done generically; there doesn't appear to be any diff --git a/kernel/pid.c b/kernel/pid.c index de1cfc4f75a2..cdf63e53a014 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -195,7 +195,7 @@ struct pid *alloc_pid(struct pid_namespace *ns) idr_preload_end(); if (nr < 0) { - retval = nr; + retval = (nr == -ENOSPC) ? -EAGAIN : nr; goto out_free; } diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index fd6f8ed28e01..9bf5404397e0 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -351,7 +351,6 @@ static int console_msg_format = MSG_FORMAT_DEFAULT; */ enum log_flags { - LOG_NOCONS = 1, /* suppress print, do not print to console */ LOG_NEWLINE = 2, /* text ended with a newline */ LOG_PREFIX = 4, /* text started with a prefix */ LOG_CONT = 8, /* text is a fragment of a continuation line */ @@ -1881,9 +1880,6 @@ int vprintk_store(int facility, int level, if (dict) lflags |= LOG_PREFIX|LOG_NEWLINE; - if (suppress_message_printing(level)) - lflags |= LOG_NOCONS; - return log_output(facility, level, lflags, dict, dictlen, text, text_len); } @@ -2032,6 +2028,7 @@ static void call_console_drivers(const char *ext_text, size_t ext_len, const char *text, size_t len) {} static size_t msg_print_text(const struct printk_log *msg, bool syslog, char *buf, size_t size) { return 0; } +static bool suppress_message_printing(int level) { return false; } #endif /* CONFIG_PRINTK */ @@ -2368,10 +2365,11 @@ skip: break; msg = log_from_idx(console_idx); - if (msg->flags & LOG_NOCONS) { + if (suppress_message_printing(msg->level)) { /* - * Skip record if !ignore_loglevel, and - * record has level above the console loglevel. + * Skip record we have buffered and already printed + * directly to the console when we received it, and + * record that has level above the console loglevel. */ console_idx = log_next(console_idx); console_seq++; diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 60caf1fb94e0..6383aa6a60ca 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -89,12 +89,12 @@ struct static_key sched_feat_keys[__SCHED_FEAT_NR] = { static void sched_feat_disable(int i) { - static_key_disable(&sched_feat_keys[i]); + static_key_disable_cpuslocked(&sched_feat_keys[i]); } static void sched_feat_enable(int i) { - static_key_enable(&sched_feat_keys[i]); + static_key_enable_cpuslocked(&sched_feat_keys[i]); } #else static void sched_feat_disable(int i) { }; @@ -146,9 +146,11 @@ sched_feat_write(struct file *filp, const char __user *ubuf, /* Ensure the static_key remains in a consistent state */ inode = file_inode(filp); + cpus_read_lock(); inode_lock(inode); ret = sched_feat_set(cmp); inode_unlock(inode); + cpus_read_unlock(); if (ret < 0) return ret; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index b39fb596f6c1..f808ddf2a868 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3362,6 +3362,7 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq) * attach_entity_load_avg - attach this entity to its cfs_rq load avg * @cfs_rq: cfs_rq to attach to * @se: sched_entity to attach + * @flags: migration hints * * Must call update_cfs_rq_load_avg() before this, since we rely on * cfs_rq->avg.last_update_time being current. @@ -7263,6 +7264,7 @@ static void update_blocked_averages(int cpu) { struct rq *rq = cpu_rq(cpu); struct cfs_rq *cfs_rq, *pos; + const struct sched_class *curr_class; struct rq_flags rf; bool done = true; @@ -7299,8 +7301,10 @@ static void update_blocked_averages(int cpu) if (cfs_rq_has_blocked(cfs_rq)) done = false; } - update_rt_rq_load_avg(rq_clock_task(rq), rq, 0); - update_dl_rq_load_avg(rq_clock_task(rq), rq, 0); + + curr_class = rq->curr->sched_class; + update_rt_rq_load_avg(rq_clock_task(rq), rq, curr_class == &rt_sched_class); + update_dl_rq_load_avg(rq_clock_task(rq), rq, curr_class == &dl_sched_class); update_irq_load_avg(rq, 0); /* Don't need periodic decay once load/util_avg are null */ if (others_have_blocked(rq)) @@ -7365,13 +7369,16 @@ static inline void update_blocked_averages(int cpu) { struct rq *rq = cpu_rq(cpu); struct cfs_rq *cfs_rq = &rq->cfs; + const struct sched_class *curr_class; struct rq_flags rf; rq_lock_irqsave(rq, &rf); update_rq_clock(rq); update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq); - update_rt_rq_load_avg(rq_clock_task(rq), rq, 0); - update_dl_rq_load_avg(rq_clock_task(rq), rq, 0); + + curr_class = rq->curr->sched_class; + update_rt_rq_load_avg(rq_clock_task(rq), rq, curr_class == &rt_sched_class); + update_dl_rq_load_avg(rq_clock_task(rq), rq, curr_class == &dl_sched_class); update_irq_load_avg(rq, 0); #ifdef CONFIG_NO_HZ_COMMON rq->last_blocked_load_update_tick = jiffies; @@ -7482,10 +7489,10 @@ static inline int get_sd_load_idx(struct sched_domain *sd, return load_idx; } -static unsigned long scale_rt_capacity(int cpu) +static unsigned long scale_rt_capacity(struct sched_domain *sd, int cpu) { struct rq *rq = cpu_rq(cpu); - unsigned long max = arch_scale_cpu_capacity(NULL, cpu); + unsigned long max = arch_scale_cpu_capacity(sd, cpu); unsigned long used, free; unsigned long irq; @@ -7507,7 +7514,7 @@ static unsigned long scale_rt_capacity(int cpu) static void update_cpu_capacity(struct sched_domain *sd, int cpu) { - unsigned long capacity = scale_rt_capacity(cpu); + unsigned long capacity = scale_rt_capacity(sd, cpu); struct sched_group *sdg = sd->groups; cpu_rq(cpu)->cpu_capacity_orig = arch_scale_cpu_capacity(sd, cpu); @@ -8269,7 +8276,7 @@ static struct sched_group *find_busiest_group(struct lb_env *env) force_balance: /* Looks like there is an imbalance. Compute it */ calculate_imbalance(env, &sds); - return sds.busiest; + return env->imbalance ? sds.busiest : NULL; out_balanced: env->imbalance = 0; @@ -9638,7 +9645,8 @@ static inline bool vruntime_normalized(struct task_struct *p) * - A task which has been woken up by try_to_wake_up() and * waiting for actually being woken up by sched_ttwu_pending(). */ - if (!se->sum_exec_runtime || p->state == TASK_WAKING) + if (!se->sum_exec_runtime || + (p->state == TASK_WAKING && p->sched_remote_wakeup)) return true; return false; diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index 56a0fed30c0a..505a41c42b96 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -1295,7 +1295,7 @@ static void init_numa_topology_type(void) n = sched_max_numa_distance; - if (sched_domains_numa_levels <= 1) { + if (sched_domains_numa_levels <= 2) { sched_numa_topology_type = NUMA_DIRECT; return; } @@ -1380,9 +1380,6 @@ void sched_init_numa(void) break; } - if (!level) - return; - /* * 'level' contains the number of unique distances * diff --git a/kernel/sys.c b/kernel/sys.c index cf5c67533ff1..123bd73046ec 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -71,9 +71,6 @@ #include <asm/io.h> #include <asm/unistd.h> -/* Hardening for Spectre-v1 */ -#include <linux/nospec.h> - #include "uid16.h" #ifndef SET_UNALIGN_CTL diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 1d92d4a982fd..65bd4616220d 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1546,6 +1546,8 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages) tmp_iter_page = first_page; do { + cond_resched(); + to_remove_page = tmp_iter_page; rb_inc_page(cpu_buffer, &tmp_iter_page); |