From 115284d89a436e9b66da0c6c4f6efded806874b2 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 18 Aug 2020 15:57:41 +0200 Subject: static_call: Add basic static call infrastructure Static calls are a replacement for global function pointers. They use code patching to allow direct calls to be used instead of indirect calls. They give the flexibility of function pointers, but with improved performance. This is especially important for cases where retpolines would otherwise be used, as retpolines can significantly impact performance. The concept and code are an extension of previous work done by Ard Biesheuvel and Steven Rostedt: https://lkml.kernel.org/r/20181005081333.15018-1-ard.biesheuvel@linaro.org https://lkml.kernel.org/r/20181006015110.653946300@goodmis.org There are two implementations, depending on arch support: 1) out-of-line: patched trampolines (CONFIG_HAVE_STATIC_CALL) 2) basic function pointers For more details, see the comments in include/linux/static_call.h. [peterz: simplified interface] Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Reviewed-by: Steven Rostedt (VMware) Cc: Linus Torvalds Link: https://lore.kernel.org/r/20200818135804.623259796@infradead.org --- arch/Kconfig | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/Kconfig b/arch/Kconfig index af14a567b493..806e6df7b47c 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -975,6 +975,9 @@ config HAVE_SPARSE_SYSCALL_NR config ARCH_HAS_VDSO_DATA bool +config HAVE_STATIC_CALL + bool + source "kernel/gcov/Kconfig" source "scripts/gcc-plugins/Kconfig" -- cgit v1.2.3 From 9183c3f9ed710a8edf1a61e8a96d497258d26e08 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 18 Aug 2020 15:57:42 +0200 Subject: static_call: Add inline static call infrastructure Add infrastructure for an arch-specific CONFIG_HAVE_STATIC_CALL_INLINE option, which is a faster version of CONFIG_HAVE_STATIC_CALL. At runtime, the static call sites are patched directly, rather than using the out-of-line trampolines. Compared to out-of-line static calls, the performance benefits are more modest, but still measurable. Steven Rostedt did some tracepoint measurements: https://lkml.kernel.org/r/20181126155405.72b4f718@gandalf.local.home This code is heavily inspired by the jump label code (aka "static jumps"), as some of the concepts are very similar. For more details, see the comments in include/linux/static_call.h. [peterz: simplified interface; merged trampolines] Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Reviewed-by: Steven Rostedt (VMware) Cc: Linus Torvalds Link: https://lore.kernel.org/r/20200818135804.684334440@infradead.org --- arch/Kconfig | 4 + include/asm-generic/vmlinux.lds.h | 7 + include/linux/module.h | 5 + include/linux/static_call.h | 36 ++++- include/linux/static_call_types.h | 13 ++ kernel/Makefile | 1 + kernel/module.c | 5 + kernel/static_call.c | 303 ++++++++++++++++++++++++++++++++++++++ 8 files changed, 373 insertions(+), 1 deletion(-) create mode 100644 kernel/static_call.c (limited to 'arch') diff --git a/arch/Kconfig b/arch/Kconfig index 806e6df7b47c..2c4936ac620a 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -978,6 +978,10 @@ config ARCH_HAS_VDSO_DATA config HAVE_STATIC_CALL bool +config HAVE_STATIC_CALL_INLINE + bool + depends on HAVE_STATIC_CALL + source "kernel/gcov/Kconfig" source "scripts/gcc-plugins/Kconfig" diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 5430febd34be..0088a5cd6a40 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -388,6 +388,12 @@ KEEP(*(__jump_table)) \ __stop___jump_table = .; +#define STATIC_CALL_DATA \ + . = ALIGN(8); \ + __start_static_call_sites = .; \ + KEEP(*(.static_call_sites)) \ + __stop_static_call_sites = .; + /* * Allow architectures to handle ro_after_init data on their * own by defining an empty RO_AFTER_INIT_DATA. @@ -398,6 +404,7 @@ __start_ro_after_init = .; \ *(.data..ro_after_init) \ JUMP_TABLE_DATA \ + STATIC_CALL_DATA \ __end_ro_after_init = .; #endif diff --git a/include/linux/module.h b/include/linux/module.h index e30ed5fa33a7..a29187f7c360 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -498,6 +499,10 @@ struct module { unsigned long *kprobe_blacklist; unsigned int num_kprobe_blacklist; #endif +#ifdef CONFIG_HAVE_STATIC_CALL_INLINE + int num_static_call_sites; + struct static_call_site *static_call_sites; +#endif #ifdef CONFIG_LIVEPATCH bool klp; /* Is this a livepatch module? */ diff --git a/include/linux/static_call.h b/include/linux/static_call.h index d8892dff2e91..0d7f9efaa3b2 100644 --- a/include/linux/static_call.h +++ b/include/linux/static_call.h @@ -95,7 +95,41 @@ extern void arch_static_call_transform(void *site, void *tramp, void *func); STATIC_CALL_TRAMP_ADDR(name), func); \ }) -#if defined(CONFIG_HAVE_STATIC_CALL) +#ifdef CONFIG_HAVE_STATIC_CALL_INLINE + +struct static_call_mod { + struct static_call_mod *next; + struct module *mod; /* for vmlinux, mod == NULL */ + struct static_call_site *sites; +}; + +struct static_call_key { + void *func; + struct static_call_mod *mods; +}; + +extern void __static_call_update(struct static_call_key *key, void *tramp, void *func); +extern int static_call_mod_init(struct module *mod); + +#define DEFINE_STATIC_CALL(name, _func) \ + DECLARE_STATIC_CALL(name, _func); \ + struct static_call_key STATIC_CALL_KEY(name) = { \ + .func = _func, \ + .mods = NULL, \ + }; \ + ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func) + +#define static_call(name) __static_call(name) + +#define EXPORT_STATIC_CALL(name) \ + EXPORT_SYMBOL(STATIC_CALL_KEY(name)); \ + EXPORT_SYMBOL(STATIC_CALL_TRAMP(name)) + +#define EXPORT_STATIC_CALL_GPL(name) \ + EXPORT_SYMBOL_GPL(STATIC_CALL_KEY(name)); \ + EXPORT_SYMBOL_GPL(STATIC_CALL_TRAMP(name)) + +#elif defined(CONFIG_HAVE_STATIC_CALL) struct static_call_key { void *func; diff --git a/include/linux/static_call_types.h b/include/linux/static_call_types.h index 5ed249dc47d3..408d345d83e1 100644 --- a/include/linux/static_call_types.h +++ b/include/linux/static_call_types.h @@ -2,14 +2,27 @@ #ifndef _STATIC_CALL_TYPES_H #define _STATIC_CALL_TYPES_H +#include #include #define STATIC_CALL_KEY_PREFIX __SCK__ +#define STATIC_CALL_KEY_PREFIX_STR __stringify(STATIC_CALL_KEY_PREFIX) +#define STATIC_CALL_KEY_PREFIX_LEN (sizeof(STATIC_CALL_KEY_PREFIX_STR) - 1) #define STATIC_CALL_KEY(name) __PASTE(STATIC_CALL_KEY_PREFIX, name) #define STATIC_CALL_TRAMP_PREFIX __SCT__ #define STATIC_CALL_TRAMP_PREFIX_STR __stringify(STATIC_CALL_TRAMP_PREFIX) +#define STATIC_CALL_TRAMP_PREFIX_LEN (sizeof(STATIC_CALL_TRAMP_PREFIX_STR) - 1) #define STATIC_CALL_TRAMP(name) __PASTE(STATIC_CALL_TRAMP_PREFIX, name) #define STATIC_CALL_TRAMP_STR(name) __stringify(STATIC_CALL_TRAMP(name)) +/* + * The static call site table needs to be created by external tooling (objtool + * or a compiler plugin). + */ +struct static_call_site { + s32 addr; + s32 key; +}; + #endif /* _STATIC_CALL_TYPES_H */ diff --git a/kernel/Makefile b/kernel/Makefile index 9a20016d4900..b74820d8b264 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -111,6 +111,7 @@ obj-$(CONFIG_CPU_PM) += cpu_pm.o obj-$(CONFIG_BPF) += bpf/ obj-$(CONFIG_KCSAN) += kcsan/ obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o +obj-$(CONFIG_HAVE_STATIC_CALL_INLINE) += static_call.o obj-$(CONFIG_PERF_EVENTS) += events/ diff --git a/kernel/module.c b/kernel/module.c index 3c465cf31d08..c075a18103fb 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3274,6 +3274,11 @@ static int find_module_sections(struct module *mod, struct load_info *info) mod->kprobe_blacklist = section_objs(info, "_kprobe_blacklist", sizeof(unsigned long), &mod->num_kprobe_blacklist); +#endif +#ifdef CONFIG_HAVE_STATIC_CALL_INLINE + mod->static_call_sites = section_objs(info, ".static_call_sites", + sizeof(*mod->static_call_sites), + &mod->num_static_call_sites); #endif mod->extable = section_objs(info, "__ex_table", sizeof(*mod->extable), &mod->num_exentries); diff --git a/kernel/static_call.c b/kernel/static_call.c new file mode 100644 index 000000000000..d24349244675 --- /dev/null +++ b/kernel/static_call.c @@ -0,0 +1,303 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern struct static_call_site __start_static_call_sites[], + __stop_static_call_sites[]; + +static bool static_call_initialized; + +#define STATIC_CALL_INIT 1UL + +/* mutex to protect key modules/sites */ +static DEFINE_MUTEX(static_call_mutex); + +static void static_call_lock(void) +{ + mutex_lock(&static_call_mutex); +} + +static void static_call_unlock(void) +{ + mutex_unlock(&static_call_mutex); +} + +static inline void *static_call_addr(struct static_call_site *site) +{ + return (void *)((long)site->addr + (long)&site->addr); +} + + +static inline struct static_call_key *static_call_key(const struct static_call_site *site) +{ + return (struct static_call_key *) + (((long)site->key + (long)&site->key) & ~STATIC_CALL_INIT); +} + +/* These assume the key is word-aligned. */ +static inline bool static_call_is_init(struct static_call_site *site) +{ + return ((long)site->key + (long)&site->key) & STATIC_CALL_INIT; +} + +static inline void static_call_set_init(struct static_call_site *site) +{ + site->key = ((long)static_call_key(site) | STATIC_CALL_INIT) - + (long)&site->key; +} + +static int static_call_site_cmp(const void *_a, const void *_b) +{ + const struct static_call_site *a = _a; + const struct static_call_site *b = _b; + const struct static_call_key *key_a = static_call_key(a); + const struct static_call_key *key_b = static_call_key(b); + + if (key_a < key_b) + return -1; + + if (key_a > key_b) + return 1; + + return 0; +} + +static void static_call_site_swap(void *_a, void *_b, int size) +{ + long delta = (unsigned long)_a - (unsigned long)_b; + struct static_call_site *a = _a; + struct static_call_site *b = _b; + struct static_call_site tmp = *a; + + a->addr = b->addr - delta; + a->key = b->key - delta; + + b->addr = tmp.addr + delta; + b->key = tmp.key + delta; +} + +static inline void static_call_sort_entries(struct static_call_site *start, + struct static_call_site *stop) +{ + sort(start, stop - start, sizeof(struct static_call_site), + static_call_site_cmp, static_call_site_swap); +} + +void __static_call_update(struct static_call_key *key, void *tramp, void *func) +{ + struct static_call_site *site, *stop; + struct static_call_mod *site_mod; + + cpus_read_lock(); + static_call_lock(); + + if (key->func == func) + goto done; + + key->func = func; + + arch_static_call_transform(NULL, tramp, func); + + /* + * If uninitialized, we'll not update the callsites, but they still + * point to the trampoline and we just patched that. + */ + if (WARN_ON_ONCE(!static_call_initialized)) + goto done; + + for (site_mod = key->mods; site_mod; site_mod = site_mod->next) { + struct module *mod = site_mod->mod; + + if (!site_mod->sites) { + /* + * This can happen if the static call key is defined in + * a module which doesn't use it. + */ + continue; + } + + stop = __stop_static_call_sites; + +#ifdef CONFIG_MODULES + if (mod) { + stop = mod->static_call_sites + + mod->num_static_call_sites; + } +#endif + + for (site = site_mod->sites; + site < stop && static_call_key(site) == key; site++) { + void *site_addr = static_call_addr(site); + + if (static_call_is_init(site)) { + /* + * Don't write to call sites which were in + * initmem and have since been freed. + */ + if (!mod && system_state >= SYSTEM_RUNNING) + continue; + if (mod && !within_module_init((unsigned long)site_addr, mod)) + continue; + } + + if (!kernel_text_address((unsigned long)site_addr)) { + WARN_ONCE(1, "can't patch static call site at %pS", + site_addr); + continue; + } + + arch_static_call_transform(site_addr, NULL, func); + } + } + +done: + static_call_unlock(); + cpus_read_unlock(); +} +EXPORT_SYMBOL_GPL(__static_call_update); + +static int __static_call_init(struct module *mod, + struct static_call_site *start, + struct static_call_site *stop) +{ + struct static_call_site *site; + struct static_call_key *key, *prev_key = NULL; + struct static_call_mod *site_mod; + + if (start == stop) + return 0; + + static_call_sort_entries(start, stop); + + for (site = start; site < stop; site++) { + void *site_addr = static_call_addr(site); + + if ((mod && within_module_init((unsigned long)site_addr, mod)) || + (!mod && init_section_contains(site_addr, 1))) + static_call_set_init(site); + + key = static_call_key(site); + if (key != prev_key) { + prev_key = key; + + site_mod = kzalloc(sizeof(*site_mod), GFP_KERNEL); + if (!site_mod) + return -ENOMEM; + + site_mod->mod = mod; + site_mod->sites = site; + site_mod->next = key->mods; + key->mods = site_mod; + } + + arch_static_call_transform(site_addr, NULL, key->func); + } + + return 0; +} + +#ifdef CONFIG_MODULES + +static int static_call_add_module(struct module *mod) +{ + return __static_call_init(mod, mod->static_call_sites, + mod->static_call_sites + mod->num_static_call_sites); +} + +static void static_call_del_module(struct module *mod) +{ + struct static_call_site *start = mod->static_call_sites; + struct static_call_site *stop = mod->static_call_sites + + mod->num_static_call_sites; + struct static_call_key *key, *prev_key = NULL; + struct static_call_mod *site_mod, **prev; + struct static_call_site *site; + + for (site = start; site < stop; site++) { + key = static_call_key(site); + if (key == prev_key) + continue; + + prev_key = key; + + for (prev = &key->mods, site_mod = key->mods; + site_mod && site_mod->mod != mod; + prev = &site_mod->next, site_mod = site_mod->next) + ; + + if (!site_mod) + continue; + + *prev = site_mod->next; + kfree(site_mod); + } +} + +static int static_call_module_notify(struct notifier_block *nb, + unsigned long val, void *data) +{ + struct module *mod = data; + int ret = 0; + + cpus_read_lock(); + static_call_lock(); + + switch (val) { + case MODULE_STATE_COMING: + ret = static_call_add_module(mod); + if (ret) { + WARN(1, "Failed to allocate memory for static calls"); + static_call_del_module(mod); + } + break; + case MODULE_STATE_GOING: + static_call_del_module(mod); + break; + } + + static_call_unlock(); + cpus_read_unlock(); + + return notifier_from_errno(ret); +} + +static struct notifier_block static_call_module_nb = { + .notifier_call = static_call_module_notify, +}; + +#endif /* CONFIG_MODULES */ + +static void __init static_call_init(void) +{ + int ret; + + if (static_call_initialized) + return; + + cpus_read_lock(); + static_call_lock(); + ret = __static_call_init(NULL, __start_static_call_sites, + __stop_static_call_sites); + static_call_unlock(); + cpus_read_unlock(); + + if (ret) { + pr_err("Failed to allocate memory for static_call!\n"); + BUG(); + } + + static_call_initialized = true; + +#ifdef CONFIG_MODULES + register_module_notifier(&static_call_module_nb); +#endif +} +early_initcall(static_call_init); -- cgit v1.2.3 From 6333e8f73b834f54e395a056e6002403f0862c51 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 18 Aug 2020 15:57:43 +0200 Subject: static_call: Avoid kprobes on inline static_call()s Similar to how we disallow kprobes on any other dynamic text (ftrace/jump_label) also disallow kprobes on inline static_call()s. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200818135804.744920586@infradead.org --- arch/x86/kernel/kprobes/opt.c | 4 ++- include/linux/static_call.h | 11 +++++++ kernel/kprobes.c | 2 ++ kernel/static_call.c | 68 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 84 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 40f380461e6d..c068e21c2c40 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -210,7 +211,8 @@ static int copy_optimized_instructions(u8 *dest, u8 *src, u8 *real) /* Check whether the address range is reserved */ if (ftrace_text_reserved(src, src + len - 1) || alternatives_text_reserved(src, src + len - 1) || - jump_label_text_reserved(src, src + len - 1)) + jump_label_text_reserved(src, src + len - 1) || + static_call_text_reserved(src, src + len - 1)) return -EBUSY; return len; diff --git a/include/linux/static_call.h b/include/linux/static_call.h index 0d7f9efaa3b2..6f62ceda7dd9 100644 --- a/include/linux/static_call.h +++ b/include/linux/static_call.h @@ -110,6 +110,7 @@ struct static_call_key { extern void __static_call_update(struct static_call_key *key, void *tramp, void *func); extern int static_call_mod_init(struct module *mod); +extern int static_call_text_reserved(void *start, void *end); #define DEFINE_STATIC_CALL(name, _func) \ DECLARE_STATIC_CALL(name, _func); \ @@ -153,6 +154,11 @@ void __static_call_update(struct static_call_key *key, void *tramp, void *func) cpus_read_unlock(); } +static inline int static_call_text_reserved(void *start, void *end) +{ + return 0; +} + #define EXPORT_STATIC_CALL(name) \ EXPORT_SYMBOL(STATIC_CALL_KEY(name)); \ EXPORT_SYMBOL(STATIC_CALL_TRAMP(name)) @@ -182,6 +188,11 @@ void __static_call_update(struct static_call_key *key, void *tramp, void *func) WRITE_ONCE(key->func, func); } +static inline int static_call_text_reserved(void *start, void *end) +{ + return 0; +} + #define EXPORT_STATIC_CALL(name) EXPORT_SYMBOL(STATIC_CALL_KEY(name)) #define EXPORT_STATIC_CALL_GPL(name) EXPORT_SYMBOL_GPL(STATIC_CALL_KEY(name)) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 287b263c9cb9..67e6a8c18007 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -1634,6 +1635,7 @@ static int check_kprobe_address_safe(struct kprobe *p, if (!kernel_text_address((unsigned long) p->addr) || within_kprobe_blacklist((unsigned long) p->addr) || jump_label_text_reserved(p->addr, p->addr) || + static_call_text_reserved(p->addr, p->addr) || find_bug((unsigned long)p->addr)) { ret = -EINVAL; goto out; diff --git a/kernel/static_call.c b/kernel/static_call.c index d24349244675..753b2f1b4fb8 100644 --- a/kernel/static_call.c +++ b/kernel/static_call.c @@ -204,8 +204,58 @@ static int __static_call_init(struct module *mod, return 0; } +static int addr_conflict(struct static_call_site *site, void *start, void *end) +{ + unsigned long addr = (unsigned long)static_call_addr(site); + + if (addr <= (unsigned long)end && + addr + CALL_INSN_SIZE > (unsigned long)start) + return 1; + + return 0; +} + +static int __static_call_text_reserved(struct static_call_site *iter_start, + struct static_call_site *iter_stop, + void *start, void *end) +{ + struct static_call_site *iter = iter_start; + + while (iter < iter_stop) { + if (addr_conflict(iter, start, end)) + return 1; + iter++; + } + + return 0; +} + #ifdef CONFIG_MODULES +static int __static_call_mod_text_reserved(void *start, void *end) +{ + struct module *mod; + int ret; + + preempt_disable(); + mod = __module_text_address((unsigned long)start); + WARN_ON_ONCE(__module_text_address((unsigned long)end) != mod); + if (!try_module_get(mod)) + mod = NULL; + preempt_enable(); + + if (!mod) + return 0; + + ret = __static_call_text_reserved(mod->static_call_sites, + mod->static_call_sites + mod->num_static_call_sites, + start, end); + + module_put(mod); + + return ret; +} + static int static_call_add_module(struct module *mod) { return __static_call_init(mod, mod->static_call_sites, @@ -273,8 +323,26 @@ static struct notifier_block static_call_module_nb = { .notifier_call = static_call_module_notify, }; +#else + +static inline int __static_call_mod_text_reserved(void *start, void *end) +{ + return 0; +} + #endif /* CONFIG_MODULES */ +int static_call_text_reserved(void *start, void *end) +{ + int ret = __static_call_text_reserved(__start_static_call_sites, + __stop_static_call_sites, start, end); + + if (ret) + return ret; + + return __static_call_mod_text_reserved(start, end); +} + static void __init static_call_init(void) { int ret; -- cgit v1.2.3 From e6d6c071f22de29e4993784fc00cd2202b7ba149 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 18 Aug 2020 15:57:44 +0200 Subject: x86/static_call: Add out-of-line static call implementation Add the x86 out-of-line static call implementation. For each key, a permanent trampoline is created which is the destination for all static calls for the given key. The trampoline has a direct jump which gets patched by static_call_update() when the destination function changes. [peterz: fixed trampoline, rewrote patching code] Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/20200818135804.804315175@infradead.org --- arch/x86/Kconfig | 1 + arch/x86/include/asm/static_call.h | 23 +++++++++++++++++++++++ arch/x86/kernel/Makefile | 1 + arch/x86/kernel/static_call.c | 31 +++++++++++++++++++++++++++++++ 4 files changed, 56 insertions(+) create mode 100644 arch/x86/include/asm/static_call.h create mode 100644 arch/x86/kernel/static_call.c (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 7101ac64bb20..595c06b32b3a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -215,6 +215,7 @@ config X86 select HAVE_FUNCTION_ARG_ACCESS_API select HAVE_STACKPROTECTOR if CC_HAS_SANE_STACKPROTECTOR select HAVE_STACK_VALIDATION if X86_64 + select HAVE_STATIC_CALL select HAVE_RSEQ select HAVE_SYSCALL_TRACEPOINTS select HAVE_UNSTABLE_SCHED_CLOCK diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h new file mode 100644 index 000000000000..07aa8791cbfe --- /dev/null +++ b/arch/x86/include/asm/static_call.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_STATIC_CALL_H +#define _ASM_STATIC_CALL_H + +#include + +/* + * For CONFIG_HAVE_STATIC_CALL, this is a permanent trampoline which + * does a direct jump to the function. The direct jump gets patched by + * static_call_update(). + */ +#define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) \ + asm(".pushsection .text, \"ax\" \n" \ + ".align 4 \n" \ + ".globl " STATIC_CALL_TRAMP_STR(name) " \n" \ + STATIC_CALL_TRAMP_STR(name) ": \n" \ + " .byte 0xe9 # jmp.d32 \n" \ + " .long " #func " - (. + 4) \n" \ + ".type " STATIC_CALL_TRAMP_STR(name) ", @function \n" \ + ".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \ + ".popsection \n") + +#endif /* _ASM_STATIC_CALL_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index e77261db2391..de09af019e23 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -68,6 +68,7 @@ obj-y += tsc.o tsc_msr.o io_delay.o rtc.o obj-y += pci-iommu_table.o obj-y += resource.o obj-y += irqflags.o +obj-y += static_call.o obj-y += process.o obj-y += fpu/ diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c new file mode 100644 index 000000000000..0565825970af --- /dev/null +++ b/arch/x86/kernel/static_call.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include + +static void __static_call_transform(void *insn, u8 opcode, void *func) +{ + const void *code = text_gen_insn(opcode, insn, func); + + if (WARN_ONCE(*(u8 *)insn != opcode, + "unexpected static call insn opcode 0x%x at %pS\n", + opcode, insn)) + return; + + if (memcmp(insn, code, CALL_INSN_SIZE) == 0) + return; + + text_poke_bp(insn, code, CALL_INSN_SIZE, NULL); +} + +void arch_static_call_transform(void *site, void *tramp, void *func) +{ + mutex_lock(&text_mutex); + + if (tramp) + __static_call_transform(tramp, JMP32_INSN_OPCODE, func); + + mutex_unlock(&text_mutex); +} +EXPORT_SYMBOL_GPL(arch_static_call_transform); -- cgit v1.2.3 From 1e7e47883830aae5e8246a22ca2fc6883c61acdf Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 18 Aug 2020 15:57:45 +0200 Subject: x86/static_call: Add inline static call implementation for x86-64 Add the inline static call implementation for x86-64. The generated code is identical to the out-of-line case, except we move the trampoline into it's own section. Objtool uses the trampoline naming convention to detect all the call sites. It then annotates those call sites in the .static_call_sites section. During boot (and module init), the call sites are patched to call directly into the destination function. The temporary trampoline is then no longer used. [peterz: merged trampolines, put trampoline in section] Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/20200818135804.864271425@infradead.org --- arch/x86/Kconfig | 3 +- arch/x86/include/asm/static_call.h | 13 +++- arch/x86/kernel/static_call.c | 3 + arch/x86/kernel/vmlinux.lds.S | 1 + include/asm-generic/vmlinux.lds.h | 6 ++ tools/include/linux/static_call_types.h | 28 +++++++ tools/objtool/check.c | 130 ++++++++++++++++++++++++++++++++ tools/objtool/check.h | 1 + tools/objtool/elf.c | 8 +- tools/objtool/elf.h | 3 +- tools/objtool/objtool.h | 1 + tools/objtool/orc_gen.c | 4 +- tools/objtool/sync-check.sh | 1 + 13 files changed, 193 insertions(+), 9 deletions(-) create mode 100644 tools/include/linux/static_call_types.h (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 595c06b32b3a..8a48d3eedb84 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -216,6 +216,7 @@ config X86 select HAVE_STACKPROTECTOR if CC_HAS_SANE_STACKPROTECTOR select HAVE_STACK_VALIDATION if X86_64 select HAVE_STATIC_CALL + select HAVE_STATIC_CALL_INLINE if HAVE_STACK_VALIDATION select HAVE_RSEQ select HAVE_SYSCALL_TRACEPOINTS select HAVE_UNSTABLE_SCHED_CLOCK @@ -231,6 +232,7 @@ config X86 select RTC_MC146818_LIB select SPARSE_IRQ select SRCU + select STACK_VALIDATION if HAVE_STACK_VALIDATION && (HAVE_STATIC_CALL_INLINE || RETPOLINE) select SYSCTL_EXCEPTION_TRACE select THREAD_INFO_IN_TASK select USER_STACKTRACE_SUPPORT @@ -452,7 +454,6 @@ config GOLDFISH config RETPOLINE bool "Avoid speculative indirect branches in kernel" default y - select STACK_VALIDATION if HAVE_STACK_VALIDATION help Compile kernel with the retpoline compiler options to guard against kernel-to-user data leaks by avoiding speculative indirect diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h index 07aa8791cbfe..33469ae3612c 100644 --- a/arch/x86/include/asm/static_call.h +++ b/arch/x86/include/asm/static_call.h @@ -5,12 +5,23 @@ #include /* + * For CONFIG_HAVE_STATIC_CALL_INLINE, this is a temporary trampoline which + * uses the current value of the key->func pointer to do an indirect jump to + * the function. This trampoline is only used during boot, before the call + * sites get patched by static_call_update(). The name of this trampoline has + * a magical aspect: objtool uses it to find static call sites so it can create + * the .static_call_sites section. + * * For CONFIG_HAVE_STATIC_CALL, this is a permanent trampoline which * does a direct jump to the function. The direct jump gets patched by * static_call_update(). + * + * Having the trampoline in a special section forces GCC to emit a JMP.d32 when + * it does tail-call optimization on the call; since you cannot compute the + * relative displacement across sections. */ #define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) \ - asm(".pushsection .text, \"ax\" \n" \ + asm(".pushsection .static_call.text, \"ax\" \n" \ ".align 4 \n" \ ".globl " STATIC_CALL_TRAMP_STR(name) " \n" \ STATIC_CALL_TRAMP_STR(name) ": \n" \ diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c index 0565825970af..5ff2b639a1a6 100644 --- a/arch/x86/kernel/static_call.c +++ b/arch/x86/kernel/static_call.c @@ -26,6 +26,9 @@ void arch_static_call_transform(void *site, void *tramp, void *func) if (tramp) __static_call_transform(tramp, JMP32_INSN_OPCODE, func); + if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site) + __static_call_transform(site, CALL_INSN_OPCODE, func); + mutex_unlock(&text_mutex); } EXPORT_SYMBOL_GPL(arch_static_call_transform); diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 9a03e5b23135..2568f4cdcbd1 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -136,6 +136,7 @@ SECTIONS ENTRY_TEXT ALIGN_ENTRY_TEXT_END SOFTIRQENTRY_TEXT + STATIC_CALL_TEXT *(.fixup) *(.gnu.warning) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 0088a5cd6a40..0502087654d7 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -642,6 +642,12 @@ *(.softirqentry.text) \ __softirqentry_text_end = .; +#define STATIC_CALL_TEXT \ + ALIGN_FUNCTION(); \ + __static_call_text_start = .; \ + *(.static_call.text) \ + __static_call_text_end = .; + /* Section used for early init (in .S files) */ #define HEAD_TEXT KEEP(*(.head.text)) diff --git a/tools/include/linux/static_call_types.h b/tools/include/linux/static_call_types.h new file mode 100644 index 000000000000..408d345d83e1 --- /dev/null +++ b/tools/include/linux/static_call_types.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _STATIC_CALL_TYPES_H +#define _STATIC_CALL_TYPES_H + +#include +#include + +#define STATIC_CALL_KEY_PREFIX __SCK__ +#define STATIC_CALL_KEY_PREFIX_STR __stringify(STATIC_CALL_KEY_PREFIX) +#define STATIC_CALL_KEY_PREFIX_LEN (sizeof(STATIC_CALL_KEY_PREFIX_STR) - 1) +#define STATIC_CALL_KEY(name) __PASTE(STATIC_CALL_KEY_PREFIX, name) + +#define STATIC_CALL_TRAMP_PREFIX __SCT__ +#define STATIC_CALL_TRAMP_PREFIX_STR __stringify(STATIC_CALL_TRAMP_PREFIX) +#define STATIC_CALL_TRAMP_PREFIX_LEN (sizeof(STATIC_CALL_TRAMP_PREFIX_STR) - 1) +#define STATIC_CALL_TRAMP(name) __PASTE(STATIC_CALL_TRAMP_PREFIX, name) +#define STATIC_CALL_TRAMP_STR(name) __stringify(STATIC_CALL_TRAMP(name)) + +/* + * The static call site table needs to be created by external tooling (objtool + * or a compiler plugin). + */ +struct static_call_site { + s32 addr; + s32 key; +}; + +#endif /* _STATIC_CALL_TYPES_H */ diff --git a/tools/objtool/check.c b/tools/objtool/check.c index e034a8f24f46..f8f7a40c6ef3 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -16,6 +16,7 @@ #include #include +#include #define FAKE_JUMP_OFFSET -1 @@ -433,6 +434,103 @@ reachable: return 0; } +static int create_static_call_sections(struct objtool_file *file) +{ + struct section *sec, *reloc_sec; + struct reloc *reloc; + struct static_call_site *site; + struct instruction *insn; + struct symbol *key_sym; + char *key_name, *tmp; + int idx; + + sec = find_section_by_name(file->elf, ".static_call_sites"); + if (sec) { + INIT_LIST_HEAD(&file->static_call_list); + WARN("file already has .static_call_sites section, skipping"); + return 0; + } + + if (list_empty(&file->static_call_list)) + return 0; + + idx = 0; + list_for_each_entry(insn, &file->static_call_list, static_call_node) + idx++; + + sec = elf_create_section(file->elf, ".static_call_sites", SHF_WRITE, + sizeof(struct static_call_site), idx); + if (!sec) + return -1; + + reloc_sec = elf_create_reloc_section(file->elf, sec, SHT_RELA); + if (!reloc_sec) + return -1; + + idx = 0; + list_for_each_entry(insn, &file->static_call_list, static_call_node) { + + site = (struct static_call_site *)sec->data->d_buf + idx; + memset(site, 0, sizeof(struct static_call_site)); + + /* populate reloc for 'addr' */ + reloc = malloc(sizeof(*reloc)); + if (!reloc) { + perror("malloc"); + return -1; + } + memset(reloc, 0, sizeof(*reloc)); + reloc->sym = insn->sec->sym; + reloc->addend = insn->offset; + reloc->type = R_X86_64_PC32; + reloc->offset = idx * sizeof(struct static_call_site); + reloc->sec = reloc_sec; + elf_add_reloc(file->elf, reloc); + + /* find key symbol */ + key_name = strdup(insn->call_dest->name); + if (!key_name) { + perror("strdup"); + return -1; + } + if (strncmp(key_name, STATIC_CALL_TRAMP_PREFIX_STR, + STATIC_CALL_TRAMP_PREFIX_LEN)) { + WARN("static_call: trampoline name malformed: %s", key_name); + return -1; + } + tmp = key_name + STATIC_CALL_TRAMP_PREFIX_LEN - STATIC_CALL_KEY_PREFIX_LEN; + memcpy(tmp, STATIC_CALL_KEY_PREFIX_STR, STATIC_CALL_KEY_PREFIX_LEN); + + key_sym = find_symbol_by_name(file->elf, tmp); + if (!key_sym) { + WARN("static_call: can't find static_call_key symbol: %s", tmp); + return -1; + } + free(key_name); + + /* populate reloc for 'key' */ + reloc = malloc(sizeof(*reloc)); + if (!reloc) { + perror("malloc"); + return -1; + } + memset(reloc, 0, sizeof(*reloc)); + reloc->sym = key_sym; + reloc->addend = 0; + reloc->type = R_X86_64_PC32; + reloc->offset = idx * sizeof(struct static_call_site) + 4; + reloc->sec = reloc_sec; + elf_add_reloc(file->elf, reloc); + + idx++; + } + + if (elf_rebuild_reloc_section(file->elf, reloc_sec)) + return -1; + + return 0; +} + /* * Warnings shouldn't be reported for ignored functions. */ @@ -1522,6 +1620,23 @@ static int read_intra_function_calls(struct objtool_file *file) return 0; } +static int read_static_call_tramps(struct objtool_file *file) +{ + struct section *sec; + struct symbol *func; + + for_each_sec(file, sec) { + list_for_each_entry(func, &sec->symbol_list, list) { + if (func->bind == STB_GLOBAL && + !strncmp(func->name, STATIC_CALL_TRAMP_PREFIX_STR, + strlen(STATIC_CALL_TRAMP_PREFIX_STR))) + func->static_call_tramp = true; + } + } + + return 0; +} + static void mark_rodata(struct objtool_file *file) { struct section *sec; @@ -1601,6 +1716,10 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; + ret = read_static_call_tramps(file); + if (ret) + return ret; + return 0; } @@ -2432,6 +2551,11 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, if (dead_end_function(file, insn->call_dest)) return 0; + if (insn->type == INSN_CALL && insn->call_dest->static_call_tramp) { + list_add_tail(&insn->static_call_node, + &file->static_call_list); + } + break; case INSN_JUMP_CONDITIONAL: @@ -2791,6 +2915,7 @@ int check(const char *_objname, bool orc) INIT_LIST_HEAD(&file.insn_list); hash_init(file.insn_hash); + INIT_LIST_HEAD(&file.static_call_list); file.c_file = !vmlinux && find_section_by_name(file.elf, ".comment"); file.ignore_unreachables = no_unreachable; file.hints = false; @@ -2838,6 +2963,11 @@ int check(const char *_objname, bool orc) warnings += ret; } + ret = create_static_call_sections(&file); + if (ret < 0) + goto out; + warnings += ret; + if (orc) { ret = create_orc(&file); if (ret < 0) diff --git a/tools/objtool/check.h b/tools/objtool/check.h index 061aa96e15d3..36d38b9153ac 100644 --- a/tools/objtool/check.h +++ b/tools/objtool/check.h @@ -22,6 +22,7 @@ struct insn_state { struct instruction { struct list_head list; struct hlist_node hash; + struct list_head static_call_node; struct section *sec; unsigned long offset; unsigned int len; diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 3ddbd66f1a37..4e1d7460574b 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -652,7 +652,7 @@ err: } struct section *elf_create_section(struct elf *elf, const char *name, - size_t entsize, int nr) + unsigned int sh_flags, size_t entsize, int nr) { struct section *sec, *shstrtab; size_t size = entsize * nr; @@ -712,7 +712,7 @@ struct section *elf_create_section(struct elf *elf, const char *name, sec->sh.sh_entsize = entsize; sec->sh.sh_type = SHT_PROGBITS; sec->sh.sh_addralign = 1; - sec->sh.sh_flags = SHF_ALLOC; + sec->sh.sh_flags = SHF_ALLOC | sh_flags; /* Add section name to .shstrtab (or .strtab for Clang) */ @@ -767,7 +767,7 @@ static struct section *elf_create_rel_reloc_section(struct elf *elf, struct sect strcpy(relocname, ".rel"); strcat(relocname, base->name); - sec = elf_create_section(elf, relocname, sizeof(GElf_Rel), 0); + sec = elf_create_section(elf, relocname, 0, sizeof(GElf_Rel), 0); free(relocname); if (!sec) return NULL; @@ -797,7 +797,7 @@ static struct section *elf_create_rela_reloc_section(struct elf *elf, struct sec strcpy(relocname, ".rela"); strcat(relocname, base->name); - sec = elf_create_section(elf, relocname, sizeof(GElf_Rela), 0); + sec = elf_create_section(elf, relocname, 0, sizeof(GElf_Rela), 0); free(relocname); if (!sec) return NULL; diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h index 6cc80a075166..807f8c670097 100644 --- a/tools/objtool/elf.h +++ b/tools/objtool/elf.h @@ -56,6 +56,7 @@ struct symbol { unsigned int len; struct symbol *pfunc, *cfunc, *alias; bool uaccess_safe; + bool static_call_tramp; }; struct reloc { @@ -120,7 +121,7 @@ static inline u32 reloc_hash(struct reloc *reloc) } struct elf *elf_open_read(const char *name, int flags); -struct section *elf_create_section(struct elf *elf, const char *name, size_t entsize, int nr); +struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr); struct section *elf_create_reloc_section(struct elf *elf, struct section *base, int reltype); void elf_add_reloc(struct elf *elf, struct reloc *reloc); int elf_write_insn(struct elf *elf, struct section *sec, diff --git a/tools/objtool/objtool.h b/tools/objtool/objtool.h index 528028a66816..9a7cd0b88bd8 100644 --- a/tools/objtool/objtool.h +++ b/tools/objtool/objtool.h @@ -16,6 +16,7 @@ struct objtool_file { struct elf *elf; struct list_head insn_list; DECLARE_HASHTABLE(insn_hash, 20); + struct list_head static_call_list; bool ignore_unreachables, c_file, hints, rodata; }; diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c index 968f55e6dd94..e6b2363c2e03 100644 --- a/tools/objtool/orc_gen.c +++ b/tools/objtool/orc_gen.c @@ -177,7 +177,7 @@ int create_orc_sections(struct objtool_file *file) /* create .orc_unwind_ip and .rela.orc_unwind_ip sections */ - sec = elf_create_section(file->elf, ".orc_unwind_ip", sizeof(int), idx); + sec = elf_create_section(file->elf, ".orc_unwind_ip", 0, sizeof(int), idx); if (!sec) return -1; @@ -186,7 +186,7 @@ int create_orc_sections(struct objtool_file *file) return -1; /* create .orc_unwind section */ - u_sec = elf_create_section(file->elf, ".orc_unwind", + u_sec = elf_create_section(file->elf, ".orc_unwind", 0, sizeof(struct orc_entry), idx); /* populate sections */ diff --git a/tools/objtool/sync-check.sh b/tools/objtool/sync-check.sh index 2a1261bfbb62..aa099b21dffa 100755 --- a/tools/objtool/sync-check.sh +++ b/tools/objtool/sync-check.sh @@ -7,6 +7,7 @@ arch/x86/include/asm/orc_types.h arch/x86/include/asm/emulate_prefix.h arch/x86/lib/x86-opcode-map.txt arch/x86/tools/gen-insn-attr-x86.awk +include/linux/static_call_types.h ' check_2 () { -- cgit v1.2.3 From f03c412915f5f69f2d17bcd20ecdd69320bcbf7b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 18 Aug 2020 15:57:46 +0200 Subject: static_call: Add simple self-test for static calls Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200818135804.922581202@infradead.org --- arch/Kconfig | 6 ++++++ kernel/static_call.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) (limited to 'arch') diff --git a/arch/Kconfig b/arch/Kconfig index 2c4936ac620a..76ec3395b843 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -106,6 +106,12 @@ config STATIC_KEYS_SELFTEST help Boot time self-test of the branch patching code. +config STATIC_CALL_SELFTEST + bool "Static call selftest" + depends on HAVE_STATIC_CALL + help + Boot time self-test of the call patching code. + config OPTPROBES def_bool y depends on KPROBES && HAVE_OPTPROBES diff --git a/kernel/static_call.c b/kernel/static_call.c index 753b2f1b4fb8..97142cb6bfa6 100644 --- a/kernel/static_call.c +++ b/kernel/static_call.c @@ -369,3 +369,46 @@ static void __init static_call_init(void) #endif } early_initcall(static_call_init); + +#ifdef CONFIG_STATIC_CALL_SELFTEST + +static int func_a(int x) +{ + return x+1; +} + +static int func_b(int x) +{ + return x+2; +} + +DEFINE_STATIC_CALL(sc_selftest, func_a); + +static struct static_call_data { + int (*func)(int); + int val; + int expect; +} static_call_data [] __initdata = { + { NULL, 2, 3 }, + { func_b, 2, 4 }, + { func_a, 2, 3 } +}; + +static int __init test_static_call_init(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(static_call_data); i++ ) { + struct static_call_data *scd = &static_call_data[i]; + + if (scd->func) + static_call_update(sc_selftest, scd->func); + + WARN_ON(static_call(sc_selftest)(scd->val) != scd->expect); + } + + return 0; +} +early_initcall(test_static_call_init); + +#endif /* CONFIG_STATIC_CALL_SELFTEST */ -- cgit v1.2.3 From c43a43e439e00ad2a4d98716895d961ade6bbbfc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 18 Aug 2020 15:57:47 +0200 Subject: x86/alternatives: Teach text_poke_bp() to emulate RET Future patches will need to poke a RET instruction, provide the infrastructure required for this. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Reviewed-by: Steven Rostedt (VMware) Cc: Masami Hiramatsu Link: https://lore.kernel.org/r/20200818135804.982214828@infradead.org --- arch/x86/include/asm/text-patching.h | 19 +++++++++++++++++++ arch/x86/kernel/alternative.c | 5 +++++ 2 files changed, 24 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h index 6593b42cb379..b7421780e4e9 100644 --- a/arch/x86/include/asm/text-patching.h +++ b/arch/x86/include/asm/text-patching.h @@ -53,6 +53,9 @@ extern void text_poke_finish(void); #define INT3_INSN_SIZE 1 #define INT3_INSN_OPCODE 0xCC +#define RET_INSN_SIZE 1 +#define RET_INSN_OPCODE 0xC3 + #define CALL_INSN_SIZE 5 #define CALL_INSN_OPCODE 0xE8 @@ -73,6 +76,7 @@ static __always_inline int text_opcode_size(u8 opcode) switch(opcode) { __CASE(INT3); + __CASE(RET); __CASE(CALL); __CASE(JMP32); __CASE(JMP8); @@ -140,12 +144,27 @@ void int3_emulate_push(struct pt_regs *regs, unsigned long val) *(unsigned long *)regs->sp = val; } +static __always_inline +unsigned long int3_emulate_pop(struct pt_regs *regs) +{ + unsigned long val = *(unsigned long *)regs->sp; + regs->sp += sizeof(unsigned long); + return val; +} + static __always_inline void int3_emulate_call(struct pt_regs *regs, unsigned long func) { int3_emulate_push(regs, regs->ip - INT3_INSN_SIZE + CALL_INSN_SIZE); int3_emulate_jmp(regs, func); } + +static __always_inline +void int3_emulate_ret(struct pt_regs *regs) +{ + unsigned long ip = int3_emulate_pop(regs); + int3_emulate_jmp(regs, ip); +} #endif /* !CONFIG_UML_X86 */ #endif /* _ASM_X86_TEXT_PATCHING_H */ diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index cdaab30880b9..4adbe65afe23 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -1103,6 +1103,10 @@ noinstr int poke_int3_handler(struct pt_regs *regs) */ goto out_put; + case RET_INSN_OPCODE: + int3_emulate_ret(regs); + break; + case CALL_INSN_OPCODE: int3_emulate_call(regs, (long)ip + tp->rel32); break; @@ -1277,6 +1281,7 @@ static void text_poke_loc_init(struct text_poke_loc *tp, void *addr, switch (tp->opcode) { case INT3_INSN_OPCODE: + case RET_INSN_OPCODE: break; case CALL_INSN_OPCODE: -- cgit v1.2.3 From 452cddbff74b6a15b9354505671011700fe03710 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 18 Aug 2020 15:57:48 +0200 Subject: static_call: Add static_call_cond() Extend the static_call infrastructure to optimize the following common pattern: if (func_ptr) func_ptr(args...) For the trampoline (which is in effect a tail-call), we patch the JMP.d32 into a RET, which then directly consumes the trampoline call. For the in-line sites we replace the CALL with a NOP5. NOTE: this is 'obviously' limited to functions with a 'void' return type. NOTE: DEFINE_STATIC_COND_CALL() only requires a typename, as opposed to a full function. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/20200818135805.042977182@infradead.org --- arch/x86/include/asm/static_call.h | 12 ++++-- arch/x86/kernel/static_call.c | 42 ++++++++++++++----- include/linux/static_call.h | 86 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 127 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h index 33469ae3612c..c37f11999d0c 100644 --- a/arch/x86/include/asm/static_call.h +++ b/arch/x86/include/asm/static_call.h @@ -20,15 +20,21 @@ * it does tail-call optimization on the call; since you cannot compute the * relative displacement across sections. */ -#define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) \ + +#define __ARCH_DEFINE_STATIC_CALL_TRAMP(name, insns) \ asm(".pushsection .static_call.text, \"ax\" \n" \ ".align 4 \n" \ ".globl " STATIC_CALL_TRAMP_STR(name) " \n" \ STATIC_CALL_TRAMP_STR(name) ": \n" \ - " .byte 0xe9 # jmp.d32 \n" \ - " .long " #func " - (. + 4) \n" \ + insns " \n" \ ".type " STATIC_CALL_TRAMP_STR(name) ", @function \n" \ ".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \ ".popsection \n") +#define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) \ + __ARCH_DEFINE_STATIC_CALL_TRAMP(name, ".byte 0xe9; .long " #func " - (. + 4)") + +#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \ + __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; nop; nop; nop; nop") + #endif /* _ASM_STATIC_CALL_H */ diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c index 5ff2b639a1a6..ead6726fb06d 100644 --- a/arch/x86/kernel/static_call.c +++ b/arch/x86/kernel/static_call.c @@ -4,19 +4,41 @@ #include #include -static void __static_call_transform(void *insn, u8 opcode, void *func) +enum insn_type { + CALL = 0, /* site call */ + NOP = 1, /* site cond-call */ + JMP = 2, /* tramp / site tail-call */ + RET = 3, /* tramp / site cond-tail-call */ +}; + +static void __static_call_transform(void *insn, enum insn_type type, void *func) { - const void *code = text_gen_insn(opcode, insn, func); + int size = CALL_INSN_SIZE; + const void *code; - if (WARN_ONCE(*(u8 *)insn != opcode, - "unexpected static call insn opcode 0x%x at %pS\n", - opcode, insn)) - return; + switch (type) { + case CALL: + code = text_gen_insn(CALL_INSN_OPCODE, insn, func); + break; + + case NOP: + code = ideal_nops[NOP_ATOMIC5]; + break; + + case JMP: + code = text_gen_insn(JMP32_INSN_OPCODE, insn, func); + break; + + case RET: + code = text_gen_insn(RET_INSN_OPCODE, insn, func); + size = RET_INSN_SIZE; + break; + } - if (memcmp(insn, code, CALL_INSN_SIZE) == 0) + if (memcmp(insn, code, size) == 0) return; - text_poke_bp(insn, code, CALL_INSN_SIZE, NULL); + text_poke_bp(insn, code, size, NULL); } void arch_static_call_transform(void *site, void *tramp, void *func) @@ -24,10 +46,10 @@ void arch_static_call_transform(void *site, void *tramp, void *func) mutex_lock(&text_mutex); if (tramp) - __static_call_transform(tramp, JMP32_INSN_OPCODE, func); + __static_call_transform(tramp, func ? JMP : RET, func); if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site) - __static_call_transform(site, CALL_INSN_OPCODE, func); + __static_call_transform(site, func ? CALL : NOP, func); mutex_unlock(&text_mutex); } diff --git a/include/linux/static_call.h b/include/linux/static_call.h index 6f62ceda7dd9..0f74581e0e2f 100644 --- a/include/linux/static_call.h +++ b/include/linux/static_call.h @@ -16,7 +16,9 @@ * * DECLARE_STATIC_CALL(name, func); * DEFINE_STATIC_CALL(name, func); + * DEFINE_STATIC_CALL_NULL(name, typename); * static_call(name)(args...); + * static_call_cond(name)(args...); * static_call_update(name, func); * * Usage example: @@ -52,6 +54,43 @@ * rather than calling through the trampoline. This requires objtool or a * compiler plugin to detect all the static_call() sites and annotate them * in the .static_call_sites section. + * + * + * Notes on NULL function pointers: + * + * Static_call()s support NULL functions, with many of the caveats that + * regular function pointers have. + * + * Clearly calling a NULL function pointer is 'BAD', so too for + * static_call()s (although when HAVE_STATIC_CALL it might not be immediately + * fatal). A NULL static_call can be the result of: + * + * DECLARE_STATIC_CALL_NULL(my_static_call, void (*)(int)); + * + * which is equivalent to declaring a NULL function pointer with just a + * typename: + * + * void (*my_func_ptr)(int arg1) = NULL; + * + * or using static_call_update() with a NULL function. In both cases the + * HAVE_STATIC_CALL implementation will patch the trampoline with a RET + * instruction, instead of an immediate tail-call JMP. HAVE_STATIC_CALL_INLINE + * architectures can patch the trampoline call to a NOP. + * + * In all cases, any argument evaluation is unconditional. Unlike a regular + * conditional function pointer call: + * + * if (my_func_ptr) + * my_func_ptr(arg1) + * + * where the argument evaludation also depends on the pointer value. + * + * When calling a static_call that can be NULL, use: + * + * static_call_cond(name)(arg1); + * + * which will include the required value tests to avoid NULL-pointer + * dereferences. */ #include @@ -120,7 +159,16 @@ extern int static_call_text_reserved(void *start, void *end); }; \ ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func) +#define DEFINE_STATIC_CALL_NULL(name, _func) \ + DECLARE_STATIC_CALL(name, _func); \ + struct static_call_key STATIC_CALL_KEY(name) = { \ + .func = NULL, \ + .type = 1, \ + }; \ + ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) + #define static_call(name) __static_call(name) +#define static_call_cond(name) (void)__static_call(name) #define EXPORT_STATIC_CALL(name) \ EXPORT_SYMBOL(STATIC_CALL_KEY(name)); \ @@ -143,7 +191,15 @@ struct static_call_key { }; \ ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func) +#define DEFINE_STATIC_CALL_NULL(name, _func) \ + DECLARE_STATIC_CALL(name, _func); \ + struct static_call_key STATIC_CALL_KEY(name) = { \ + .func = NULL, \ + }; \ + ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) + #define static_call(name) __static_call(name) +#define static_call_cond(name) (void)__static_call(name) static inline void __static_call_update(struct static_call_key *key, void *tramp, void *func) @@ -179,9 +235,39 @@ struct static_call_key { .func = _func, \ } +#define DEFINE_STATIC_CALL_NULL(name, _func) \ + DECLARE_STATIC_CALL(name, _func); \ + struct static_call_key STATIC_CALL_KEY(name) = { \ + .func = NULL, \ + } + #define static_call(name) \ ((typeof(STATIC_CALL_TRAMP(name))*)(STATIC_CALL_KEY(name).func)) +static inline void __static_call_nop(void) { } + +/* + * This horrific hack takes care of two things: + * + * - it ensures the compiler will only load the function pointer ONCE, + * which avoids a reload race. + * + * - it ensures the argument evaluation is unconditional, similar + * to the HAVE_STATIC_CALL variant. + * + * Sadly current GCC/Clang (10 for both) do not optimize this properly + * and will emit an indirect call for the NULL case :-( + */ +#define __static_call_cond(name) \ +({ \ + void *func = READ_ONCE(STATIC_CALL_KEY(name).func); \ + if (!func) \ + func = &__static_call_nop; \ + (typeof(STATIC_CALL_TRAMP(name))*)func; \ +}) + +#define static_call_cond(name) (void)__static_call_cond(name) + static inline void __static_call_update(struct static_call_key *key, void *tramp, void *func) { -- cgit v1.2.3 From 5b06fd3bb9cdce4f3e731c48eb5b74c4acc47997 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 18 Aug 2020 15:57:49 +0200 Subject: static_call: Handle tail-calls GCC can turn our static_call(name)(args...) into a tail call, in which case we get a JMP.d32 into the trampoline (which then does a further tail-call). Teach objtool to recognise and mark these in .static_call_sites and adjust the code patching to deal with this. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/20200818135805.101186767@infradead.org --- arch/x86/kernel/static_call.c | 21 ++++++++++++++++++--- include/linux/static_call.h | 4 ++-- include/linux/static_call_types.h | 7 +++++++ kernel/static_call.c | 21 +++++++++++++-------- tools/include/linux/static_call_types.h | 7 +++++++ tools/objtool/check.c | 18 +++++++++++++----- 6 files changed, 60 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c index ead6726fb06d..60a325c731df 100644 --- a/arch/x86/kernel/static_call.c +++ b/arch/x86/kernel/static_call.c @@ -41,15 +41,30 @@ static void __static_call_transform(void *insn, enum insn_type type, void *func) text_poke_bp(insn, code, size, NULL); } -void arch_static_call_transform(void *site, void *tramp, void *func) +static inline enum insn_type __sc_insn(bool null, bool tail) +{ + /* + * Encode the following table without branches: + * + * tail null insn + * -----+-------+------ + * 0 | 0 | CALL + * 0 | 1 | NOP + * 1 | 0 | JMP + * 1 | 1 | RET + */ + return 2*tail + null; +} + +void arch_static_call_transform(void *site, void *tramp, void *func, bool tail) { mutex_lock(&text_mutex); if (tramp) - __static_call_transform(tramp, func ? JMP : RET, func); + __static_call_transform(tramp, __sc_insn(!func, true), func); if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site) - __static_call_transform(site, func ? CALL : NOP, func); + __static_call_transform(site, __sc_insn(!func, tail), func); mutex_unlock(&text_mutex); } diff --git a/include/linux/static_call.h b/include/linux/static_call.h index 0f74581e0e2f..519bd666e096 100644 --- a/include/linux/static_call.h +++ b/include/linux/static_call.h @@ -103,7 +103,7 @@ /* * Either @site or @tramp can be NULL. */ -extern void arch_static_call_transform(void *site, void *tramp, void *func); +extern void arch_static_call_transform(void *site, void *tramp, void *func, bool tail); #define STATIC_CALL_TRAMP_ADDR(name) &STATIC_CALL_TRAMP(name) @@ -206,7 +206,7 @@ void __static_call_update(struct static_call_key *key, void *tramp, void *func) { cpus_read_lock(); WRITE_ONCE(key->func, func); - arch_static_call_transform(NULL, tramp, func); + arch_static_call_transform(NULL, tramp, func, false); cpus_read_unlock(); } diff --git a/include/linux/static_call_types.h b/include/linux/static_call_types.h index 408d345d83e1..89135bb35bf7 100644 --- a/include/linux/static_call_types.h +++ b/include/linux/static_call_types.h @@ -16,6 +16,13 @@ #define STATIC_CALL_TRAMP(name) __PASTE(STATIC_CALL_TRAMP_PREFIX, name) #define STATIC_CALL_TRAMP_STR(name) __stringify(STATIC_CALL_TRAMP(name)) +/* + * Flags in the low bits of static_call_site::key. + */ +#define STATIC_CALL_SITE_TAIL 1UL /* tail call */ +#define STATIC_CALL_SITE_INIT 2UL /* init section */ +#define STATIC_CALL_SITE_FLAGS 3UL + /* * The static call site table needs to be created by external tooling (objtool * or a compiler plugin). diff --git a/kernel/static_call.c b/kernel/static_call.c index 97142cb6bfa6..d98e0e4272c1 100644 --- a/kernel/static_call.c +++ b/kernel/static_call.c @@ -15,8 +15,6 @@ extern struct static_call_site __start_static_call_sites[], static bool static_call_initialized; -#define STATIC_CALL_INIT 1UL - /* mutex to protect key modules/sites */ static DEFINE_MUTEX(static_call_mutex); @@ -39,18 +37,23 @@ static inline void *static_call_addr(struct static_call_site *site) static inline struct static_call_key *static_call_key(const struct static_call_site *site) { return (struct static_call_key *) - (((long)site->key + (long)&site->key) & ~STATIC_CALL_INIT); + (((long)site->key + (long)&site->key) & ~STATIC_CALL_SITE_FLAGS); } /* These assume the key is word-aligned. */ static inline bool static_call_is_init(struct static_call_site *site) { - return ((long)site->key + (long)&site->key) & STATIC_CALL_INIT; + return ((long)site->key + (long)&site->key) & STATIC_CALL_SITE_INIT; +} + +static inline bool static_call_is_tail(struct static_call_site *site) +{ + return ((long)site->key + (long)&site->key) & STATIC_CALL_SITE_TAIL; } static inline void static_call_set_init(struct static_call_site *site) { - site->key = ((long)static_call_key(site) | STATIC_CALL_INIT) - + site->key = ((long)static_call_key(site) | STATIC_CALL_SITE_INIT) - (long)&site->key; } @@ -104,7 +107,7 @@ void __static_call_update(struct static_call_key *key, void *tramp, void *func) key->func = func; - arch_static_call_transform(NULL, tramp, func); + arch_static_call_transform(NULL, tramp, func, false); /* * If uninitialized, we'll not update the callsites, but they still @@ -154,7 +157,8 @@ void __static_call_update(struct static_call_key *key, void *tramp, void *func) continue; } - arch_static_call_transform(site_addr, NULL, func); + arch_static_call_transform(site_addr, NULL, func, + static_call_is_tail(site)); } } @@ -198,7 +202,8 @@ static int __static_call_init(struct module *mod, key->mods = site_mod; } - arch_static_call_transform(site_addr, NULL, key->func); + arch_static_call_transform(site_addr, NULL, key->func, + static_call_is_tail(site)); } return 0; diff --git a/tools/include/linux/static_call_types.h b/tools/include/linux/static_call_types.h index 408d345d83e1..89135bb35bf7 100644 --- a/tools/include/linux/static_call_types.h +++ b/tools/include/linux/static_call_types.h @@ -16,6 +16,13 @@ #define STATIC_CALL_TRAMP(name) __PASTE(STATIC_CALL_TRAMP_PREFIX, name) #define STATIC_CALL_TRAMP_STR(name) __stringify(STATIC_CALL_TRAMP(name)) +/* + * Flags in the low bits of static_call_site::key. + */ +#define STATIC_CALL_SITE_TAIL 1UL /* tail call */ +#define STATIC_CALL_SITE_INIT 2UL /* init section */ +#define STATIC_CALL_SITE_FLAGS 3UL + /* * The static call site table needs to be created by external tooling (objtool * or a compiler plugin). diff --git a/tools/objtool/check.c b/tools/objtool/check.c index f8f7a40c6ef3..75d0cd2f9044 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -516,7 +516,7 @@ static int create_static_call_sections(struct objtool_file *file) } memset(reloc, 0, sizeof(*reloc)); reloc->sym = key_sym; - reloc->addend = 0; + reloc->addend = is_sibling_call(insn) ? STATIC_CALL_SITE_TAIL : 0; reloc->type = R_X86_64_PC32; reloc->offset = idx * sizeof(struct static_call_site) + 4; reloc->sec = reloc_sec; @@ -747,6 +747,10 @@ static int add_jump_destinations(struct objtool_file *file) } else { /* external sibling call */ insn->call_dest = reloc->sym; + if (insn->call_dest->static_call_tramp) { + list_add_tail(&insn->static_call_node, + &file->static_call_list); + } continue; } @@ -798,6 +802,10 @@ static int add_jump_destinations(struct objtool_file *file) /* internal sibling call */ insn->call_dest = insn->jump_dest->func; + if (insn->call_dest->static_call_tramp) { + list_add_tail(&insn->static_call_node, + &file->static_call_list); + } } } } @@ -1684,6 +1692,10 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; + ret = read_static_call_tramps(file); + if (ret) + return ret; + ret = add_jump_destinations(file); if (ret) return ret; @@ -1716,10 +1728,6 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; - ret = read_static_call_tramps(file); - if (ret) - return ret; - return 0; } -- cgit v1.2.3 From 6c3fce794e9d2a5ce3a948962d0808a459c40a84 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 18 Aug 2020 15:57:50 +0200 Subject: static_call: Add some validation Verify the text we're about to change is as we expect it to be. Requested-by: Steven Rostedt Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200818135805.161974981@infradead.org --- arch/x86/kernel/static_call.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c index 60a325c731df..55140d8db106 100644 --- a/arch/x86/kernel/static_call.c +++ b/arch/x86/kernel/static_call.c @@ -41,6 +41,26 @@ static void __static_call_transform(void *insn, enum insn_type type, void *func) text_poke_bp(insn, code, size, NULL); } +static void __static_call_validate(void *insn, bool tail) +{ + u8 opcode = *(u8 *)insn; + + if (tail) { + if (opcode == JMP32_INSN_OPCODE || + opcode == RET_INSN_OPCODE) + return; + } else { + if (opcode == CALL_INSN_OPCODE || + !memcmp(insn, ideal_nops[NOP_ATOMIC5], 5)) + return; + } + + /* + * If we ever trigger this, our text is corrupt, we'll probably not live long. + */ + WARN_ONCE(1, "unexpected static_call insn opcode 0x%x at %pS\n", opcode, insn); +} + static inline enum insn_type __sc_insn(bool null, bool tail) { /* @@ -60,11 +80,15 @@ void arch_static_call_transform(void *site, void *tramp, void *func, bool tail) { mutex_lock(&text_mutex); - if (tramp) + if (tramp) { + __static_call_validate(tramp, true); __static_call_transform(tramp, __sc_insn(!func, true), func); + } - if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site) + if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site) { + __static_call_validate(site, tail); __static_call_transform(site, __sc_insn(!func, tail), func); + } mutex_unlock(&text_mutex); } -- cgit v1.2.3 From a945c8345ec0decb2f1a7f19a8c5e60bcb1dd1eb Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 18 Aug 2020 15:57:51 +0200 Subject: static_call: Allow early init In order to use static_call() to wire up x86_pmu, we need to initialize earlier, specifically before memory allocation works; copy some of the tricks from jump_label to enable this. Primarily we overload key->next to store a sites pointer when there are no modules, this avoids having to use kmalloc() to initialize the sites and allows us to run much earlier. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Reviewed-by: Steven Rostedt (VMware) Link: https://lore.kernel.org/r/20200818135805.220737930@infradead.org --- arch/x86/kernel/setup.c | 2 ++ arch/x86/kernel/static_call.c | 5 +++- include/linux/static_call.h | 15 ++++++++-- kernel/static_call.c | 70 ++++++++++++++++++++++++++++++++++++++++--- 4 files changed, 85 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 3511736fbc74..799a6de439ea 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -19,6 +19,7 @@ #include #include #include +#include #include @@ -849,6 +850,7 @@ void __init setup_arch(char **cmdline_p) early_cpu_init(); arch_init_ideal_nops(); jump_label_init(); + static_call_init(); early_ioremap_init(); setup_olpc_ofw_pgd(); diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c index 55140d8db106..ca9a380d9c0b 100644 --- a/arch/x86/kernel/static_call.c +++ b/arch/x86/kernel/static_call.c @@ -11,7 +11,7 @@ enum insn_type { RET = 3, /* tramp / site cond-tail-call */ }; -static void __static_call_transform(void *insn, enum insn_type type, void *func) +static void __ref __static_call_transform(void *insn, enum insn_type type, void *func) { int size = CALL_INSN_SIZE; const void *code; @@ -38,6 +38,9 @@ static void __static_call_transform(void *insn, enum insn_type type, void *func) if (memcmp(insn, code, size) == 0) return; + if (unlikely(system_state == SYSTEM_BOOTING)) + return text_poke_early(insn, code, size); + text_poke_bp(insn, code, size, NULL); } diff --git a/include/linux/static_call.h b/include/linux/static_call.h index 519bd666e096..bfa2ba39be57 100644 --- a/include/linux/static_call.h +++ b/include/linux/static_call.h @@ -136,6 +136,8 @@ extern void arch_static_call_transform(void *site, void *tramp, void *func, bool #ifdef CONFIG_HAVE_STATIC_CALL_INLINE +extern void __init static_call_init(void); + struct static_call_mod { struct static_call_mod *next; struct module *mod; /* for vmlinux, mod == NULL */ @@ -144,7 +146,12 @@ struct static_call_mod { struct static_call_key { void *func; - struct static_call_mod *mods; + union { + /* bit 0: 0 = mods, 1 = sites */ + unsigned long type; + struct static_call_mod *mods; + struct static_call_site *sites; + }; }; extern void __static_call_update(struct static_call_key *key, void *tramp, void *func); @@ -155,7 +162,7 @@ extern int static_call_text_reserved(void *start, void *end); DECLARE_STATIC_CALL(name, _func); \ struct static_call_key STATIC_CALL_KEY(name) = { \ .func = _func, \ - .mods = NULL, \ + .type = 1, \ }; \ ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func) @@ -180,6 +187,8 @@ extern int static_call_text_reserved(void *start, void *end); #elif defined(CONFIG_HAVE_STATIC_CALL) +static inline void static_call_init(void) { } + struct static_call_key { void *func; }; @@ -225,6 +234,8 @@ static inline int static_call_text_reserved(void *start, void *end) #else /* Generic implementation */ +static inline void static_call_init(void) { } + struct static_call_key { void *func; }; diff --git a/kernel/static_call.c b/kernel/static_call.c index d98e0e4272c1..f8362b3f8fd5 100644 --- a/kernel/static_call.c +++ b/kernel/static_call.c @@ -94,10 +94,31 @@ static inline void static_call_sort_entries(struct static_call_site *start, static_call_site_cmp, static_call_site_swap); } +static inline bool static_call_key_has_mods(struct static_call_key *key) +{ + return !(key->type & 1); +} + +static inline struct static_call_mod *static_call_key_next(struct static_call_key *key) +{ + if (!static_call_key_has_mods(key)) + return NULL; + + return key->mods; +} + +static inline struct static_call_site *static_call_key_sites(struct static_call_key *key) +{ + if (static_call_key_has_mods(key)) + return NULL; + + return (struct static_call_site *)(key->type & ~1); +} + void __static_call_update(struct static_call_key *key, void *tramp, void *func) { struct static_call_site *site, *stop; - struct static_call_mod *site_mod; + struct static_call_mod *site_mod, first; cpus_read_lock(); static_call_lock(); @@ -116,13 +137,22 @@ void __static_call_update(struct static_call_key *key, void *tramp, void *func) if (WARN_ON_ONCE(!static_call_initialized)) goto done; - for (site_mod = key->mods; site_mod; site_mod = site_mod->next) { + first = (struct static_call_mod){ + .next = static_call_key_next(key), + .mod = NULL, + .sites = static_call_key_sites(key), + }; + + for (site_mod = &first; site_mod; site_mod = site_mod->next) { struct module *mod = site_mod->mod; if (!site_mod->sites) { /* * This can happen if the static call key is defined in * a module which doesn't use it. + * + * It also happens in the has_mods case, where the + * 'first' entry has no sites associated with it. */ continue; } @@ -192,16 +222,48 @@ static int __static_call_init(struct module *mod, if (key != prev_key) { prev_key = key; + /* + * For vmlinux (!mod) avoid the allocation by storing + * the sites pointer in the key itself. Also see + * __static_call_update()'s @first. + * + * This allows architectures (eg. x86) to call + * static_call_init() before memory allocation works. + */ + if (!mod) { + key->sites = site; + key->type |= 1; + goto do_transform; + } + site_mod = kzalloc(sizeof(*site_mod), GFP_KERNEL); if (!site_mod) return -ENOMEM; + /* + * When the key has a direct sites pointer, extract + * that into an explicit struct static_call_mod, so we + * can have a list of modules. + */ + if (static_call_key_sites(key)) { + site_mod->mod = NULL; + site_mod->next = NULL; + site_mod->sites = static_call_key_sites(key); + + key->mods = site_mod; + + site_mod = kzalloc(sizeof(*site_mod), GFP_KERNEL); + if (!site_mod) + return -ENOMEM; + } + site_mod->mod = mod; site_mod->sites = site; - site_mod->next = key->mods; + site_mod->next = static_call_key_next(key); key->mods = site_mod; } +do_transform: arch_static_call_transform(site_addr, NULL, key->func, static_call_is_tail(site)); } @@ -348,7 +410,7 @@ int static_call_text_reserved(void *start, void *end) return __static_call_mod_text_reserved(start, end); } -static void __init static_call_init(void) +void __init static_call_init(void) { int ret; -- cgit v1.2.3 From 7c9903c9bf716d89b34f96cc2ed64e28dabf570b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 18 Aug 2020 15:57:53 +0200 Subject: x86/perf, static_call: Optimize x86_pmu methods Replace many of the indirect calls with static_call(). The average PMI time, as measured by perf_sample_event_took()*: PRE: 3283.03 [ns] POST: 3145.12 [ns] Which is a ~138 [ns] win per PMI, or a ~4.2% decrease. [*] on an IVB-EP, using: 'perf record -a -e cycles -- make O=defconfig-build/ -j80' Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/20200818135805.338001015@infradead.org --- arch/x86/events/core.c | 134 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 94 insertions(+), 40 deletions(-) (limited to 'arch') diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 1cbf57dc2ac8..360c395d51d0 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -52,6 +53,34 @@ DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { DEFINE_STATIC_KEY_FALSE(rdpmc_never_available_key); DEFINE_STATIC_KEY_FALSE(rdpmc_always_available_key); +/* + * This here uses DEFINE_STATIC_CALL_NULL() to get a static_call defined + * from just a typename, as opposed to an actual function. + */ +DEFINE_STATIC_CALL_NULL(x86_pmu_handle_irq, *x86_pmu.handle_irq); +DEFINE_STATIC_CALL_NULL(x86_pmu_disable_all, *x86_pmu.disable_all); +DEFINE_STATIC_CALL_NULL(x86_pmu_enable_all, *x86_pmu.enable_all); +DEFINE_STATIC_CALL_NULL(x86_pmu_enable, *x86_pmu.enable); +DEFINE_STATIC_CALL_NULL(x86_pmu_disable, *x86_pmu.disable); + +DEFINE_STATIC_CALL_NULL(x86_pmu_add, *x86_pmu.add); +DEFINE_STATIC_CALL_NULL(x86_pmu_del, *x86_pmu.del); +DEFINE_STATIC_CALL_NULL(x86_pmu_read, *x86_pmu.read); + +DEFINE_STATIC_CALL_NULL(x86_pmu_schedule_events, *x86_pmu.schedule_events); +DEFINE_STATIC_CALL_NULL(x86_pmu_get_event_constraints, *x86_pmu.get_event_constraints); +DEFINE_STATIC_CALL_NULL(x86_pmu_put_event_constraints, *x86_pmu.put_event_constraints); + +DEFINE_STATIC_CALL_NULL(x86_pmu_start_scheduling, *x86_pmu.start_scheduling); +DEFINE_STATIC_CALL_NULL(x86_pmu_commit_scheduling, *x86_pmu.commit_scheduling); +DEFINE_STATIC_CALL_NULL(x86_pmu_stop_scheduling, *x86_pmu.stop_scheduling); + +DEFINE_STATIC_CALL_NULL(x86_pmu_sched_task, *x86_pmu.sched_task); +DEFINE_STATIC_CALL_NULL(x86_pmu_swap_task_ctx, *x86_pmu.swap_task_ctx); + +DEFINE_STATIC_CALL_NULL(x86_pmu_drain_pebs, *x86_pmu.drain_pebs); +DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_aliases, *x86_pmu.pebs_aliases); + u64 __read_mostly hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] @@ -660,7 +689,7 @@ static void x86_pmu_disable(struct pmu *pmu) cpuc->enabled = 0; barrier(); - x86_pmu.disable_all(); + static_call(x86_pmu_disable_all)(); } void x86_pmu_enable_all(int added) @@ -907,8 +936,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) if (cpuc->txn_flags & PERF_PMU_TXN_ADD) n0 -= cpuc->n_txn; - if (x86_pmu.start_scheduling) - x86_pmu.start_scheduling(cpuc); + static_call_cond(x86_pmu_start_scheduling)(cpuc); for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) { c = cpuc->event_constraint[i]; @@ -925,7 +953,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) * change due to external factors (sibling state, allow_tfa). */ if (!c || (c->flags & PERF_X86_EVENT_DYNAMIC)) { - c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]); + c = static_call(x86_pmu_get_event_constraints)(cpuc, i, cpuc->event_list[i]); cpuc->event_constraint[i] = c; } @@ -1008,8 +1036,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) if (!unsched && assign) { for (i = 0; i < n; i++) { e = cpuc->event_list[i]; - if (x86_pmu.commit_scheduling) - x86_pmu.commit_scheduling(cpuc, i, assign[i]); + static_call_cond(x86_pmu_commit_scheduling)(cpuc, i, assign[i]); } } else { for (i = n0; i < n; i++) { @@ -1018,15 +1045,13 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) /* * release events that failed scheduling */ - if (x86_pmu.put_event_constraints) - x86_pmu.put_event_constraints(cpuc, e); + static_call_cond(x86_pmu_put_event_constraints)(cpuc, e); cpuc->event_constraint[i] = NULL; } } - if (x86_pmu.stop_scheduling) - x86_pmu.stop_scheduling(cpuc); + static_call_cond(x86_pmu_stop_scheduling)(cpuc); return unsched ? -EINVAL : 0; } @@ -1226,7 +1251,7 @@ static void x86_pmu_enable(struct pmu *pmu) cpuc->enabled = 1; barrier(); - x86_pmu.enable_all(added); + static_call(x86_pmu_enable_all)(added); } static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); @@ -1347,7 +1372,7 @@ static int x86_pmu_add(struct perf_event *event, int flags) if (cpuc->txn_flags & PERF_PMU_TXN_ADD) goto done_collect; - ret = x86_pmu.schedule_events(cpuc, n, assign); + ret = static_call(x86_pmu_schedule_events)(cpuc, n, assign); if (ret) goto out; /* @@ -1365,13 +1390,11 @@ done_collect: cpuc->n_added += n - n0; cpuc->n_txn += n - n0; - if (x86_pmu.add) { - /* - * This is before x86_pmu_enable() will call x86_pmu_start(), - * so we enable LBRs before an event needs them etc.. - */ - x86_pmu.add(event); - } + /* + * This is before x86_pmu_enable() will call x86_pmu_start(), + * so we enable LBRs before an event needs them etc.. + */ + static_call_cond(x86_pmu_add)(event); ret = 0; out: @@ -1399,7 +1422,7 @@ static void x86_pmu_start(struct perf_event *event, int flags) cpuc->events[idx] = event; __set_bit(idx, cpuc->active_mask); __set_bit(idx, cpuc->running); - x86_pmu.enable(event); + static_call(x86_pmu_enable)(event); perf_event_update_userpage(event); } @@ -1469,7 +1492,7 @@ void x86_pmu_stop(struct perf_event *event, int flags) struct hw_perf_event *hwc = &event->hw; if (test_bit(hwc->idx, cpuc->active_mask)) { - x86_pmu.disable(event); + static_call(x86_pmu_disable)(event); __clear_bit(hwc->idx, cpuc->active_mask); cpuc->events[hwc->idx] = NULL; WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); @@ -1519,8 +1542,7 @@ static void x86_pmu_del(struct perf_event *event, int flags) if (i >= cpuc->n_events - cpuc->n_added) --cpuc->n_added; - if (x86_pmu.put_event_constraints) - x86_pmu.put_event_constraints(cpuc, event); + static_call_cond(x86_pmu_put_event_constraints)(cpuc, event); /* Delete the array entry. */ while (++i < cpuc->n_events) { @@ -1533,13 +1555,12 @@ static void x86_pmu_del(struct perf_event *event, int flags) perf_event_update_userpage(event); do_del: - if (x86_pmu.del) { - /* - * This is after x86_pmu_stop(); so we disable LBRs after any - * event can need them etc.. - */ - x86_pmu.del(event); - } + + /* + * This is after x86_pmu_stop(); so we disable LBRs after any + * event can need them etc.. + */ + static_call_cond(x86_pmu_del)(event); } int x86_pmu_handle_irq(struct pt_regs *regs) @@ -1617,7 +1638,7 @@ perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs) return NMI_DONE; start_clock = sched_clock(); - ret = x86_pmu.handle_irq(regs); + ret = static_call(x86_pmu_handle_irq)(regs); finish_clock = sched_clock(); perf_sample_event_took(finish_clock - start_clock); @@ -1830,6 +1851,38 @@ ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event) static struct attribute_group x86_pmu_attr_group; static struct attribute_group x86_pmu_caps_group; +static void x86_pmu_static_call_update(void) +{ + static_call_update(x86_pmu_handle_irq, x86_pmu.handle_irq); + static_call_update(x86_pmu_disable_all, x86_pmu.disable_all); + static_call_update(x86_pmu_enable_all, x86_pmu.enable_all); + static_call_update(x86_pmu_enable, x86_pmu.enable); + static_call_update(x86_pmu_disable, x86_pmu.disable); + + static_call_update(x86_pmu_add, x86_pmu.add); + static_call_update(x86_pmu_del, x86_pmu.del); + static_call_update(x86_pmu_read, x86_pmu.read); + + static_call_update(x86_pmu_schedule_events, x86_pmu.schedule_events); + static_call_update(x86_pmu_get_event_constraints, x86_pmu.get_event_constraints); + static_call_update(x86_pmu_put_event_constraints, x86_pmu.put_event_constraints); + + static_call_update(x86_pmu_start_scheduling, x86_pmu.start_scheduling); + static_call_update(x86_pmu_commit_scheduling, x86_pmu.commit_scheduling); + static_call_update(x86_pmu_stop_scheduling, x86_pmu.stop_scheduling); + + static_call_update(x86_pmu_sched_task, x86_pmu.sched_task); + static_call_update(x86_pmu_swap_task_ctx, x86_pmu.swap_task_ctx); + + static_call_update(x86_pmu_drain_pebs, x86_pmu.drain_pebs); + static_call_update(x86_pmu_pebs_aliases, x86_pmu.pebs_aliases); +} + +static void _x86_pmu_read(struct perf_event *event) +{ + x86_perf_event_update(event); +} + static int __init init_hw_perf_events(void) { struct x86_pmu_quirk *quirk; @@ -1898,6 +1951,11 @@ static int __init init_hw_perf_events(void) pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); + if (!x86_pmu.read) + x86_pmu.read = _x86_pmu_read; + + x86_pmu_static_call_update(); + /* * Install callbacks. Core will call them for each online * cpu. @@ -1934,11 +1992,9 @@ out: } early_initcall(init_hw_perf_events); -static inline void x86_pmu_read(struct perf_event *event) +static void x86_pmu_read(struct perf_event *event) { - if (x86_pmu.read) - return x86_pmu.read(event); - x86_perf_event_update(event); + static_call(x86_pmu_read)(event); } /* @@ -2015,7 +2071,7 @@ static int x86_pmu_commit_txn(struct pmu *pmu) if (!x86_pmu_initialized()) return -EAGAIN; - ret = x86_pmu.schedule_events(cpuc, n, assign); + ret = static_call(x86_pmu_schedule_events)(cpuc, n, assign); if (ret) return ret; @@ -2308,15 +2364,13 @@ static const struct attribute_group *x86_pmu_attr_groups[] = { static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) { - if (x86_pmu.sched_task) - x86_pmu.sched_task(ctx, sched_in); + static_call_cond(x86_pmu_sched_task)(ctx, sched_in); } static void x86_pmu_swap_task_ctx(struct perf_event_context *prev, struct perf_event_context *next) { - if (x86_pmu.swap_task_ctx) - x86_pmu.swap_task_ctx(prev, next); + static_call_cond(x86_pmu_swap_task_ctx)(prev, next); } void perf_check_microcode(void) -- cgit v1.2.3