summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-12-14 17:41:38 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2020-12-14 17:41:38 -0800
commit76d4acf22b4847f6c7b2f9042366fbdc3d20f578 (patch)
tree635466360e3716372dd91ad8c724b40d15c05753 /kernel
parent8a8ca83ec3cf7ffc69020c189e3d368b1d4ba98a (diff)
parenta70a04b3844f59c29573a8581d5c263225060dd6 (diff)
downloadlinux-76d4acf22b4847f6c7b2f9042366fbdc3d20f578.tar.bz2
Merge tag 'perf-kprobes-2020-12-14' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf/kprobes updates from Thomas Gleixner: "Make kretprobes lockless to avoid the rp->lock performance and potential lock ordering issues" * tag 'perf-kprobes-2020-12-14' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: locking/atomics: Regenerate the atomics-check SHA1's kprobes: Replace rp->free_instance with freelist freelist: Implement lockless freelist asm-generic/atomic: Add try_cmpxchg() fallbacks kprobes: Remove kretprobe hash llist: Add nonatomic __llist_add() and __llist_dell_all()
Diffstat (limited to 'kernel')
-rw-r--r--kernel/fork.c4
-rw-r--r--kernel/kprobes.c314
-rw-r--r--kernel/trace/trace_kprobe.c3
3 files changed, 98 insertions, 223 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index 09be1be28cde..1513832de220 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2183,6 +2183,10 @@ static __latent_entropy struct task_struct *copy_process(
INIT_LIST_HEAD(&p->thread_group);
p->task_works = NULL;
+#ifdef CONFIG_KRETPROBES
+ p->kretprobe_instances.first = NULL;
+#endif
+
/*
* Ensure that the cgroup subsystem policies allow the new process to be
* forked. It should be noted that the new process's css_set can be changed
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 41fdbb7953c6..f7fb5d135930 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -36,7 +36,6 @@
#include <linux/cpu.h>
#include <linux/jump_label.h>
#include <linux/perf_event.h>
-#include <linux/static_call.h>
#include <asm/sections.h>
#include <asm/cacheflush.h>
@@ -54,7 +53,6 @@ static int kprobes_initialized;
* - RCU hlist traversal under disabling preempt (breakpoint handlers)
*/
static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
-static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
/* NOTE: change this value only with kprobe_mutex held */
static bool kprobes_all_disarmed;
@@ -62,9 +60,6 @@ static bool kprobes_all_disarmed;
/* This protects kprobe_table and optimizing_list */
static DEFINE_MUTEX(kprobe_mutex);
static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
-static struct {
- raw_spinlock_t lock ____cacheline_aligned_in_smp;
-} kretprobe_table_locks[KPROBE_TABLE_SIZE];
kprobe_opcode_t * __weak kprobe_lookup_name(const char *name,
unsigned int __unused)
@@ -72,11 +67,6 @@ kprobe_opcode_t * __weak kprobe_lookup_name(const char *name,
return ((kprobe_opcode_t *)(kallsyms_lookup_name(name)));
}
-static raw_spinlock_t *kretprobe_table_lock_ptr(unsigned long hash)
-{
- return &(kretprobe_table_locks[hash].lock);
-}
-
/* Blacklist -- list of struct kprobe_blacklist_entry */
static LIST_HEAD(kprobe_blacklist);
@@ -1224,76 +1214,26 @@ void kprobes_inc_nmissed_count(struct kprobe *p)
}
NOKPROBE_SYMBOL(kprobes_inc_nmissed_count);
-static void recycle_rp_inst(struct kretprobe_instance *ri)
+static void free_rp_inst_rcu(struct rcu_head *head)
{
- struct kretprobe *rp = ri->rp;
+ struct kretprobe_instance *ri = container_of(head, struct kretprobe_instance, rcu);
- /* remove rp inst off the rprobe_inst_table */
- hlist_del(&ri->hlist);
- INIT_HLIST_NODE(&ri->hlist);
- if (likely(rp)) {
- raw_spin_lock(&rp->lock);
- hlist_add_head(&ri->hlist, &rp->free_instances);
- raw_spin_unlock(&rp->lock);
- } else
- kfree_rcu(ri, rcu);
+ if (refcount_dec_and_test(&ri->rph->ref))
+ kfree(ri->rph);
+ kfree(ri);
}
-NOKPROBE_SYMBOL(recycle_rp_inst);
-
-static void kretprobe_hash_lock(struct task_struct *tsk,
- struct hlist_head **head, unsigned long *flags)
-__acquires(hlist_lock)
-{
- unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
- raw_spinlock_t *hlist_lock;
+NOKPROBE_SYMBOL(free_rp_inst_rcu);
- *head = &kretprobe_inst_table[hash];
- hlist_lock = kretprobe_table_lock_ptr(hash);
- /*
- * Nested is a workaround that will soon not be needed.
- * There's other protections that make sure the same lock
- * is not taken on the same CPU that lockdep is unaware of.
- * Differentiate when it is taken in NMI context.
- */
- raw_spin_lock_irqsave_nested(hlist_lock, *flags, !!in_nmi());
-}
-NOKPROBE_SYMBOL(kretprobe_hash_lock);
-
-static void kretprobe_table_lock(unsigned long hash,
- unsigned long *flags)
-__acquires(hlist_lock)
-{
- raw_spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
- /*
- * Nested is a workaround that will soon not be needed.
- * There's other protections that make sure the same lock
- * is not taken on the same CPU that lockdep is unaware of.
- * Differentiate when it is taken in NMI context.
- */
- raw_spin_lock_irqsave_nested(hlist_lock, *flags, !!in_nmi());
-}
-NOKPROBE_SYMBOL(kretprobe_table_lock);
-
-static void kretprobe_hash_unlock(struct task_struct *tsk,
- unsigned long *flags)
-__releases(hlist_lock)
+static void recycle_rp_inst(struct kretprobe_instance *ri)
{
- unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
- raw_spinlock_t *hlist_lock;
-
- hlist_lock = kretprobe_table_lock_ptr(hash);
- raw_spin_unlock_irqrestore(hlist_lock, *flags);
-}
-NOKPROBE_SYMBOL(kretprobe_hash_unlock);
+ struct kretprobe *rp = get_kretprobe(ri);
-static void kretprobe_table_unlock(unsigned long hash,
- unsigned long *flags)
-__releases(hlist_lock)
-{
- raw_spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
- raw_spin_unlock_irqrestore(hlist_lock, *flags);
+ if (likely(rp)) {
+ freelist_add(&ri->freelist, &rp->freelist);
+ } else
+ call_rcu(&ri->rcu, free_rp_inst_rcu);
}
-NOKPROBE_SYMBOL(kretprobe_table_unlock);
+NOKPROBE_SYMBOL(recycle_rp_inst);
static struct kprobe kprobe_busy = {
.addr = (void *) get_kprobe,
@@ -1324,24 +1264,21 @@ void kprobe_busy_end(void)
void kprobe_flush_task(struct task_struct *tk)
{
struct kretprobe_instance *ri;
- struct hlist_head *head;
- struct hlist_node *tmp;
- unsigned long hash, flags = 0;
+ struct llist_node *node;
+ /* Early boot, not yet initialized. */
if (unlikely(!kprobes_initialized))
- /* Early boot. kretprobe_table_locks not yet initialized. */
return;
kprobe_busy_begin();
- hash = hash_ptr(tk, KPROBE_HASH_BITS);
- head = &kretprobe_inst_table[hash];
- kretprobe_table_lock(hash, &flags);
- hlist_for_each_entry_safe(ri, tmp, head, hlist) {
- if (ri->task == tk)
- recycle_rp_inst(ri);
+ node = __llist_del_all(&tk->kretprobe_instances);
+ while (node) {
+ ri = container_of(node, struct kretprobe_instance, llist);
+ node = node->next;
+
+ recycle_rp_inst(ri);
}
- kretprobe_table_unlock(hash, &flags);
kprobe_busy_end();
}
@@ -1350,37 +1287,23 @@ NOKPROBE_SYMBOL(kprobe_flush_task);
static inline void free_rp_inst(struct kretprobe *rp)
{
struct kretprobe_instance *ri;
- struct hlist_node *next;
+ struct freelist_node *node;
+ int count = 0;
+
+ node = rp->freelist.head;
+ while (node) {
+ ri = container_of(node, struct kretprobe_instance, freelist);
+ node = node->next;
- hlist_for_each_entry_safe(ri, next, &rp->free_instances, hlist) {
- hlist_del(&ri->hlist);
kfree(ri);
+ count++;
}
-}
-
-static void cleanup_rp_inst(struct kretprobe *rp)
-{
- unsigned long flags, hash;
- struct kretprobe_instance *ri;
- struct hlist_node *next;
- struct hlist_head *head;
- /* To avoid recursive kretprobe by NMI, set kprobe busy here */
- kprobe_busy_begin();
- for (hash = 0; hash < KPROBE_TABLE_SIZE; hash++) {
- kretprobe_table_lock(hash, &flags);
- head = &kretprobe_inst_table[hash];
- hlist_for_each_entry_safe(ri, next, head, hlist) {
- if (ri->rp == rp)
- ri->rp = NULL;
- }
- kretprobe_table_unlock(hash, &flags);
+ if (refcount_sub_and_test(count, &rp->rph->ref)) {
+ kfree(rp->rph);
+ rp->rph = NULL;
}
- kprobe_busy_end();
-
- free_rp_inst(rp);
}
-NOKPROBE_SYMBOL(cleanup_rp_inst);
/* Add the new probe to ap->list */
static int add_new_kprobe(struct kprobe *ap, struct kprobe *p)
@@ -1643,7 +1566,6 @@ static int check_kprobe_address_safe(struct kprobe *p,
if (!kernel_text_address((unsigned long) p->addr) ||
within_kprobe_blacklist((unsigned long) p->addr) ||
jump_label_text_reserved(p->addr, p->addr) ||
- static_call_text_reserved(p->addr, p->addr) ||
find_bug((unsigned long)p->addr)) {
ret = -EINVAL;
goto out;
@@ -1942,88 +1864,56 @@ unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs,
void *trampoline_address,
void *frame_pointer)
{
- struct kretprobe_instance *ri = NULL, *last = NULL;
- struct hlist_head *head;
- struct hlist_node *tmp;
- unsigned long flags;
kprobe_opcode_t *correct_ret_addr = NULL;
- bool skipped = false;
+ struct kretprobe_instance *ri = NULL;
+ struct llist_node *first, *node;
+ struct kretprobe *rp;
- kretprobe_hash_lock(current, &head, &flags);
+ /* Find all nodes for this frame. */
+ first = node = current->kretprobe_instances.first;
+ while (node) {
+ ri = container_of(node, struct kretprobe_instance, llist);
- /*
- * It is possible to have multiple instances associated with a given
- * task either because multiple functions in the call path have
- * return probes installed on them, and/or more than one
- * return probe was registered for a target function.
- *
- * We can handle this because:
- * - instances are always pushed into the head of the list
- * - when multiple return probes are registered for the same
- * function, the (chronologically) first instance's ret_addr
- * will be the real return address, and all the rest will
- * point to kretprobe_trampoline.
- */
- hlist_for_each_entry(ri, head, hlist) {
- if (ri->task != current)
- /* another task is sharing our hash bucket */
- continue;
- /*
- * Return probes must be pushed on this hash list correct
- * order (same as return order) so that it can be popped
- * correctly. However, if we find it is pushed it incorrect
- * order, this means we find a function which should not be
- * probed, because the wrong order entry is pushed on the
- * path of processing other kretprobe itself.
- */
- if (ri->fp != frame_pointer) {
- if (!skipped)
- pr_warn("kretprobe is stacked incorrectly. Trying to fixup.\n");
- skipped = true;
- continue;
- }
+ BUG_ON(ri->fp != frame_pointer);
- correct_ret_addr = ri->ret_addr;
- if (skipped)
- pr_warn("%ps must be blacklisted because of incorrect kretprobe order\n",
- ri->rp->kp.addr);
-
- if (correct_ret_addr != trampoline_address)
+ if (ri->ret_addr != trampoline_address) {
+ correct_ret_addr = ri->ret_addr;
/*
* This is the real return address. Any other
* instances associated with this task are for
* other calls deeper on the call stack
*/
- break;
+ goto found;
+ }
+
+ node = node->next;
}
+ pr_err("Oops! Kretprobe fails to find correct return address.\n");
+ BUG_ON(1);
- BUG_ON(!correct_ret_addr || (correct_ret_addr == trampoline_address));
- last = ri;
+found:
+ /* Unlink all nodes for this frame. */
+ current->kretprobe_instances.first = node->next;
+ node->next = NULL;
- hlist_for_each_entry_safe(ri, tmp, head, hlist) {
- if (ri->task != current)
- /* another task is sharing our hash bucket */
- continue;
- if (ri->fp != frame_pointer)
- continue;
+ /* Run them.. */
+ while (first) {
+ ri = container_of(first, struct kretprobe_instance, llist);
+ first = first->next;
- if (ri->rp && ri->rp->handler) {
+ rp = get_kretprobe(ri);
+ if (rp && rp->handler) {
struct kprobe *prev = kprobe_running();
- __this_cpu_write(current_kprobe, &ri->rp->kp);
+ __this_cpu_write(current_kprobe, &rp->kp);
ri->ret_addr = correct_ret_addr;
- ri->rp->handler(ri, regs);
+ rp->handler(ri, regs);
__this_cpu_write(current_kprobe, prev);
}
recycle_rp_inst(ri);
-
- if (ri == last)
- break;
}
- kretprobe_hash_unlock(current, &flags);
-
return (unsigned long)correct_ret_addr;
}
NOKPROBE_SYMBOL(__kretprobe_trampoline_handler)
@@ -2035,44 +1925,26 @@ NOKPROBE_SYMBOL(__kretprobe_trampoline_handler)
static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
{
struct kretprobe *rp = container_of(p, struct kretprobe, kp);
- unsigned long hash, flags = 0;
struct kretprobe_instance *ri;
+ struct freelist_node *fn;
- /* TODO: consider to only swap the RA after the last pre_handler fired */
- hash = hash_ptr(current, KPROBE_HASH_BITS);
- /*
- * Nested is a workaround that will soon not be needed.
- * There's other protections that make sure the same lock
- * is not taken on the same CPU that lockdep is unaware of.
- */
- raw_spin_lock_irqsave_nested(&rp->lock, flags, 1);
- if (!hlist_empty(&rp->free_instances)) {
- ri = hlist_entry(rp->free_instances.first,
- struct kretprobe_instance, hlist);
- hlist_del(&ri->hlist);
- raw_spin_unlock_irqrestore(&rp->lock, flags);
-
- ri->rp = rp;
- ri->task = current;
-
- if (rp->entry_handler && rp->entry_handler(ri, regs)) {
- raw_spin_lock_irqsave_nested(&rp->lock, flags, 1);
- hlist_add_head(&ri->hlist, &rp->free_instances);
- raw_spin_unlock_irqrestore(&rp->lock, flags);
- return 0;
- }
+ fn = freelist_try_get(&rp->freelist);
+ if (!fn) {
+ rp->nmissed++;
+ return 0;
+ }
- arch_prepare_kretprobe(ri, regs);
+ ri = container_of(fn, struct kretprobe_instance, freelist);
- /* XXX(hch): why is there no hlist_move_head? */
- INIT_HLIST_NODE(&ri->hlist);
- kretprobe_table_lock(hash, &flags);
- hlist_add_head(&ri->hlist, &kretprobe_inst_table[hash]);
- kretprobe_table_unlock(hash, &flags);
- } else {
- rp->nmissed++;
- raw_spin_unlock_irqrestore(&rp->lock, flags);
+ if (rp->entry_handler && rp->entry_handler(ri, regs)) {
+ freelist_add(&ri->freelist, &rp->freelist);
+ return 0;
}
+
+ arch_prepare_kretprobe(ri, regs);
+
+ __llist_add(&ri->llist, &current->kretprobe_instances);
+
return 0;
}
NOKPROBE_SYMBOL(pre_handler_kretprobe);
@@ -2129,18 +2001,24 @@ int register_kretprobe(struct kretprobe *rp)
rp->maxactive = num_possible_cpus();
#endif
}
- raw_spin_lock_init(&rp->lock);
- INIT_HLIST_HEAD(&rp->free_instances);
+ rp->freelist.head = NULL;
+ rp->rph = kzalloc(sizeof(struct kretprobe_holder), GFP_KERNEL);
+ if (!rp->rph)
+ return -ENOMEM;
+
+ rp->rph->rp = rp;
for (i = 0; i < rp->maxactive; i++) {
- inst = kmalloc(sizeof(struct kretprobe_instance) +
+ inst = kzalloc(sizeof(struct kretprobe_instance) +
rp->data_size, GFP_KERNEL);
if (inst == NULL) {
+ refcount_set(&rp->rph->ref, i);
free_rp_inst(rp);
return -ENOMEM;
}
- INIT_HLIST_NODE(&inst->hlist);
- hlist_add_head(&inst->hlist, &rp->free_instances);
+ inst->rph = rp->rph;
+ freelist_add(&inst->freelist, &rp->freelist);
}
+ refcount_set(&rp->rph->ref, i);
rp->nmissed = 0;
/* Establish function entry probe point */
@@ -2182,16 +2060,18 @@ void unregister_kretprobes(struct kretprobe **rps, int num)
if (num <= 0)
return;
mutex_lock(&kprobe_mutex);
- for (i = 0; i < num; i++)
+ for (i = 0; i < num; i++) {
if (__unregister_kprobe_top(&rps[i]->kp) < 0)
rps[i]->kp.addr = NULL;
+ rps[i]->rph->rp = NULL;
+ }
mutex_unlock(&kprobe_mutex);
synchronize_rcu();
for (i = 0; i < num; i++) {
if (rps[i]->kp.addr) {
__unregister_kprobe_bottom(&rps[i]->kp);
- cleanup_rp_inst(rps[i]);
+ free_rp_inst(rps[i]);
}
}
}
@@ -2235,9 +2115,6 @@ static void kill_kprobe(struct kprobe *p)
lockdep_assert_held(&kprobe_mutex);
- if (WARN_ON_ONCE(kprobe_gone(p)))
- return;
-
p->flags |= KPROBE_FLAG_GONE;
if (kprobe_aggrprobe(p)) {
/*
@@ -2518,10 +2395,7 @@ static int kprobes_module_callback(struct notifier_block *nb,
mutex_lock(&kprobe_mutex);
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i];
- hlist_for_each_entry(p, head, hlist) {
- if (kprobe_gone(p))
- continue;
-
+ hlist_for_each_entry(p, head, hlist)
if (within_module_init((unsigned long)p->addr, mod) ||
(checkcore &&
within_module_core((unsigned long)p->addr, mod))) {
@@ -2538,7 +2412,6 @@ static int kprobes_module_callback(struct notifier_block *nb,
*/
kill_kprobe(p);
}
- }
}
if (val == MODULE_STATE_GOING)
remove_module_kprobe_blacklist(mod);
@@ -2583,11 +2456,8 @@ static int __init init_kprobes(void)
/* FIXME allocate the probe table, currently defined statically */
/* initialize all list heads */
- for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
+ for (i = 0; i < KPROBE_TABLE_SIZE; i++)
INIT_HLIST_HEAD(&kprobe_table[i]);
- INIT_HLIST_HEAD(&kretprobe_inst_table[i]);
- raw_spin_lock_init(&(kretprobe_table_locks[i].lock));
- }
err = populate_kprobe_blacklist(__start_kprobe_blacklist,
__stop_kprobe_blacklist);
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index b911e9f6d9f5..97c7a7782db7 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1731,7 +1731,8 @@ NOKPROBE_SYMBOL(kprobe_dispatcher);
static int
kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
{
- struct trace_kprobe *tk = container_of(ri->rp, struct trace_kprobe, rp);
+ struct kretprobe *rp = get_kretprobe(ri);
+ struct trace_kprobe *tk = container_of(rp, struct trace_kprobe, rp);
raw_cpu_inc(*tk->nhit);