summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/events/core.c99
-rw-r--r--kernel/events/uprobes.c36
-rw-r--r--kernel/trace/trace.c24
-rw-r--r--kernel/trace/trace_kprobe.c4
-rw-r--r--kernel/trace/trace_probe.c30
-rw-r--r--kernel/trace/trace_probe.h11
-rw-r--r--kernel/trace/trace_uprobe.c4
7 files changed, 144 insertions, 64 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index fc9bb2225291..7c0d263f6bc5 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1475,8 +1475,7 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
if (event->group_leader == event) {
struct list_head *list;
- if (is_software_event(event))
- event->group_flags |= PERF_GROUP_SOFTWARE;
+ event->group_caps = event->event_caps;
list = ctx_group_list(event, ctx);
list_add_tail(&event->group_entry, list);
@@ -1630,9 +1629,7 @@ static void perf_group_attach(struct perf_event *event)
WARN_ON_ONCE(group_leader->ctx != event->ctx);
- if (group_leader->group_flags & PERF_GROUP_SOFTWARE &&
- !is_software_event(event))
- group_leader->group_flags &= ~PERF_GROUP_SOFTWARE;
+ group_leader->group_caps &= event->event_caps;
list_add_tail(&event->group_entry, &group_leader->sibling_list);
group_leader->nr_siblings++;
@@ -1723,7 +1720,7 @@ static void perf_group_detach(struct perf_event *event)
sibling->group_leader = sibling;
/* Inherit group flags from the previous leader */
- sibling->group_flags = event->group_flags;
+ sibling->group_caps = event->group_caps;
WARN_ON_ONCE(sibling->ctx != event->ctx);
}
@@ -1832,6 +1829,8 @@ group_sched_out(struct perf_event *group_event,
struct perf_event *event;
int state = group_event->state;
+ perf_pmu_disable(ctx->pmu);
+
event_sched_out(group_event, cpuctx, ctx);
/*
@@ -1840,6 +1839,8 @@ group_sched_out(struct perf_event *group_event,
list_for_each_entry(event, &group_event->sibling_list, group_entry)
event_sched_out(event, cpuctx, ctx);
+ perf_pmu_enable(ctx->pmu);
+
if (state == PERF_EVENT_STATE_ACTIVE && group_event->attr.exclusive)
cpuctx->exclusive = 0;
}
@@ -2145,7 +2146,7 @@ static int group_can_go_on(struct perf_event *event,
/*
* Groups consisting entirely of software events can always go on.
*/
- if (event->group_flags & PERF_GROUP_SOFTWARE)
+ if (event->group_caps & PERF_EV_CAP_SOFTWARE)
return 1;
/*
* If an exclusive group is already on, no other hardware
@@ -2491,7 +2492,7 @@ static int __perf_event_stop(void *info)
* while restarting.
*/
if (sd->restart)
- event->pmu->start(event, PERF_EF_START);
+ event->pmu->start(event, 0);
return 0;
}
@@ -2837,19 +2838,36 @@ unlock:
}
}
+static DEFINE_PER_CPU(struct list_head, sched_cb_list);
+
void perf_sched_cb_dec(struct pmu *pmu)
{
+ struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+
this_cpu_dec(perf_sched_cb_usages);
+
+ if (!--cpuctx->sched_cb_usage)
+ list_del(&cpuctx->sched_cb_entry);
}
+
void perf_sched_cb_inc(struct pmu *pmu)
{
+ struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+
+ if (!cpuctx->sched_cb_usage++)
+ list_add(&cpuctx->sched_cb_entry, this_cpu_ptr(&sched_cb_list));
+
this_cpu_inc(perf_sched_cb_usages);
}
/*
* This function provides the context switch callback to the lower code
* layer. It is invoked ONLY when the context switch callback is enabled.
+ *
+ * This callback is relevant even to per-cpu events; for example multi event
+ * PEBS requires this to provide PID/TID information. This requires we flush
+ * all queued PEBS records before we context switch to a new task.
*/
static void perf_pmu_sched_task(struct task_struct *prev,
struct task_struct *next,
@@ -2857,34 +2875,24 @@ static void perf_pmu_sched_task(struct task_struct *prev,
{
struct perf_cpu_context *cpuctx;
struct pmu *pmu;
- unsigned long flags;
if (prev == next)
return;
- local_irq_save(flags);
-
- rcu_read_lock();
-
- list_for_each_entry_rcu(pmu, &pmus, entry) {
- if (pmu->sched_task) {
- cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
-
- perf_ctx_lock(cpuctx, cpuctx->task_ctx);
+ list_for_each_entry(cpuctx, this_cpu_ptr(&sched_cb_list), sched_cb_entry) {
+ pmu = cpuctx->unique_pmu; /* software PMUs will not have sched_task */
- perf_pmu_disable(pmu);
+ if (WARN_ON_ONCE(!pmu->sched_task))
+ continue;
- pmu->sched_task(cpuctx->task_ctx, sched_in);
+ perf_ctx_lock(cpuctx, cpuctx->task_ctx);
+ perf_pmu_disable(pmu);
- perf_pmu_enable(pmu);
+ pmu->sched_task(cpuctx->task_ctx, sched_in);
- perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
- }
+ perf_pmu_enable(pmu);
+ perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
}
-
- rcu_read_unlock();
-
- local_irq_restore(flags);
}
static void perf_event_switch(struct task_struct *task,
@@ -3416,6 +3424,22 @@ struct perf_read_data {
int ret;
};
+static int find_cpu_to_read(struct perf_event *event, int local_cpu)
+{
+ int event_cpu = event->oncpu;
+ u16 local_pkg, event_pkg;
+
+ if (event->group_caps & PERF_EV_CAP_READ_ACTIVE_PKG) {
+ event_pkg = topology_physical_package_id(event_cpu);
+ local_pkg = topology_physical_package_id(local_cpu);
+
+ if (event_pkg == local_pkg)
+ return local_cpu;
+ }
+
+ return event_cpu;
+}
+
/*
* Cross CPU call to read the hardware event
*/
@@ -3537,7 +3561,7 @@ u64 perf_event_read_local(struct perf_event *event)
static int perf_event_read(struct perf_event *event, bool group)
{
- int ret = 0;
+ int ret = 0, cpu_to_read, local_cpu;
/*
* If event is enabled and currently active on a CPU, update the
@@ -3549,6 +3573,11 @@ static int perf_event_read(struct perf_event *event, bool group)
.group = group,
.ret = 0,
};
+
+ local_cpu = get_cpu();
+ cpu_to_read = find_cpu_to_read(event, local_cpu);
+ put_cpu();
+
/*
* Purposely ignore the smp_call_function_single() return
* value.
@@ -3559,7 +3588,7 @@ static int perf_event_read(struct perf_event *event, bool group)
* Therefore, either way, we'll have an up-to-date event count
* after this.
*/
- (void)smp_call_function_single(event->oncpu, __perf_event_read, &data, 1);
+ (void)smp_call_function_single(cpu_to_read, __perf_event_read, &data, 1);
ret = data.ret;
} else if (event->state == PERF_EVENT_STATE_INACTIVE) {
struct perf_event_context *ctx = event->ctx;
@@ -5350,9 +5379,10 @@ perf_output_sample_regs(struct perf_output_handle *handle,
struct pt_regs *regs, u64 mask)
{
int bit;
+ DECLARE_BITMAP(_mask, 64);
- for_each_set_bit(bit, (const unsigned long *) &mask,
- sizeof(mask) * BITS_PER_BYTE) {
+ bitmap_from_u64(_mask, mask);
+ for_each_set_bit(bit, _mask, sizeof(mask) * BITS_PER_BYTE) {
u64 val;
val = perf_reg_value(regs, bit);
@@ -9505,6 +9535,9 @@ SYSCALL_DEFINE5(perf_event_open,
goto err_alloc;
}
+ if (pmu->task_ctx_nr == perf_sw_context)
+ event->event_caps |= PERF_EV_CAP_SOFTWARE;
+
if (group_leader &&
(is_software_event(event) != is_software_event(group_leader))) {
if (is_software_event(event)) {
@@ -9518,7 +9551,7 @@ SYSCALL_DEFINE5(perf_event_open,
*/
pmu = group_leader->pmu;
} else if (is_software_event(group_leader) &&
- (group_leader->group_flags & PERF_GROUP_SOFTWARE)) {
+ (group_leader->group_caps & PERF_EV_CAP_SOFTWARE)) {
/*
* In case the group is a pure software group, and we
* try to add a hardware event, move the whole group to
@@ -10453,6 +10486,8 @@ static void __init perf_event_init_all_cpus(void)
INIT_LIST_HEAD(&per_cpu(pmu_sb_events.list, cpu));
raw_spin_lock_init(&per_cpu(pmu_sb_events.lock, cpu));
+
+ INIT_LIST_HEAD(&per_cpu(sched_cb_list, cpu));
}
}
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 8c50276b60d1..d4129bb05e5d 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -150,7 +150,7 @@ static loff_t vaddr_to_offset(struct vm_area_struct *vma, unsigned long vaddr)
* Returns 0 on success, -EFAULT on failure.
*/
static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
- struct page *page, struct page *kpage)
+ struct page *old_page, struct page *new_page)
{
struct mm_struct *mm = vma->vm_mm;
spinlock_t *ptl;
@@ -161,49 +161,49 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
const unsigned long mmun_end = addr + PAGE_SIZE;
struct mem_cgroup *memcg;
- err = mem_cgroup_try_charge(kpage, vma->vm_mm, GFP_KERNEL, &memcg,
+ err = mem_cgroup_try_charge(new_page, vma->vm_mm, GFP_KERNEL, &memcg,
false);
if (err)
return err;
/* For try_to_free_swap() and munlock_vma_page() below */
- lock_page(page);
+ lock_page(old_page);
mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
err = -EAGAIN;
- ptep = page_check_address(page, mm, addr, &ptl, 0);
+ ptep = page_check_address(old_page, mm, addr, &ptl, 0);
if (!ptep) {
- mem_cgroup_cancel_charge(kpage, memcg, false);
+ mem_cgroup_cancel_charge(new_page, memcg, false);
goto unlock;
}
- get_page(kpage);
- page_add_new_anon_rmap(kpage, vma, addr, false);
- mem_cgroup_commit_charge(kpage, memcg, false, false);
- lru_cache_add_active_or_unevictable(kpage, vma);
+ get_page(new_page);
+ page_add_new_anon_rmap(new_page, vma, addr, false);
+ mem_cgroup_commit_charge(new_page, memcg, false, false);
+ lru_cache_add_active_or_unevictable(new_page, vma);
- if (!PageAnon(page)) {
- dec_mm_counter(mm, mm_counter_file(page));
+ if (!PageAnon(old_page)) {
+ dec_mm_counter(mm, mm_counter_file(old_page));
inc_mm_counter(mm, MM_ANONPAGES);
}
flush_cache_page(vma, addr, pte_pfn(*ptep));
ptep_clear_flush_notify(vma, addr, ptep);
- set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));
+ set_pte_at_notify(mm, addr, ptep, mk_pte(new_page, vma->vm_page_prot));
- page_remove_rmap(page, false);
- if (!page_mapped(page))
- try_to_free_swap(page);
+ page_remove_rmap(old_page, false);
+ if (!page_mapped(old_page))
+ try_to_free_swap(old_page);
pte_unmap_unlock(ptep, ptl);
if (vma->vm_flags & VM_LOCKED)
- munlock_vma_page(page);
- put_page(page);
+ munlock_vma_page(old_page);
+ put_page(old_page);
err = 0;
unlock:
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
- unlock_page(page);
+ unlock_page(old_page);
return err;
}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 7bc56762ca35..37824d98ae71 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4123,6 +4123,30 @@ static const char readme_msg[] =
"\t\t\t traces\n"
#endif
#endif /* CONFIG_STACK_TRACER */
+#ifdef CONFIG_KPROBE_EVENT
+ " kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
+ "\t\t\t Write into this file to define/undefine new trace events.\n"
+#endif
+#ifdef CONFIG_UPROBE_EVENT
+ " uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
+ "\t\t\t Write into this file to define/undefine new trace events.\n"
+#endif
+#if defined(CONFIG_KPROBE_EVENT) || defined(CONFIG_UPROBE_EVENT)
+ "\t accepts: event-definitions (one definition per line)\n"
+ "\t Format: p|r[:[<group>/]<event>] <place> [<args>]\n"
+ "\t -:[<group>/]<event>\n"
+#ifdef CONFIG_KPROBE_EVENT
+ "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
+#endif
+#ifdef CONFIG_UPROBE_EVENT
+ "\t place: <path>:<offset>\n"
+#endif
+ "\t args: <name>=fetcharg[:type]\n"
+ "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
+ "\t $stack<index>, $stack, $retval, $comm\n"
+ "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
+ "\t b<bit-width>@<bit-offset>/<container-size>\n"
+#endif
" events/\t\t- Directory containing all trace event subsystems:\n"
" enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
" events/<system>/\t- Directory containing all trace events for <system>:\n"
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 9aedb0b06683..eb6c9f1d3a93 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -253,6 +253,10 @@ static const struct fetch_type kprobes_fetch_type_table[] = {
ASSIGN_FETCH_TYPE(s16, u16, 1),
ASSIGN_FETCH_TYPE(s32, u32, 1),
ASSIGN_FETCH_TYPE(s64, u64, 1),
+ ASSIGN_FETCH_TYPE_ALIAS(x8, u8, u8, 0),
+ ASSIGN_FETCH_TYPE_ALIAS(x16, u16, u16, 0),
+ ASSIGN_FETCH_TYPE_ALIAS(x32, u32, u32, 0),
+ ASSIGN_FETCH_TYPE_ALIAS(x64, u64, u64, 0),
ASSIGN_FETCH_TYPE_END
};
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 74e80a582c28..8c0553d9afd3 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -36,24 +36,28 @@ const char *reserved_field_names[] = {
};
/* Printing in basic type function template */
-#define DEFINE_BASIC_PRINT_TYPE_FUNC(type, fmt) \
-int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, const char *name, \
+#define DEFINE_BASIC_PRINT_TYPE_FUNC(tname, type, fmt) \
+int PRINT_TYPE_FUNC_NAME(tname)(struct trace_seq *s, const char *name, \
void *data, void *ent) \
{ \
trace_seq_printf(s, " %s=" fmt, name, *(type *)data); \
return !trace_seq_has_overflowed(s); \
} \
-const char PRINT_TYPE_FMT_NAME(type)[] = fmt; \
-NOKPROBE_SYMBOL(PRINT_TYPE_FUNC_NAME(type));
-
-DEFINE_BASIC_PRINT_TYPE_FUNC(u8 , "0x%x")
-DEFINE_BASIC_PRINT_TYPE_FUNC(u16, "0x%x")
-DEFINE_BASIC_PRINT_TYPE_FUNC(u32, "0x%x")
-DEFINE_BASIC_PRINT_TYPE_FUNC(u64, "0x%Lx")
-DEFINE_BASIC_PRINT_TYPE_FUNC(s8, "%d")
-DEFINE_BASIC_PRINT_TYPE_FUNC(s16, "%d")
-DEFINE_BASIC_PRINT_TYPE_FUNC(s32, "%d")
-DEFINE_BASIC_PRINT_TYPE_FUNC(s64, "%Ld")
+const char PRINT_TYPE_FMT_NAME(tname)[] = fmt; \
+NOKPROBE_SYMBOL(PRINT_TYPE_FUNC_NAME(tname));
+
+DEFINE_BASIC_PRINT_TYPE_FUNC(u8, u8, "%u")
+DEFINE_BASIC_PRINT_TYPE_FUNC(u16, u16, "%u")
+DEFINE_BASIC_PRINT_TYPE_FUNC(u32, u32, "%u")
+DEFINE_BASIC_PRINT_TYPE_FUNC(u64, u64, "%Lu")
+DEFINE_BASIC_PRINT_TYPE_FUNC(s8, s8, "%d")
+DEFINE_BASIC_PRINT_TYPE_FUNC(s16, s16, "%d")
+DEFINE_BASIC_PRINT_TYPE_FUNC(s32, s32, "%d")
+DEFINE_BASIC_PRINT_TYPE_FUNC(s64, s64, "%Ld")
+DEFINE_BASIC_PRINT_TYPE_FUNC(x8, u8, "0x%x")
+DEFINE_BASIC_PRINT_TYPE_FUNC(x16, u16, "0x%x")
+DEFINE_BASIC_PRINT_TYPE_FUNC(x32, u32, "0x%x")
+DEFINE_BASIC_PRINT_TYPE_FUNC(x64, u64, "0x%Lx")
/* Print type function for string type */
int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s, const char *name,
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 45400ca5ded1..0c0ae54d44c6 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -149,6 +149,11 @@ DECLARE_BASIC_PRINT_TYPE_FUNC(s8);
DECLARE_BASIC_PRINT_TYPE_FUNC(s16);
DECLARE_BASIC_PRINT_TYPE_FUNC(s32);
DECLARE_BASIC_PRINT_TYPE_FUNC(s64);
+DECLARE_BASIC_PRINT_TYPE_FUNC(x8);
+DECLARE_BASIC_PRINT_TYPE_FUNC(x16);
+DECLARE_BASIC_PRINT_TYPE_FUNC(x32);
+DECLARE_BASIC_PRINT_TYPE_FUNC(x64);
+
DECLARE_BASIC_PRINT_TYPE_FUNC(string);
#define FETCH_FUNC_NAME(method, type) fetch_##method##_##type
@@ -203,7 +208,7 @@ DEFINE_FETCH_##method(u32) \
DEFINE_FETCH_##method(u64)
/* Default (unsigned long) fetch type */
-#define __DEFAULT_FETCH_TYPE(t) u##t
+#define __DEFAULT_FETCH_TYPE(t) x##t
#define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t)
#define DEFAULT_FETCH_TYPE _DEFAULT_FETCH_TYPE(BITS_PER_LONG)
#define DEFAULT_FETCH_TYPE_STR __stringify(DEFAULT_FETCH_TYPE)
@@ -234,6 +239,10 @@ ASSIGN_FETCH_FUNC(file_offset, ftype), \
#define ASSIGN_FETCH_TYPE(ptype, ftype, sign) \
__ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, #ptype)
+/* If ptype is an alias of atype, use this macro (show atype in format) */
+#define ASSIGN_FETCH_TYPE_ALIAS(ptype, atype, ftype, sign) \
+ __ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, #atype)
+
#define ASSIGN_FETCH_TYPE_END {}
#define FETCH_TYPE_STRING 0
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index c53485441c88..7a687320f867 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -211,6 +211,10 @@ static const struct fetch_type uprobes_fetch_type_table[] = {
ASSIGN_FETCH_TYPE(s16, u16, 1),
ASSIGN_FETCH_TYPE(s32, u32, 1),
ASSIGN_FETCH_TYPE(s64, u64, 1),
+ ASSIGN_FETCH_TYPE_ALIAS(x8, u8, u8, 0),
+ ASSIGN_FETCH_TYPE_ALIAS(x16, u16, u16, 0),
+ ASSIGN_FETCH_TYPE_ALIAS(x32, u32, u32, 0),
+ ASSIGN_FETCH_TYPE_ALIAS(x64, u64, u64, 0),
ASSIGN_FETCH_TYPE_END
};