diff options
Diffstat (limited to 'kernel')
70 files changed, 3653 insertions, 999 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index f2cc0d118a0b..4cb4130ced32 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -27,6 +27,7 @@ KCOV_INSTRUMENT_softirq.o := n # and produce insane amounts of uninteresting coverage. KCOV_INSTRUMENT_module.o := n KCOV_INSTRUMENT_extable.o := n +KCOV_INSTRUMENT_stacktrace.o := n # Don't self-instrument. KCOV_INSTRUMENT_kcov.o := n KASAN_SANITIZE_kcov.o := n diff --git a/kernel/acct.c b/kernel/acct.c index 81f9831a7859..11ff4a596d6b 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -416,6 +416,7 @@ static void fill_ac(acct_t *ac) { struct pacct_struct *pacct = ¤t->signal->pacct; u64 elapsed, run_time; + time64_t btime; struct tty_struct *tty; /* @@ -448,7 +449,8 @@ static void fill_ac(acct_t *ac) } #endif do_div(elapsed, AHZ); - ac->ac_btime = get_seconds() - elapsed; + btime = ktime_get_real_seconds() - elapsed; + ac->ac_btime = clamp_t(time64_t, btime, 0, U32_MAX); #if ACCT_VERSION==2 ac->ac_ahz = AHZ; #endif diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index 8ad1c9ea26b2..042f95534f86 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -96,12 +96,11 @@ const struct bpf_prog_ops bpf_struct_ops_prog_ops = { static const struct btf_type *module_type; -void bpf_struct_ops_init(struct btf *btf) +void bpf_struct_ops_init(struct btf *btf, struct bpf_verifier_log *log) { s32 type_id, value_id, module_id; const struct btf_member *member; struct bpf_struct_ops *st_ops; - struct bpf_verifier_log log = {}; const struct btf_type *t; char value_name[128]; const char *mname; @@ -172,7 +171,7 @@ void bpf_struct_ops_init(struct btf *btf) member->type, NULL); if (func_proto && - btf_distill_func_proto(&log, btf, + btf_distill_func_proto(log, btf, func_proto, mname, &st_ops->func_models[j])) { pr_warn("Error in parsing func ptr %s in struct %s\n", diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index b7c1660fb594..805c43b083e9 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -3643,7 +3643,7 @@ struct btf *btf_parse_vmlinux(void) goto errout; } - bpf_struct_ops_init(btf); + bpf_struct_ops_init(btf, log); btf_verifier_env_free(env); refcount_set(&btf->refcnt, 1); @@ -3931,6 +3931,7 @@ again: if (btf_type_is_ptr(mtype)) { const struct btf_type *stype; + u32 id; if (msize != size || off != moff) { bpf_log(log, @@ -3939,12 +3940,9 @@ again: return -EACCES; } - stype = btf_type_by_id(btf_vmlinux, mtype->type); - /* skip modifiers */ - while (btf_type_is_modifier(stype)) - stype = btf_type_by_id(btf_vmlinux, stype->type); + stype = btf_type_skip_modifiers(btf_vmlinux, mtype->type, &id); if (btf_type_is_struct(stype)) { - *next_btf_id = mtype->type; + *next_btf_id = id; return PTR_TO_BTF_ID; } } diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index bd2fd8eab470..5e40e7fccc21 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -569,16 +569,11 @@ enum { OPT_MODE, }; -static const struct fs_parameter_spec bpf_param_specs[] = { +static const struct fs_parameter_spec bpf_fs_parameters[] = { fsparam_u32oct ("mode", OPT_MODE), {} }; -static const struct fs_parameter_description bpf_fs_parameters = { - .name = "bpf", - .specs = bpf_param_specs, -}; - struct bpf_mount_opts { umode_t mode; }; @@ -589,7 +584,7 @@ static int bpf_parse_param(struct fs_context *fc, struct fs_parameter *param) struct fs_parse_result result; int opt; - opt = fs_parse(fc, &bpf_fs_parameters, param, &result); + opt = fs_parse(fc, bpf_fs_parameters, param, &result); if (opt < 0) /* We might like to report bad mount options here, but * traditionally we've ignored all mount options, so we'd @@ -665,7 +660,7 @@ static struct file_system_type bpf_fs_type = { .owner = THIS_MODULE, .name = "bpf", .init_fs_context = bpf_init_fs_context, - .parameters = &bpf_fs_parameters, + .parameters = bpf_fs_parameters, .kill_sb = kill_litter_super, }; diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index 5b9da0954a27..2c5dc6541ece 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -302,14 +302,14 @@ int bpf_prog_offload_info_fill(struct bpf_prog_info *info, struct inode *ns_inode; struct path ns_path; char __user *uinsns; - void *res; + int res; u32 ulen; res = ns_get_path_cb(&ns_path, bpf_prog_offload_info_fill_ns, &args); - if (IS_ERR(res)) { + if (res) { if (!info->ifindex) return -ENODEV; - return PTR_ERR(res); + return res; } down_read(&bpf_devs_lock); @@ -526,13 +526,13 @@ int bpf_map_offload_info_fill(struct bpf_map_info *info, struct bpf_map *map) }; struct inode *ns_inode; struct path ns_path; - void *res; + int res; res = ns_get_path_cb(&ns_path, bpf_map_offload_info_fill_ns, &args); - if (IS_ERR(res)) { + if (res) { if (!info->ifindex) return -ENODEV; - return PTR_ERR(res); + return res; } ns_inode = ns_path.dentry->d_inode; diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index 90d1710fef6c..bfbeabc17a9d 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -7,7 +7,7 @@ #include <linux/workqueue.h> #include <linux/list.h> #include <linux/refcount.h> -#include <linux/fs_context.h> +#include <linux/fs_parser.h> #define TRACE_CGROUP_PATH_LEN 1024 extern spinlock_t trace_cgroup_path_lock; @@ -265,7 +265,7 @@ extern const struct proc_ns_operations cgroupns_operations; */ extern struct cftype cgroup1_base_files[]; extern struct kernfs_syscall_ops cgroup1_kf_syscall_ops; -extern const struct fs_parameter_description cgroup1_fs_parameters; +extern const struct fs_parameter_spec cgroup1_fs_parameters[]; int proc_cgroupstats_show(struct seq_file *m, void *v); bool cgroup1_ssid_disabled(int ssid); diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 09f3a413f6f8..be1a1c83cdd1 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -18,8 +18,6 @@ #include <trace/events/cgroup.h> -#define cg_invalf(fc, fmt, ...) invalf(fc, fmt, ## __VA_ARGS__) - /* * pidlists linger the following amount before being destroyed. The goal * is avoiding frequent destruction in the middle of consecutive read calls @@ -887,7 +885,7 @@ enum cgroup1_param { Opt_xattr, }; -static const struct fs_parameter_spec cgroup1_param_specs[] = { +const struct fs_parameter_spec cgroup1_fs_parameters[] = { fsparam_flag ("all", Opt_all), fsparam_flag ("clone_children", Opt_clone_children), fsparam_flag ("cpuset_v2_mode", Opt_cpuset_v2_mode), @@ -899,11 +897,6 @@ static const struct fs_parameter_spec cgroup1_param_specs[] = { {} }; -const struct fs_parameter_description cgroup1_fs_parameters = { - .name = "cgroup1", - .specs = cgroup1_param_specs, -}; - int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct cgroup_fs_context *ctx = cgroup_fc2context(fc); @@ -911,7 +904,7 @@ int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param) struct fs_parse_result result; int opt, i; - opt = fs_parse(fc, &cgroup1_fs_parameters, param, &result); + opt = fs_parse(fc, cgroup1_fs_parameters, param, &result); if (opt == -ENOPARAM) { if (strcmp(param->key, "source") == 0) { fc->source = param->string; @@ -924,7 +917,7 @@ int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param) ctx->subsys_mask |= (1 << i); return 0; } - return cg_invalf(fc, "cgroup1: Unknown subsys name '%s'", param->key); + return invalfc(fc, "Unknown subsys name '%s'", param->key); } if (opt < 0) return opt; @@ -952,7 +945,7 @@ int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param) case Opt_release_agent: /* Specifying two release agents is forbidden */ if (ctx->release_agent) - return cg_invalf(fc, "cgroup1: release_agent respecified"); + return invalfc(fc, "release_agent respecified"); ctx->release_agent = param->string; param->string = NULL; break; @@ -962,9 +955,9 @@ int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param) return -ENOENT; /* Can't specify an empty name */ if (!param->size) - return cg_invalf(fc, "cgroup1: Empty name"); + return invalfc(fc, "Empty name"); if (param->size > MAX_CGROUP_ROOT_NAMELEN - 1) - return cg_invalf(fc, "cgroup1: Name too long"); + return invalfc(fc, "Name too long"); /* Must match [\w.-]+ */ for (i = 0; i < param->size; i++) { char c = param->string[i]; @@ -972,11 +965,11 @@ int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param) continue; if ((c == '.') || (c == '-') || (c == '_')) continue; - return cg_invalf(fc, "cgroup1: Invalid name"); + return invalfc(fc, "Invalid name"); } /* Specifying two names is forbidden */ if (ctx->name) - return cg_invalf(fc, "cgroup1: name respecified"); + return invalfc(fc, "name respecified"); ctx->name = param->string; param->string = NULL; break; @@ -1011,7 +1004,7 @@ static int check_cgroupfs_options(struct fs_context *fc) if (ctx->all_ss) { /* Mutually exclusive option 'all' + subsystem name */ if (ctx->subsys_mask) - return cg_invalf(fc, "cgroup1: subsys name conflicts with all"); + return invalfc(fc, "subsys name conflicts with all"); /* 'all' => select all the subsystems */ ctx->subsys_mask = enabled; } @@ -1021,7 +1014,7 @@ static int check_cgroupfs_options(struct fs_context *fc) * empty hierarchies must have a name). */ if (!ctx->subsys_mask && !ctx->name) - return cg_invalf(fc, "cgroup1: Need name or subsystem set"); + return invalfc(fc, "Need name or subsystem set"); /* * Option noprefix was introduced just for backward compatibility @@ -1029,11 +1022,11 @@ static int check_cgroupfs_options(struct fs_context *fc) * the cpuset subsystem. */ if ((ctx->flags & CGRP_ROOT_NOPREFIX) && (ctx->subsys_mask & mask)) - return cg_invalf(fc, "cgroup1: noprefix used incorrectly"); + return invalfc(fc, "noprefix used incorrectly"); /* Can't specify "none" and some subsystems */ if (ctx->subsys_mask && ctx->none) - return cg_invalf(fc, "cgroup1: none used incorrectly"); + return invalfc(fc, "none used incorrectly"); return 0; } @@ -1063,7 +1056,7 @@ int cgroup1_reconfigure(struct fs_context *fc) /* Don't allow flags or name to change at remount */ if ((ctx->flags ^ root->flags) || (ctx->name && strcmp(ctx->name, root->name))) { - cg_invalf(fc, "option or name mismatch, new: 0x%x \"%s\", old: 0x%x \"%s\"", + errorfc(fc, "option or name mismatch, new: 0x%x \"%s\", old: 0x%x \"%s\"", ctx->flags, ctx->name ?: "", root->flags, root->name); ret = -EINVAL; goto out_unlock; @@ -1180,7 +1173,7 @@ static int cgroup1_root_to_use(struct fs_context *fc) * can't create new one without subsys specification. */ if (!ctx->subsys_mask && !ctx->none) - return cg_invalf(fc, "cgroup1: No subsys list or none specified"); + return invalfc(fc, "No subsys list or none specified"); /* Hierarchies may only be created in the initial cgroup namespace. */ if (ctx->ns != &init_cgroup_ns) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index b3744872263e..db552b9f9377 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -1816,24 +1816,19 @@ enum cgroup2_param { nr__cgroup2_params }; -static const struct fs_parameter_spec cgroup2_param_specs[] = { +static const struct fs_parameter_spec cgroup2_fs_parameters[] = { fsparam_flag("nsdelegate", Opt_nsdelegate), fsparam_flag("memory_localevents", Opt_memory_localevents), {} }; -static const struct fs_parameter_description cgroup2_fs_parameters = { - .name = "cgroup2", - .specs = cgroup2_param_specs, -}; - static int cgroup2_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct cgroup_fs_context *ctx = cgroup_fc2context(fc); struct fs_parse_result result; int opt; - opt = fs_parse(fc, &cgroup2_fs_parameters, param, &result); + opt = fs_parse(fc, cgroup2_fs_parameters, param, &result); if (opt < 0) return opt; @@ -2156,7 +2151,7 @@ static void cgroup_kill_sb(struct super_block *sb) struct file_system_type cgroup_fs_type = { .name = "cgroup", .init_fs_context = cgroup_init_fs_context, - .parameters = &cgroup1_fs_parameters, + .parameters = cgroup1_fs_parameters, .kill_sb = cgroup_kill_sb, .fs_flags = FS_USERNS_MOUNT, }; @@ -2164,7 +2159,7 @@ struct file_system_type cgroup_fs_type = { static struct file_system_type cgroup2_fs_type = { .name = "cgroup2", .init_fs_context = cgroup_init_fs_context, - .parameters = &cgroup2_fs_parameters, + .parameters = cgroup2_fs_parameters, .kill_sb = cgroup_kill_sb, .fs_flags = FS_USERNS_MOUNT, }; diff --git a/kernel/configs.c b/kernel/configs.c index c09ea4c995e1..a28c79c5f713 100644 --- a/kernel/configs.c +++ b/kernel/configs.c @@ -47,10 +47,9 @@ ikconfig_read_current(struct file *file, char __user *buf, &kernel_config_data); } -static const struct file_operations ikconfig_file_ops = { - .owner = THIS_MODULE, - .read = ikconfig_read_current, - .llseek = default_llseek, +static const struct proc_ops config_gz_proc_ops = { + .proc_read = ikconfig_read_current, + .proc_lseek = default_llseek, }; static int __init ikconfig_init(void) @@ -59,7 +58,7 @@ static int __init ikconfig_init(void) /* create the current config file */ entry = proc_create("config.gz", S_IFREG | S_IRUGO, NULL, - &ikconfig_file_ops); + &config_gz_proc_ops); if (!entry) return -ENOMEM; diff --git a/kernel/debug/kdb/kdb_bp.c b/kernel/debug/kdb/kdb_bp.c index 62c301ad0773..d7ebb2c79cb8 100644 --- a/kernel/debug/kdb/kdb_bp.c +++ b/kernel/debug/kdb/kdb_bp.c @@ -412,7 +412,6 @@ static int kdb_bc(int argc, const char **argv) * assume that the breakpoint number is desired. */ if (addr < KDB_MAXBPT) { - bp = &kdb_breakpoints[addr]; lowbp = highbp = addr; highbp++; } else { diff --git a/kernel/debug/kdb/kdb_bt.c b/kernel/debug/kdb/kdb_bt.c index 4af48ac53625..3de0cc780c16 100644 --- a/kernel/debug/kdb/kdb_bt.c +++ b/kernel/debug/kdb/kdb_bt.c @@ -119,7 +119,6 @@ kdb_bt_cpu(unsigned long cpu) return; } - kdb_set_current_task(kdb_tsk); kdb_bt1(kdb_tsk, ~0UL, false); } @@ -166,10 +165,8 @@ kdb_bt(int argc, const char **argv) if (diag) return diag; p = find_task_by_pid_ns(pid, &init_pid_ns); - if (p) { - kdb_set_current_task(p); + if (p) return kdb_bt1(p, ~0UL, false); - } kdb_printf("No process with pid == %ld found\n", pid); return 0; } else if (strcmp(argv[0], "btt") == 0) { @@ -178,11 +175,9 @@ kdb_bt(int argc, const char **argv) diag = kdbgetularg((char *)argv[1], &addr); if (diag) return diag; - kdb_set_current_task((struct task_struct *)addr); return kdb_bt1((struct task_struct *)addr, ~0UL, false); } else if (strcmp(argv[0], "btc") == 0) { unsigned long cpu = ~0; - struct task_struct *save_current_task = kdb_current_task; if (argc > 1) return KDB_ARGCOUNT; if (argc == 1) { @@ -204,7 +199,6 @@ kdb_bt(int argc, const char **argv) kdb_bt_cpu(cpu); touch_nmi_watchdog(); } - kdb_set_current_task(save_current_task); } return 0; } else { diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c index 8bcdded5d61f..924bc9298a42 100644 --- a/kernel/debug/kdb/kdb_io.c +++ b/kernel/debug/kdb/kdb_io.c @@ -553,7 +553,7 @@ int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap) int this_cpu, old_cpu; char *cp, *cp2, *cphold = NULL, replaced_byte = ' '; char *moreprompt = "more> "; - struct console *c = console_drivers; + struct console *c; unsigned long uninitialized_var(flags); /* Serialize kdb_printf if multiple cpus try to write at once. @@ -698,10 +698,9 @@ kdb_printit: cp2++; } } - while (c) { + for_each_console(c) { c->write(c, cp, retlen - (cp - kdb_buffer)); touch_nmi_watchdog(); - c = c->next; } } if (logging) { @@ -752,7 +751,6 @@ kdb_printit: moreprompt = "more> "; kdb_input_flush(); - c = console_drivers; if (dbg_io_ops && !dbg_io_ops->is_console) { len = strlen(moreprompt); @@ -762,10 +760,9 @@ kdb_printit: cp++; } } - while (c) { + for_each_console(c) { c->write(c, moreprompt, strlen(moreprompt)); touch_nmi_watchdog(); - c = c->next; } if (logging) diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 4567fe998c30..ba12e9f4661e 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -73,7 +73,6 @@ int kdb_nextline = 1; int kdb_state; /* General KDB state */ struct task_struct *kdb_current_task; -EXPORT_SYMBOL(kdb_current_task); struct pt_regs *kdb_current_regs; const char *kdb_diemsg; @@ -1139,7 +1138,7 @@ static void kdb_dumpregs(struct pt_regs *regs) console_loglevel = old_lvl; } -void kdb_set_current_task(struct task_struct *p) +static void kdb_set_current_task(struct task_struct *p) { kdb_current_task = p; diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h index 55d052061ef9..2e296e4a234c 100644 --- a/kernel/debug/kdb/kdb_private.h +++ b/kernel/debug/kdb/kdb_private.h @@ -240,8 +240,8 @@ extern void *debug_kmalloc(size_t size, gfp_t flags); extern void debug_kfree(void *); extern void debug_kusage(void); -extern void kdb_set_current_task(struct task_struct *); extern struct task_struct *kdb_current_task; +extern struct pt_regs *kdb_current_regs; #ifdef CONFIG_KDB_KEYBOARD extern void kdb_kbd_cleanup_state(void); diff --git a/kernel/events/core.c b/kernel/events/core.c index fdb7f7ef380c..e453589da97c 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4374,7 +4374,7 @@ static void free_event_rcu(struct rcu_head *head) } static void ring_buffer_attach(struct perf_event *event, - struct ring_buffer *rb); + struct perf_buffer *rb); static void detach_sb_event(struct perf_event *event) { @@ -5055,7 +5055,7 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) static __poll_t perf_poll(struct file *file, poll_table *wait) { struct perf_event *event = file->private_data; - struct ring_buffer *rb; + struct perf_buffer *rb; __poll_t events = EPOLLHUP; poll_wait(file, &event->waitq, wait); @@ -5297,7 +5297,7 @@ static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon return perf_event_set_bpf_prog(event, arg); case PERF_EVENT_IOC_PAUSE_OUTPUT: { - struct ring_buffer *rb; + struct perf_buffer *rb; rcu_read_lock(); rb = rcu_dereference(event->rb); @@ -5433,7 +5433,7 @@ static void calc_timer_values(struct perf_event *event, static void perf_event_init_userpage(struct perf_event *event) { struct perf_event_mmap_page *userpg; - struct ring_buffer *rb; + struct perf_buffer *rb; rcu_read_lock(); rb = rcu_dereference(event->rb); @@ -5465,7 +5465,7 @@ void __weak arch_perf_update_userpage( void perf_event_update_userpage(struct perf_event *event) { struct perf_event_mmap_page *userpg; - struct ring_buffer *rb; + struct perf_buffer *rb; u64 enabled, running, now; rcu_read_lock(); @@ -5516,7 +5516,7 @@ EXPORT_SYMBOL_GPL(perf_event_update_userpage); static vm_fault_t perf_mmap_fault(struct vm_fault *vmf) { struct perf_event *event = vmf->vma->vm_file->private_data; - struct ring_buffer *rb; + struct perf_buffer *rb; vm_fault_t ret = VM_FAULT_SIGBUS; if (vmf->flags & FAULT_FLAG_MKWRITE) { @@ -5549,9 +5549,9 @@ unlock: } static void ring_buffer_attach(struct perf_event *event, - struct ring_buffer *rb) + struct perf_buffer *rb) { - struct ring_buffer *old_rb = NULL; + struct perf_buffer *old_rb = NULL; unsigned long flags; if (event->rb) { @@ -5609,7 +5609,7 @@ static void ring_buffer_attach(struct perf_event *event, static void ring_buffer_wakeup(struct perf_event *event) { - struct ring_buffer *rb; + struct perf_buffer *rb; rcu_read_lock(); rb = rcu_dereference(event->rb); @@ -5620,9 +5620,9 @@ static void ring_buffer_wakeup(struct perf_event *event) rcu_read_unlock(); } -struct ring_buffer *ring_buffer_get(struct perf_event *event) +struct perf_buffer *ring_buffer_get(struct perf_event *event) { - struct ring_buffer *rb; + struct perf_buffer *rb; rcu_read_lock(); rb = rcu_dereference(event->rb); @@ -5635,7 +5635,7 @@ struct ring_buffer *ring_buffer_get(struct perf_event *event) return rb; } -void ring_buffer_put(struct ring_buffer *rb) +void ring_buffer_put(struct perf_buffer *rb) { if (!refcount_dec_and_test(&rb->refcount)) return; @@ -5673,7 +5673,7 @@ static void perf_mmap_close(struct vm_area_struct *vma) { struct perf_event *event = vma->vm_file->private_data; - struct ring_buffer *rb = ring_buffer_get(event); + struct perf_buffer *rb = ring_buffer_get(event); struct user_struct *mmap_user = rb->mmap_user; int mmap_locked = rb->mmap_locked; unsigned long size = perf_data_size(rb); @@ -5791,8 +5791,8 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) struct perf_event *event = file->private_data; unsigned long user_locked, user_lock_limit; struct user_struct *user = current_user(); + struct perf_buffer *rb = NULL; unsigned long locked, lock_limit; - struct ring_buffer *rb = NULL; unsigned long vma_size; unsigned long nr_pages; long user_extra = 0, extra = 0; @@ -6275,7 +6275,7 @@ static unsigned long perf_prepare_sample_aux(struct perf_event *event, size_t size) { struct perf_event *sampler = event->aux_event; - struct ring_buffer *rb; + struct perf_buffer *rb; data->aux_size = 0; @@ -6308,7 +6308,7 @@ out: return data->aux_size; } -long perf_pmu_snapshot_aux(struct ring_buffer *rb, +long perf_pmu_snapshot_aux(struct perf_buffer *rb, struct perf_event *event, struct perf_output_handle *handle, unsigned long size) @@ -6347,8 +6347,8 @@ static void perf_aux_sample_output(struct perf_event *event, struct perf_sample_data *data) { struct perf_event *sampler = event->aux_event; + struct perf_buffer *rb; unsigned long pad; - struct ring_buffer *rb; long size; if (WARN_ON_ONCE(!sampler || !data->aux_size)) @@ -6716,7 +6716,7 @@ void perf_output_sample(struct perf_output_handle *handle, int wakeup_events = event->attr.wakeup_events; if (wakeup_events) { - struct ring_buffer *rb = handle->rb; + struct perf_buffer *rb = handle->rb; int events = local_inc_return(&rb->events); if (events >= wakeup_events) { @@ -7159,7 +7159,7 @@ void perf_event_exec(void) } struct remote_output { - struct ring_buffer *rb; + struct perf_buffer *rb; int err; }; @@ -7167,7 +7167,7 @@ static void __perf_event_output_stop(struct perf_event *event, void *data) { struct perf_event *parent = event->parent; struct remote_output *ro = data; - struct ring_buffer *rb = ro->rb; + struct perf_buffer *rb = ro->rb; struct stop_event_data sd = { .event = event, }; @@ -7504,7 +7504,7 @@ static void perf_fill_ns_link_info(struct perf_ns_link_info *ns_link_info, { struct path ns_path; struct inode *ns_inode; - void *error; + int error; error = ns_get_path(&ns_path, task, ns_ops); if (!error) { @@ -11007,7 +11007,7 @@ err_size: static int perf_event_set_output(struct perf_event *event, struct perf_event *output_event) { - struct ring_buffer *rb = NULL; + struct perf_buffer *rb = NULL; int ret = -EINVAL; if (!output_event) diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 747d67f130cb..f16f66b6b655 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -10,7 +10,7 @@ #define RING_BUFFER_WRITABLE 0x01 -struct ring_buffer { +struct perf_buffer { refcount_t refcount; struct rcu_head rcu_head; #ifdef CONFIG_PERF_USE_VMALLOC @@ -58,17 +58,17 @@ struct ring_buffer { void *data_pages[0]; }; -extern void rb_free(struct ring_buffer *rb); +extern void rb_free(struct perf_buffer *rb); static inline void rb_free_rcu(struct rcu_head *rcu_head) { - struct ring_buffer *rb; + struct perf_buffer *rb; - rb = container_of(rcu_head, struct ring_buffer, rcu_head); + rb = container_of(rcu_head, struct perf_buffer, rcu_head); rb_free(rb); } -static inline void rb_toggle_paused(struct ring_buffer *rb, bool pause) +static inline void rb_toggle_paused(struct perf_buffer *rb, bool pause) { if (!pause && rb->nr_pages) rb->paused = 0; @@ -76,16 +76,16 @@ static inline void rb_toggle_paused(struct ring_buffer *rb, bool pause) rb->paused = 1; } -extern struct ring_buffer * +extern struct perf_buffer * rb_alloc(int nr_pages, long watermark, int cpu, int flags); extern void perf_event_wakeup(struct perf_event *event); -extern int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event, +extern int rb_alloc_aux(struct perf_buffer *rb, struct perf_event *event, pgoff_t pgoff, int nr_pages, long watermark, int flags); -extern void rb_free_aux(struct ring_buffer *rb); -extern struct ring_buffer *ring_buffer_get(struct perf_event *event); -extern void ring_buffer_put(struct ring_buffer *rb); +extern void rb_free_aux(struct perf_buffer *rb); +extern struct perf_buffer *ring_buffer_get(struct perf_event *event); +extern void ring_buffer_put(struct perf_buffer *rb); -static inline bool rb_has_aux(struct ring_buffer *rb) +static inline bool rb_has_aux(struct perf_buffer *rb) { return !!rb->aux_nr_pages; } @@ -94,7 +94,7 @@ void perf_event_aux_event(struct perf_event *event, unsigned long head, unsigned long size, u64 flags); extern struct page * -perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff); +perf_mmap_to_page(struct perf_buffer *rb, unsigned long pgoff); #ifdef CONFIG_PERF_USE_VMALLOC /* @@ -103,25 +103,25 @@ perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff); * Required for architectures that have d-cache aliasing issues. */ -static inline int page_order(struct ring_buffer *rb) +static inline int page_order(struct perf_buffer *rb) { return rb->page_order; } #else -static inline int page_order(struct ring_buffer *rb) +static inline int page_order(struct perf_buffer *rb) { return 0; } #endif -static inline unsigned long perf_data_size(struct ring_buffer *rb) +static inline unsigned long perf_data_size(struct perf_buffer *rb) { return rb->nr_pages << (PAGE_SHIFT + page_order(rb)); } -static inline unsigned long perf_aux_size(struct ring_buffer *rb) +static inline unsigned long perf_aux_size(struct perf_buffer *rb) { return rb->aux_nr_pages << PAGE_SHIFT; } @@ -141,7 +141,7 @@ static inline unsigned long perf_aux_size(struct ring_buffer *rb) buf += written; \ handle->size -= written; \ if (!handle->size) { \ - struct ring_buffer *rb = handle->rb; \ + struct perf_buffer *rb = handle->rb; \ \ handle->page++; \ handle->page &= rb->nr_pages - 1; \ diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 7ffd5c763f93..192b8abc6330 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -35,7 +35,7 @@ static void perf_output_wakeup(struct perf_output_handle *handle) */ static void perf_output_get_handle(struct perf_output_handle *handle) { - struct ring_buffer *rb = handle->rb; + struct perf_buffer *rb = handle->rb; preempt_disable(); @@ -49,7 +49,7 @@ static void perf_output_get_handle(struct perf_output_handle *handle) static void perf_output_put_handle(struct perf_output_handle *handle) { - struct ring_buffer *rb = handle->rb; + struct perf_buffer *rb = handle->rb; unsigned long head; unsigned int nest; @@ -150,7 +150,7 @@ __perf_output_begin(struct perf_output_handle *handle, struct perf_event *event, unsigned int size, bool backward) { - struct ring_buffer *rb; + struct perf_buffer *rb; unsigned long tail, offset, head; int have_lost, page_shift; struct { @@ -301,7 +301,7 @@ void perf_output_end(struct perf_output_handle *handle) } static void -ring_buffer_init(struct ring_buffer *rb, long watermark, int flags) +ring_buffer_init(struct perf_buffer *rb, long watermark, int flags) { long max_size = perf_data_size(rb); @@ -361,7 +361,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle, { struct perf_event *output_event = event; unsigned long aux_head, aux_tail; - struct ring_buffer *rb; + struct perf_buffer *rb; unsigned int nest; if (output_event->parent) @@ -449,7 +449,7 @@ err: } EXPORT_SYMBOL_GPL(perf_aux_output_begin); -static __always_inline bool rb_need_aux_wakeup(struct ring_buffer *rb) +static __always_inline bool rb_need_aux_wakeup(struct perf_buffer *rb) { if (rb->aux_overwrite) return false; @@ -475,7 +475,7 @@ static __always_inline bool rb_need_aux_wakeup(struct ring_buffer *rb) void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size) { bool wakeup = !!(handle->aux_flags & PERF_AUX_FLAG_TRUNCATED); - struct ring_buffer *rb = handle->rb; + struct perf_buffer *rb = handle->rb; unsigned long aux_head; /* in overwrite mode, driver provides aux_head via handle */ @@ -532,7 +532,7 @@ EXPORT_SYMBOL_GPL(perf_aux_output_end); */ int perf_aux_output_skip(struct perf_output_handle *handle, unsigned long size) { - struct ring_buffer *rb = handle->rb; + struct perf_buffer *rb = handle->rb; if (size > handle->size) return -ENOSPC; @@ -569,8 +569,8 @@ long perf_output_copy_aux(struct perf_output_handle *aux_handle, struct perf_output_handle *handle, unsigned long from, unsigned long to) { + struct perf_buffer *rb = aux_handle->rb; unsigned long tocopy, remainder, len = 0; - struct ring_buffer *rb = aux_handle->rb; void *addr; from &= (rb->aux_nr_pages << PAGE_SHIFT) - 1; @@ -626,7 +626,7 @@ static struct page *rb_alloc_aux_page(int node, int order) return page; } -static void rb_free_aux_page(struct ring_buffer *rb, int idx) +static void rb_free_aux_page(struct perf_buffer *rb, int idx) { struct page *page = virt_to_page(rb->aux_pages[idx]); @@ -635,7 +635,7 @@ static void rb_free_aux_page(struct ring_buffer *rb, int idx) __free_page(page); } -static void __rb_free_aux(struct ring_buffer *rb) +static void __rb_free_aux(struct perf_buffer *rb) { int pg; @@ -662,7 +662,7 @@ static void __rb_free_aux(struct ring_buffer *rb) } } -int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event, +int rb_alloc_aux(struct perf_buffer *rb, struct perf_event *event, pgoff_t pgoff, int nr_pages, long watermark, int flags) { bool overwrite = !(flags & RING_BUFFER_WRITABLE); @@ -753,7 +753,7 @@ out: return ret; } -void rb_free_aux(struct ring_buffer *rb) +void rb_free_aux(struct perf_buffer *rb) { if (refcount_dec_and_test(&rb->aux_refcount)) __rb_free_aux(rb); @@ -766,7 +766,7 @@ void rb_free_aux(struct ring_buffer *rb) */ static struct page * -__perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff) +__perf_mmap_to_page(struct perf_buffer *rb, unsigned long pgoff) { if (pgoff > rb->nr_pages) return NULL; @@ -798,13 +798,13 @@ static void perf_mmap_free_page(void *addr) __free_page(page); } -struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags) +struct perf_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags) { - struct ring_buffer *rb; + struct perf_buffer *rb; unsigned long size; int i; - size = sizeof(struct ring_buffer); + size = sizeof(struct perf_buffer); size += nr_pages * sizeof(void *); if (order_base_2(size) >= PAGE_SHIFT+MAX_ORDER) @@ -843,7 +843,7 @@ fail: return NULL; } -void rb_free(struct ring_buffer *rb) +void rb_free(struct perf_buffer *rb) { int i; @@ -854,13 +854,13 @@ void rb_free(struct ring_buffer *rb) } #else -static int data_page_nr(struct ring_buffer *rb) +static int data_page_nr(struct perf_buffer *rb) { return rb->nr_pages << page_order(rb); } static struct page * -__perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff) +__perf_mmap_to_page(struct perf_buffer *rb, unsigned long pgoff) { /* The '>' counts in the user page. */ if (pgoff > data_page_nr(rb)) @@ -878,11 +878,11 @@ static void perf_mmap_unmark_page(void *addr) static void rb_free_work(struct work_struct *work) { - struct ring_buffer *rb; + struct perf_buffer *rb; void *base; int i, nr; - rb = container_of(work, struct ring_buffer, work); + rb = container_of(work, struct perf_buffer, work); nr = data_page_nr(rb); base = rb->user_page; @@ -894,18 +894,18 @@ static void rb_free_work(struct work_struct *work) kfree(rb); } -void rb_free(struct ring_buffer *rb) +void rb_free(struct perf_buffer *rb) { schedule_work(&rb->work); } -struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags) +struct perf_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags) { - struct ring_buffer *rb; + struct perf_buffer *rb; unsigned long size; void *all_buf; - size = sizeof(struct ring_buffer); + size = sizeof(struct perf_buffer); size += sizeof(void *); rb = kzalloc(size, GFP_KERNEL); @@ -939,7 +939,7 @@ fail: #endif struct page * -perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff) +perf_mmap_to_page(struct perf_buffer *rb, unsigned long pgoff) { if (rb->aux_nr_pages) { /* above AUX space */ diff --git a/kernel/fork.c b/kernel/fork.c index ef82feb4bddc..60a1295f4384 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -692,7 +692,7 @@ void __mmdrop(struct mm_struct *mm) WARN_ON_ONCE(mm == current->active_mm); mm_free_pgd(mm); destroy_context(mm); - mmu_notifier_mm_destroy(mm); + mmu_notifier_subscriptions_destroy(mm); check_mm(mm); put_user_ns(mm->user_ns); free_mm(mm); @@ -1025,7 +1025,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, mm_init_aio(mm); mm_init_owner(mm, p); RCU_INIT_POINTER(mm->exe_file, NULL); - mmu_notifier_mm_init(mm); + mmu_notifier_subscriptions_init(mm); init_tlb_flush_pending(mm); #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS mm->pmd_huge_pte = NULL; diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 818b2802d3e7..3089a60ea8f9 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -731,6 +731,13 @@ static int set_irq_wake_real(unsigned int irq, unsigned int on) * * Wakeup mode lets this IRQ wake the system from sleep * states like "suspend to RAM". + * + * Note: irq enable/disable state is completely orthogonal + * to the enable/disable state of irq wake. An irq can be + * disabled with disable_irq() and still wake the system as + * long as the irq has wake enabled. If this does not hold, + * then the underlying irq chip and the related driver need + * to be investigated. */ int irq_set_irq_wake(unsigned int irq, unsigned int on) { diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index cfc4f088a0e7..9e5783d98033 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -176,20 +176,20 @@ static int irq_affinity_list_proc_open(struct inode *inode, struct file *file) return single_open(file, irq_affinity_list_proc_show, PDE_DATA(inode)); } -static const struct file_operations irq_affinity_proc_fops = { - .open = irq_affinity_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .write = irq_affinity_proc_write, +static const struct proc_ops irq_affinity_proc_ops = { + .proc_open = irq_affinity_proc_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = single_release, + .proc_write = irq_affinity_proc_write, }; -static const struct file_operations irq_affinity_list_proc_fops = { - .open = irq_affinity_list_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .write = irq_affinity_list_proc_write, +static const struct proc_ops irq_affinity_list_proc_ops = { + .proc_open = irq_affinity_list_proc_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = single_release, + .proc_write = irq_affinity_list_proc_write, }; #ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK @@ -246,12 +246,12 @@ static int default_affinity_open(struct inode *inode, struct file *file) return single_open(file, default_affinity_show, PDE_DATA(inode)); } -static const struct file_operations default_affinity_proc_fops = { - .open = default_affinity_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .write = default_affinity_write, +static const struct proc_ops default_affinity_proc_ops = { + .proc_open = default_affinity_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = single_release, + .proc_write = default_affinity_write, }; static int irq_node_proc_show(struct seq_file *m, void *v) @@ -342,7 +342,7 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc) #ifdef CONFIG_SMP /* create /proc/irq/<irq>/smp_affinity */ proc_create_data("smp_affinity", 0644, desc->dir, - &irq_affinity_proc_fops, irqp); + &irq_affinity_proc_ops, irqp); /* create /proc/irq/<irq>/affinity_hint */ proc_create_single_data("affinity_hint", 0444, desc->dir, @@ -350,7 +350,7 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc) /* create /proc/irq/<irq>/smp_affinity_list */ proc_create_data("smp_affinity_list", 0644, desc->dir, - &irq_affinity_list_proc_fops, irqp); + &irq_affinity_list_proc_ops, irqp); proc_create_single_data("node", 0444, desc->dir, irq_node_proc_show, irqp); @@ -401,7 +401,7 @@ static void register_default_affinity_proc(void) { #ifdef CONFIG_SMP proc_create("irq/default_smp_affinity", 0644, NULL, - &default_affinity_proc_fops); + &default_affinity_proc_ops); #endif } diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 136ce049c4ad..d812b90f4c86 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -698,16 +698,16 @@ const char *kdb_walk_kallsyms(loff_t *pos) } #endif /* CONFIG_KGDB_KDB */ -static const struct file_operations kallsyms_operations = { - .open = kallsyms_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_private, +static const struct proc_ops kallsyms_proc_ops = { + .proc_open = kallsyms_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = seq_release_private, }; static int __init kallsyms_init(void) { - proc_create("kallsyms", 0444, NULL, &kallsyms_operations); + proc_create("kallsyms", 0444, NULL, &kallsyms_proc_ops); return 0; } device_initcall(kallsyms_init); diff --git a/kernel/latencytop.c b/kernel/latencytop.c index e3acead004e6..8d1c15832e55 100644 --- a/kernel/latencytop.c +++ b/kernel/latencytop.c @@ -255,17 +255,17 @@ static int lstats_open(struct inode *inode, struct file *filp) return single_open(filp, lstats_show, NULL); } -static const struct file_operations lstats_fops = { - .open = lstats_open, - .read = seq_read, - .write = lstats_write, - .llseek = seq_lseek, - .release = single_release, +static const struct proc_ops lstats_proc_ops = { + .proc_open = lstats_open, + .proc_read = seq_read, + .proc_write = lstats_write, + .proc_lseek = seq_lseek, + .proc_release = single_release, }; static int __init init_lstats_procfs(void) { - proc_create("latency_stats", 0644, NULL, &lstats_fops); + proc_create("latency_stats", 0644, NULL, &lstats_proc_ops); return 0; } diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c index 9bb6d2497b04..231684cfc5ae 100644 --- a/kernel/locking/lockdep_proc.c +++ b/kernel/locking/lockdep_proc.c @@ -643,12 +643,12 @@ static int lock_stat_release(struct inode *inode, struct file *file) return seq_release(inode, file); } -static const struct file_operations proc_lock_stat_operations = { - .open = lock_stat_open, - .write = lock_stat_write, - .read = seq_read, - .llseek = seq_lseek, - .release = lock_stat_release, +static const struct proc_ops lock_stat_proc_ops = { + .proc_open = lock_stat_open, + .proc_write = lock_stat_write, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = lock_stat_release, }; #endif /* CONFIG_LOCK_STAT */ @@ -660,8 +660,7 @@ static int __init lockdep_proc_init(void) #endif proc_create_single("lockdep_stats", S_IRUSR, NULL, lockdep_stats_show); #ifdef CONFIG_LOCK_STAT - proc_create("lock_stat", S_IRUSR | S_IWUSR, NULL, - &proc_lock_stat_operations); + proc_create("lock_stat", S_IRUSR | S_IWUSR, NULL, &lock_stat_proc_ops); #endif return 0; diff --git a/kernel/module.c b/kernel/module.c index ac058a5ad1d1..33569a01d6e1 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -214,7 +214,8 @@ static struct module *mod_find(unsigned long addr) { struct module *mod; - list_for_each_entry_rcu(mod, &modules, list) { + list_for_each_entry_rcu(mod, &modules, list, + lockdep_is_held(&module_mutex)) { if (within_module(addr, mod)) return mod; } @@ -448,7 +449,8 @@ bool each_symbol_section(bool (*fn)(const struct symsearch *arr, if (each_symbol_in_section(arr, ARRAY_SIZE(arr), NULL, fn, data)) return true; - list_for_each_entry_rcu(mod, &modules, list) { + list_for_each_entry_rcu(mod, &modules, list, + lockdep_is_held(&module_mutex)) { struct symsearch arr[] = { { mod->syms, mod->syms + mod->num_syms, mod->crcs, NOT_GPL_ONLY, false }, @@ -616,7 +618,8 @@ static struct module *find_module_all(const char *name, size_t len, module_assert_mutex_or_preempt(); - list_for_each_entry_rcu(mod, &modules, list) { + list_for_each_entry_rcu(mod, &modules, list, + lockdep_is_held(&module_mutex)) { if (!even_unformed && mod->state == MODULE_STATE_UNFORMED) continue; if (strlen(mod->name) == len && !memcmp(mod->name, name, len)) @@ -1781,6 +1784,8 @@ static int module_add_modinfo_attrs(struct module *mod) error_out: if (i > 0) module_remove_modinfo_attrs(mod, --i); + else + kfree(mod->modinfo_attrs); return error; } @@ -2834,7 +2839,7 @@ static int module_sig_check(struct load_info *info, int flags) reason = "Loading of module with unavailable key"; decide: if (is_module_sig_enforced()) { - pr_notice("%s is rejected\n", reason); + pr_notice("%s: %s is rejected\n", info->name, reason); return -EKEYREJECTED; } @@ -3011,9 +3016,7 @@ static int setup_load_info(struct load_info *info, int flags) /* Try to find a name early so we can log errors with a module name */ info->index.info = find_sec(info, ".modinfo"); - if (!info->index.info) - info->name = "(missing .modinfo section)"; - else + if (info->index.info) info->name = get_modinfo(info, "name"); /* Find internal symbols and strings. */ @@ -3028,14 +3031,15 @@ static int setup_load_info(struct load_info *info, int flags) } if (info->index.sym == 0) { - pr_warn("%s: module has no symbols (stripped?)\n", info->name); + pr_warn("%s: module has no symbols (stripped?)\n", + info->name ?: "(missing .modinfo section or name field)"); return -ENOEXEC; } info->index.mod = find_sec(info, ".gnu.linkonce.this_module"); if (!info->index.mod) { pr_warn("%s: No module found in object\n", - info->name ?: "(missing .modinfo name field)"); + info->name ?: "(missing .modinfo section or name field)"); return -ENOEXEC; } /* This is temporary: point mod into copy of data. */ @@ -4350,16 +4354,16 @@ static int modules_open(struct inode *inode, struct file *file) return err; } -static const struct file_operations proc_modules_operations = { - .open = modules_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, +static const struct proc_ops modules_proc_ops = { + .proc_open = modules_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = seq_release, }; static int __init proc_modules_init(void) { - proc_create("modules", 0, NULL, &proc_modules_operations); + proc_create("modules", 0, NULL, &modules_proc_ops); return 0; } module_init(proc_modules_init); diff --git a/kernel/pid.c b/kernel/pid.c index 2278e249141d..0f4ecb57214c 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -578,3 +578,93 @@ void __init pid_idr_init(void) init_pid_ns.pid_cachep = KMEM_CACHE(pid, SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT); } + +static struct file *__pidfd_fget(struct task_struct *task, int fd) +{ + struct file *file; + int ret; + + ret = mutex_lock_killable(&task->signal->cred_guard_mutex); + if (ret) + return ERR_PTR(ret); + + if (ptrace_may_access(task, PTRACE_MODE_ATTACH_REALCREDS)) + file = fget_task(task, fd); + else + file = ERR_PTR(-EPERM); + + mutex_unlock(&task->signal->cred_guard_mutex); + + return file ?: ERR_PTR(-EBADF); +} + +static int pidfd_getfd(struct pid *pid, int fd) +{ + struct task_struct *task; + struct file *file; + int ret; + + task = get_pid_task(pid, PIDTYPE_PID); + if (!task) + return -ESRCH; + + file = __pidfd_fget(task, fd); + put_task_struct(task); + if (IS_ERR(file)) + return PTR_ERR(file); + + ret = security_file_receive(file); + if (ret) { + fput(file); + return ret; + } + + ret = get_unused_fd_flags(O_CLOEXEC); + if (ret < 0) + fput(file); + else + fd_install(ret, file); + + return ret; +} + +/** + * sys_pidfd_getfd() - Get a file descriptor from another process + * + * @pidfd: the pidfd file descriptor of the process + * @fd: the file descriptor number to get + * @flags: flags on how to get the fd (reserved) + * + * This syscall gets a copy of a file descriptor from another process + * based on the pidfd, and file descriptor number. It requires that + * the calling process has the ability to ptrace the process represented + * by the pidfd. The process which is having its file descriptor copied + * is otherwise unaffected. + * + * Return: On success, a cloexec file descriptor is returned. + * On error, a negative errno number will be returned. + */ +SYSCALL_DEFINE3(pidfd_getfd, int, pidfd, int, fd, + unsigned int, flags) +{ + struct pid *pid; + struct fd f; + int ret; + + /* flags is currently unused - make sure it's unset */ + if (flags) + return -EINVAL; + + f = fdget(pidfd); + if (!f.file) + return -EBADF; + + pid = pidfd_pid(f.file); + if (IS_ERR(pid)) + ret = PTR_ERR(pid); + else + ret = pidfd_getfd(pid, fd); + + fdput(f); + return ret; +} diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 1ef6f75d92f1..fada22dc4ab6 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2770,8 +2770,6 @@ void register_console(struct console *newcon) * for us. */ logbuf_lock_irqsave(flags); - console_seq = syslog_seq; - console_idx = syslog_idx; /* * We're about to replay the log buffer. Only do this to the * just-registered console to avoid excessive message spam to @@ -2783,6 +2781,8 @@ void register_console(struct console *newcon) */ exclusive_console = newcon; exclusive_console_stop_seq = console_seq; + console_seq = syslog_seq; + console_idx = syslog_idx; logbuf_unlock_irqrestore(flags); } console_unlock(); diff --git a/kernel/profile.c b/kernel/profile.c index 4b144b02ca5d..6f69a4195d56 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -442,18 +442,18 @@ static ssize_t prof_cpu_mask_proc_write(struct file *file, return err; } -static const struct file_operations prof_cpu_mask_proc_fops = { - .open = prof_cpu_mask_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .write = prof_cpu_mask_proc_write, +static const struct proc_ops prof_cpu_mask_proc_ops = { + .proc_open = prof_cpu_mask_proc_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = single_release, + .proc_write = prof_cpu_mask_proc_write, }; void create_prof_cpu_mask(void) { /* create /proc/irq/prof_cpu_mask */ - proc_create("irq/prof_cpu_mask", 0600, NULL, &prof_cpu_mask_proc_fops); + proc_create("irq/prof_cpu_mask", 0600, NULL, &prof_cpu_mask_proc_ops); } /* @@ -517,10 +517,10 @@ static ssize_t write_profile(struct file *file, const char __user *buf, return count; } -static const struct file_operations proc_profile_operations = { - .read = read_profile, - .write = write_profile, - .llseek = default_llseek, +static const struct proc_ops profile_proc_ops = { + .proc_read = read_profile, + .proc_write = write_profile, + .proc_lseek = default_llseek, }; int __ref create_proc_profile(void) @@ -548,7 +548,7 @@ int __ref create_proc_profile(void) err = 0; #endif entry = proc_create("profile", S_IWUSR | S_IRUGO, - NULL, &proc_profile_operations); + NULL, &profile_proc_ops); if (!entry) goto err_state_onl; proc_set_size(entry, (1 + prof_len) * sizeof(atomic_t)); diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index 6935a9e2b094..dcbd75791f39 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -508,7 +508,6 @@ static void synchronize_rcu_expedited_wait(void) tick_dep_set_cpu(cpu, TICK_DEP_BIT_RCU_EXP); } } - WARN_ON_ONCE(1); } for (;;) { diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index db7b50bba3f1..ac4bd0ca11cc 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -1251,40 +1251,40 @@ static int psi_fop_release(struct inode *inode, struct file *file) return single_release(inode, file); } -static const struct file_operations psi_io_fops = { - .open = psi_io_open, - .read = seq_read, - .llseek = seq_lseek, - .write = psi_io_write, - .poll = psi_fop_poll, - .release = psi_fop_release, +static const struct proc_ops psi_io_proc_ops = { + .proc_open = psi_io_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_write = psi_io_write, + .proc_poll = psi_fop_poll, + .proc_release = psi_fop_release, }; -static const struct file_operations psi_memory_fops = { - .open = psi_memory_open, - .read = seq_read, - .llseek = seq_lseek, - .write = psi_memory_write, - .poll = psi_fop_poll, - .release = psi_fop_release, +static const struct proc_ops psi_memory_proc_ops = { + .proc_open = psi_memory_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_write = psi_memory_write, + .proc_poll = psi_fop_poll, + .proc_release = psi_fop_release, }; -static const struct file_operations psi_cpu_fops = { - .open = psi_cpu_open, - .read = seq_read, - .llseek = seq_lseek, - .write = psi_cpu_write, - .poll = psi_fop_poll, - .release = psi_fop_release, +static const struct proc_ops psi_cpu_proc_ops = { + .proc_open = psi_cpu_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_write = psi_cpu_write, + .proc_poll = psi_fop_poll, + .proc_release = psi_fop_release, }; static int __init psi_proc_init(void) { if (psi_enable) { proc_mkdir("pressure", NULL); - proc_create("pressure/io", 0, NULL, &psi_io_fops); - proc_create("pressure/memory", 0, NULL, &psi_memory_fops); - proc_create("pressure/cpu", 0, NULL, &psi_cpu_fops); + proc_create("pressure/io", 0, NULL, &psi_io_proc_ops); + proc_create("pressure/memory", 0, NULL, &psi_memory_proc_ops); + proc_create("pressure/cpu", 0, NULL, &psi_cpu_proc_ops); } return 0; } diff --git a/kernel/signal.c b/kernel/signal.c index bcd46f547db3..9ad8dea93dbb 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1383,7 +1383,7 @@ struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, * must see ->sighand == NULL. */ spin_lock_irqsave(&sighand->siglock, *flags); - if (likely(sighand == tsk->sighand)) + if (likely(sighand == rcu_access_pointer(tsk->sighand))) break; spin_unlock_irqrestore(&sighand->siglock, *flags); } diff --git a/kernel/sys.c b/kernel/sys.c index a9331f101883..f9bc5c303e3f 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2261,6 +2261,8 @@ int __weak arch_prctl_spec_ctrl_set(struct task_struct *t, unsigned long which, return -EINVAL; } +#define PR_IO_FLUSHER (PF_MEMALLOC_NOIO | PF_LESS_THROTTLE) + SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, unsigned long, arg4, unsigned long, arg5) { @@ -2488,6 +2490,29 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, return -EINVAL; error = GET_TAGGED_ADDR_CTRL(); break; + case PR_SET_IO_FLUSHER: + if (!capable(CAP_SYS_RESOURCE)) + return -EPERM; + + if (arg3 || arg4 || arg5) + return -EINVAL; + + if (arg2 == 1) + current->flags |= PR_IO_FLUSHER; + else if (!arg2) + current->flags &= ~PR_IO_FLUSHER; + else + return -EINVAL; + break; + case PR_GET_IO_FLUSHER: + if (!capable(CAP_SYS_RESOURCE)) + return -EPERM; + + if (arg2 || arg3 || arg4 || arg5) + return -EINVAL; + + error = (current->flags & PR_IO_FLUSHER) == PR_IO_FLUSHER; + break; default: error = -EINVAL; break; diff --git a/kernel/sysctl-test.c b/kernel/sysctl-test.c index 2a63241a8453..ccb78509f1a8 100644 --- a/kernel/sysctl-test.c +++ b/kernel/sysctl-test.c @@ -389,4 +389,6 @@ static struct kunit_suite sysctl_test_suite = { .test_cases = sysctl_test_cases, }; -kunit_test_suite(sysctl_test_suite); +kunit_test_suites(&sysctl_test_suite); + +MODULE_LICENSE("GPL v2"); diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index fff5f64981c6..428beb69426a 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -293,8 +293,15 @@ static void clocksource_watchdog(struct timer_list *unused) next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask); if (next_cpu >= nr_cpu_ids) next_cpu = cpumask_first(cpu_online_mask); - watchdog_timer.expires += WATCHDOG_INTERVAL; - add_timer_on(&watchdog_timer, next_cpu); + + /* + * Arm timer if not already pending: could race with concurrent + * pair clocksource_stop_watchdog() clocksource_start_watchdog(). + */ + if (!timer_pending(&watchdog_timer)) { + watchdog_timer.expires += WATCHDOG_INTERVAL; + add_timer_on(&watchdog_timer, next_cpu); + } out: spin_unlock(&watchdog_lock); } diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c index 9e59c9ea92aa..ca4e6d57d68b 100644 --- a/kernel/time/itimer.c +++ b/kernel/time/itimer.c @@ -97,20 +97,20 @@ static int do_getitimer(int which, struct itimerspec64 *value) return 0; } -static int put_itimerval(struct itimerval __user *o, +static int put_itimerval(struct __kernel_old_itimerval __user *o, const struct itimerspec64 *i) { - struct itimerval v; + struct __kernel_old_itimerval v; v.it_interval.tv_sec = i->it_interval.tv_sec; v.it_interval.tv_usec = i->it_interval.tv_nsec / NSEC_PER_USEC; v.it_value.tv_sec = i->it_value.tv_sec; v.it_value.tv_usec = i->it_value.tv_nsec / NSEC_PER_USEC; - return copy_to_user(o, &v, sizeof(struct itimerval)) ? -EFAULT : 0; + return copy_to_user(o, &v, sizeof(struct __kernel_old_itimerval)) ? -EFAULT : 0; } -SYSCALL_DEFINE2(getitimer, int, which, struct itimerval __user *, value) +SYSCALL_DEFINE2(getitimer, int, which, struct __kernel_old_itimerval __user *, value) { struct itimerspec64 get_buffer; int error = do_getitimer(which, &get_buffer); @@ -314,11 +314,11 @@ SYSCALL_DEFINE1(alarm, unsigned int, seconds) #endif -static int get_itimerval(struct itimerspec64 *o, const struct itimerval __user *i) +static int get_itimerval(struct itimerspec64 *o, const struct __kernel_old_itimerval __user *i) { - struct itimerval v; + struct __kernel_old_itimerval v; - if (copy_from_user(&v, i, sizeof(struct itimerval))) + if (copy_from_user(&v, i, sizeof(struct __kernel_old_itimerval))) return -EFAULT; /* Validate the timevals in value. */ @@ -333,8 +333,8 @@ static int get_itimerval(struct itimerspec64 *o, const struct itimerval __user * return 0; } -SYSCALL_DEFINE3(setitimer, int, which, struct itimerval __user *, value, - struct itimerval __user *, ovalue) +SYSCALL_DEFINE3(setitimer, int, which, struct __kernel_old_itimerval __user *, value, + struct __kernel_old_itimerval __user *, ovalue) { struct itimerspec64 set_buffer, get_buffer; int error; diff --git a/kernel/time/time.c b/kernel/time/time.c index 704ccd9451b0..cdd7386115ff 100644 --- a/kernel/time/time.c +++ b/kernel/time/time.c @@ -626,10 +626,12 @@ EXPORT_SYMBOL(__usecs_to_jiffies); * The >> (NSEC_JIFFIE_SC - SEC_JIFFIE_SC) converts the scaled nsec * value to a scaled second value. */ -static unsigned long -__timespec64_to_jiffies(u64 sec, long nsec) + +unsigned long +timespec64_to_jiffies(const struct timespec64 *value) { - nsec = nsec + TICK_NSEC - 1; + u64 sec = value->tv_sec; + long nsec = value->tv_nsec + TICK_NSEC - 1; if (sec >= MAX_SEC_IN_JIFFIES){ sec = MAX_SEC_IN_JIFFIES; @@ -640,18 +642,6 @@ __timespec64_to_jiffies(u64 sec, long nsec) (NSEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC; } - -static unsigned long -__timespec_to_jiffies(unsigned long sec, long nsec) -{ - return __timespec64_to_jiffies((u64)sec, nsec); -} - -unsigned long -timespec64_to_jiffies(const struct timespec64 *value) -{ - return __timespec64_to_jiffies(value->tv_sec, value->tv_nsec); -} EXPORT_SYMBOL(timespec64_to_jiffies); void @@ -669,44 +659,6 @@ jiffies_to_timespec64(const unsigned long jiffies, struct timespec64 *value) EXPORT_SYMBOL(jiffies_to_timespec64); /* - * We could use a similar algorithm to timespec_to_jiffies (with a - * different multiplier for usec instead of nsec). But this has a - * problem with rounding: we can't exactly add TICK_NSEC - 1 to the - * usec value, since it's not necessarily integral. - * - * We could instead round in the intermediate scaled representation - * (i.e. in units of 1/2^(large scale) jiffies) but that's also - * perilous: the scaling introduces a small positive error, which - * combined with a division-rounding-upward (i.e. adding 2^(scale) - 1 - * units to the intermediate before shifting) leads to accidental - * overflow and overestimates. - * - * At the cost of one additional multiplication by a constant, just - * use the timespec implementation. - */ -unsigned long -timeval_to_jiffies(const struct timeval *value) -{ - return __timespec_to_jiffies(value->tv_sec, - value->tv_usec * NSEC_PER_USEC); -} -EXPORT_SYMBOL(timeval_to_jiffies); - -void jiffies_to_timeval(const unsigned long jiffies, struct timeval *value) -{ - /* - * Convert jiffies to nanoseconds and separate with - * one divide. - */ - u32 rem; - - value->tv_sec = div_u64_rem((u64)jiffies * TICK_NSEC, - NSEC_PER_SEC, &rem); - value->tv_usec = rem / NSEC_PER_USEC; -} -EXPORT_SYMBOL(jiffies_to_timeval); - -/* * Convert jiffies/jiffies_64 to clock_t and back. */ clock_t jiffies_to_clock_t(unsigned long x) diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 25a0fcfa7a5d..91e885194dbc 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -141,6 +141,15 @@ menuconfig FTRACE if FTRACE +config BOOTTIME_TRACING + bool "Boot-time Tracing support" + depends on BOOT_CONFIG && TRACING + default y + help + Enable developer to setup ftrace subsystem via supplemental + kernel cmdline at boot time for debugging (tracing) driver + initialization and boot process. + config FUNCTION_TRACER bool "Kernel Function Tracer" depends on HAVE_FUNCTION_TRACER @@ -172,6 +181,77 @@ config FUNCTION_GRAPH_TRACER the return value. This is done by setting the current return address on the current task structure into a stack of calls. +config DYNAMIC_FTRACE + bool "enable/disable function tracing dynamically" + depends on FUNCTION_TRACER + depends on HAVE_DYNAMIC_FTRACE + default y + help + This option will modify all the calls to function tracing + dynamically (will patch them out of the binary image and + replace them with a No-Op instruction) on boot up. During + compile time, a table is made of all the locations that ftrace + can function trace, and this table is linked into the kernel + image. When this is enabled, functions can be individually + enabled, and the functions not enabled will not affect + performance of the system. + + See the files in /sys/kernel/debug/tracing: + available_filter_functions + set_ftrace_filter + set_ftrace_notrace + + This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but + otherwise has native performance as long as no tracing is active. + +config DYNAMIC_FTRACE_WITH_REGS + def_bool y + depends on DYNAMIC_FTRACE + depends on HAVE_DYNAMIC_FTRACE_WITH_REGS + +config DYNAMIC_FTRACE_WITH_DIRECT_CALLS + def_bool y + depends on DYNAMIC_FTRACE + depends on HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + +config FUNCTION_PROFILER + bool "Kernel function profiler" + depends on FUNCTION_TRACER + default n + help + This option enables the kernel function profiler. A file is created + in debugfs called function_profile_enabled which defaults to zero. + When a 1 is echoed into this file profiling begins, and when a + zero is entered, profiling stops. A "functions" file is created in + the trace_stat directory; this file shows the list of functions that + have been hit and their counters. + + If in doubt, say N. + +config STACK_TRACER + bool "Trace max stack" + depends on HAVE_FUNCTION_TRACER + select FUNCTION_TRACER + select STACKTRACE + select KALLSYMS + help + This special tracer records the maximum stack footprint of the + kernel and displays it in /sys/kernel/debug/tracing/stack_trace. + + This tracer works by hooking into every function call that the + kernel executes, and keeping a maximum stack depth value and + stack-trace saved. If this is configured with DYNAMIC_FTRACE + then it will not have any overhead while the stack tracer + is disabled. + + To enable the stack tracer on bootup, pass in 'stacktrace' + on the kernel command line. + + The stack tracer can also be enabled or disabled via the + sysctl kernel.stack_tracer_enabled + + Say N if unsure. + config TRACE_PREEMPT_TOGGLE bool help @@ -282,6 +362,19 @@ config HWLAT_TRACER file. Every time a latency is greater than tracing_thresh, it will be recorded into the ring buffer. +config MMIOTRACE + bool "Memory mapped IO tracing" + depends on HAVE_MMIOTRACE_SUPPORT && PCI + select GENERIC_TRACER + help + Mmiotrace traces Memory Mapped I/O access and is meant for + debugging and reverse engineering. It is called from the ioremap + implementation and works via page faults. Tracing is disabled by + default and can be enabled at run-time. + + See Documentation/trace/mmiotrace.rst. + If you are not helping to develop drivers, say N. + config ENABLE_DEFAULT_TRACERS bool "Trace process context switches and events" depends on !GENERIC_TRACER @@ -410,30 +503,6 @@ config BRANCH_TRACER Say N if unsure. -config STACK_TRACER - bool "Trace max stack" - depends on HAVE_FUNCTION_TRACER - select FUNCTION_TRACER - select STACKTRACE - select KALLSYMS - help - This special tracer records the maximum stack footprint of the - kernel and displays it in /sys/kernel/debug/tracing/stack_trace. - - This tracer works by hooking into every function call that the - kernel executes, and keeping a maximum stack depth value and - stack-trace saved. If this is configured with DYNAMIC_FTRACE - then it will not have any overhead while the stack tracer - is disabled. - - To enable the stack tracer on bootup, pass in 'stacktrace' - on the kernel command line. - - The stack tracer can also be enabled or disabled via the - sysctl kernel.stack_tracer_enabled - - Say N if unsure. - config BLK_DEV_IO_TRACE bool "Support for tracing block IO actions" depends on SYSFS @@ -531,53 +600,6 @@ config DYNAMIC_EVENTS config PROBE_EVENTS def_bool n -config DYNAMIC_FTRACE - bool "enable/disable function tracing dynamically" - depends on FUNCTION_TRACER - depends on HAVE_DYNAMIC_FTRACE - default y - help - This option will modify all the calls to function tracing - dynamically (will patch them out of the binary image and - replace them with a No-Op instruction) on boot up. During - compile time, a table is made of all the locations that ftrace - can function trace, and this table is linked into the kernel - image. When this is enabled, functions can be individually - enabled, and the functions not enabled will not affect - performance of the system. - - See the files in /sys/kernel/debug/tracing: - available_filter_functions - set_ftrace_filter - set_ftrace_notrace - - This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but - otherwise has native performance as long as no tracing is active. - -config DYNAMIC_FTRACE_WITH_REGS - def_bool y - depends on DYNAMIC_FTRACE - depends on HAVE_DYNAMIC_FTRACE_WITH_REGS - -config DYNAMIC_FTRACE_WITH_DIRECT_CALLS - def_bool y - depends on DYNAMIC_FTRACE - depends on HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS - -config FUNCTION_PROFILER - bool "Kernel function profiler" - depends on FUNCTION_TRACER - default n - help - This option enables the kernel function profiler. A file is created - in debugfs called function_profile_enabled which defaults to zero. - When a 1 is echoed into this file profiling begins, and when a - zero is entered, profiling stops. A "functions" file is created in - the trace_stat directory; this file shows the list of functions that - have been hit and their counters. - - If in doubt, say N. - config BPF_KPROBE_OVERRIDE bool "Enable BPF programs to override a kprobed function" depends on BPF_EVENTS @@ -592,54 +614,6 @@ config FTRACE_MCOUNT_RECORD depends on DYNAMIC_FTRACE depends on HAVE_FTRACE_MCOUNT_RECORD -config FTRACE_SELFTEST - bool - -config FTRACE_STARTUP_TEST - bool "Perform a startup test on ftrace" - depends on GENERIC_TRACER - select FTRACE_SELFTEST - help - This option performs a series of startup tests on ftrace. On bootup - a series of tests are made to verify that the tracer is - functioning properly. It will do tests on all the configured - tracers of ftrace. - -config EVENT_TRACE_STARTUP_TEST - bool "Run selftest on trace events" - depends on FTRACE_STARTUP_TEST - default y - help - This option performs a test on all trace events in the system. - It basically just enables each event and runs some code that - will trigger events (not necessarily the event it enables) - This may take some time run as there are a lot of events. - -config EVENT_TRACE_TEST_SYSCALLS - bool "Run selftest on syscall events" - depends on EVENT_TRACE_STARTUP_TEST - help - This option will also enable testing every syscall event. - It only enables the event and disables it and runs various loads - with the event enabled. This adds a bit more time for kernel boot - up since it runs this on every system call defined. - - TBD - enable a way to actually call the syscalls as we test their - events - -config MMIOTRACE - bool "Memory mapped IO tracing" - depends on HAVE_MMIOTRACE_SUPPORT && PCI - select GENERIC_TRACER - help - Mmiotrace traces Memory Mapped I/O access and is meant for - debugging and reverse engineering. It is called from the ioremap - implementation and works via page faults. Tracing is disabled by - default and can be enabled at run-time. - - See Documentation/trace/mmiotrace.rst. - If you are not helping to develop drivers, say N. - config TRACING_MAP bool depends on ARCH_HAVE_NMI_SAFE_CMPXCHG @@ -680,16 +654,6 @@ config TRACE_EVENT_INJECT If unsure, say N. -config MMIOTRACE_TEST - tristate "Test module for mmiotrace" - depends on MMIOTRACE && m - help - This is a dumb module for testing mmiotrace. It is very dangerous - as it will write garbage to IO memory starting at a given address. - However, it should be safe to use on e.g. unused portion of VRAM. - - Say N, unless you absolutely know what you are doing. - config TRACEPOINT_BENCHMARK bool "Add tracepoint that benchmarks tracepoints" help @@ -736,6 +700,81 @@ config RING_BUFFER_BENCHMARK If unsure, say N. +config TRACE_EVAL_MAP_FILE + bool "Show eval mappings for trace events" + depends on TRACING + help + The "print fmt" of the trace events will show the enum/sizeof names + instead of their values. This can cause problems for user space tools + that use this string to parse the raw data as user space does not know + how to convert the string to its value. + + To fix this, there's a special macro in the kernel that can be used + to convert an enum/sizeof into its value. If this macro is used, then + the print fmt strings will be converted to their values. + + If something does not get converted properly, this option can be + used to show what enums/sizeof the kernel tried to convert. + + This option is for debugging the conversions. A file is created + in the tracing directory called "eval_map" that will show the + names matched with their values and what trace event system they + belong too. + + Normally, the mapping of the strings to values will be freed after + boot up or module load. With this option, they will not be freed, as + they are needed for the "eval_map" file. Enabling this option will + increase the memory footprint of the running kernel. + + If unsure, say N. + +config GCOV_PROFILE_FTRACE + bool "Enable GCOV profiling on ftrace subsystem" + depends on GCOV_KERNEL + help + Enable GCOV profiling on ftrace subsystem for checking + which functions/lines are tested. + + If unsure, say N. + + Note that on a kernel compiled with this config, ftrace will + run significantly slower. + +config FTRACE_SELFTEST + bool + +config FTRACE_STARTUP_TEST + bool "Perform a startup test on ftrace" + depends on GENERIC_TRACER + select FTRACE_SELFTEST + help + This option performs a series of startup tests on ftrace. On bootup + a series of tests are made to verify that the tracer is + functioning properly. It will do tests on all the configured + tracers of ftrace. + +config EVENT_TRACE_STARTUP_TEST + bool "Run selftest on trace events" + depends on FTRACE_STARTUP_TEST + default y + help + This option performs a test on all trace events in the system. + It basically just enables each event and runs some code that + will trigger events (not necessarily the event it enables) + This may take some time run as there are a lot of events. + +config EVENT_TRACE_TEST_SYSCALLS + bool "Run selftest on syscall events" + depends on EVENT_TRACE_STARTUP_TEST + help + This option will also enable testing every syscall event. + It only enables the event and disables it and runs various loads + with the event enabled. This adds a bit more time for kernel boot + up since it runs this on every system call defined. + + TBD - enable a way to actually call the syscalls as we test their + events + config RING_BUFFER_STARTUP_TEST bool "Ring buffer startup self test" depends on RING_BUFFER @@ -759,8 +798,18 @@ config RING_BUFFER_STARTUP_TEST If unsure, say N +config MMIOTRACE_TEST + tristate "Test module for mmiotrace" + depends on MMIOTRACE && m + help + This is a dumb module for testing mmiotrace. It is very dangerous + as it will write garbage to IO memory starting at a given address. + However, it should be safe to use on e.g. unused portion of VRAM. + + Say N, unless you absolutely know what you are doing. + config PREEMPTIRQ_DELAY_TEST - tristate "Preempt / IRQ disable delay thread to test latency tracers" + tristate "Test module to create a preempt / IRQ disable delay thread to test latency tracers" depends on m help Select this option to build a test module that can help test latency @@ -774,45 +823,30 @@ config PREEMPTIRQ_DELAY_TEST If unsure, say N -config TRACE_EVAL_MAP_FILE - bool "Show eval mappings for trace events" - depends on TRACING - help - The "print fmt" of the trace events will show the enum/sizeof names - instead of their values. This can cause problems for user space tools - that use this string to parse the raw data as user space does not know - how to convert the string to its value. - - To fix this, there's a special macro in the kernel that can be used - to convert an enum/sizeof into its value. If this macro is used, then - the print fmt strings will be converted to their values. - - If something does not get converted properly, this option can be - used to show what enums/sizeof the kernel tried to convert. - - This option is for debugging the conversions. A file is created - in the tracing directory called "eval_map" that will show the - names matched with their values and what trace event system they - belong too. +config SYNTH_EVENT_GEN_TEST + tristate "Test module for in-kernel synthetic event generation" + depends on HIST_TRIGGERS + help + This option creates a test module to check the base + functionality of in-kernel synthetic event definition and + generation. - Normally, the mapping of the strings to values will be freed after - boot up or module load. With this option, they will not be freed, as - they are needed for the "eval_map" file. Enabling this option will - increase the memory footprint of the running kernel. + To test, insert the module, and then check the trace buffer + for the generated sample events. - If unsure, say N. + If unsure, say N. -config GCOV_PROFILE_FTRACE - bool "Enable GCOV profiling on ftrace subsystem" - depends on GCOV_KERNEL +config KPROBE_EVENT_GEN_TEST + tristate "Test module for in-kernel kprobe event generation" + depends on KPROBE_EVENTS help - Enable GCOV profiling on ftrace subsystem for checking - which functions/lines are tested. + This option creates a test module to check the base + functionality of in-kernel kprobe event definition. - If unsure, say N. + To test, insert the module, and then check the trace buffer + for the generated kprobe events. - Note that on a kernel compiled with this config, ftrace will - run significantly slower. + If unsure, say N. endif # FTRACE diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 0e63db62225f..f9dcd19165fa 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -44,6 +44,8 @@ obj-$(CONFIG_TRACING) += trace_stat.o obj-$(CONFIG_TRACING) += trace_printk.o obj-$(CONFIG_TRACING_MAP) += tracing_map.o obj-$(CONFIG_PREEMPTIRQ_DELAY_TEST) += preemptirq_delay_test.o +obj-$(CONFIG_SYNTH_EVENT_GEN_TEST) += synth_event_gen_test.o +obj-$(CONFIG_KPROBE_EVENT_GEN_TEST) += kprobe_event_gen_test.o obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o obj-$(CONFIG_PREEMPTIRQ_TRACEPOINTS) += trace_preemptirq.o @@ -83,6 +85,7 @@ endif obj-$(CONFIG_DYNAMIC_EVENTS) += trace_dynevent.o obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o obj-$(CONFIG_UPROBE_EVENTS) += trace_uprobe.o +obj-$(CONFIG_BOOTTIME_TRACING) += trace_boot.o obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 475e29498bca..0735ae8545d8 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -68,14 +68,14 @@ static void trace_note(struct blk_trace *bt, pid_t pid, int action, { struct blk_io_trace *t; struct ring_buffer_event *event = NULL; - struct ring_buffer *buffer = NULL; + struct trace_buffer *buffer = NULL; int pc = 0; int cpu = smp_processor_id(); bool blk_tracer = blk_tracer_enabled; ssize_t cgid_len = cgid ? sizeof(cgid) : 0; if (blk_tracer) { - buffer = blk_tr->trace_buffer.buffer; + buffer = blk_tr->array_buffer.buffer; pc = preempt_count(); event = trace_buffer_lock_reserve(buffer, TRACE_BLK, sizeof(*t) + len + cgid_len, @@ -215,7 +215,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, { struct task_struct *tsk = current; struct ring_buffer_event *event = NULL; - struct ring_buffer *buffer = NULL; + struct trace_buffer *buffer = NULL; struct blk_io_trace *t; unsigned long flags = 0; unsigned long *sequence; @@ -248,7 +248,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, if (blk_tracer) { tracing_record_cmdline(current); - buffer = blk_tr->trace_buffer.buffer; + buffer = blk_tr->array_buffer.buffer; pc = preempt_count(); event = trace_buffer_lock_reserve(buffer, TRACE_BLK, sizeof(*t) + pdu_len + cgid_len, diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 9bf1f2cd515e..3f7ee102868a 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -62,8 +62,6 @@ }) /* hash bits for specific function selection */ -#define FTRACE_HASH_BITS 7 -#define FTRACE_FUNC_HASHSIZE (1 << FTRACE_HASH_BITS) #define FTRACE_HASH_DEFAULT_BITS 10 #define FTRACE_HASH_MAX_BITS 12 @@ -146,7 +144,7 @@ static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip, { struct trace_array *tr = op->private; - if (tr && this_cpu_read(tr->trace_buffer.data->ftrace_ignore_pid)) + if (tr && this_cpu_read(tr->array_buffer.data->ftrace_ignore_pid)) return; op->saved_func(ip, parent_ip, op, regs); @@ -1103,9 +1101,6 @@ struct ftrace_page { #define ENTRY_SIZE sizeof(struct dyn_ftrace) #define ENTRIES_PER_PAGE (PAGE_SIZE / ENTRY_SIZE) -/* estimate from running different kernels */ -#define NR_TO_INIT 10000 - static struct ftrace_page *ftrace_pages_start; static struct ftrace_page *ftrace_pages; @@ -5464,7 +5459,7 @@ static void __init set_ftrace_early_graph(char *buf, int enable) struct ftrace_hash *hash; hash = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS); - if (WARN_ON(!hash)) + if (MEM_FAIL(!hash, "Failed to allocate hash\n")) return; while (buf) { @@ -5596,8 +5591,8 @@ static const struct file_operations ftrace_notrace_fops = { static DEFINE_MUTEX(graph_lock); -struct ftrace_hash *ftrace_graph_hash = EMPTY_HASH; -struct ftrace_hash *ftrace_graph_notrace_hash = EMPTY_HASH; +struct ftrace_hash __rcu *ftrace_graph_hash = EMPTY_HASH; +struct ftrace_hash __rcu *ftrace_graph_notrace_hash = EMPTY_HASH; enum graph_filter_type { GRAPH_FILTER_NOTRACE = 0, @@ -5872,8 +5867,15 @@ ftrace_graph_release(struct inode *inode, struct file *file) mutex_unlock(&graph_lock); - /* Wait till all users are no longer using the old hash */ - synchronize_rcu(); + /* + * We need to do a hard force of sched synchronization. + * This is because we use preempt_disable() to do RCU, but + * the function tracers can be called where RCU is not watching + * (like before user_exit()). We can not rely on the RCU + * infrastructure to do the synchronization, thus we must do it + * ourselves. + */ + schedule_on_each_cpu(ftrace_sync); free_ftrace_hash(old_hash); } @@ -6596,7 +6598,7 @@ static void add_to_clear_hash_list(struct list_head *clear_list, func = kmalloc(sizeof(*func), GFP_KERNEL); if (!func) { - WARN_ONCE(1, "alloc failure, ftrace filter could be stale\n"); + MEM_FAIL(1, "alloc failure, ftrace filter could be stale\n"); return; } @@ -6922,7 +6924,7 @@ ftrace_filter_pid_sched_switch_probe(void *data, bool preempt, pid_list = rcu_dereference_sched(tr->function_pids); - this_cpu_write(tr->trace_buffer.data->ftrace_ignore_pid, + this_cpu_write(tr->array_buffer.data->ftrace_ignore_pid, trace_ignore_this_task(pid_list, next)); } @@ -6976,7 +6978,7 @@ static void clear_ftrace_pids(struct trace_array *tr) unregister_trace_sched_switch(ftrace_filter_pid_sched_switch_probe, tr); for_each_possible_cpu(cpu) - per_cpu_ptr(tr->trace_buffer.data, cpu)->ftrace_ignore_pid = false; + per_cpu_ptr(tr->array_buffer.data, cpu)->ftrace_ignore_pid = false; rcu_assign_pointer(tr->function_pids, NULL); @@ -7031,9 +7033,10 @@ static void *fpid_next(struct seq_file *m, void *v, loff_t *pos) struct trace_array *tr = m->private; struct trace_pid_list *pid_list = rcu_dereference_sched(tr->function_pids); - if (v == FTRACE_NO_PIDS) + if (v == FTRACE_NO_PIDS) { + (*pos)++; return NULL; - + } return trace_pid_next(pid_list, v, pos); } @@ -7100,7 +7103,7 @@ static void ignore_task_cpu(void *data) pid_list = rcu_dereference_protected(tr->function_pids, mutex_is_locked(&ftrace_lock)); - this_cpu_write(tr->trace_buffer.data->ftrace_ignore_pid, + this_cpu_write(tr->array_buffer.data->ftrace_ignore_pid, trace_ignore_this_task(pid_list, current)); } diff --git a/kernel/trace/kprobe_event_gen_test.c b/kernel/trace/kprobe_event_gen_test.c new file mode 100644 index 000000000000..18b0f1cbb947 --- /dev/null +++ b/kernel/trace/kprobe_event_gen_test.c @@ -0,0 +1,225 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test module for in-kernel kprobe event creation and generation. + * + * Copyright (C) 2019 Tom Zanussi <zanussi@kernel.org> + */ + +#include <linux/module.h> +#include <linux/trace_events.h> + +/* + * This module is a simple test of basic functionality for in-kernel + * kprobe/kretprobe event creation. The first test uses + * kprobe_event_gen_cmd_start(), kprobe_event_add_fields() and + * kprobe_event_gen_cmd_end() to create a kprobe event, which is then + * enabled in order to generate trace output. The second creates a + * kretprobe event using kretprobe_event_gen_cmd_start() and + * kretprobe_event_gen_cmd_end(), and is also then enabled. + * + * To test, select CONFIG_KPROBE_EVENT_GEN_TEST and build the module. + * Then: + * + * # insmod kernel/trace/kprobe_event_gen_test.ko + * # cat /sys/kernel/debug/tracing/trace + * + * You should see many instances of the "gen_kprobe_test" and + * "gen_kretprobe_test" events in the trace buffer. + * + * To remove the events, remove the module: + * + * # rmmod kprobe_event_gen_test + * + */ + +static struct trace_event_file *gen_kprobe_test; +static struct trace_event_file *gen_kretprobe_test; + +/* + * Test to make sure we can create a kprobe event, then add more + * fields. + */ +static int __init test_gen_kprobe_cmd(void) +{ + struct dynevent_cmd cmd; + char *buf; + int ret; + + /* Create a buffer to hold the generated command */ + buf = kzalloc(MAX_DYNEVENT_CMD_LEN, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + /* Before generating the command, initialize the cmd object */ + kprobe_event_cmd_init(&cmd, buf, MAX_DYNEVENT_CMD_LEN); + + /* + * Define the gen_kprobe_test event with the first 2 kprobe + * fields. + */ + ret = kprobe_event_gen_cmd_start(&cmd, "gen_kprobe_test", + "do_sys_open", + "dfd=%ax", "filename=%dx"); + if (ret) + goto free; + + /* Use kprobe_event_add_fields to add the rest of the fields */ + + ret = kprobe_event_add_fields(&cmd, "flags=%cx", "mode=+4($stack)"); + if (ret) + goto free; + + /* + * This actually creates the event. + */ + ret = kprobe_event_gen_cmd_end(&cmd); + if (ret) + goto free; + + /* + * Now get the gen_kprobe_test event file. We need to prevent + * the instance and event from disappearing from underneath + * us, which trace_get_event_file() does (though in this case + * we're using the top-level instance which never goes away). + */ + gen_kprobe_test = trace_get_event_file(NULL, "kprobes", + "gen_kprobe_test"); + if (IS_ERR(gen_kprobe_test)) { + ret = PTR_ERR(gen_kprobe_test); + goto delete; + } + + /* Enable the event or you won't see anything */ + ret = trace_array_set_clr_event(gen_kprobe_test->tr, + "kprobes", "gen_kprobe_test", true); + if (ret) { + trace_put_event_file(gen_kprobe_test); + goto delete; + } + out: + return ret; + delete: + /* We got an error after creating the event, delete it */ + ret = kprobe_event_delete("gen_kprobe_test"); + free: + kfree(buf); + + goto out; +} + +/* + * Test to make sure we can create a kretprobe event. + */ +static int __init test_gen_kretprobe_cmd(void) +{ + struct dynevent_cmd cmd; + char *buf; + int ret; + + /* Create a buffer to hold the generated command */ + buf = kzalloc(MAX_DYNEVENT_CMD_LEN, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + /* Before generating the command, initialize the cmd object */ + kprobe_event_cmd_init(&cmd, buf, MAX_DYNEVENT_CMD_LEN); + + /* + * Define the kretprobe event. + */ + ret = kretprobe_event_gen_cmd_start(&cmd, "gen_kretprobe_test", + "do_sys_open", + "$retval"); + if (ret) + goto free; + + /* + * This actually creates the event. + */ + ret = kretprobe_event_gen_cmd_end(&cmd); + if (ret) + goto free; + + /* + * Now get the gen_kretprobe_test event file. We need to + * prevent the instance and event from disappearing from + * underneath us, which trace_get_event_file() does (though in + * this case we're using the top-level instance which never + * goes away). + */ + gen_kretprobe_test = trace_get_event_file(NULL, "kprobes", + "gen_kretprobe_test"); + if (IS_ERR(gen_kretprobe_test)) { + ret = PTR_ERR(gen_kretprobe_test); + goto delete; + } + + /* Enable the event or you won't see anything */ + ret = trace_array_set_clr_event(gen_kretprobe_test->tr, + "kprobes", "gen_kretprobe_test", true); + if (ret) { + trace_put_event_file(gen_kretprobe_test); + goto delete; + } + out: + return ret; + delete: + /* We got an error after creating the event, delete it */ + ret = kprobe_event_delete("gen_kretprobe_test"); + free: + kfree(buf); + + goto out; +} + +static int __init kprobe_event_gen_test_init(void) +{ + int ret; + + ret = test_gen_kprobe_cmd(); + if (ret) + return ret; + + ret = test_gen_kretprobe_cmd(); + if (ret) { + WARN_ON(trace_array_set_clr_event(gen_kretprobe_test->tr, + "kprobes", + "gen_kretprobe_test", false)); + trace_put_event_file(gen_kretprobe_test); + WARN_ON(kprobe_event_delete("gen_kretprobe_test")); + } + + return ret; +} + +static void __exit kprobe_event_gen_test_exit(void) +{ + /* Disable the event or you can't remove it */ + WARN_ON(trace_array_set_clr_event(gen_kprobe_test->tr, + "kprobes", + "gen_kprobe_test", false)); + + /* Now give the file and instance back */ + trace_put_event_file(gen_kprobe_test); + + /* Now unregister and free the event */ + WARN_ON(kprobe_event_delete("gen_kprobe_test")); + + /* Disable the event or you can't remove it */ + WARN_ON(trace_array_set_clr_event(gen_kprobe_test->tr, + "kprobes", + "gen_kretprobe_test", false)); + + /* Now give the file and instance back */ + trace_put_event_file(gen_kretprobe_test); + + /* Now unregister and free the event */ + WARN_ON(kprobe_event_delete("gen_kretprobe_test")); +} + +module_init(kprobe_event_gen_test_init) +module_exit(kprobe_event_gen_test_exit) + +MODULE_AUTHOR("Tom Zanussi"); +MODULE_DESCRIPTION("kprobe event generation test"); +MODULE_LICENSE("GPL v2"); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 3f655371eaf6..61f0e92ace99 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -300,8 +300,6 @@ u64 ring_buffer_event_time_stamp(struct ring_buffer_event *event) /* Missed count stored at end */ #define RB_MISSED_STORED (1 << 30) -#define RB_MISSED_FLAGS (RB_MISSED_EVENTS|RB_MISSED_STORED) - struct buffer_data_page { u64 time_stamp; /* page time stamp */ local_t commit; /* write committed index */ @@ -443,7 +441,7 @@ enum { struct ring_buffer_per_cpu { int cpu; atomic_t record_disabled; - struct ring_buffer *buffer; + struct trace_buffer *buffer; raw_spinlock_t reader_lock; /* serialize readers */ arch_spinlock_t lock; struct lock_class_key lock_key; @@ -482,7 +480,7 @@ struct ring_buffer_per_cpu { struct rb_irq_work irq_work; }; -struct ring_buffer { +struct trace_buffer { unsigned flags; int cpus; atomic_t record_disabled; @@ -518,7 +516,7 @@ struct ring_buffer_iter { * * Returns the number of pages used by a per_cpu buffer of the ring buffer. */ -size_t ring_buffer_nr_pages(struct ring_buffer *buffer, int cpu) +size_t ring_buffer_nr_pages(struct trace_buffer *buffer, int cpu) { return buffer->buffers[cpu]->nr_pages; } @@ -530,7 +528,7 @@ size_t ring_buffer_nr_pages(struct ring_buffer *buffer, int cpu) * * Returns the number of pages that have content in the ring buffer. */ -size_t ring_buffer_nr_dirty_pages(struct ring_buffer *buffer, int cpu) +size_t ring_buffer_nr_dirty_pages(struct trace_buffer *buffer, int cpu) { size_t read; size_t cnt; @@ -573,7 +571,7 @@ static void rb_wake_up_waiters(struct irq_work *work) * as data is added to any of the @buffer's cpu buffers. Otherwise * it will wait for data to be added to a specific cpu buffer. */ -int ring_buffer_wait(struct ring_buffer *buffer, int cpu, int full) +int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full) { struct ring_buffer_per_cpu *uninitialized_var(cpu_buffer); DEFINE_WAIT(wait); @@ -684,7 +682,7 @@ int ring_buffer_wait(struct ring_buffer *buffer, int cpu, int full) * Returns EPOLLIN | EPOLLRDNORM if data exists in the buffers, * zero otherwise. */ -__poll_t ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu, +__poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, struct file *filp, poll_table *poll_table) { struct ring_buffer_per_cpu *cpu_buffer; @@ -742,13 +740,13 @@ __poll_t ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu, /* Up this if you want to test the TIME_EXTENTS and normalization */ #define DEBUG_SHIFT 0 -static inline u64 rb_time_stamp(struct ring_buffer *buffer) +static inline u64 rb_time_stamp(struct trace_buffer *buffer) { /* shift to debug/test normalization and TIME_EXTENTS */ return buffer->clock() << DEBUG_SHIFT; } -u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu) +u64 ring_buffer_time_stamp(struct trace_buffer *buffer, int cpu) { u64 time; @@ -760,7 +758,7 @@ u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu) } EXPORT_SYMBOL_GPL(ring_buffer_time_stamp); -void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer, +void ring_buffer_normalize_time_stamp(struct trace_buffer *buffer, int cpu, u64 *ts) { /* Just stupid testing the normalize function and deltas */ @@ -1283,7 +1281,7 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, } static struct ring_buffer_per_cpu * -rb_allocate_cpu_buffer(struct ring_buffer *buffer, long nr_pages, int cpu) +rb_allocate_cpu_buffer(struct trace_buffer *buffer, long nr_pages, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; struct buffer_page *bpage; @@ -1368,16 +1366,17 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) * __ring_buffer_alloc - allocate a new ring_buffer * @size: the size in bytes per cpu that is needed. * @flags: attributes to set for the ring buffer. + * @key: ring buffer reader_lock_key. * * Currently the only flag that is available is the RB_FL_OVERWRITE * flag. This flag means that the buffer will overwrite old data * when the buffer wraps. If this flag is not set, the buffer will * drop data when the tail hits the head. */ -struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags, +struct trace_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *key) { - struct ring_buffer *buffer; + struct trace_buffer *buffer; long nr_pages; int bsize; int cpu; @@ -1447,7 +1446,7 @@ EXPORT_SYMBOL_GPL(__ring_buffer_alloc); * @buffer: the buffer to free. */ void -ring_buffer_free(struct ring_buffer *buffer) +ring_buffer_free(struct trace_buffer *buffer) { int cpu; @@ -1463,18 +1462,18 @@ ring_buffer_free(struct ring_buffer *buffer) } EXPORT_SYMBOL_GPL(ring_buffer_free); -void ring_buffer_set_clock(struct ring_buffer *buffer, +void ring_buffer_set_clock(struct trace_buffer *buffer, u64 (*clock)(void)) { buffer->clock = clock; } -void ring_buffer_set_time_stamp_abs(struct ring_buffer *buffer, bool abs) +void ring_buffer_set_time_stamp_abs(struct trace_buffer *buffer, bool abs) { buffer->time_stamp_abs = abs; } -bool ring_buffer_time_stamp_abs(struct ring_buffer *buffer) +bool ring_buffer_time_stamp_abs(struct trace_buffer *buffer) { return buffer->time_stamp_abs; } @@ -1712,7 +1711,7 @@ static void update_pages_handler(struct work_struct *work) * * Returns 0 on success and < 0 on failure. */ -int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size, +int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size, int cpu_id) { struct ring_buffer_per_cpu *cpu_buffer; @@ -1891,7 +1890,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size, } EXPORT_SYMBOL_GPL(ring_buffer_resize); -void ring_buffer_change_overwrite(struct ring_buffer *buffer, int val) +void ring_buffer_change_overwrite(struct trace_buffer *buffer, int val) { mutex_lock(&buffer->mutex); if (val) @@ -2206,7 +2205,7 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, { struct buffer_page *tail_page = info->tail_page; struct buffer_page *commit_page = cpu_buffer->commit_page; - struct ring_buffer *buffer = cpu_buffer->buffer; + struct trace_buffer *buffer = cpu_buffer->buffer; struct buffer_page *next_page; int ret; @@ -2330,11 +2329,11 @@ static inline bool rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer, /** * rb_update_event - update event type and data + * @cpu_buffer: The per cpu buffer of the @event * @event: the event to update - * @type: the type of event - * @length: the size of the event field in the ring buffer + * @info: The info to update the @event with (contains length and delta) * - * Update the type and data fields of the event. The length + * Update the type and data fields of the @event. The length * is the actual size that is written to the ring buffer, * and with this, we can determine what to place into the * data field. @@ -2609,7 +2608,7 @@ static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, } static __always_inline void -rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) +rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) { size_t nr_pages; size_t dirty; @@ -2733,7 +2732,7 @@ trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer) * Call this function before calling another ring_buffer_lock_reserve() and * call ring_buffer_nest_end() after the nested ring_buffer_unlock_commit(). */ -void ring_buffer_nest_start(struct ring_buffer *buffer) +void ring_buffer_nest_start(struct trace_buffer *buffer) { struct ring_buffer_per_cpu *cpu_buffer; int cpu; @@ -2753,7 +2752,7 @@ void ring_buffer_nest_start(struct ring_buffer *buffer) * Must be called after ring_buffer_nest_start() and after the * ring_buffer_unlock_commit(). */ -void ring_buffer_nest_end(struct ring_buffer *buffer) +void ring_buffer_nest_end(struct trace_buffer *buffer) { struct ring_buffer_per_cpu *cpu_buffer; int cpu; @@ -2775,7 +2774,7 @@ void ring_buffer_nest_end(struct ring_buffer *buffer) * * Must be paired with ring_buffer_lock_reserve. */ -int ring_buffer_unlock_commit(struct ring_buffer *buffer, +int ring_buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event) { struct ring_buffer_per_cpu *cpu_buffer; @@ -2868,7 +2867,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, } static __always_inline struct ring_buffer_event * -rb_reserve_next_event(struct ring_buffer *buffer, +rb_reserve_next_event(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer, unsigned long length) { @@ -2961,7 +2960,7 @@ rb_reserve_next_event(struct ring_buffer *buffer, * If NULL is returned, then nothing has been allocated or locked. */ struct ring_buffer_event * -ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length) +ring_buffer_lock_reserve(struct trace_buffer *buffer, unsigned long length) { struct ring_buffer_per_cpu *cpu_buffer; struct ring_buffer_event *event; @@ -3062,7 +3061,7 @@ rb_decrement_entry(struct ring_buffer_per_cpu *cpu_buffer, * If this function is called, do not call ring_buffer_unlock_commit on * the event. */ -void ring_buffer_discard_commit(struct ring_buffer *buffer, +void ring_buffer_discard_commit(struct trace_buffer *buffer, struct ring_buffer_event *event) { struct ring_buffer_per_cpu *cpu_buffer; @@ -3113,7 +3112,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_discard_commit); * Note, like ring_buffer_lock_reserve, the length is the length of the data * and not the length of the event which would hold the header. */ -int ring_buffer_write(struct ring_buffer *buffer, +int ring_buffer_write(struct trace_buffer *buffer, unsigned long length, void *data) { @@ -3193,7 +3192,7 @@ static bool rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer) * * The caller should call synchronize_rcu() after this. */ -void ring_buffer_record_disable(struct ring_buffer *buffer) +void ring_buffer_record_disable(struct trace_buffer *buffer) { atomic_inc(&buffer->record_disabled); } @@ -3206,7 +3205,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_disable); * Note, multiple disables will need the same number of enables * to truly enable the writing (much like preempt_disable). */ -void ring_buffer_record_enable(struct ring_buffer *buffer) +void ring_buffer_record_enable(struct trace_buffer *buffer) { atomic_dec(&buffer->record_disabled); } @@ -3223,7 +3222,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_enable); * it works like an on/off switch, where as the disable() version * must be paired with a enable(). */ -void ring_buffer_record_off(struct ring_buffer *buffer) +void ring_buffer_record_off(struct trace_buffer *buffer) { unsigned int rd; unsigned int new_rd; @@ -3246,7 +3245,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_off); * it works like an on/off switch, where as the enable() version * must be paired with a disable(). */ -void ring_buffer_record_on(struct ring_buffer *buffer) +void ring_buffer_record_on(struct trace_buffer *buffer) { unsigned int rd; unsigned int new_rd; @@ -3264,7 +3263,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_on); * * Returns true if the ring buffer is in a state that it accepts writes. */ -bool ring_buffer_record_is_on(struct ring_buffer *buffer) +bool ring_buffer_record_is_on(struct trace_buffer *buffer) { return !atomic_read(&buffer->record_disabled); } @@ -3280,7 +3279,7 @@ bool ring_buffer_record_is_on(struct ring_buffer *buffer) * ring_buffer_record_disable(), as that is a temporary disabling of * the ring buffer. */ -bool ring_buffer_record_is_set_on(struct ring_buffer *buffer) +bool ring_buffer_record_is_set_on(struct trace_buffer *buffer) { return !(atomic_read(&buffer->record_disabled) & RB_BUFFER_OFF); } @@ -3295,7 +3294,7 @@ bool ring_buffer_record_is_set_on(struct ring_buffer *buffer) * * The caller should call synchronize_rcu() after this. */ -void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu) +void ring_buffer_record_disable_cpu(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; @@ -3315,7 +3314,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_disable_cpu); * Note, multiple disables will need the same number of enables * to truly enable the writing (much like preempt_disable). */ -void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu) +void ring_buffer_record_enable_cpu(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; @@ -3345,7 +3344,7 @@ rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer) * @buffer: The ring buffer * @cpu: The per CPU buffer to read from. */ -u64 ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu) +u64 ring_buffer_oldest_event_ts(struct trace_buffer *buffer, int cpu) { unsigned long flags; struct ring_buffer_per_cpu *cpu_buffer; @@ -3378,7 +3377,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_oldest_event_ts); * @buffer: The ring buffer * @cpu: The per CPU buffer to read from. */ -unsigned long ring_buffer_bytes_cpu(struct ring_buffer *buffer, int cpu) +unsigned long ring_buffer_bytes_cpu(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; unsigned long ret; @@ -3398,7 +3397,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_bytes_cpu); * @buffer: The ring buffer * @cpu: The per CPU buffer to get the entries from. */ -unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu) +unsigned long ring_buffer_entries_cpu(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; @@ -3417,7 +3416,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu); * @buffer: The ring buffer * @cpu: The per CPU buffer to get the number of overruns from */ -unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu) +unsigned long ring_buffer_overrun_cpu(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; unsigned long ret; @@ -3440,7 +3439,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu); * @cpu: The per CPU buffer to get the number of overruns from */ unsigned long -ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu) +ring_buffer_commit_overrun_cpu(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; unsigned long ret; @@ -3462,7 +3461,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_commit_overrun_cpu); * @cpu: The per CPU buffer to get the number of overruns from */ unsigned long -ring_buffer_dropped_events_cpu(struct ring_buffer *buffer, int cpu) +ring_buffer_dropped_events_cpu(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; unsigned long ret; @@ -3483,7 +3482,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_dropped_events_cpu); * @cpu: The per CPU buffer to get the number of events read */ unsigned long -ring_buffer_read_events_cpu(struct ring_buffer *buffer, int cpu) +ring_buffer_read_events_cpu(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; @@ -3502,7 +3501,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_read_events_cpu); * Returns the total number of entries in the ring buffer * (all CPU entries) */ -unsigned long ring_buffer_entries(struct ring_buffer *buffer) +unsigned long ring_buffer_entries(struct trace_buffer *buffer) { struct ring_buffer_per_cpu *cpu_buffer; unsigned long entries = 0; @@ -3525,7 +3524,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_entries); * Returns the total number of overruns in the ring buffer * (all CPU entries) */ -unsigned long ring_buffer_overruns(struct ring_buffer *buffer) +unsigned long ring_buffer_overruns(struct trace_buffer *buffer) { struct ring_buffer_per_cpu *cpu_buffer; unsigned long overruns = 0; @@ -3949,7 +3948,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_peek); static struct ring_buffer_event * rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) { - struct ring_buffer *buffer; + struct trace_buffer *buffer; struct ring_buffer_per_cpu *cpu_buffer; struct ring_buffer_event *event; int nr_loops = 0; @@ -4077,7 +4076,7 @@ rb_reader_unlock(struct ring_buffer_per_cpu *cpu_buffer, bool locked) * not consume the data. */ struct ring_buffer_event * -ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts, +ring_buffer_peek(struct trace_buffer *buffer, int cpu, u64 *ts, unsigned long *lost_events) { struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; @@ -4141,7 +4140,7 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) * and eventually empty the ring buffer if the producer is slower. */ struct ring_buffer_event * -ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts, +ring_buffer_consume(struct trace_buffer *buffer, int cpu, u64 *ts, unsigned long *lost_events) { struct ring_buffer_per_cpu *cpu_buffer; @@ -4201,7 +4200,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_consume); * This overall must be paired with ring_buffer_read_finish. */ struct ring_buffer_iter * -ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu, gfp_t flags) +ring_buffer_read_prepare(struct trace_buffer *buffer, int cpu, gfp_t flags) { struct ring_buffer_per_cpu *cpu_buffer; struct ring_buffer_iter *iter; @@ -4331,8 +4330,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_read); /** * ring_buffer_size - return the size of the ring buffer (in bytes) * @buffer: The ring buffer. + * @cpu: The CPU to get ring buffer size from. */ -unsigned long ring_buffer_size(struct ring_buffer *buffer, int cpu) +unsigned long ring_buffer_size(struct trace_buffer *buffer, int cpu) { /* * Earlier, this method returned @@ -4398,7 +4398,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) * @buffer: The ring buffer to reset a per cpu buffer of * @cpu: The CPU buffer to be reset */ -void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu) +void ring_buffer_reset_cpu(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; unsigned long flags; @@ -4435,7 +4435,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu); * ring_buffer_reset - reset a ring buffer * @buffer: The ring buffer to reset all cpu buffers */ -void ring_buffer_reset(struct ring_buffer *buffer) +void ring_buffer_reset(struct trace_buffer *buffer) { int cpu; @@ -4448,7 +4448,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_reset); * rind_buffer_empty - is the ring buffer empty? * @buffer: The ring buffer to test */ -bool ring_buffer_empty(struct ring_buffer *buffer) +bool ring_buffer_empty(struct trace_buffer *buffer) { struct ring_buffer_per_cpu *cpu_buffer; unsigned long flags; @@ -4478,7 +4478,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_empty); * @buffer: The ring buffer * @cpu: The CPU buffer to test */ -bool ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu) +bool ring_buffer_empty_cpu(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; unsigned long flags; @@ -4504,14 +4504,15 @@ EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu); * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers * @buffer_a: One buffer to swap with * @buffer_b: The other buffer to swap with + * @cpu: the CPU of the buffers to swap * * This function is useful for tracers that want to take a "snapshot" * of a CPU buffer and has another back up buffer lying around. * it is expected that the tracer handles the cpu buffer not being * used at the moment. */ -int ring_buffer_swap_cpu(struct ring_buffer *buffer_a, - struct ring_buffer *buffer_b, int cpu) +int ring_buffer_swap_cpu(struct trace_buffer *buffer_a, + struct trace_buffer *buffer_b, int cpu) { struct ring_buffer_per_cpu *cpu_buffer_a; struct ring_buffer_per_cpu *cpu_buffer_b; @@ -4590,7 +4591,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu); * Returns: * The page allocated, or ERR_PTR */ -void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu) +void *ring_buffer_alloc_read_page(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; struct buffer_data_page *bpage = NULL; @@ -4637,7 +4638,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_alloc_read_page); * * Free a page allocated from ring_buffer_alloc_read_page. */ -void ring_buffer_free_read_page(struct ring_buffer *buffer, int cpu, void *data) +void ring_buffer_free_read_page(struct trace_buffer *buffer, int cpu, void *data) { struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; struct buffer_data_page *bpage = data; @@ -4697,7 +4698,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_free_read_page); * >=0 if data has been transferred, returns the offset of consumed data. * <0 if no data has been transferred. */ -int ring_buffer_read_page(struct ring_buffer *buffer, +int ring_buffer_read_page(struct trace_buffer *buffer, void **data_page, size_t len, int cpu, int full) { struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; @@ -4868,12 +4869,12 @@ EXPORT_SYMBOL_GPL(ring_buffer_read_page); */ int trace_rb_cpu_prepare(unsigned int cpu, struct hlist_node *node) { - struct ring_buffer *buffer; + struct trace_buffer *buffer; long nr_pages_same; int cpu_i; unsigned long nr_pages; - buffer = container_of(node, struct ring_buffer, node); + buffer = container_of(node, struct trace_buffer, node); if (cpumask_test_cpu(cpu, buffer->cpumask)) return 0; @@ -4923,7 +4924,7 @@ int trace_rb_cpu_prepare(unsigned int cpu, struct hlist_node *node) static struct task_struct *rb_threads[NR_CPUS] __initdata; struct rb_test_data { - struct ring_buffer *buffer; + struct trace_buffer *buffer; unsigned long events; unsigned long bytes_written; unsigned long bytes_alloc; @@ -5065,7 +5066,7 @@ static __init int rb_hammer_test(void *arg) static __init int test_ringbuffer(void) { struct task_struct *rb_hammer; - struct ring_buffer *buffer; + struct trace_buffer *buffer; int cpu; int ret = 0; diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c index 32149e46551c..8df0aa810950 100644 --- a/kernel/trace/ring_buffer_benchmark.c +++ b/kernel/trace/ring_buffer_benchmark.c @@ -29,7 +29,7 @@ static int reader_finish; static DECLARE_COMPLETION(read_start); static DECLARE_COMPLETION(read_done); -static struct ring_buffer *buffer; +static struct trace_buffer *buffer; static struct task_struct *producer; static struct task_struct *consumer; static unsigned long read; diff --git a/kernel/trace/synth_event_gen_test.c b/kernel/trace/synth_event_gen_test.c new file mode 100644 index 000000000000..4aefe003cb7c --- /dev/null +++ b/kernel/trace/synth_event_gen_test.c @@ -0,0 +1,523 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test module for in-kernel sythetic event creation and generation. + * + * Copyright (C) 2019 Tom Zanussi <zanussi@kernel.org> + */ + +#include <linux/module.h> +#include <linux/trace_events.h> + +/* + * This module is a simple test of basic functionality for in-kernel + * synthetic event creation and generation, the first and second tests + * using synth_event_gen_cmd_start() and synth_event_add_field(), the + * third uses synth_event_create() to do it all at once with a static + * field array. + * + * Following that are a few examples using the created events to test + * various ways of tracing a synthetic event. + * + * To test, select CONFIG_SYNTH_EVENT_GEN_TEST and build the module. + * Then: + * + * # insmod kernel/trace/synth_event_gen_test.ko + * # cat /sys/kernel/debug/tracing/trace + * + * You should see several events in the trace buffer - + * "create_synth_test", "empty_synth_test", and several instances of + * "gen_synth_test". + * + * To remove the events, remove the module: + * + * # rmmod synth_event_gen_test + * + */ + +static struct trace_event_file *create_synth_test; +static struct trace_event_file *empty_synth_test; +static struct trace_event_file *gen_synth_test; + +/* + * Test to make sure we can create a synthetic event, then add more + * fields. + */ +static int __init test_gen_synth_cmd(void) +{ + struct dynevent_cmd cmd; + u64 vals[7]; + char *buf; + int ret; + + /* Create a buffer to hold the generated command */ + buf = kzalloc(MAX_DYNEVENT_CMD_LEN, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + /* Before generating the command, initialize the cmd object */ + synth_event_cmd_init(&cmd, buf, MAX_DYNEVENT_CMD_LEN); + + /* + * Create the empty gen_synth_test synthetic event with the + * first 4 fields. + */ + ret = synth_event_gen_cmd_start(&cmd, "gen_synth_test", THIS_MODULE, + "pid_t", "next_pid_field", + "char[16]", "next_comm_field", + "u64", "ts_ns", + "u64", "ts_ms"); + if (ret) + goto free; + + /* Use synth_event_add_field to add the rest of the fields */ + + ret = synth_event_add_field(&cmd, "unsigned int", "cpu"); + if (ret) + goto free; + + ret = synth_event_add_field(&cmd, "char[64]", "my_string_field"); + if (ret) + goto free; + + ret = synth_event_add_field(&cmd, "int", "my_int_field"); + if (ret) + goto free; + + ret = synth_event_gen_cmd_end(&cmd); + if (ret) + goto free; + + /* + * Now get the gen_synth_test event file. We need to prevent + * the instance and event from disappearing from underneath + * us, which trace_get_event_file() does (though in this case + * we're using the top-level instance which never goes away). + */ + gen_synth_test = trace_get_event_file(NULL, "synthetic", + "gen_synth_test"); + if (IS_ERR(gen_synth_test)) { + ret = PTR_ERR(gen_synth_test); + goto delete; + } + + /* Enable the event or you won't see anything */ + ret = trace_array_set_clr_event(gen_synth_test->tr, + "synthetic", "gen_synth_test", true); + if (ret) { + trace_put_event_file(gen_synth_test); + goto delete; + } + + /* Create some bogus values just for testing */ + + vals[0] = 777; /* next_pid_field */ + vals[1] = (u64)"hula hoops"; /* next_comm_field */ + vals[2] = 1000000; /* ts_ns */ + vals[3] = 1000; /* ts_ms */ + vals[4] = smp_processor_id(); /* cpu */ + vals[5] = (u64)"thneed"; /* my_string_field */ + vals[6] = 598; /* my_int_field */ + + /* Now generate a gen_synth_test event */ + ret = synth_event_trace_array(gen_synth_test, vals, ARRAY_SIZE(vals)); + out: + return ret; + delete: + /* We got an error after creating the event, delete it */ + synth_event_delete("gen_synth_test"); + free: + kfree(buf); + + goto out; +} + +/* + * Test to make sure we can create an initially empty synthetic event, + * then add all the fields. + */ +static int __init test_empty_synth_event(void) +{ + struct dynevent_cmd cmd; + u64 vals[7]; + char *buf; + int ret; + + /* Create a buffer to hold the generated command */ + buf = kzalloc(MAX_DYNEVENT_CMD_LEN, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + /* Before generating the command, initialize the cmd object */ + synth_event_cmd_init(&cmd, buf, MAX_DYNEVENT_CMD_LEN); + + /* + * Create the empty_synth_test synthetic event with no fields. + */ + ret = synth_event_gen_cmd_start(&cmd, "empty_synth_test", THIS_MODULE); + if (ret) + goto free; + + /* Use synth_event_add_field to add all of the fields */ + + ret = synth_event_add_field(&cmd, "pid_t", "next_pid_field"); + if (ret) + goto free; + + ret = synth_event_add_field(&cmd, "char[16]", "next_comm_field"); + if (ret) + goto free; + + ret = synth_event_add_field(&cmd, "u64", "ts_ns"); + if (ret) + goto free; + + ret = synth_event_add_field(&cmd, "u64", "ts_ms"); + if (ret) + goto free; + + ret = synth_event_add_field(&cmd, "unsigned int", "cpu"); + if (ret) + goto free; + + ret = synth_event_add_field(&cmd, "char[64]", "my_string_field"); + if (ret) + goto free; + + ret = synth_event_add_field(&cmd, "int", "my_int_field"); + if (ret) + goto free; + + /* All fields have been added, close and register the synth event */ + + ret = synth_event_gen_cmd_end(&cmd); + if (ret) + goto free; + + /* + * Now get the empty_synth_test event file. We need to + * prevent the instance and event from disappearing from + * underneath us, which trace_get_event_file() does (though in + * this case we're using the top-level instance which never + * goes away). + */ + empty_synth_test = trace_get_event_file(NULL, "synthetic", + "empty_synth_test"); + if (IS_ERR(empty_synth_test)) { + ret = PTR_ERR(empty_synth_test); + goto delete; + } + + /* Enable the event or you won't see anything */ + ret = trace_array_set_clr_event(empty_synth_test->tr, + "synthetic", "empty_synth_test", true); + if (ret) { + trace_put_event_file(empty_synth_test); + goto delete; + } + + /* Create some bogus values just for testing */ + + vals[0] = 777; /* next_pid_field */ + vals[1] = (u64)"tiddlywinks"; /* next_comm_field */ + vals[2] = 1000000; /* ts_ns */ + vals[3] = 1000; /* ts_ms */ + vals[4] = smp_processor_id(); /* cpu */ + vals[5] = (u64)"thneed_2.0"; /* my_string_field */ + vals[6] = 399; /* my_int_field */ + + /* Now trace an empty_synth_test event */ + ret = synth_event_trace_array(empty_synth_test, vals, ARRAY_SIZE(vals)); + out: + return ret; + delete: + /* We got an error after creating the event, delete it */ + synth_event_delete("empty_synth_test"); + free: + kfree(buf); + + goto out; +} + +static struct synth_field_desc create_synth_test_fields[] = { + { .type = "pid_t", .name = "next_pid_field" }, + { .type = "char[16]", .name = "next_comm_field" }, + { .type = "u64", .name = "ts_ns" }, + { .type = "u64", .name = "ts_ms" }, + { .type = "unsigned int", .name = "cpu" }, + { .type = "char[64]", .name = "my_string_field" }, + { .type = "int", .name = "my_int_field" }, +}; + +/* + * Test synthetic event creation all at once from array of field + * descriptors. + */ +static int __init test_create_synth_event(void) +{ + u64 vals[7]; + int ret; + + /* Create the create_synth_test event with the fields above */ + ret = synth_event_create("create_synth_test", + create_synth_test_fields, + ARRAY_SIZE(create_synth_test_fields), + THIS_MODULE); + if (ret) + goto out; + + /* + * Now get the create_synth_test event file. We need to + * prevent the instance and event from disappearing from + * underneath us, which trace_get_event_file() does (though in + * this case we're using the top-level instance which never + * goes away). + */ + create_synth_test = trace_get_event_file(NULL, "synthetic", + "create_synth_test"); + if (IS_ERR(create_synth_test)) { + ret = PTR_ERR(create_synth_test); + goto delete; + } + + /* Enable the event or you won't see anything */ + ret = trace_array_set_clr_event(create_synth_test->tr, + "synthetic", "create_synth_test", true); + if (ret) { + trace_put_event_file(create_synth_test); + goto delete; + } + + /* Create some bogus values just for testing */ + + vals[0] = 777; /* next_pid_field */ + vals[1] = (u64)"tiddlywinks"; /* next_comm_field */ + vals[2] = 1000000; /* ts_ns */ + vals[3] = 1000; /* ts_ms */ + vals[4] = smp_processor_id(); /* cpu */ + vals[5] = (u64)"thneed"; /* my_string_field */ + vals[6] = 398; /* my_int_field */ + + /* Now generate a create_synth_test event */ + ret = synth_event_trace_array(create_synth_test, vals, ARRAY_SIZE(vals)); + out: + return ret; + delete: + /* We got an error after creating the event, delete it */ + ret = synth_event_delete("create_synth_test"); + + goto out; +} + +/* + * Test tracing a synthetic event by reserving trace buffer space, + * then filling in fields one after another. + */ +static int __init test_add_next_synth_val(void) +{ + struct synth_event_trace_state trace_state; + int ret; + + /* Start by reserving space in the trace buffer */ + ret = synth_event_trace_start(gen_synth_test, &trace_state); + if (ret) + return ret; + + /* Write some bogus values into the trace buffer, one after another */ + + /* next_pid_field */ + ret = synth_event_add_next_val(777, &trace_state); + if (ret) + goto out; + + /* next_comm_field */ + ret = synth_event_add_next_val((u64)"slinky", &trace_state); + if (ret) + goto out; + + /* ts_ns */ + ret = synth_event_add_next_val(1000000, &trace_state); + if (ret) + goto out; + + /* ts_ms */ + ret = synth_event_add_next_val(1000, &trace_state); + if (ret) + goto out; + + /* cpu */ + ret = synth_event_add_next_val(smp_processor_id(), &trace_state); + if (ret) + goto out; + + /* my_string_field */ + ret = synth_event_add_next_val((u64)"thneed_2.01", &trace_state); + if (ret) + goto out; + + /* my_int_field */ + ret = synth_event_add_next_val(395, &trace_state); + out: + /* Finally, commit the event */ + ret = synth_event_trace_end(&trace_state); + + return ret; +} + +/* + * Test tracing a synthetic event by reserving trace buffer space, + * then filling in fields using field names, which can be done in any + * order. + */ +static int __init test_add_synth_val(void) +{ + struct synth_event_trace_state trace_state; + int ret; + + /* Start by reserving space in the trace buffer */ + ret = synth_event_trace_start(gen_synth_test, &trace_state); + if (ret) + return ret; + + /* Write some bogus values into the trace buffer, using field names */ + + ret = synth_event_add_val("ts_ns", 1000000, &trace_state); + if (ret) + goto out; + + ret = synth_event_add_val("ts_ms", 1000, &trace_state); + if (ret) + goto out; + + ret = synth_event_add_val("cpu", smp_processor_id(), &trace_state); + if (ret) + goto out; + + ret = synth_event_add_val("next_pid_field", 777, &trace_state); + if (ret) + goto out; + + ret = synth_event_add_val("next_comm_field", (u64)"silly putty", + &trace_state); + if (ret) + goto out; + + ret = synth_event_add_val("my_string_field", (u64)"thneed_9", + &trace_state); + if (ret) + goto out; + + ret = synth_event_add_val("my_int_field", 3999, &trace_state); + out: + /* Finally, commit the event */ + ret = synth_event_trace_end(&trace_state); + + return ret; +} + +/* + * Test tracing a synthetic event all at once from array of values. + */ +static int __init test_trace_synth_event(void) +{ + int ret; + + /* Trace some bogus values just for testing */ + ret = synth_event_trace(create_synth_test, 7, /* number of values */ + 444, /* next_pid_field */ + (u64)"clackers", /* next_comm_field */ + 1000000, /* ts_ns */ + 1000, /* ts_ms */ + smp_processor_id(), /* cpu */ + (u64)"Thneed", /* my_string_field */ + 999); /* my_int_field */ + return ret; +} + +static int __init synth_event_gen_test_init(void) +{ + int ret; + + ret = test_gen_synth_cmd(); + if (ret) + return ret; + + ret = test_empty_synth_event(); + if (ret) { + WARN_ON(trace_array_set_clr_event(gen_synth_test->tr, + "synthetic", + "gen_synth_test", false)); + trace_put_event_file(gen_synth_test); + WARN_ON(synth_event_delete("gen_synth_test")); + goto out; + } + + ret = test_create_synth_event(); + if (ret) { + WARN_ON(trace_array_set_clr_event(gen_synth_test->tr, + "synthetic", + "gen_synth_test", false)); + trace_put_event_file(gen_synth_test); + WARN_ON(synth_event_delete("gen_synth_test")); + + WARN_ON(trace_array_set_clr_event(empty_synth_test->tr, + "synthetic", + "empty_synth_test", false)); + trace_put_event_file(empty_synth_test); + WARN_ON(synth_event_delete("empty_synth_test")); + goto out; + } + + ret = test_add_next_synth_val(); + WARN_ON(ret); + + ret = test_add_synth_val(); + WARN_ON(ret); + + ret = test_trace_synth_event(); + WARN_ON(ret); + out: + return ret; +} + +static void __exit synth_event_gen_test_exit(void) +{ + /* Disable the event or you can't remove it */ + WARN_ON(trace_array_set_clr_event(gen_synth_test->tr, + "synthetic", + "gen_synth_test", false)); + + /* Now give the file and instance back */ + trace_put_event_file(gen_synth_test); + + /* Now unregister and free the synthetic event */ + WARN_ON(synth_event_delete("gen_synth_test")); + + /* Disable the event or you can't remove it */ + WARN_ON(trace_array_set_clr_event(empty_synth_test->tr, + "synthetic", + "empty_synth_test", false)); + + /* Now give the file and instance back */ + trace_put_event_file(empty_synth_test); + + /* Now unregister and free the synthetic event */ + WARN_ON(synth_event_delete("empty_synth_test")); + + /* Disable the event or you can't remove it */ + WARN_ON(trace_array_set_clr_event(create_synth_test->tr, + "synthetic", + "create_synth_test", false)); + + /* Now give the file and instance back */ + trace_put_event_file(create_synth_test); + + /* Now unregister and free the synthetic event */ + WARN_ON(synth_event_delete("create_synth_test")); +} + +module_init(synth_event_gen_test_init) +module_exit(synth_event_gen_test_exit) + +MODULE_AUTHOR("Tom Zanussi"); +MODULE_DESCRIPTION("synthetic event generation test"); +MODULE_LICENSE("GPL v2"); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 5b6ee4aadc26..c797a15a1fc7 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -162,8 +162,8 @@ union trace_eval_map_item { static union trace_eval_map_item *trace_eval_maps; #endif /* CONFIG_TRACE_EVAL_MAP_FILE */ -static int tracing_set_tracer(struct trace_array *tr, const char *buf); -static void ftrace_trace_userstack(struct ring_buffer *buffer, +int tracing_set_tracer(struct trace_array *tr, const char *buf); +static void ftrace_trace_userstack(struct trace_buffer *buffer, unsigned long flags, int pc); #define MAX_TRACER_SIZE 100 @@ -338,7 +338,7 @@ int tracing_check_open_get_tr(struct trace_array *tr) } int call_filter_check_discard(struct trace_event_call *call, void *rec, - struct ring_buffer *buffer, + struct trace_buffer *buffer, struct ring_buffer_event *event) { if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) && @@ -603,7 +603,7 @@ int trace_pid_write(struct trace_pid_list *filtered_pids, return read; } -static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu) +static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu) { u64 ts; @@ -619,7 +619,7 @@ static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu) u64 ftrace_now(int cpu) { - return buffer_ftrace_now(&global_trace.trace_buffer, cpu); + return buffer_ftrace_now(&global_trace.array_buffer, cpu); } /** @@ -747,22 +747,22 @@ static inline void trace_access_lock_init(void) #endif #ifdef CONFIG_STACKTRACE -static void __ftrace_trace_stack(struct ring_buffer *buffer, +static void __ftrace_trace_stack(struct trace_buffer *buffer, unsigned long flags, int skip, int pc, struct pt_regs *regs); static inline void ftrace_trace_stack(struct trace_array *tr, - struct ring_buffer *buffer, + struct trace_buffer *buffer, unsigned long flags, int skip, int pc, struct pt_regs *regs); #else -static inline void __ftrace_trace_stack(struct ring_buffer *buffer, +static inline void __ftrace_trace_stack(struct trace_buffer *buffer, unsigned long flags, int skip, int pc, struct pt_regs *regs) { } static inline void ftrace_trace_stack(struct trace_array *tr, - struct ring_buffer *buffer, + struct trace_buffer *buffer, unsigned long flags, int skip, int pc, struct pt_regs *regs) { @@ -780,7 +780,7 @@ trace_event_setup(struct ring_buffer_event *event, } static __always_inline struct ring_buffer_event * -__trace_buffer_lock_reserve(struct ring_buffer *buffer, +__trace_buffer_lock_reserve(struct trace_buffer *buffer, int type, unsigned long len, unsigned long flags, int pc) @@ -796,8 +796,8 @@ __trace_buffer_lock_reserve(struct ring_buffer *buffer, void tracer_tracing_on(struct trace_array *tr) { - if (tr->trace_buffer.buffer) - ring_buffer_record_on(tr->trace_buffer.buffer); + if (tr->array_buffer.buffer) + ring_buffer_record_on(tr->array_buffer.buffer); /* * This flag is looked at when buffers haven't been allocated * yet, or by some tracers (like irqsoff), that just want to @@ -825,7 +825,7 @@ EXPORT_SYMBOL_GPL(tracing_on); static __always_inline void -__buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event) +__buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event) { __this_cpu_write(trace_taskinfo_save, true); @@ -848,7 +848,7 @@ __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *eve int __trace_puts(unsigned long ip, const char *str, int size) { struct ring_buffer_event *event; - struct ring_buffer *buffer; + struct trace_buffer *buffer; struct print_entry *entry; unsigned long irq_flags; int alloc; @@ -865,11 +865,14 @@ int __trace_puts(unsigned long ip, const char *str, int size) alloc = sizeof(*entry) + size + 2; /* possible \n added */ local_save_flags(irq_flags); - buffer = global_trace.trace_buffer.buffer; + buffer = global_trace.array_buffer.buffer; + ring_buffer_nest_start(buffer); event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, irq_flags, pc); - if (!event) - return 0; + if (!event) { + size = 0; + goto out; + } entry = ring_buffer_event_data(event); entry->ip = ip; @@ -885,7 +888,8 @@ int __trace_puts(unsigned long ip, const char *str, int size) __buffer_unlock_commit(buffer, event); ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL); - + out: + ring_buffer_nest_end(buffer); return size; } EXPORT_SYMBOL_GPL(__trace_puts); @@ -898,10 +902,11 @@ EXPORT_SYMBOL_GPL(__trace_puts); int __trace_bputs(unsigned long ip, const char *str) { struct ring_buffer_event *event; - struct ring_buffer *buffer; + struct trace_buffer *buffer; struct bputs_entry *entry; unsigned long irq_flags; int size = sizeof(struct bputs_entry); + int ret = 0; int pc; if (!(global_trace.trace_flags & TRACE_ITER_PRINTK)) @@ -913,11 +918,13 @@ int __trace_bputs(unsigned long ip, const char *str) return 0; local_save_flags(irq_flags); - buffer = global_trace.trace_buffer.buffer; + buffer = global_trace.array_buffer.buffer; + + ring_buffer_nest_start(buffer); event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size, irq_flags, pc); if (!event) - return 0; + goto out; entry = ring_buffer_event_data(event); entry->ip = ip; @@ -926,7 +933,10 @@ int __trace_bputs(unsigned long ip, const char *str) __buffer_unlock_commit(buffer, event); ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL); - return 1; + ret = 1; + out: + ring_buffer_nest_end(buffer); + return ret; } EXPORT_SYMBOL_GPL(__trace_bputs); @@ -1036,9 +1046,9 @@ void *tracing_cond_snapshot_data(struct trace_array *tr) } EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data); -static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf, - struct trace_buffer *size_buf, int cpu_id); -static void set_buffer_entries(struct trace_buffer *buf, unsigned long val); +static int resize_buffer_duplicate_size(struct array_buffer *trace_buf, + struct array_buffer *size_buf, int cpu_id); +static void set_buffer_entries(struct array_buffer *buf, unsigned long val); int tracing_alloc_snapshot_instance(struct trace_array *tr) { @@ -1048,7 +1058,7 @@ int tracing_alloc_snapshot_instance(struct trace_array *tr) /* allocate spare buffer */ ret = resize_buffer_duplicate_size(&tr->max_buffer, - &tr->trace_buffer, RING_BUFFER_ALL_CPUS); + &tr->array_buffer, RING_BUFFER_ALL_CPUS); if (ret < 0) return ret; @@ -1251,8 +1261,8 @@ EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable); void tracer_tracing_off(struct trace_array *tr) { - if (tr->trace_buffer.buffer) - ring_buffer_record_off(tr->trace_buffer.buffer); + if (tr->array_buffer.buffer) + ring_buffer_record_off(tr->array_buffer.buffer); /* * This flag is looked at when buffers haven't been allocated * yet, or by some tracers (like irqsoff), that just want to @@ -1294,8 +1304,8 @@ void disable_trace_on_warning(void) */ bool tracer_tracing_is_on(struct trace_array *tr) { - if (tr->trace_buffer.buffer) - return ring_buffer_record_is_on(tr->trace_buffer.buffer); + if (tr->array_buffer.buffer) + return ring_buffer_record_is_on(tr->array_buffer.buffer); return !tr->buffer_disabled; } @@ -1590,8 +1600,8 @@ void latency_fsnotify(struct trace_array *tr) static void __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) { - struct trace_buffer *trace_buf = &tr->trace_buffer; - struct trace_buffer *max_buf = &tr->max_buffer; + struct array_buffer *trace_buf = &tr->array_buffer; + struct array_buffer *max_buf = &tr->max_buffer; struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu); struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu); @@ -1649,8 +1659,8 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu, arch_spin_lock(&tr->max_lock); - /* Inherit the recordable setting from trace_buffer */ - if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer)) + /* Inherit the recordable setting from array_buffer */ + if (ring_buffer_record_is_set_on(tr->array_buffer.buffer)) ring_buffer_record_on(tr->max_buffer.buffer); else ring_buffer_record_off(tr->max_buffer.buffer); @@ -1659,7 +1669,7 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu, if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) goto out_unlock; #endif - swap(tr->trace_buffer.buffer, tr->max_buffer.buffer); + swap(tr->array_buffer.buffer, tr->max_buffer.buffer); __update_max_tr(tr, tsk, cpu); @@ -1692,7 +1702,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) arch_spin_lock(&tr->max_lock); - ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu); + ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu); if (ret == -EBUSY) { /* @@ -1718,7 +1728,7 @@ static int wait_on_pipe(struct trace_iterator *iter, int full) if (trace_buffer_iter(iter, iter->cpu_file)) return 0; - return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file, + return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full); } @@ -1769,7 +1779,7 @@ static int run_tracer_selftest(struct tracer *type) * internal tracing to verify that everything is in order. * If we fail, we do not register this tracer. */ - tracing_reset_online_cpus(&tr->trace_buffer); + tracing_reset_online_cpus(&tr->array_buffer); tr->current_trace = type; @@ -1795,7 +1805,7 @@ static int run_tracer_selftest(struct tracer *type) return -1; } /* Only reset on passing, to avoid touching corrupted buffers */ - tracing_reset_online_cpus(&tr->trace_buffer); + tracing_reset_online_cpus(&tr->array_buffer); #ifdef CONFIG_TRACER_MAX_TRACE if (type->use_max_tr) { @@ -1962,9 +1972,9 @@ int __init register_tracer(struct tracer *type) return ret; } -static void tracing_reset_cpu(struct trace_buffer *buf, int cpu) +static void tracing_reset_cpu(struct array_buffer *buf, int cpu) { - struct ring_buffer *buffer = buf->buffer; + struct trace_buffer *buffer = buf->buffer; if (!buffer) return; @@ -1978,9 +1988,9 @@ static void tracing_reset_cpu(struct trace_buffer *buf, int cpu) ring_buffer_record_enable(buffer); } -void tracing_reset_online_cpus(struct trace_buffer *buf) +void tracing_reset_online_cpus(struct array_buffer *buf) { - struct ring_buffer *buffer = buf->buffer; + struct trace_buffer *buffer = buf->buffer; int cpu; if (!buffer) @@ -2008,7 +2018,7 @@ void tracing_reset_all_online_cpus(void) if (!tr->clear_trace) continue; tr->clear_trace = false; - tracing_reset_online_cpus(&tr->trace_buffer); + tracing_reset_online_cpus(&tr->array_buffer); #ifdef CONFIG_TRACER_MAX_TRACE tracing_reset_online_cpus(&tr->max_buffer); #endif @@ -2098,7 +2108,7 @@ int is_tracing_stopped(void) */ void tracing_start(void) { - struct ring_buffer *buffer; + struct trace_buffer *buffer; unsigned long flags; if (tracing_disabled) @@ -2117,7 +2127,7 @@ void tracing_start(void) /* Prevent the buffers from switching */ arch_spin_lock(&global_trace.max_lock); - buffer = global_trace.trace_buffer.buffer; + buffer = global_trace.array_buffer.buffer; if (buffer) ring_buffer_record_enable(buffer); @@ -2135,7 +2145,7 @@ void tracing_start(void) static void tracing_start_tr(struct trace_array *tr) { - struct ring_buffer *buffer; + struct trace_buffer *buffer; unsigned long flags; if (tracing_disabled) @@ -2156,7 +2166,7 @@ static void tracing_start_tr(struct trace_array *tr) goto out; } - buffer = tr->trace_buffer.buffer; + buffer = tr->array_buffer.buffer; if (buffer) ring_buffer_record_enable(buffer); @@ -2172,7 +2182,7 @@ static void tracing_start_tr(struct trace_array *tr) */ void tracing_stop(void) { - struct ring_buffer *buffer; + struct trace_buffer *buffer; unsigned long flags; raw_spin_lock_irqsave(&global_trace.start_lock, flags); @@ -2182,7 +2192,7 @@ void tracing_stop(void) /* Prevent the buffers from switching */ arch_spin_lock(&global_trace.max_lock); - buffer = global_trace.trace_buffer.buffer; + buffer = global_trace.array_buffer.buffer; if (buffer) ring_buffer_record_disable(buffer); @@ -2200,7 +2210,7 @@ void tracing_stop(void) static void tracing_stop_tr(struct trace_array *tr) { - struct ring_buffer *buffer; + struct trace_buffer *buffer; unsigned long flags; /* If global, we need to also stop the max tracer */ @@ -2211,7 +2221,7 @@ static void tracing_stop_tr(struct trace_array *tr) if (tr->stop_count++) goto out; - buffer = tr->trace_buffer.buffer; + buffer = tr->array_buffer.buffer; if (buffer) ring_buffer_record_disable(buffer); @@ -2442,7 +2452,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned short type, EXPORT_SYMBOL_GPL(tracing_generic_entry_update); struct ring_buffer_event * -trace_buffer_lock_reserve(struct ring_buffer *buffer, +trace_buffer_lock_reserve(struct trace_buffer *buffer, int type, unsigned long len, unsigned long flags, int pc) @@ -2561,10 +2571,10 @@ void trace_buffered_event_disable(void) preempt_enable(); } -static struct ring_buffer *temp_buffer; +static struct trace_buffer *temp_buffer; struct ring_buffer_event * -trace_event_buffer_lock_reserve(struct ring_buffer **current_rb, +trace_event_buffer_lock_reserve(struct trace_buffer **current_rb, struct trace_event_file *trace_file, int type, unsigned long len, unsigned long flags, int pc) @@ -2572,7 +2582,7 @@ trace_event_buffer_lock_reserve(struct ring_buffer **current_rb, struct ring_buffer_event *entry; int val; - *current_rb = trace_file->tr->trace_buffer.buffer; + *current_rb = trace_file->tr->array_buffer.buffer; if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) && @@ -2610,6 +2620,7 @@ static DEFINE_MUTEX(tracepoint_printk_mutex); static void output_printk(struct trace_event_buffer *fbuffer) { struct trace_event_call *event_call; + struct trace_event_file *file; struct trace_event *event; unsigned long flags; struct trace_iterator *iter = tracepoint_print_iter; @@ -2623,6 +2634,12 @@ static void output_printk(struct trace_event_buffer *fbuffer) !event_call->event.funcs->trace) return; + file = fbuffer->trace_file; + if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) || + (unlikely(file->flags & EVENT_FILE_FL_FILTERED) && + !filter_match_preds(file->filter, fbuffer->entry))) + return; + event = &fbuffer->trace_file->event_call->event; spin_lock_irqsave(&tracepoint_iter_lock, flags); @@ -2673,9 +2690,9 @@ void trace_event_buffer_commit(struct trace_event_buffer *fbuffer) if (static_key_false(&tracepoint_printk_key.key)) output_printk(fbuffer); - event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer, + event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer, fbuffer->event, fbuffer->entry, - fbuffer->flags, fbuffer->pc); + fbuffer->flags, fbuffer->pc, fbuffer->regs); } EXPORT_SYMBOL_GPL(trace_event_buffer_commit); @@ -2689,7 +2706,7 @@ EXPORT_SYMBOL_GPL(trace_event_buffer_commit); # define STACK_SKIP 3 void trace_buffer_unlock_commit_regs(struct trace_array *tr, - struct ring_buffer *buffer, + struct trace_buffer *buffer, struct ring_buffer_event *event, unsigned long flags, int pc, struct pt_regs *regs) @@ -2710,7 +2727,7 @@ void trace_buffer_unlock_commit_regs(struct trace_array *tr, * Similar to trace_buffer_unlock_commit_regs() but do not dump stack. */ void -trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer, +trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer, struct ring_buffer_event *event) { __buffer_unlock_commit(buffer, event); @@ -2845,7 +2862,7 @@ trace_function(struct trace_array *tr, int pc) { struct trace_event_call *call = &event_function; - struct ring_buffer *buffer = tr->trace_buffer.buffer; + struct trace_buffer *buffer = tr->array_buffer.buffer; struct ring_buffer_event *event; struct ftrace_entry *entry; @@ -2883,7 +2900,7 @@ struct ftrace_stacks { static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks); static DEFINE_PER_CPU(int, ftrace_stack_reserve); -static void __ftrace_trace_stack(struct ring_buffer *buffer, +static void __ftrace_trace_stack(struct trace_buffer *buffer, unsigned long flags, int skip, int pc, struct pt_regs *regs) { @@ -2958,7 +2975,7 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer, } static inline void ftrace_trace_stack(struct trace_array *tr, - struct ring_buffer *buffer, + struct trace_buffer *buffer, unsigned long flags, int skip, int pc, struct pt_regs *regs) { @@ -2971,7 +2988,7 @@ static inline void ftrace_trace_stack(struct trace_array *tr, void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, int pc) { - struct ring_buffer *buffer = tr->trace_buffer.buffer; + struct trace_buffer *buffer = tr->array_buffer.buffer; if (rcu_is_watching()) { __ftrace_trace_stack(buffer, flags, skip, pc, NULL); @@ -3009,7 +3026,7 @@ void trace_dump_stack(int skip) /* Skip 1 to skip this function. */ skip++; #endif - __ftrace_trace_stack(global_trace.trace_buffer.buffer, + __ftrace_trace_stack(global_trace.array_buffer.buffer, flags, skip, preempt_count(), NULL); } EXPORT_SYMBOL_GPL(trace_dump_stack); @@ -3018,7 +3035,7 @@ EXPORT_SYMBOL_GPL(trace_dump_stack); static DEFINE_PER_CPU(int, user_stack_count); static void -ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) +ftrace_trace_userstack(struct trace_buffer *buffer, unsigned long flags, int pc) { struct trace_event_call *call = &event_user_stack; struct ring_buffer_event *event; @@ -3063,7 +3080,7 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) preempt_enable(); } #else /* CONFIG_USER_STACKTRACE_SUPPORT */ -static void ftrace_trace_userstack(struct ring_buffer *buffer, +static void ftrace_trace_userstack(struct trace_buffer *buffer, unsigned long flags, int pc) { } @@ -3109,7 +3126,7 @@ static int alloc_percpu_trace_buffer(void) struct trace_buffer_struct *buffers; buffers = alloc_percpu(struct trace_buffer_struct); - if (WARN(!buffers, "Could not allocate percpu trace_printk buffer")) + if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer")) return -ENOMEM; trace_percpu_buffer = buffers; @@ -3154,7 +3171,7 @@ void trace_printk_init_buffers(void) * directly here. If the global_trace.buffer is already * allocated here, then this was called by module code. */ - if (global_trace.trace_buffer.buffer) + if (global_trace.array_buffer.buffer) tracing_start_cmdline_record(); } EXPORT_SYMBOL_GPL(trace_printk_init_buffers); @@ -3188,7 +3205,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) { struct trace_event_call *call = &event_bprint; struct ring_buffer_event *event; - struct ring_buffer *buffer; + struct trace_buffer *buffer; struct trace_array *tr = &global_trace; struct bprint_entry *entry; unsigned long flags; @@ -3213,11 +3230,12 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args); if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0) - goto out; + goto out_put; local_save_flags(flags); size = sizeof(*entry) + sizeof(u32) * len; - buffer = tr->trace_buffer.buffer; + buffer = tr->array_buffer.buffer; + ring_buffer_nest_start(buffer); event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size, flags, pc); if (!event) @@ -3233,6 +3251,8 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) } out: + ring_buffer_nest_end(buffer); +out_put: put_trace_buf(); out_nobuffer: @@ -3245,7 +3265,7 @@ EXPORT_SYMBOL_GPL(trace_vbprintk); __printf(3, 0) static int -__trace_array_vprintk(struct ring_buffer *buffer, +__trace_array_vprintk(struct trace_buffer *buffer, unsigned long ip, const char *fmt, va_list args) { struct trace_event_call *call = &event_print; @@ -3275,6 +3295,7 @@ __trace_array_vprintk(struct ring_buffer *buffer, local_save_flags(flags); size = sizeof(*entry) + len + 1; + ring_buffer_nest_start(buffer); event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, flags, pc); if (!event) @@ -3289,6 +3310,7 @@ __trace_array_vprintk(struct ring_buffer *buffer, } out: + ring_buffer_nest_end(buffer); put_trace_buf(); out_nobuffer: @@ -3302,7 +3324,7 @@ __printf(3, 0) int trace_array_vprintk(struct trace_array *tr, unsigned long ip, const char *fmt, va_list args) { - return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args); + return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args); } __printf(3, 0) @@ -3326,7 +3348,7 @@ int trace_array_printk(struct trace_array *tr, EXPORT_SYMBOL_GPL(trace_array_printk); __printf(3, 4) -int trace_array_printk_buf(struct ring_buffer *buffer, +int trace_array_printk_buf(struct trace_buffer *buffer, unsigned long ip, const char *fmt, ...) { int ret; @@ -3367,7 +3389,7 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts, if (buf_iter) event = ring_buffer_iter_peek(buf_iter, ts); else - event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts, + event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts, lost_events); if (event) { @@ -3382,7 +3404,7 @@ static struct trace_entry * __find_next_entry(struct trace_iterator *iter, int *ent_cpu, unsigned long *missing_events, u64 *ent_ts) { - struct ring_buffer *buffer = iter->trace_buffer->buffer; + struct trace_buffer *buffer = iter->array_buffer->buffer; struct trace_entry *ent, *next = NULL; unsigned long lost_events = 0, next_lost = 0; int cpu_file = iter->cpu_file; @@ -3459,7 +3481,7 @@ void *trace_find_next_entry_inc(struct trace_iterator *iter) static void trace_consume(struct trace_iterator *iter) { - ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts, + ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts, &iter->lost_events); } @@ -3497,7 +3519,7 @@ void tracing_iter_reset(struct trace_iterator *iter, int cpu) unsigned long entries = 0; u64 ts; - per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0; + per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0; buf_iter = trace_buffer_iter(iter, cpu); if (!buf_iter) @@ -3511,13 +3533,13 @@ void tracing_iter_reset(struct trace_iterator *iter, int cpu) * by the timestamp being before the start of the buffer. */ while ((event = ring_buffer_iter_peek(buf_iter, &ts))) { - if (ts >= iter->trace_buffer->time_start) + if (ts >= iter->array_buffer->time_start) break; entries++; ring_buffer_read(buf_iter, NULL); } - per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries; + per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries; } /* @@ -3602,7 +3624,7 @@ static void s_stop(struct seq_file *m, void *p) } static void -get_total_entries_cpu(struct trace_buffer *buf, unsigned long *total, +get_total_entries_cpu(struct array_buffer *buf, unsigned long *total, unsigned long *entries, int cpu) { unsigned long count; @@ -3624,7 +3646,7 @@ get_total_entries_cpu(struct trace_buffer *buf, unsigned long *total, } static void -get_total_entries(struct trace_buffer *buf, +get_total_entries(struct array_buffer *buf, unsigned long *total, unsigned long *entries) { unsigned long t, e; @@ -3647,7 +3669,7 @@ unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu) if (!tr) tr = &global_trace; - get_total_entries_cpu(&tr->trace_buffer, &total, &entries, cpu); + get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu); return entries; } @@ -3659,7 +3681,7 @@ unsigned long trace_total_entries(struct trace_array *tr) if (!tr) tr = &global_trace; - get_total_entries(&tr->trace_buffer, &total, &entries); + get_total_entries(&tr->array_buffer, &total, &entries); return entries; } @@ -3676,7 +3698,7 @@ static void print_lat_help_header(struct seq_file *m) "# \\ / ||||| \\ | / \n"); } -static void print_event_info(struct trace_buffer *buf, struct seq_file *m) +static void print_event_info(struct array_buffer *buf, struct seq_file *m) { unsigned long total; unsigned long entries; @@ -3687,7 +3709,7 @@ static void print_event_info(struct trace_buffer *buf, struct seq_file *m) seq_puts(m, "#\n"); } -static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m, +static void print_func_help_header(struct array_buffer *buf, struct seq_file *m, unsigned int flags) { bool tgid = flags & TRACE_ITER_RECORD_TGID; @@ -3698,7 +3720,7 @@ static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m, seq_printf(m, "# | | %s | | |\n", tgid ? " | " : ""); } -static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m, +static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m, unsigned int flags) { bool tgid = flags & TRACE_ITER_RECORD_TGID; @@ -3720,7 +3742,7 @@ void print_trace_header(struct seq_file *m, struct trace_iterator *iter) { unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK); - struct trace_buffer *buf = iter->trace_buffer; + struct array_buffer *buf = iter->array_buffer; struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu); struct tracer *type = iter->trace; unsigned long entries; @@ -3795,7 +3817,7 @@ static void test_cpu_buff_start(struct trace_iterator *iter) cpumask_test_cpu(iter->cpu, iter->started)) return; - if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries) + if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries) return; if (cpumask_available(iter->started)) @@ -3929,7 +3951,7 @@ int trace_empty(struct trace_iterator *iter) if (!ring_buffer_iter_empty(buf_iter)) return 0; } else { - if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu)) + if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu)) return 0; } return 1; @@ -3941,7 +3963,7 @@ int trace_empty(struct trace_iterator *iter) if (!ring_buffer_iter_empty(buf_iter)) return 0; } else { - if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu)) + if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu)) return 0; } } @@ -4031,10 +4053,10 @@ void trace_default_header(struct seq_file *m) } else { if (!(trace_flags & TRACE_ITER_VERBOSE)) { if (trace_flags & TRACE_ITER_IRQ_INFO) - print_func_help_header_irq(iter->trace_buffer, + print_func_help_header_irq(iter->array_buffer, m, trace_flags); else - print_func_help_header(iter->trace_buffer, m, + print_func_help_header(iter->array_buffer, m, trace_flags); } } @@ -4192,21 +4214,21 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot) #ifdef CONFIG_TRACER_MAX_TRACE /* Currently only the top directory has a snapshot */ if (tr->current_trace->print_max || snapshot) - iter->trace_buffer = &tr->max_buffer; + iter->array_buffer = &tr->max_buffer; else #endif - iter->trace_buffer = &tr->trace_buffer; + iter->array_buffer = &tr->array_buffer; iter->snapshot = snapshot; iter->pos = -1; iter->cpu_file = tracing_get_cpu(inode); mutex_init(&iter->mutex); /* Notify the tracer early; before we stop tracing. */ - if (iter->trace && iter->trace->open) + if (iter->trace->open) iter->trace->open(iter); /* Annotate start of buffers if we had overruns */ - if (ring_buffer_overruns(iter->trace_buffer->buffer)) + if (ring_buffer_overruns(iter->array_buffer->buffer)) iter->iter_flags |= TRACE_FILE_ANNOTATE; /* Output in nanoseconds only if we are using a clock in nanoseconds. */ @@ -4220,7 +4242,7 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot) if (iter->cpu_file == RING_BUFFER_ALL_CPUS) { for_each_tracing_cpu(cpu) { iter->buffer_iter[cpu] = - ring_buffer_read_prepare(iter->trace_buffer->buffer, + ring_buffer_read_prepare(iter->array_buffer->buffer, cpu, GFP_KERNEL); } ring_buffer_read_prepare_sync(); @@ -4231,7 +4253,7 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot) } else { cpu = iter->cpu_file; iter->buffer_iter[cpu] = - ring_buffer_read_prepare(iter->trace_buffer->buffer, + ring_buffer_read_prepare(iter->array_buffer->buffer, cpu, GFP_KERNEL); ring_buffer_read_prepare_sync(); ring_buffer_read_start(iter->buffer_iter[cpu]); @@ -4357,7 +4379,7 @@ static int tracing_open(struct inode *inode, struct file *file) /* If this file was open for write, then erase contents */ if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) { int cpu = tracing_get_cpu(inode); - struct trace_buffer *trace_buf = &tr->trace_buffer; + struct array_buffer *trace_buf = &tr->array_buffer; #ifdef CONFIG_TRACER_MAX_TRACE if (tr->current_trace->print_max) @@ -4554,20 +4576,13 @@ out_err: return count; } -static ssize_t -tracing_cpumask_write(struct file *filp, const char __user *ubuf, - size_t count, loff_t *ppos) +int tracing_set_cpumask(struct trace_array *tr, + cpumask_var_t tracing_cpumask_new) { - struct trace_array *tr = file_inode(filp)->i_private; - cpumask_var_t tracing_cpumask_new; - int err, cpu; - - if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL)) - return -ENOMEM; + int cpu; - err = cpumask_parse_user(ubuf, count, tracing_cpumask_new); - if (err) - goto err_unlock; + if (!tr) + return -EINVAL; local_irq_disable(); arch_spin_lock(&tr->max_lock); @@ -4578,24 +4593,47 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, */ if (cpumask_test_cpu(cpu, tr->tracing_cpumask) && !cpumask_test_cpu(cpu, tracing_cpumask_new)) { - atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled); - ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu); + atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled); + ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu); } if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) && cpumask_test_cpu(cpu, tracing_cpumask_new)) { - atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled); - ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu); + atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled); + ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu); } } arch_spin_unlock(&tr->max_lock); local_irq_enable(); cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new); + + return 0; +} + +static ssize_t +tracing_cpumask_write(struct file *filp, const char __user *ubuf, + size_t count, loff_t *ppos) +{ + struct trace_array *tr = file_inode(filp)->i_private; + cpumask_var_t tracing_cpumask_new; + int err; + + if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL)) + return -ENOMEM; + + err = cpumask_parse_user(ubuf, count, tracing_cpumask_new); + if (err) + goto err_free; + + err = tracing_set_cpumask(tr, tracing_cpumask_new); + if (err) + goto err_free; + free_cpumask_var(tracing_cpumask_new); return count; -err_unlock: +err_free: free_cpumask_var(tracing_cpumask_new); return err; @@ -4726,7 +4764,7 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled) ftrace_pid_follow_fork(tr, enabled); if (mask == TRACE_ITER_OVERWRITE) { - ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled); + ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled); #ifdef CONFIG_TRACER_MAX_TRACE ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled); #endif @@ -4740,7 +4778,7 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled) return 0; } -static int trace_set_options(struct trace_array *tr, char *option) +int trace_set_options(struct trace_array *tr, char *option) { char *cmp; int neg = 0; @@ -5361,14 +5399,12 @@ static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos) * Paranoid! If ptr points to end, we don't want to increment past it. * This really should never happen. */ + (*pos)++; ptr = update_eval_map(ptr); if (WARN_ON_ONCE(!ptr)) return NULL; ptr++; - - (*pos)++; - ptr = update_eval_map(ptr); return ptr; @@ -5534,11 +5570,11 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf, int tracer_init(struct tracer *t, struct trace_array *tr) { - tracing_reset_online_cpus(&tr->trace_buffer); + tracing_reset_online_cpus(&tr->array_buffer); return t->init(tr); } -static void set_buffer_entries(struct trace_buffer *buf, unsigned long val) +static void set_buffer_entries(struct array_buffer *buf, unsigned long val) { int cpu; @@ -5548,8 +5584,8 @@ static void set_buffer_entries(struct trace_buffer *buf, unsigned long val) #ifdef CONFIG_TRACER_MAX_TRACE /* resize @tr's buffer to the size of @size_tr's entries */ -static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf, - struct trace_buffer *size_buf, int cpu_id) +static int resize_buffer_duplicate_size(struct array_buffer *trace_buf, + struct array_buffer *size_buf, int cpu_id) { int cpu, ret = 0; @@ -5587,10 +5623,10 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr, ring_buffer_expanded = true; /* May be called before buffers are initialized */ - if (!tr->trace_buffer.buffer) + if (!tr->array_buffer.buffer) return 0; - ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu); + ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu); if (ret < 0) return ret; @@ -5601,8 +5637,8 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr, ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu); if (ret < 0) { - int r = resize_buffer_duplicate_size(&tr->trace_buffer, - &tr->trace_buffer, cpu); + int r = resize_buffer_duplicate_size(&tr->array_buffer, + &tr->array_buffer, cpu); if (r < 0) { /* * AARGH! We are left with different @@ -5633,15 +5669,15 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr, #endif /* CONFIG_TRACER_MAX_TRACE */ if (cpu == RING_BUFFER_ALL_CPUS) - set_buffer_entries(&tr->trace_buffer, size); + set_buffer_entries(&tr->array_buffer, size); else - per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size; + per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size; return ret; } -static ssize_t tracing_resize_ring_buffer(struct trace_array *tr, - unsigned long size, int cpu_id) +ssize_t tracing_resize_ring_buffer(struct trace_array *tr, + unsigned long size, int cpu_id) { int ret = size; @@ -5720,7 +5756,7 @@ static void add_tracer_options(struct trace_array *tr, struct tracer *t) create_trace_option_files(tr, t); } -static int tracing_set_tracer(struct trace_array *tr, const char *buf) +int tracing_set_tracer(struct trace_array *tr, const char *buf) { struct tracer *t; #ifdef CONFIG_TRACER_MAX_TRACE @@ -5979,7 +6015,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) iter->iter_flags |= TRACE_FILE_TIME_IN_NS; iter->tr = tr; - iter->trace_buffer = &tr->trace_buffer; + iter->array_buffer = &tr->array_buffer; iter->cpu_file = tracing_get_cpu(inode); mutex_init(&iter->mutex); filp->private_data = iter; @@ -6039,7 +6075,7 @@ trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_tabl */ return EPOLLIN | EPOLLRDNORM; else - return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file, + return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file, filp, poll_table); } @@ -6356,8 +6392,8 @@ tracing_entries_read(struct file *filp, char __user *ubuf, for_each_tracing_cpu(cpu) { /* fill in the size from first enabled cpu */ if (size == 0) - size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries; - if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) { + size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries; + if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) { buf_size_same = 0; break; } @@ -6373,7 +6409,7 @@ tracing_entries_read(struct file *filp, char __user *ubuf, } else r = sprintf(buf, "X\n"); } else - r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10); + r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10); mutex_unlock(&trace_types_lock); @@ -6420,7 +6456,7 @@ tracing_total_entries_read(struct file *filp, char __user *ubuf, mutex_lock(&trace_types_lock); for_each_tracing_cpu(cpu) { - size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10; + size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10; if (!ring_buffer_expanded) expanded_size += trace_buf_size >> 10; } @@ -6470,7 +6506,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, struct trace_array *tr = filp->private_data; struct ring_buffer_event *event; enum event_trigger_type tt = ETT_NONE; - struct ring_buffer *buffer; + struct trace_buffer *buffer; struct print_entry *entry; unsigned long irq_flags; ssize_t written; @@ -6499,7 +6535,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, if (cnt < FAULTED_SIZE) size += FAULTED_SIZE - cnt; - buffer = tr->trace_buffer.buffer; + buffer = tr->array_buffer.buffer; event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, irq_flags, preempt_count()); if (unlikely(!event)) @@ -6550,7 +6586,7 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf, { struct trace_array *tr = filp->private_data; struct ring_buffer_event *event; - struct ring_buffer *buffer; + struct trace_buffer *buffer; struct raw_data_entry *entry; unsigned long irq_flags; ssize_t written; @@ -6579,7 +6615,7 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf, if (cnt < FAULT_SIZE_ID) size += FAULT_SIZE_ID - cnt; - buffer = tr->trace_buffer.buffer; + buffer = tr->array_buffer.buffer; event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size, irq_flags, preempt_count()); if (!event) @@ -6634,13 +6670,13 @@ int tracing_set_clock(struct trace_array *tr, const char *clockstr) tr->clock_id = i; - ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func); + ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func); /* * New clock may not be consistent with the previous clock. * Reset the buffer so that it doesn't have incomparable timestamps. */ - tracing_reset_online_cpus(&tr->trace_buffer); + tracing_reset_online_cpus(&tr->array_buffer); #ifdef CONFIG_TRACER_MAX_TRACE if (tr->max_buffer.buffer) @@ -6703,7 +6739,7 @@ static int tracing_time_stamp_mode_show(struct seq_file *m, void *v) mutex_lock(&trace_types_lock); - if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer)) + if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer)) seq_puts(m, "delta [absolute]\n"); else seq_puts(m, "[delta] absolute\n"); @@ -6748,7 +6784,7 @@ int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs) goto out; } - ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs); + ring_buffer_set_time_stamp_abs(tr->array_buffer.buffer, abs); #ifdef CONFIG_TRACER_MAX_TRACE if (tr->max_buffer.buffer) @@ -6797,7 +6833,7 @@ static int tracing_snapshot_open(struct inode *inode, struct file *file) ret = 0; iter->tr = tr; - iter->trace_buffer = &tr->max_buffer; + iter->array_buffer = &tr->max_buffer; iter->cpu_file = tracing_get_cpu(inode); m->private = iter; file->private_data = m; @@ -6860,7 +6896,7 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, #endif if (tr->allocated_snapshot) ret = resize_buffer_duplicate_size(&tr->max_buffer, - &tr->trace_buffer, iter->cpu_file); + &tr->array_buffer, iter->cpu_file); else ret = tracing_alloc_snapshot_instance(tr); if (ret < 0) @@ -6935,7 +6971,7 @@ static int snapshot_raw_open(struct inode *inode, struct file *filp) } info->iter.snapshot = true; - info->iter.trace_buffer = &info->iter.tr->max_buffer; + info->iter.array_buffer = &info->iter.tr->max_buffer; return ret; } @@ -7310,7 +7346,7 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp) info->iter.tr = tr; info->iter.cpu_file = tracing_get_cpu(inode); info->iter.trace = tr->current_trace; - info->iter.trace_buffer = &tr->trace_buffer; + info->iter.array_buffer = &tr->array_buffer; info->spare = NULL; /* Force reading ring buffer for first read */ info->read = (unsigned int)-1; @@ -7355,7 +7391,7 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, #endif if (!info->spare) { - info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer, + info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer, iter->cpu_file); if (IS_ERR(info->spare)) { ret = PTR_ERR(info->spare); @@ -7373,7 +7409,7 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, again: trace_access_lock(iter->cpu_file); - ret = ring_buffer_read_page(iter->trace_buffer->buffer, + ret = ring_buffer_read_page(iter->array_buffer->buffer, &info->spare, count, iter->cpu_file, 0); @@ -7423,7 +7459,7 @@ static int tracing_buffers_release(struct inode *inode, struct file *file) __trace_array_put(iter->tr); if (info->spare) - ring_buffer_free_read_page(iter->trace_buffer->buffer, + ring_buffer_free_read_page(iter->array_buffer->buffer, info->spare_cpu, info->spare); kfree(info); @@ -7433,7 +7469,7 @@ static int tracing_buffers_release(struct inode *inode, struct file *file) } struct buffer_ref { - struct ring_buffer *buffer; + struct trace_buffer *buffer; void *page; int cpu; refcount_t refcount; @@ -7528,7 +7564,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, again: trace_access_lock(iter->cpu_file); - entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file); + entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file); for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) { struct page *page; @@ -7541,7 +7577,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, } refcount_set(&ref->refcount, 1); - ref->buffer = iter->trace_buffer->buffer; + ref->buffer = iter->array_buffer->buffer; ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file); if (IS_ERR(ref->page)) { ret = PTR_ERR(ref->page); @@ -7569,7 +7605,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, spd.nr_pages++; *ppos += PAGE_SIZE; - entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file); + entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file); } trace_access_unlock(iter->cpu_file); @@ -7613,7 +7649,7 @@ tracing_stats_read(struct file *filp, char __user *ubuf, { struct inode *inode = file_inode(filp); struct trace_array *tr = inode->i_private; - struct trace_buffer *trace_buf = &tr->trace_buffer; + struct array_buffer *trace_buf = &tr->array_buffer; int cpu = tracing_get_cpu(inode); struct trace_seq *s; unsigned long cnt; @@ -7894,7 +7930,7 @@ static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu) tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer); - WARN_ONCE(!tr->percpu_dir, + MEM_FAIL(!tr->percpu_dir, "Could not create tracefs directory 'per_cpu/%d'\n", cpu); return tr->percpu_dir; @@ -8215,7 +8251,7 @@ create_trace_option_files(struct trace_array *tr, struct tracer *tracer) for (cnt = 0; opts[cnt].name; cnt++) { create_trace_option_file(tr, &topts[cnt], flags, &opts[cnt]); - WARN_ONCE(topts[cnt].entry == NULL, + MEM_FAIL(topts[cnt].entry == NULL, "Failed to create trace option: %s", opts[cnt].name); } @@ -8272,7 +8308,7 @@ rb_simple_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_array *tr = filp->private_data; - struct ring_buffer *buffer = tr->trace_buffer.buffer; + struct trace_buffer *buffer = tr->array_buffer.buffer; unsigned long val; int ret; @@ -8362,7 +8398,7 @@ static void init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer); static int -allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size) +allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size) { enum ring_buffer_flags rb_flags; @@ -8382,8 +8418,8 @@ allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size } /* Allocate the first page for all buffers */ - set_buffer_entries(&tr->trace_buffer, - ring_buffer_size(tr->trace_buffer.buffer, 0)); + set_buffer_entries(&tr->array_buffer, + ring_buffer_size(tr->array_buffer.buffer, 0)); return 0; } @@ -8392,18 +8428,18 @@ static int allocate_trace_buffers(struct trace_array *tr, int size) { int ret; - ret = allocate_trace_buffer(tr, &tr->trace_buffer, size); + ret = allocate_trace_buffer(tr, &tr->array_buffer, size); if (ret) return ret; #ifdef CONFIG_TRACER_MAX_TRACE ret = allocate_trace_buffer(tr, &tr->max_buffer, allocate_snapshot ? size : 1); - if (WARN_ON(ret)) { - ring_buffer_free(tr->trace_buffer.buffer); - tr->trace_buffer.buffer = NULL; - free_percpu(tr->trace_buffer.data); - tr->trace_buffer.data = NULL; + if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) { + ring_buffer_free(tr->array_buffer.buffer); + tr->array_buffer.buffer = NULL; + free_percpu(tr->array_buffer.data); + tr->array_buffer.data = NULL; return -ENOMEM; } tr->allocated_snapshot = allocate_snapshot; @@ -8417,7 +8453,7 @@ static int allocate_trace_buffers(struct trace_array *tr, int size) return 0; } -static void free_trace_buffer(struct trace_buffer *buf) +static void free_trace_buffer(struct array_buffer *buf) { if (buf->buffer) { ring_buffer_free(buf->buffer); @@ -8432,7 +8468,7 @@ static void free_trace_buffers(struct trace_array *tr) if (!tr) return; - free_trace_buffer(&tr->trace_buffer); + free_trace_buffer(&tr->array_buffer); #ifdef CONFIG_TRACER_MAX_TRACE free_trace_buffer(&tr->max_buffer); @@ -8463,6 +8499,34 @@ static void update_tracer_options(struct trace_array *tr) mutex_unlock(&trace_types_lock); } +/* Must have trace_types_lock held */ +struct trace_array *trace_array_find(const char *instance) +{ + struct trace_array *tr, *found = NULL; + + list_for_each_entry(tr, &ftrace_trace_arrays, list) { + if (tr->name && strcmp(tr->name, instance) == 0) { + found = tr; + break; + } + } + + return found; +} + +struct trace_array *trace_array_find_get(const char *instance) +{ + struct trace_array *tr; + + mutex_lock(&trace_types_lock); + tr = trace_array_find(instance); + if (tr) + tr->ref++; + mutex_unlock(&trace_types_lock); + + return tr; +} + static struct trace_array *trace_array_create(const char *name) { struct trace_array *tr; @@ -8504,7 +8568,7 @@ static struct trace_array *trace_array_create(const char *name) ret = event_trace_add_tracer(tr->dir, tr); if (ret) { - tracefs_remove_recursive(tr->dir); + tracefs_remove(tr->dir); goto out_free_tr; } @@ -8539,10 +8603,8 @@ static int instance_mkdir(const char *name) mutex_lock(&trace_types_lock); ret = -EEXIST; - list_for_each_entry(tr, &ftrace_trace_arrays, list) { - if (tr->name && strcmp(tr->name, name) == 0) - goto out_unlock; - } + if (trace_array_find(name)) + goto out_unlock; tr = trace_array_create(name); @@ -8564,6 +8626,10 @@ out_unlock: * NOTE: This function increments the reference counter associated with the * trace array returned. This makes sure it cannot be freed while in use. * Use trace_array_put() once the trace array is no longer needed. + * If the trace_array is to be freed, trace_array_destroy() needs to + * be called after the trace_array_put(), or simply let user space delete + * it from the tracefs instances directory. But until the + * trace_array_put() is called, user space can not delete it. * */ struct trace_array *trace_array_get_by_name(const char *name) @@ -8613,7 +8679,7 @@ static int __remove_instance(struct trace_array *tr) event_trace_del_tracer(tr); ftrace_clear_pids(tr); ftrace_destroy_function_files(tr); - tracefs_remove_recursive(tr->dir); + tracefs_remove(tr->dir); free_trace_buffers(tr); for (i = 0; i < tr->nr_topts; i++) { @@ -8666,12 +8732,9 @@ static int instance_rmdir(const char *name) mutex_lock(&trace_types_lock); ret = -ENODEV; - list_for_each_entry(tr, &ftrace_trace_arrays, list) { - if (tr->name && strcmp(tr->name, name) == 0) { - ret = __remove_instance(tr); - break; - } - } + tr = trace_array_find(name); + if (tr) + ret = __remove_instance(tr); mutex_unlock(&trace_types_lock); mutex_unlock(&event_mutex); @@ -8684,7 +8747,7 @@ static __init void create_trace_instances(struct dentry *d_tracer) trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer, instance_mkdir, instance_rmdir); - if (WARN_ON(!trace_instance_dir)) + if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n")) return; } @@ -8754,7 +8817,7 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer) #endif if (ftrace_create_function_files(tr, d_tracer)) - WARN(1, "Could not allocate function filter files"); + MEM_FAIL(1, "Could not allocate function filter files"); #ifdef CONFIG_TRACER_SNAPSHOT trace_create_file("snapshot", 0644, d_tracer, @@ -9036,13 +9099,13 @@ void trace_init_global_iter(struct trace_iterator *iter) iter->tr = &global_trace; iter->trace = iter->tr->current_trace; iter->cpu_file = RING_BUFFER_ALL_CPUS; - iter->trace_buffer = &global_trace.trace_buffer; + iter->array_buffer = &global_trace.array_buffer; if (iter->trace && iter->trace->open) iter->trace->open(iter); /* Annotate start of buffers if we had overruns */ - if (ring_buffer_overruns(iter->trace_buffer->buffer)) + if (ring_buffer_overruns(iter->array_buffer->buffer)) iter->iter_flags |= TRACE_FILE_ANNOTATE; /* Output in nanoseconds only if we are using a clock in nanoseconds. */ @@ -9083,7 +9146,7 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) trace_init_global_iter(&iter); for_each_tracing_cpu(cpu) { - atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled); + atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled); } old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ; @@ -9151,7 +9214,7 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) tr->trace_flags |= old_userobj; for_each_tracing_cpu(cpu) { - atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled); + atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled); } atomic_dec(&dump_running); printk_nmi_direct_exit(); @@ -9306,8 +9369,7 @@ __init static int tracer_alloc_buffers(void) /* TODO: make the number of buffers hot pluggable with CPUS */ if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) { - printk(KERN_ERR "tracer: failed to allocate ring buffer!\n"); - WARN_ON(1); + MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n"); goto out_free_savedcmd; } @@ -9380,7 +9442,8 @@ void __init early_trace_init(void) if (tracepoint_printk) { tracepoint_print_iter = kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL); - if (WARN_ON(!tracepoint_print_iter)) + if (MEM_FAIL(!tracepoint_print_iter, + "Failed to allocate trace iterator\n")) tracepoint_printk = 0; else static_key_enable(&tracepoint_printk_key.key); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index a98dce1b3334..99372dd7d168 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -93,6 +93,18 @@ enum trace_type { #include "trace_entries.h" +/* Use this for memory failure errors */ +#define MEM_FAIL(condition, fmt, ...) ({ \ + static bool __section(.data.once) __warned; \ + int __ret_warn_once = !!(condition); \ + \ + if (unlikely(__ret_warn_once && !__warned)) { \ + __warned = true; \ + pr_err("ERROR: " fmt, ##__VA_ARGS__); \ + } \ + unlikely(__ret_warn_once); \ +}) + /* * syscalls are special, and need special handling, this is why * they are not included in trace_entries.h @@ -175,9 +187,9 @@ struct trace_array_cpu { struct tracer; struct trace_option_dentry; -struct trace_buffer { +struct array_buffer { struct trace_array *tr; - struct ring_buffer *buffer; + struct trace_buffer *buffer; struct trace_array_cpu __percpu *data; u64 time_start; int cpu; @@ -248,7 +260,7 @@ struct cond_snapshot { struct trace_array { struct list_head list; char *name; - struct trace_buffer trace_buffer; + struct array_buffer array_buffer; #ifdef CONFIG_TRACER_MAX_TRACE /* * The max_buffer is used to snapshot the trace when a maximum @@ -256,12 +268,12 @@ struct trace_array { * Some tracers will use this to store a maximum trace while * it continues examining live traces. * - * The buffers for the max_buffer are set up the same as the trace_buffer + * The buffers for the max_buffer are set up the same as the array_buffer * When a snapshot is taken, the buffer of the max_buffer is swapped - * with the buffer of the trace_buffer and the buffers are reset for - * the trace_buffer so the tracing can continue. + * with the buffer of the array_buffer and the buffers are reset for + * the array_buffer so the tracing can continue. */ - struct trace_buffer max_buffer; + struct array_buffer max_buffer; bool allocated_snapshot; #endif #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER) @@ -345,6 +357,8 @@ extern struct mutex trace_types_lock; extern int trace_array_get(struct trace_array *tr); extern int tracing_check_open_get_tr(struct trace_array *tr); +extern struct trace_array *trace_array_find(const char *instance); +extern struct trace_array *trace_array_find_get(const char *instance); extern int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs); extern int tracing_set_clock(struct trace_array *tr, const char *clockstr); @@ -684,7 +698,7 @@ trace_buffer_iter(struct trace_iterator *iter, int cpu) int tracer_init(struct tracer *t, struct trace_array *tr); int tracing_is_enabled(void); -void tracing_reset_online_cpus(struct trace_buffer *buf); +void tracing_reset_online_cpus(struct array_buffer *buf); void tracing_reset_current(int cpu); void tracing_reset_all_online_cpus(void); int tracing_open_generic(struct inode *inode, struct file *filp); @@ -704,7 +718,7 @@ struct dentry *tracing_init_dentry(void); struct ring_buffer_event; struct ring_buffer_event * -trace_buffer_lock_reserve(struct ring_buffer *buffer, +trace_buffer_lock_reserve(struct trace_buffer *buffer, int type, unsigned long len, unsigned long flags, @@ -716,7 +730,7 @@ struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts); -void trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer, +void trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer, struct ring_buffer_event *event); int trace_empty(struct trace_iterator *iter); @@ -872,7 +886,7 @@ trace_vprintk(unsigned long ip, const char *fmt, va_list args); extern int trace_array_vprintk(struct trace_array *tr, unsigned long ip, const char *fmt, va_list args); -int trace_array_printk_buf(struct ring_buffer *buffer, +int trace_array_printk_buf(struct trace_buffer *buffer, unsigned long ip, const char *fmt, ...); void trace_printk_seq(struct trace_seq *s); enum print_line_t print_trace_line(struct trace_iterator *iter); @@ -949,22 +963,31 @@ extern void __trace_graph_return(struct trace_array *tr, unsigned long flags, int pc); #ifdef CONFIG_DYNAMIC_FTRACE -extern struct ftrace_hash *ftrace_graph_hash; -extern struct ftrace_hash *ftrace_graph_notrace_hash; +extern struct ftrace_hash __rcu *ftrace_graph_hash; +extern struct ftrace_hash __rcu *ftrace_graph_notrace_hash; static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace) { unsigned long addr = trace->func; int ret = 0; + struct ftrace_hash *hash; preempt_disable_notrace(); - if (ftrace_hash_empty(ftrace_graph_hash)) { + /* + * Have to open code "rcu_dereference_sched()" because the + * function graph tracer can be called when RCU is not + * "watching". + * Protected with schedule_on_each_cpu(ftrace_sync) + */ + hash = rcu_dereference_protected(ftrace_graph_hash, !preemptible()); + + if (ftrace_hash_empty(hash)) { ret = 1; goto out; } - if (ftrace_lookup_ip(ftrace_graph_hash, addr)) { + if (ftrace_lookup_ip(hash, addr)) { /* * This needs to be cleared on the return functions @@ -1000,10 +1023,20 @@ static inline void ftrace_graph_addr_finish(struct ftrace_graph_ret *trace) static inline int ftrace_graph_notrace_addr(unsigned long addr) { int ret = 0; + struct ftrace_hash *notrace_hash; preempt_disable_notrace(); - if (ftrace_lookup_ip(ftrace_graph_notrace_hash, addr)) + /* + * Have to open code "rcu_dereference_sched()" because the + * function graph tracer can be called when RCU is not + * "watching". + * Protected with schedule_on_each_cpu(ftrace_sync) + */ + notrace_hash = rcu_dereference_protected(ftrace_graph_notrace_hash, + !preemptible()); + + if (ftrace_lookup_ip(notrace_hash, addr)) ret = 1; preempt_enable_notrace(); @@ -1056,7 +1089,7 @@ struct ftrace_func_command { extern bool ftrace_filter_param __initdata; static inline int ftrace_trace_task(struct trace_array *tr) { - return !this_cpu_read(tr->trace_buffer.data->ftrace_ignore_pid); + return !this_cpu_read(tr->array_buffer.data->ftrace_ignore_pid); } extern int ftrace_is_dead(void); int ftrace_create_function_files(struct trace_array *tr, @@ -1144,6 +1177,11 @@ int unregister_ftrace_command(struct ftrace_func_command *cmd); void ftrace_create_filter_files(struct ftrace_ops *ops, struct dentry *parent); void ftrace_destroy_filter_files(struct ftrace_ops *ops); + +extern int ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf, + int len, int reset); +extern int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf, + int len, int reset); #else struct ftrace_func_command; @@ -1366,17 +1404,17 @@ struct trace_subsystem_dir { }; extern int call_filter_check_discard(struct trace_event_call *call, void *rec, - struct ring_buffer *buffer, + struct trace_buffer *buffer, struct ring_buffer_event *event); void trace_buffer_unlock_commit_regs(struct trace_array *tr, - struct ring_buffer *buffer, + struct trace_buffer *buffer, struct ring_buffer_event *event, unsigned long flags, int pc, struct pt_regs *regs); static inline void trace_buffer_unlock_commit(struct trace_array *tr, - struct ring_buffer *buffer, + struct trace_buffer *buffer, struct ring_buffer_event *event, unsigned long flags, int pc) { @@ -1389,7 +1427,7 @@ void trace_buffered_event_disable(void); void trace_buffered_event_enable(void); static inline void -__trace_event_discard_commit(struct ring_buffer *buffer, +__trace_event_discard_commit(struct trace_buffer *buffer, struct ring_buffer_event *event) { if (this_cpu_read(trace_buffered_event) == event) { @@ -1415,7 +1453,7 @@ __trace_event_discard_commit(struct ring_buffer *buffer, */ static inline bool __event_trigger_test_discard(struct trace_event_file *file, - struct ring_buffer *buffer, + struct trace_buffer *buffer, struct ring_buffer_event *event, void *entry, enum event_trigger_type *tt) @@ -1450,7 +1488,7 @@ __event_trigger_test_discard(struct trace_event_file *file, */ static inline void event_trigger_unlock_commit(struct trace_event_file *file, - struct ring_buffer *buffer, + struct trace_buffer *buffer, struct ring_buffer_event *event, void *entry, unsigned long irq_flags, int pc) { @@ -1481,7 +1519,7 @@ event_trigger_unlock_commit(struct trace_event_file *file, */ static inline void event_trigger_unlock_commit_regs(struct trace_event_file *file, - struct ring_buffer *buffer, + struct trace_buffer *buffer, struct ring_buffer_event *event, void *entry, unsigned long irq_flags, int pc, struct pt_regs *regs) @@ -1892,6 +1930,15 @@ void trace_printk_start_comm(void); int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set); int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled); +/* Used from boot time tracer */ +extern int trace_set_options(struct trace_array *tr, char *option); +extern int tracing_set_tracer(struct trace_array *tr, const char *buf); +extern ssize_t tracing_resize_ring_buffer(struct trace_array *tr, + unsigned long size, int cpu_id); +extern int tracing_set_cpumask(struct trace_array *tr, + cpumask_var_t tracing_cpumask_new); + + #define MAX_EVENT_NAME_LEN 64 extern int trace_run_command(const char *buf, int (*createfn)(int, char**)); @@ -1949,6 +1996,9 @@ static inline const char *get_syscall_name(int syscall) #ifdef CONFIG_EVENT_TRACING void trace_event_init(void); void trace_event_eval_update(struct trace_eval_map **map, int len); +/* Used from boot time tracer */ +extern int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set); +extern int trigger_process_regex(struct trace_event_file *file, char *buff); #else static inline void __init trace_event_init(void) { } static inline void trace_event_eval_update(struct trace_eval_map **map, int len) { } diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c new file mode 100644 index 000000000000..06d7feb5255f --- /dev/null +++ b/kernel/trace/trace_boot.c @@ -0,0 +1,334 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * trace_boot.c + * Tracing kernel boot-time + */ + +#define pr_fmt(fmt) "trace_boot: " fmt + +#include <linux/bootconfig.h> +#include <linux/cpumask.h> +#include <linux/ftrace.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/mutex.h> +#include <linux/string.h> +#include <linux/slab.h> +#include <linux/trace.h> +#include <linux/trace_events.h> + +#include "trace.h" + +#define MAX_BUF_LEN 256 + +static void __init +trace_boot_set_instance_options(struct trace_array *tr, struct xbc_node *node) +{ + struct xbc_node *anode; + const char *p; + char buf[MAX_BUF_LEN]; + unsigned long v = 0; + + /* Common ftrace options */ + xbc_node_for_each_array_value(node, "options", anode, p) { + if (strlcpy(buf, p, ARRAY_SIZE(buf)) >= ARRAY_SIZE(buf)) { + pr_err("String is too long: %s\n", p); + continue; + } + + if (trace_set_options(tr, buf) < 0) + pr_err("Failed to set option: %s\n", buf); + } + + p = xbc_node_find_value(node, "trace_clock", NULL); + if (p && *p != '\0') { + if (tracing_set_clock(tr, p) < 0) + pr_err("Failed to set trace clock: %s\n", p); + } + + p = xbc_node_find_value(node, "buffer_size", NULL); + if (p && *p != '\0') { + v = memparse(p, NULL); + if (v < PAGE_SIZE) + pr_err("Buffer size is too small: %s\n", p); + if (tracing_resize_ring_buffer(tr, v, RING_BUFFER_ALL_CPUS) < 0) + pr_err("Failed to resize trace buffer to %s\n", p); + } + + p = xbc_node_find_value(node, "cpumask", NULL); + if (p && *p != '\0') { + cpumask_var_t new_mask; + + if (alloc_cpumask_var(&new_mask, GFP_KERNEL)) { + if (cpumask_parse(p, new_mask) < 0 || + tracing_set_cpumask(tr, new_mask) < 0) + pr_err("Failed to set new CPU mask %s\n", p); + free_cpumask_var(new_mask); + } + } +} + +#ifdef CONFIG_EVENT_TRACING +static void __init +trace_boot_enable_events(struct trace_array *tr, struct xbc_node *node) +{ + struct xbc_node *anode; + char buf[MAX_BUF_LEN]; + const char *p; + + xbc_node_for_each_array_value(node, "events", anode, p) { + if (strlcpy(buf, p, ARRAY_SIZE(buf)) >= ARRAY_SIZE(buf)) { + pr_err("String is too long: %s\n", p); + continue; + } + + if (ftrace_set_clr_event(tr, buf, 1) < 0) + pr_err("Failed to enable event: %s\n", p); + } +} + +#ifdef CONFIG_KPROBE_EVENTS +static int __init +trace_boot_add_kprobe_event(struct xbc_node *node, const char *event) +{ + struct dynevent_cmd cmd; + struct xbc_node *anode; + char buf[MAX_BUF_LEN]; + const char *val; + int ret; + + kprobe_event_cmd_init(&cmd, buf, MAX_BUF_LEN); + + ret = kprobe_event_gen_cmd_start(&cmd, event, NULL); + if (ret) + return ret; + + xbc_node_for_each_array_value(node, "probes", anode, val) { + ret = kprobe_event_add_field(&cmd, val); + if (ret) + return ret; + } + + ret = kprobe_event_gen_cmd_end(&cmd); + if (ret) + pr_err("Failed to add probe: %s\n", buf); + + return ret; +} +#else +static inline int __init +trace_boot_add_kprobe_event(struct xbc_node *node, const char *event) +{ + pr_err("Kprobe event is not supported.\n"); + return -ENOTSUPP; +} +#endif + +#ifdef CONFIG_HIST_TRIGGERS +static int __init +trace_boot_add_synth_event(struct xbc_node *node, const char *event) +{ + struct dynevent_cmd cmd; + struct xbc_node *anode; + char buf[MAX_BUF_LEN]; + const char *p; + int ret; + + synth_event_cmd_init(&cmd, buf, MAX_BUF_LEN); + + ret = synth_event_gen_cmd_start(&cmd, event, NULL); + if (ret) + return ret; + + xbc_node_for_each_array_value(node, "fields", anode, p) { + ret = synth_event_add_field_str(&cmd, p); + if (ret) + return ret; + } + + ret = synth_event_gen_cmd_end(&cmd); + if (ret < 0) + pr_err("Failed to add synthetic event: %s\n", buf); + + return ret; +} +#else +static inline int __init +trace_boot_add_synth_event(struct xbc_node *node, const char *event) +{ + pr_err("Synthetic event is not supported.\n"); + return -ENOTSUPP; +} +#endif + +static void __init +trace_boot_init_one_event(struct trace_array *tr, struct xbc_node *gnode, + struct xbc_node *enode) +{ + struct trace_event_file *file; + struct xbc_node *anode; + char buf[MAX_BUF_LEN]; + const char *p, *group, *event; + + group = xbc_node_get_data(gnode); + event = xbc_node_get_data(enode); + + if (!strcmp(group, "kprobes")) + if (trace_boot_add_kprobe_event(enode, event) < 0) + return; + if (!strcmp(group, "synthetic")) + if (trace_boot_add_synth_event(enode, event) < 0) + return; + + mutex_lock(&event_mutex); + file = find_event_file(tr, group, event); + if (!file) { + pr_err("Failed to find event: %s:%s\n", group, event); + goto out; + } + + p = xbc_node_find_value(enode, "filter", NULL); + if (p && *p != '\0') { + if (strlcpy(buf, p, ARRAY_SIZE(buf)) >= ARRAY_SIZE(buf)) + pr_err("filter string is too long: %s\n", p); + else if (apply_event_filter(file, buf) < 0) + pr_err("Failed to apply filter: %s\n", buf); + } + + xbc_node_for_each_array_value(enode, "actions", anode, p) { + if (strlcpy(buf, p, ARRAY_SIZE(buf)) >= ARRAY_SIZE(buf)) + pr_err("action string is too long: %s\n", p); + else if (trigger_process_regex(file, buf) < 0) + pr_err("Failed to apply an action: %s\n", buf); + } + + if (xbc_node_find_value(enode, "enable", NULL)) { + if (trace_event_enable_disable(file, 1, 0) < 0) + pr_err("Failed to enable event node: %s:%s\n", + group, event); + } +out: + mutex_unlock(&event_mutex); +} + +static void __init +trace_boot_init_events(struct trace_array *tr, struct xbc_node *node) +{ + struct xbc_node *gnode, *enode; + + node = xbc_node_find_child(node, "event"); + if (!node) + return; + /* per-event key starts with "event.GROUP.EVENT" */ + xbc_node_for_each_child(node, gnode) + xbc_node_for_each_child(gnode, enode) + trace_boot_init_one_event(tr, gnode, enode); +} +#else +#define trace_boot_enable_events(tr, node) do {} while (0) +#define trace_boot_init_events(tr, node) do {} while (0) +#endif + +#ifdef CONFIG_DYNAMIC_FTRACE +static void __init +trace_boot_set_ftrace_filter(struct trace_array *tr, struct xbc_node *node) +{ + struct xbc_node *anode; + const char *p; + char *q; + + xbc_node_for_each_array_value(node, "ftrace.filters", anode, p) { + q = kstrdup(p, GFP_KERNEL); + if (!q) + return; + if (ftrace_set_filter(tr->ops, q, strlen(q), 0) < 0) + pr_err("Failed to add %s to ftrace filter\n", p); + else + ftrace_filter_param = true; + kfree(q); + } + xbc_node_for_each_array_value(node, "ftrace.notraces", anode, p) { + q = kstrdup(p, GFP_KERNEL); + if (!q) + return; + if (ftrace_set_notrace(tr->ops, q, strlen(q), 0) < 0) + pr_err("Failed to add %s to ftrace filter\n", p); + else + ftrace_filter_param = true; + kfree(q); + } +} +#else +#define trace_boot_set_ftrace_filter(tr, node) do {} while (0) +#endif + +static void __init +trace_boot_enable_tracer(struct trace_array *tr, struct xbc_node *node) +{ + const char *p; + + trace_boot_set_ftrace_filter(tr, node); + + p = xbc_node_find_value(node, "tracer", NULL); + if (p && *p != '\0') { + if (tracing_set_tracer(tr, p) < 0) + pr_err("Failed to set given tracer: %s\n", p); + } +} + +static void __init +trace_boot_init_one_instance(struct trace_array *tr, struct xbc_node *node) +{ + trace_boot_set_instance_options(tr, node); + trace_boot_init_events(tr, node); + trace_boot_enable_events(tr, node); + trace_boot_enable_tracer(tr, node); +} + +static void __init +trace_boot_init_instances(struct xbc_node *node) +{ + struct xbc_node *inode; + struct trace_array *tr; + const char *p; + + node = xbc_node_find_child(node, "instance"); + if (!node) + return; + + xbc_node_for_each_child(node, inode) { + p = xbc_node_get_data(inode); + if (!p || *p == '\0') + continue; + + tr = trace_array_get_by_name(p); + if (!tr) { + pr_err("Failed to get trace instance %s\n", p); + continue; + } + trace_boot_init_one_instance(tr, inode); + trace_array_put(tr); + } +} + +static int __init trace_boot_init(void) +{ + struct xbc_node *trace_node; + struct trace_array *tr; + + trace_node = xbc_find_node("ftrace"); + if (!trace_node) + return 0; + + tr = top_trace_array(); + if (!tr) + return 0; + + /* Global trace array is also one instance */ + trace_boot_init_one_instance(tr, trace_node); + trace_boot_init_instances(trace_node); + + return 0; +} + +fs_initcall(trace_boot_init); diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index 88e158d27965..eff099123aa2 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -32,10 +32,10 @@ probe_likely_condition(struct ftrace_likely_data *f, int val, int expect) { struct trace_event_call *call = &event_branch; struct trace_array *tr = branch_tracer; + struct trace_buffer *buffer; struct trace_array_cpu *data; struct ring_buffer_event *event; struct trace_branch *entry; - struct ring_buffer *buffer; unsigned long flags; int pc; const char *p; @@ -55,12 +55,12 @@ probe_likely_condition(struct ftrace_likely_data *f, int val, int expect) raw_local_irq_save(flags); current->trace_recursion |= TRACE_BRANCH_BIT; - data = this_cpu_ptr(tr->trace_buffer.data); + data = this_cpu_ptr(tr->array_buffer.data); if (atomic_read(&data->disabled)) goto out; pc = preempt_count(); - buffer = tr->trace_buffer.buffer; + buffer = tr->array_buffer.buffer; event = trace_buffer_lock_reserve(buffer, TRACE_BRANCH, sizeof(*entry), flags, pc); if (!event) diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c index 89779eb84a07..9f2e8520b748 100644 --- a/kernel/trace/trace_dynevent.c +++ b/kernel/trace/trace_dynevent.c @@ -223,3 +223,215 @@ static __init int init_dynamic_event(void) return 0; } fs_initcall(init_dynamic_event); + +/** + * dynevent_arg_add - Add an arg to a dynevent_cmd + * @cmd: A pointer to the dynevent_cmd struct representing the new event cmd + * @arg: The argument to append to the current cmd + * @check_arg: An (optional) pointer to a function checking arg sanity + * + * Append an argument to a dynevent_cmd. The argument string will be + * appended to the current cmd string, followed by a separator, if + * applicable. Before the argument is added, the @check_arg function, + * if present, will be used to check the sanity of the current arg + * string. + * + * The cmd string and separator should be set using the + * dynevent_arg_init() before any arguments are added using this + * function. + * + * Return: 0 if successful, error otherwise. + */ +int dynevent_arg_add(struct dynevent_cmd *cmd, + struct dynevent_arg *arg, + dynevent_check_arg_fn_t check_arg) +{ + int ret = 0; + + if (check_arg) { + ret = check_arg(arg); + if (ret) + return ret; + } + + ret = seq_buf_printf(&cmd->seq, " %s%c", arg->str, arg->separator); + if (ret) { + pr_err("String is too long: %s%c\n", arg->str, arg->separator); + return -E2BIG; + } + + return ret; +} + +/** + * dynevent_arg_pair_add - Add an arg pair to a dynevent_cmd + * @cmd: A pointer to the dynevent_cmd struct representing the new event cmd + * @arg_pair: The argument pair to append to the current cmd + * @check_arg: An (optional) pointer to a function checking arg sanity + * + * Append an argument pair to a dynevent_cmd. An argument pair + * consists of a left-hand-side argument and a right-hand-side + * argument separated by an operator, which can be whitespace, all + * followed by a separator, if applicable. This can be used to add + * arguments of the form 'type variable_name;' or 'x+y'. + * + * The lhs argument string will be appended to the current cmd string, + * followed by an operator, if applicable, followd by the rhs string, + * followed finally by a separator, if applicable. Before the + * argument is added, the @check_arg function, if present, will be + * used to check the sanity of the current arg strings. + * + * The cmd strings, operator, and separator should be set using the + * dynevent_arg_pair_init() before any arguments are added using this + * function. + * + * Return: 0 if successful, error otherwise. + */ +int dynevent_arg_pair_add(struct dynevent_cmd *cmd, + struct dynevent_arg_pair *arg_pair, + dynevent_check_arg_fn_t check_arg) +{ + int ret = 0; + + if (check_arg) { + ret = check_arg(arg_pair); + if (ret) + return ret; + } + + ret = seq_buf_printf(&cmd->seq, " %s%c%s%c", arg_pair->lhs, + arg_pair->operator, arg_pair->rhs, + arg_pair->separator); + if (ret) { + pr_err("field string is too long: %s%c%s%c\n", arg_pair->lhs, + arg_pair->operator, arg_pair->rhs, + arg_pair->separator); + return -E2BIG; + } + + return ret; +} + +/** + * dynevent_str_add - Add a string to a dynevent_cmd + * @cmd: A pointer to the dynevent_cmd struct representing the new event cmd + * @str: The string to append to the current cmd + * + * Append a string to a dynevent_cmd. The string will be appended to + * the current cmd string as-is, with nothing prepended or appended. + * + * Return: 0 if successful, error otherwise. + */ +int dynevent_str_add(struct dynevent_cmd *cmd, const char *str) +{ + int ret = 0; + + ret = seq_buf_puts(&cmd->seq, str); + if (ret) { + pr_err("String is too long: %s\n", str); + return -E2BIG; + } + + return ret; +} + +/** + * dynevent_cmd_init - Initialize a dynevent_cmd object + * @cmd: A pointer to the dynevent_cmd struct representing the cmd + * @buf: A pointer to the buffer to generate the command into + * @maxlen: The length of the buffer the command will be generated into + * @type: The type of the cmd, checked against further operations + * @run_command: The type-specific function that will actually run the command + * + * Initialize a dynevent_cmd. A dynevent_cmd is used to build up and + * run dynamic event creation commands, such as commands for creating + * synthetic and kprobe events. Before calling any of the functions + * used to build the command, a dynevent_cmd object should be + * instantiated and initialized using this function. + * + * The initialization sets things up by saving a pointer to the + * user-supplied buffer and its length via the @buf and @maxlen + * params, and by saving the cmd-specific @type and @run_command + * params which are used to check subsequent dynevent_cmd operations + * and actually run the command when complete. + */ +void dynevent_cmd_init(struct dynevent_cmd *cmd, char *buf, int maxlen, + enum dynevent_type type, + dynevent_create_fn_t run_command) +{ + memset(cmd, '\0', sizeof(*cmd)); + + seq_buf_init(&cmd->seq, buf, maxlen); + cmd->type = type; + cmd->run_command = run_command; +} + +/** + * dynevent_arg_init - Initialize a dynevent_arg object + * @arg: A pointer to the dynevent_arg struct representing the arg + * @separator: An (optional) separator, appended after adding the arg + * + * Initialize a dynevent_arg object. A dynevent_arg represents an + * object used to append single arguments to the current command + * string. After the arg string is successfully appended to the + * command string, the optional @separator is appended. If no + * separator was specified when initializing the arg, a space will be + * appended. + */ +void dynevent_arg_init(struct dynevent_arg *arg, + char separator) +{ + memset(arg, '\0', sizeof(*arg)); + + if (!separator) + separator = ' '; + arg->separator = separator; +} + +/** + * dynevent_arg_pair_init - Initialize a dynevent_arg_pair object + * @arg_pair: A pointer to the dynevent_arg_pair struct representing the arg + * @operator: An (optional) operator, appended after adding the first arg + * @separator: An (optional) separator, appended after adding the second arg + * + * Initialize a dynevent_arg_pair object. A dynevent_arg_pair + * represents an object used to append argument pairs such as 'type + * variable_name;' or 'x+y' to the current command string. An + * argument pair consists of a left-hand-side argument and a + * right-hand-side argument separated by an operator, which can be + * whitespace, all followed by a separator, if applicable. After the + * first arg string is successfully appended to the command string, + * the optional @operator is appended, followed by the second arg and + * and optional @separator. If no separator was specified when + * initializing the arg, a space will be appended. + */ +void dynevent_arg_pair_init(struct dynevent_arg_pair *arg_pair, + char operator, char separator) +{ + memset(arg_pair, '\0', sizeof(*arg_pair)); + + if (!operator) + operator = ' '; + arg_pair->operator = operator; + + if (!separator) + separator = ' '; + arg_pair->separator = separator; +} + +/** + * dynevent_create - Create the dynamic event contained in dynevent_cmd + * @cmd: The dynevent_cmd object containing the dynamic event creation command + * + * Once a dynevent_cmd object has been successfully built up via the + * dynevent_cmd_init(), dynevent_arg_add() and dynevent_arg_pair_add() + * functions, this function runs the final command to actually create + * the event. + * + * Return: 0 if the event was successfully created, error otherwise. + */ +int dynevent_create(struct dynevent_cmd *cmd) +{ + return cmd->run_command(cmd); +} +EXPORT_SYMBOL_GPL(dynevent_create); diff --git a/kernel/trace/trace_dynevent.h b/kernel/trace/trace_dynevent.h index 46898138d2df..d6857a254ede 100644 --- a/kernel/trace/trace_dynevent.h +++ b/kernel/trace/trace_dynevent.h @@ -117,4 +117,36 @@ int dyn_event_release(int argc, char **argv, struct dyn_event_operations *type); #define for_each_dyn_event_safe(pos, n) \ list_for_each_entry_safe(pos, n, &dyn_event_list, list) +extern void dynevent_cmd_init(struct dynevent_cmd *cmd, char *buf, int maxlen, + enum dynevent_type type, + dynevent_create_fn_t run_command); + +typedef int (*dynevent_check_arg_fn_t)(void *data); + +struct dynevent_arg { + const char *str; + char separator; /* e.g. ';', ',', or nothing */ +}; + +extern void dynevent_arg_init(struct dynevent_arg *arg, + char separator); +extern int dynevent_arg_add(struct dynevent_cmd *cmd, + struct dynevent_arg *arg, + dynevent_check_arg_fn_t check_arg); + +struct dynevent_arg_pair { + const char *lhs; + const char *rhs; + char operator; /* e.g. '=' or nothing */ + char separator; /* e.g. ';', ',', or nothing */ +}; + +extern void dynevent_arg_pair_init(struct dynevent_arg_pair *arg_pair, + char operator, char separator); + +extern int dynevent_arg_pair_add(struct dynevent_cmd *cmd, + struct dynevent_arg_pair *arg_pair, + dynevent_check_arg_fn_t check_arg); +extern int dynevent_str_add(struct dynevent_cmd *cmd, const char *str); + #endif diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index 3e9d81608284..f22746f3c132 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h @@ -164,7 +164,7 @@ FTRACE_ENTRY(kernel_stack, stack_entry, F_STRUCT( __field( int, size ) - __dynamic_array(unsigned long, caller ) + __array( unsigned long, caller, FTRACE_STACK_ENTRIES ) ), F_printk("\t=> %ps\n\t=> %ps\n\t=> %ps\n" diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index c8622a44d300..f38234ecea18 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -238,7 +238,7 @@ bool trace_event_ignore_this_pid(struct trace_event_file *trace_file) if (!pid_list) return false; - data = this_cpu_ptr(tr->trace_buffer.data); + data = this_cpu_ptr(tr->array_buffer.data); return data->ignore_pid; } @@ -273,6 +273,7 @@ void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer, if (!fbuffer->event) return NULL; + fbuffer->regs = NULL; fbuffer->entry = ring_buffer_event_data(fbuffer->event); return fbuffer->entry; } @@ -547,7 +548,7 @@ event_filter_pid_sched_switch_probe_pre(void *data, bool preempt, pid_list = rcu_dereference_sched(tr->filtered_pids); - this_cpu_write(tr->trace_buffer.data->ignore_pid, + this_cpu_write(tr->array_buffer.data->ignore_pid, trace_ignore_this_task(pid_list, prev) && trace_ignore_this_task(pid_list, next)); } @@ -561,7 +562,7 @@ event_filter_pid_sched_switch_probe_post(void *data, bool preempt, pid_list = rcu_dereference_sched(tr->filtered_pids); - this_cpu_write(tr->trace_buffer.data->ignore_pid, + this_cpu_write(tr->array_buffer.data->ignore_pid, trace_ignore_this_task(pid_list, next)); } @@ -572,12 +573,12 @@ event_filter_pid_sched_wakeup_probe_pre(void *data, struct task_struct *task) struct trace_pid_list *pid_list; /* Nothing to do if we are already tracing */ - if (!this_cpu_read(tr->trace_buffer.data->ignore_pid)) + if (!this_cpu_read(tr->array_buffer.data->ignore_pid)) return; pid_list = rcu_dereference_sched(tr->filtered_pids); - this_cpu_write(tr->trace_buffer.data->ignore_pid, + this_cpu_write(tr->array_buffer.data->ignore_pid, trace_ignore_this_task(pid_list, task)); } @@ -588,13 +589,13 @@ event_filter_pid_sched_wakeup_probe_post(void *data, struct task_struct *task) struct trace_pid_list *pid_list; /* Nothing to do if we are not tracing */ - if (this_cpu_read(tr->trace_buffer.data->ignore_pid)) + if (this_cpu_read(tr->array_buffer.data->ignore_pid)) return; pid_list = rcu_dereference_sched(tr->filtered_pids); /* Set tracing if current is enabled */ - this_cpu_write(tr->trace_buffer.data->ignore_pid, + this_cpu_write(tr->array_buffer.data->ignore_pid, trace_ignore_this_task(pid_list, current)); } @@ -626,7 +627,7 @@ static void __ftrace_clear_event_pids(struct trace_array *tr) } for_each_possible_cpu(cpu) - per_cpu_ptr(tr->trace_buffer.data, cpu)->ignore_pid = false; + per_cpu_ptr(tr->array_buffer.data, cpu)->ignore_pid = false; rcu_assign_pointer(tr->filtered_pids, NULL); @@ -698,7 +699,7 @@ static void remove_subsystem(struct trace_subsystem_dir *dir) return; if (!--dir->nr_events) { - tracefs_remove_recursive(dir->entry); + tracefs_remove(dir->entry); list_del(&dir->list); __put_system_dir(dir); } @@ -717,7 +718,7 @@ static void remove_event_file_dir(struct trace_event_file *file) } spin_unlock(&dir->d_lock); - tracefs_remove_recursive(dir); + tracefs_remove(dir); } list_del(&file->list); @@ -1595,7 +1596,7 @@ static void ignore_task_cpu(void *data) pid_list = rcu_dereference_protected(tr->filtered_pids, mutex_is_locked(&event_mutex)); - this_cpu_write(tr->trace_buffer.data->ignore_pid, + this_cpu_write(tr->array_buffer.data->ignore_pid, trace_ignore_this_task(pid_list, current)); } @@ -2553,6 +2554,91 @@ find_event_file(struct trace_array *tr, const char *system, const char *event) return file; } +/** + * trace_get_event_file - Find and return a trace event file + * @instance: The name of the trace instance containing the event + * @system: The name of the system containing the event + * @event: The name of the event + * + * Return a trace event file given the trace instance name, trace + * system, and trace event name. If the instance name is NULL, it + * refers to the top-level trace array. + * + * This function will look it up and return it if found, after calling + * trace_array_get() to prevent the instance from going away, and + * increment the event's module refcount to prevent it from being + * removed. + * + * To release the file, call trace_put_event_file(), which will call + * trace_array_put() and decrement the event's module refcount. + * + * Return: The trace event on success, ERR_PTR otherwise. + */ +struct trace_event_file *trace_get_event_file(const char *instance, + const char *system, + const char *event) +{ + struct trace_array *tr = top_trace_array(); + struct trace_event_file *file = NULL; + int ret = -EINVAL; + + if (instance) { + tr = trace_array_find_get(instance); + if (!tr) + return ERR_PTR(-ENOENT); + } else { + ret = trace_array_get(tr); + if (ret) + return ERR_PTR(ret); + } + + mutex_lock(&event_mutex); + + file = find_event_file(tr, system, event); + if (!file) { + trace_array_put(tr); + ret = -EINVAL; + goto out; + } + + /* Don't let event modules unload while in use */ + ret = try_module_get(file->event_call->mod); + if (!ret) { + trace_array_put(tr); + ret = -EBUSY; + goto out; + } + + ret = 0; + out: + mutex_unlock(&event_mutex); + + if (ret) + file = ERR_PTR(ret); + + return file; +} +EXPORT_SYMBOL_GPL(trace_get_event_file); + +/** + * trace_put_event_file - Release a file from trace_get_event_file() + * @file: The trace event file + * + * If a file was retrieved using trace_get_event_file(), this should + * be called when it's no longer needed. It will cancel the previous + * trace_array_get() called by that function, and decrement the + * event's module refcount. + */ +void trace_put_event_file(struct trace_event_file *file) +{ + mutex_lock(&event_mutex); + module_put(file->event_call->mod); + mutex_unlock(&event_mutex); + + trace_array_put(file->tr); +} +EXPORT_SYMBOL_GPL(trace_put_event_file); + #ifdef CONFIG_DYNAMIC_FTRACE /* Avoid typos */ @@ -3082,7 +3168,7 @@ int event_trace_del_tracer(struct trace_array *tr) down_write(&trace_event_sem); __trace_remove_event_dirs(tr); - tracefs_remove_recursive(tr->event_dir); + tracefs_remove(tr->event_dir); up_write(&trace_event_sem); tr->event_dir = NULL; @@ -3409,8 +3495,8 @@ static void __init function_test_events_call(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct pt_regs *pt_regs) { + struct trace_buffer *buffer; struct ring_buffer_event *event; - struct ring_buffer *buffer; struct ftrace_entry *entry; unsigned long flags; long disabled; diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index f2896d13001b..e7ce7cdac62f 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -66,7 +66,12 @@ C(INVALID_SUBSYS_EVENT, "Invalid subsystem or event name"), \ C(INVALID_REF_KEY, "Using variable references in keys not supported"), \ C(VAR_NOT_FOUND, "Couldn't find variable"), \ - C(FIELD_NOT_FOUND, "Couldn't find field"), + C(FIELD_NOT_FOUND, "Couldn't find field"), \ + C(EMPTY_ASSIGNMENT, "Empty assignment"), \ + C(INVALID_SORT_MODIFIER,"Invalid sort modifier"), \ + C(EMPTY_SORT_FIELD, "Empty sort field"), \ + C(TOO_MANY_SORT_FIELDS, "Too many sort fields (Max = 2)"), \ + C(INVALID_SORT_FIELD, "Sort field must be a key or a val"), #undef C #define C(a, b) HIST_ERR_##a @@ -375,7 +380,7 @@ struct hist_trigger_data { unsigned int n_save_var_str; }; -static int synth_event_create(int argc, const char **argv); +static int create_synth_event(int argc, const char **argv); static int synth_event_show(struct seq_file *m, struct dyn_event *ev); static int synth_event_release(struct dyn_event *ev); static bool synth_event_is_busy(struct dyn_event *ev); @@ -383,7 +388,7 @@ static bool synth_event_match(const char *system, const char *event, int argc, const char **argv, struct dyn_event *ev); static struct dyn_event_operations synth_event_ops = { - .create = synth_event_create, + .create = create_synth_event, .show = synth_event_show, .is_busy = synth_event_is_busy, .free = synth_event_release, @@ -394,6 +399,7 @@ struct synth_field { char *type; char *name; size_t size; + unsigned int offset; bool is_signed; bool is_string; }; @@ -408,6 +414,7 @@ struct synth_event { struct trace_event_class class; struct trace_event_call call; struct tracepoint *tp; + struct module *mod; }; static bool is_synth_event(struct dyn_event *ev) @@ -470,11 +477,12 @@ struct action_data { * When a histogram trigger is hit, the values of any * references to variables, including variables being passed * as parameters to synthetic events, are collected into a - * var_ref_vals array. This var_ref_idx is the index of the - * first param in the array to be passed to the synthetic - * event invocation. + * var_ref_vals array. This var_ref_idx array is an array of + * indices into the var_ref_vals array, one for each synthetic + * event param, and is passed to the synthetic event + * invocation. */ - unsigned int var_ref_idx; + unsigned int var_ref_idx[TRACING_MAP_VARS_MAX]; struct synth_event *synth_event; bool use_trace_keyword; char *synth_event_name; @@ -608,7 +616,8 @@ static void last_cmd_set(struct trace_event_file *file, char *str) if (!str) return; - strncpy(last_cmd, str, MAX_FILTER_STR_VAL - 1); + strcpy(last_cmd, "hist:"); + strncat(last_cmd, str, MAX_FILTER_STR_VAL - 1 - sizeof("hist:")); if (file) { call = file->event_call; @@ -662,6 +671,8 @@ static int synth_event_define_fields(struct trace_event_call *call) if (ret) break; + event->fields[i]->offset = n_u64; + if (event->fields[i]->is_string) { offset += STR_VAR_LEN_MAX; n_u64 += STR_VAR_LEN_MAX / sizeof(u64); @@ -834,7 +845,7 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter, fmt = synth_field_fmt(se->fields[i]->type); /* parameter types */ - if (tr->trace_flags & TRACE_ITER_VERBOSE) + if (tr && tr->trace_flags & TRACE_ITER_VERBOSE) trace_seq_printf(s, "%s ", fmt); snprintf(print_fmt, sizeof(print_fmt), "%%s=%s%%s", fmt); @@ -875,14 +886,14 @@ static struct trace_event_functions synth_event_funcs = { static notrace void trace_event_raw_event_synth(void *__data, u64 *var_ref_vals, - unsigned int var_ref_idx) + unsigned int *var_ref_idx) { struct trace_event_file *trace_file = __data; struct synth_trace_event *entry; struct trace_event_buffer fbuffer; - struct ring_buffer *buffer; + struct trace_buffer *buffer; struct synth_event *event; - unsigned int i, n_u64; + unsigned int i, n_u64, val_idx; int fields_size = 0; event = trace_file->event_call->data; @@ -896,7 +907,7 @@ static notrace void trace_event_raw_event_synth(void *__data, * Avoid ring buffer recursion detection, as this event * is being performed within another event. */ - buffer = trace_file->tr->trace_buffer.buffer; + buffer = trace_file->tr->array_buffer.buffer; ring_buffer_nest_start(buffer); entry = trace_event_buffer_reserve(&fbuffer, trace_file, @@ -905,15 +916,16 @@ static notrace void trace_event_raw_event_synth(void *__data, goto out; for (i = 0, n_u64 = 0; i < event->n_fields; i++) { + val_idx = var_ref_idx[i]; if (event->fields[i]->is_string) { - char *str_val = (char *)(long)var_ref_vals[var_ref_idx + i]; + char *str_val = (char *)(long)var_ref_vals[val_idx]; char *str_field = (char *)&entry->fields[n_u64]; strscpy(str_field, str_val, STR_VAR_LEN_MAX); n_u64 += STR_VAR_LEN_MAX / sizeof(u64); } else { struct synth_field *field = event->fields[i]; - u64 val = var_ref_vals[var_ref_idx + i]; + u64 val = var_ref_vals[val_idx]; switch (field->size) { case 1: @@ -1113,10 +1125,10 @@ static struct tracepoint *alloc_synth_tracepoint(char *name) } typedef void (*synth_probe_func_t) (void *__data, u64 *var_ref_vals, - unsigned int var_ref_idx); + unsigned int *var_ref_idx); static inline void trace_synth(struct synth_event *event, u64 *var_ref_vals, - unsigned int var_ref_idx) + unsigned int *var_ref_idx) { struct tracepoint *tp = event->tp; @@ -1293,6 +1305,273 @@ struct hist_var_data { struct hist_trigger_data *hist_data; }; +static int synth_event_check_arg_fn(void *data) +{ + struct dynevent_arg_pair *arg_pair = data; + int size; + + size = synth_field_size((char *)arg_pair->lhs); + + return size ? 0 : -EINVAL; +} + +/** + * synth_event_add_field - Add a new field to a synthetic event cmd + * @cmd: A pointer to the dynevent_cmd struct representing the new event + * @type: The type of the new field to add + * @name: The name of the new field to add + * + * Add a new field to a synthetic event cmd object. Field ordering is in + * the same order the fields are added. + * + * See synth_field_size() for available types. If field_name contains + * [n] the field is considered to be an array. + * + * Return: 0 if successful, error otherwise. + */ +int synth_event_add_field(struct dynevent_cmd *cmd, const char *type, + const char *name) +{ + struct dynevent_arg_pair arg_pair; + int ret; + + if (cmd->type != DYNEVENT_TYPE_SYNTH) + return -EINVAL; + + if (!type || !name) + return -EINVAL; + + dynevent_arg_pair_init(&arg_pair, 0, ';'); + + arg_pair.lhs = type; + arg_pair.rhs = name; + + ret = dynevent_arg_pair_add(cmd, &arg_pair, synth_event_check_arg_fn); + if (ret) + return ret; + + if (++cmd->n_fields > SYNTH_FIELDS_MAX) + ret = -EINVAL; + + return ret; +} +EXPORT_SYMBOL_GPL(synth_event_add_field); + +/** + * synth_event_add_field_str - Add a new field to a synthetic event cmd + * @cmd: A pointer to the dynevent_cmd struct representing the new event + * @type_name: The type and name of the new field to add, as a single string + * + * Add a new field to a synthetic event cmd object, as a single + * string. The @type_name string is expected to be of the form 'type + * name', which will be appended by ';'. No sanity checking is done - + * what's passed in is assumed to already be well-formed. Field + * ordering is in the same order the fields are added. + * + * See synth_field_size() for available types. If field_name contains + * [n] the field is considered to be an array. + * + * Return: 0 if successful, error otherwise. + */ +int synth_event_add_field_str(struct dynevent_cmd *cmd, const char *type_name) +{ + struct dynevent_arg arg; + int ret; + + if (cmd->type != DYNEVENT_TYPE_SYNTH) + return -EINVAL; + + if (!type_name) + return -EINVAL; + + dynevent_arg_init(&arg, ';'); + + arg.str = type_name; + + ret = dynevent_arg_add(cmd, &arg, NULL); + if (ret) + return ret; + + if (++cmd->n_fields > SYNTH_FIELDS_MAX) + ret = -EINVAL; + + return ret; +} +EXPORT_SYMBOL_GPL(synth_event_add_field_str); + +/** + * synth_event_add_fields - Add multiple fields to a synthetic event cmd + * @cmd: A pointer to the dynevent_cmd struct representing the new event + * @fields: An array of type/name field descriptions + * @n_fields: The number of field descriptions contained in the fields array + * + * Add a new set of fields to a synthetic event cmd object. The event + * fields that will be defined for the event should be passed in as an + * array of struct synth_field_desc, and the number of elements in the + * array passed in as n_fields. Field ordering will retain the + * ordering given in the fields array. + * + * See synth_field_size() for available types. If field_name contains + * [n] the field is considered to be an array. + * + * Return: 0 if successful, error otherwise. + */ +int synth_event_add_fields(struct dynevent_cmd *cmd, + struct synth_field_desc *fields, + unsigned int n_fields) +{ + unsigned int i; + int ret = 0; + + for (i = 0; i < n_fields; i++) { + if (fields[i].type == NULL || fields[i].name == NULL) { + ret = -EINVAL; + break; + } + + ret = synth_event_add_field(cmd, fields[i].type, fields[i].name); + if (ret) + break; + } + + return ret; +} +EXPORT_SYMBOL_GPL(synth_event_add_fields); + +/** + * __synth_event_gen_cmd_start - Start a synthetic event command from arg list + * @cmd: A pointer to the dynevent_cmd struct representing the new event + * @name: The name of the synthetic event + * @mod: The module creating the event, NULL if not created from a module + * @args: Variable number of arg (pairs), one pair for each field + * + * NOTE: Users normally won't want to call this function directly, but + * rather use the synth_event_gen_cmd_start() wrapper, which + * automatically adds a NULL to the end of the arg list. If this + * function is used directly, make sure the last arg in the variable + * arg list is NULL. + * + * Generate a synthetic event command to be executed by + * synth_event_gen_cmd_end(). This function can be used to generate + * the complete command or only the first part of it; in the latter + * case, synth_event_add_field(), synth_event_add_field_str(), or + * synth_event_add_fields() can be used to add more fields following + * this. + * + * There should be an even number variable args, each pair consisting + * of a type followed by a field name. + * + * See synth_field_size() for available types. If field_name contains + * [n] the field is considered to be an array. + * + * Return: 0 if successful, error otherwise. + */ +int __synth_event_gen_cmd_start(struct dynevent_cmd *cmd, const char *name, + struct module *mod, ...) +{ + struct dynevent_arg arg; + va_list args; + int ret; + + cmd->event_name = name; + cmd->private_data = mod; + + if (cmd->type != DYNEVENT_TYPE_SYNTH) + return -EINVAL; + + dynevent_arg_init(&arg, 0); + arg.str = name; + ret = dynevent_arg_add(cmd, &arg, NULL); + if (ret) + return ret; + + va_start(args, mod); + for (;;) { + const char *type, *name; + + type = va_arg(args, const char *); + if (!type) + break; + name = va_arg(args, const char *); + if (!name) + break; + + if (++cmd->n_fields > SYNTH_FIELDS_MAX) { + ret = -EINVAL; + break; + } + + ret = synth_event_add_field(cmd, type, name); + if (ret) + break; + } + va_end(args); + + return ret; +} +EXPORT_SYMBOL_GPL(__synth_event_gen_cmd_start); + +/** + * synth_event_gen_cmd_array_start - Start synthetic event command from an array + * @cmd: A pointer to the dynevent_cmd struct representing the new event + * @name: The name of the synthetic event + * @fields: An array of type/name field descriptions + * @n_fields: The number of field descriptions contained in the fields array + * + * Generate a synthetic event command to be executed by + * synth_event_gen_cmd_end(). This function can be used to generate + * the complete command or only the first part of it; in the latter + * case, synth_event_add_field(), synth_event_add_field_str(), or + * synth_event_add_fields() can be used to add more fields following + * this. + * + * The event fields that will be defined for the event should be + * passed in as an array of struct synth_field_desc, and the number of + * elements in the array passed in as n_fields. Field ordering will + * retain the ordering given in the fields array. + * + * See synth_field_size() for available types. If field_name contains + * [n] the field is considered to be an array. + * + * Return: 0 if successful, error otherwise. + */ +int synth_event_gen_cmd_array_start(struct dynevent_cmd *cmd, const char *name, + struct module *mod, + struct synth_field_desc *fields, + unsigned int n_fields) +{ + struct dynevent_arg arg; + unsigned int i; + int ret = 0; + + cmd->event_name = name; + cmd->private_data = mod; + + if (cmd->type != DYNEVENT_TYPE_SYNTH) + return -EINVAL; + + if (n_fields > SYNTH_FIELDS_MAX) + return -EINVAL; + + dynevent_arg_init(&arg, 0); + arg.str = name; + ret = dynevent_arg_add(cmd, &arg, NULL); + if (ret) + return ret; + + for (i = 0; i < n_fields; i++) { + if (fields[i].type == NULL || fields[i].name == NULL) + return -EINVAL; + + ret = synth_event_add_field(cmd, fields[i].type, fields[i].name); + if (ret) + break; + } + + return ret; +} +EXPORT_SYMBOL_GPL(synth_event_gen_cmd_array_start); + static int __create_synth_event(int argc, const char *name, const char **argv) { struct synth_field *field, *fields[SYNTH_FIELDS_MAX]; @@ -1361,29 +1640,123 @@ static int __create_synth_event(int argc, const char *name, const char **argv) goto out; } +/** + * synth_event_create - Create a new synthetic event + * @name: The name of the new sythetic event + * @fields: An array of type/name field descriptions + * @n_fields: The number of field descriptions contained in the fields array + * @mod: The module creating the event, NULL if not created from a module + * + * Create a new synthetic event with the given name under the + * trace/events/synthetic/ directory. The event fields that will be + * defined for the event should be passed in as an array of struct + * synth_field_desc, and the number elements in the array passed in as + * n_fields. Field ordering will retain the ordering given in the + * fields array. + * + * If the new synthetic event is being created from a module, the mod + * param must be non-NULL. This will ensure that the trace buffer + * won't contain unreadable events. + * + * The new synth event should be deleted using synth_event_delete() + * function. The new synthetic event can be generated from modules or + * other kernel code using trace_synth_event() and related functions. + * + * Return: 0 if successful, error otherwise. + */ +int synth_event_create(const char *name, struct synth_field_desc *fields, + unsigned int n_fields, struct module *mod) +{ + struct dynevent_cmd cmd; + char *buf; + int ret; + + buf = kzalloc(MAX_DYNEVENT_CMD_LEN, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + synth_event_cmd_init(&cmd, buf, MAX_DYNEVENT_CMD_LEN); + + ret = synth_event_gen_cmd_array_start(&cmd, name, mod, + fields, n_fields); + if (ret) + goto out; + + ret = synth_event_gen_cmd_end(&cmd); + out: + kfree(buf); + + return ret; +} +EXPORT_SYMBOL_GPL(synth_event_create); + +static int destroy_synth_event(struct synth_event *se) +{ + int ret; + + if (se->ref) + ret = -EBUSY; + else { + ret = unregister_synth_event(se); + if (!ret) { + dyn_event_remove(&se->devent); + free_synth_event(se); + } + } + + return ret; +} + +/** + * synth_event_delete - Delete a synthetic event + * @event_name: The name of the new sythetic event + * + * Delete a synthetic event that was created with synth_event_create(). + * + * Return: 0 if successful, error otherwise. + */ +int synth_event_delete(const char *event_name) +{ + struct synth_event *se = NULL; + struct module *mod = NULL; + int ret = -ENOENT; + + mutex_lock(&event_mutex); + se = find_synth_event(event_name); + if (se) { + mod = se->mod; + ret = destroy_synth_event(se); + } + mutex_unlock(&event_mutex); + + if (mod) { + mutex_lock(&trace_types_lock); + /* + * It is safest to reset the ring buffer if the module + * being unloaded registered any events that were + * used. The only worry is if a new module gets + * loaded, and takes on the same id as the events of + * this module. When printing out the buffer, traced + * events left over from this module may be passed to + * the new module events and unexpected results may + * occur. + */ + tracing_reset_all_online_cpus(); + mutex_unlock(&trace_types_lock); + } + + return ret; +} +EXPORT_SYMBOL_GPL(synth_event_delete); + static int create_or_delete_synth_event(int argc, char **argv) { const char *name = argv[0]; - struct synth_event *event = NULL; int ret; /* trace_run_command() ensures argc != 0 */ if (name[0] == '!') { - mutex_lock(&event_mutex); - event = find_synth_event(name + 1); - if (event) { - if (event->ref) - ret = -EBUSY; - else { - ret = unregister_synth_event(event); - if (!ret) { - dyn_event_remove(&event->devent); - free_synth_event(event); - } - } - } else - ret = -ENOENT; - mutex_unlock(&event_mutex); + ret = synth_event_delete(name + 1); return ret; } @@ -1391,7 +1764,474 @@ static int create_or_delete_synth_event(int argc, char **argv) return ret == -ECANCELED ? -EINVAL : ret; } -static int synth_event_create(int argc, const char **argv) +static int synth_event_run_command(struct dynevent_cmd *cmd) +{ + struct synth_event *se; + int ret; + + ret = trace_run_command(cmd->seq.buffer, create_or_delete_synth_event); + if (ret) + return ret; + + se = find_synth_event(cmd->event_name); + if (WARN_ON(!se)) + return -ENOENT; + + se->mod = cmd->private_data; + + return ret; +} + +/** + * synth_event_cmd_init - Initialize a synthetic event command object + * @cmd: A pointer to the dynevent_cmd struct representing the new event + * @buf: A pointer to the buffer used to build the command + * @maxlen: The length of the buffer passed in @buf + * + * Initialize a synthetic event command object. Use this before + * calling any of the other dyenvent_cmd functions. + */ +void synth_event_cmd_init(struct dynevent_cmd *cmd, char *buf, int maxlen) +{ + dynevent_cmd_init(cmd, buf, maxlen, DYNEVENT_TYPE_SYNTH, + synth_event_run_command); +} +EXPORT_SYMBOL_GPL(synth_event_cmd_init); + +/** + * synth_event_trace - Trace a synthetic event + * @file: The trace_event_file representing the synthetic event + * @n_vals: The number of values in vals + * @args: Variable number of args containing the event values + * + * Trace a synthetic event using the values passed in the variable + * argument list. + * + * The argument list should be a list 'n_vals' u64 values. The number + * of vals must match the number of field in the synthetic event, and + * must be in the same order as the synthetic event fields. + * + * All vals should be cast to u64, and string vals are just pointers + * to strings, cast to u64. Strings will be copied into space + * reserved in the event for the string, using these pointers. + * + * Return: 0 on success, err otherwise. + */ +int synth_event_trace(struct trace_event_file *file, unsigned int n_vals, ...) +{ + struct trace_event_buffer fbuffer; + struct synth_trace_event *entry; + struct trace_buffer *buffer; + struct synth_event *event; + unsigned int i, n_u64; + int fields_size = 0; + va_list args; + int ret = 0; + + /* + * Normal event generation doesn't get called at all unless + * the ENABLED bit is set (which attaches the probe thus + * allowing this code to be called, etc). Because this is + * called directly by the user, we don't have that but we + * still need to honor not logging when disabled. + */ + if (!(file->flags & EVENT_FILE_FL_ENABLED)) + return 0; + + event = file->event_call->data; + + if (n_vals != event->n_fields) + return -EINVAL; + + if (trace_trigger_soft_disabled(file)) + return -EINVAL; + + fields_size = event->n_u64 * sizeof(u64); + + /* + * Avoid ring buffer recursion detection, as this event + * is being performed within another event. + */ + buffer = file->tr->array_buffer.buffer; + ring_buffer_nest_start(buffer); + + entry = trace_event_buffer_reserve(&fbuffer, file, + sizeof(*entry) + fields_size); + if (!entry) { + ret = -EINVAL; + goto out; + } + + va_start(args, n_vals); + for (i = 0, n_u64 = 0; i < event->n_fields; i++) { + u64 val; + + val = va_arg(args, u64); + + if (event->fields[i]->is_string) { + char *str_val = (char *)(long)val; + char *str_field = (char *)&entry->fields[n_u64]; + + strscpy(str_field, str_val, STR_VAR_LEN_MAX); + n_u64 += STR_VAR_LEN_MAX / sizeof(u64); + } else { + entry->fields[n_u64] = val; + n_u64++; + } + } + va_end(args); + + trace_event_buffer_commit(&fbuffer); +out: + ring_buffer_nest_end(buffer); + + return ret; +} +EXPORT_SYMBOL_GPL(synth_event_trace); + +/** + * synth_event_trace_array - Trace a synthetic event from an array + * @file: The trace_event_file representing the synthetic event + * @vals: Array of values + * @n_vals: The number of values in vals + * + * Trace a synthetic event using the values passed in as 'vals'. + * + * The 'vals' array is just an array of 'n_vals' u64. The number of + * vals must match the number of field in the synthetic event, and + * must be in the same order as the synthetic event fields. + * + * All vals should be cast to u64, and string vals are just pointers + * to strings, cast to u64. Strings will be copied into space + * reserved in the event for the string, using these pointers. + * + * Return: 0 on success, err otherwise. + */ +int synth_event_trace_array(struct trace_event_file *file, u64 *vals, + unsigned int n_vals) +{ + struct trace_event_buffer fbuffer; + struct synth_trace_event *entry; + struct trace_buffer *buffer; + struct synth_event *event; + unsigned int i, n_u64; + int fields_size = 0; + int ret = 0; + + /* + * Normal event generation doesn't get called at all unless + * the ENABLED bit is set (which attaches the probe thus + * allowing this code to be called, etc). Because this is + * called directly by the user, we don't have that but we + * still need to honor not logging when disabled. + */ + if (!(file->flags & EVENT_FILE_FL_ENABLED)) + return 0; + + event = file->event_call->data; + + if (n_vals != event->n_fields) + return -EINVAL; + + if (trace_trigger_soft_disabled(file)) + return -EINVAL; + + fields_size = event->n_u64 * sizeof(u64); + + /* + * Avoid ring buffer recursion detection, as this event + * is being performed within another event. + */ + buffer = file->tr->array_buffer.buffer; + ring_buffer_nest_start(buffer); + + entry = trace_event_buffer_reserve(&fbuffer, file, + sizeof(*entry) + fields_size); + if (!entry) { + ret = -EINVAL; + goto out; + } + + for (i = 0, n_u64 = 0; i < event->n_fields; i++) { + if (event->fields[i]->is_string) { + char *str_val = (char *)(long)vals[i]; + char *str_field = (char *)&entry->fields[n_u64]; + + strscpy(str_field, str_val, STR_VAR_LEN_MAX); + n_u64 += STR_VAR_LEN_MAX / sizeof(u64); + } else { + entry->fields[n_u64] = vals[i]; + n_u64++; + } + } + + trace_event_buffer_commit(&fbuffer); +out: + ring_buffer_nest_end(buffer); + + return ret; +} +EXPORT_SYMBOL_GPL(synth_event_trace_array); + +/** + * synth_event_trace_start - Start piecewise synthetic event trace + * @file: The trace_event_file representing the synthetic event + * @trace_state: A pointer to object tracking the piecewise trace state + * + * Start the trace of a synthetic event field-by-field rather than all + * at once. + * + * This function 'opens' an event trace, which means space is reserved + * for the event in the trace buffer, after which the event's + * individual field values can be set through either + * synth_event_add_next_val() or synth_event_add_val(). + * + * A pointer to a trace_state object is passed in, which will keep + * track of the current event trace state until the event trace is + * closed (and the event finally traced) using + * synth_event_trace_end(). + * + * Note that synth_event_trace_end() must be called after all values + * have been added for each event trace, regardless of whether adding + * all field values succeeded or not. + * + * Note also that for a given event trace, all fields must be added + * using either synth_event_add_next_val() or synth_event_add_val() + * but not both together or interleaved. + * + * Return: 0 on success, err otherwise. + */ +int synth_event_trace_start(struct trace_event_file *file, + struct synth_event_trace_state *trace_state) +{ + struct synth_trace_event *entry; + int fields_size = 0; + int ret = 0; + + if (!trace_state) { + ret = -EINVAL; + goto out; + } + + memset(trace_state, '\0', sizeof(*trace_state)); + + /* + * Normal event tracing doesn't get called at all unless the + * ENABLED bit is set (which attaches the probe thus allowing + * this code to be called, etc). Because this is called + * directly by the user, we don't have that but we still need + * to honor not logging when disabled. For the the iterated + * trace case, we save the enabed state upon start and just + * ignore the following data calls. + */ + if (!(file->flags & EVENT_FILE_FL_ENABLED)) { + trace_state->enabled = false; + goto out; + } + + trace_state->enabled = true; + + trace_state->event = file->event_call->data; + + if (trace_trigger_soft_disabled(file)) { + ret = -EINVAL; + goto out; + } + + fields_size = trace_state->event->n_u64 * sizeof(u64); + + /* + * Avoid ring buffer recursion detection, as this event + * is being performed within another event. + */ + trace_state->buffer = file->tr->array_buffer.buffer; + ring_buffer_nest_start(trace_state->buffer); + + entry = trace_event_buffer_reserve(&trace_state->fbuffer, file, + sizeof(*entry) + fields_size); + if (!entry) { + ret = -EINVAL; + goto out; + } + + trace_state->entry = entry; +out: + return ret; +} +EXPORT_SYMBOL_GPL(synth_event_trace_start); + +static int __synth_event_add_val(const char *field_name, u64 val, + struct synth_event_trace_state *trace_state) +{ + struct synth_field *field = NULL; + struct synth_trace_event *entry; + struct synth_event *event; + int i, ret = 0; + + if (!trace_state) { + ret = -EINVAL; + goto out; + } + + /* can't mix add_next_synth_val() with add_synth_val() */ + if (field_name) { + if (trace_state->add_next) { + ret = -EINVAL; + goto out; + } + trace_state->add_name = true; + } else { + if (trace_state->add_name) { + ret = -EINVAL; + goto out; + } + trace_state->add_next = true; + } + + if (!trace_state->enabled) + goto out; + + event = trace_state->event; + if (trace_state->add_name) { + for (i = 0; i < event->n_fields; i++) { + field = event->fields[i]; + if (strcmp(field->name, field_name) == 0) + break; + } + if (!field) { + ret = -EINVAL; + goto out; + } + } else { + if (trace_state->cur_field >= event->n_fields) { + ret = -EINVAL; + goto out; + } + field = event->fields[trace_state->cur_field++]; + } + + entry = trace_state->entry; + if (field->is_string) { + char *str_val = (char *)(long)val; + char *str_field; + + if (!str_val) { + ret = -EINVAL; + goto out; + } + + str_field = (char *)&entry->fields[field->offset]; + strscpy(str_field, str_val, STR_VAR_LEN_MAX); + } else + entry->fields[field->offset] = val; + out: + return ret; +} + +/** + * synth_event_add_next_val - Add the next field's value to an open synth trace + * @val: The value to set the next field to + * @trace_state: A pointer to object tracking the piecewise trace state + * + * Set the value of the next field in an event that's been opened by + * synth_event_trace_start(). + * + * The val param should be the value cast to u64. If the value points + * to a string, the val param should be a char * cast to u64. + * + * This function assumes all the fields in an event are to be set one + * after another - successive calls to this function are made, one for + * each field, in the order of the fields in the event, until all + * fields have been set. If you'd rather set each field individually + * without regard to ordering, synth_event_add_val() can be used + * instead. + * + * Note however that synth_event_add_next_val() and + * synth_event_add_val() can't be intermixed for a given event trace - + * one or the other but not both can be used at the same time. + * + * Note also that synth_event_trace_end() must be called after all + * values have been added for each event trace, regardless of whether + * adding all field values succeeded or not. + * + * Return: 0 on success, err otherwise. + */ +int synth_event_add_next_val(u64 val, + struct synth_event_trace_state *trace_state) +{ + return __synth_event_add_val(NULL, val, trace_state); +} +EXPORT_SYMBOL_GPL(synth_event_add_next_val); + +/** + * synth_event_add_val - Add a named field's value to an open synth trace + * @field_name: The name of the synthetic event field value to set + * @val: The value to set the next field to + * @trace_state: A pointer to object tracking the piecewise trace state + * + * Set the value of the named field in an event that's been opened by + * synth_event_trace_start(). + * + * The val param should be the value cast to u64. If the value points + * to a string, the val param should be a char * cast to u64. + * + * This function looks up the field name, and if found, sets the field + * to the specified value. This lookup makes this function more + * expensive than synth_event_add_next_val(), so use that or the + * none-piecewise synth_event_trace() instead if efficiency is more + * important. + * + * Note however that synth_event_add_next_val() and + * synth_event_add_val() can't be intermixed for a given event trace - + * one or the other but not both can be used at the same time. + * + * Note also that synth_event_trace_end() must be called after all + * values have been added for each event trace, regardless of whether + * adding all field values succeeded or not. + * + * Return: 0 on success, err otherwise. + */ +int synth_event_add_val(const char *field_name, u64 val, + struct synth_event_trace_state *trace_state) +{ + return __synth_event_add_val(field_name, val, trace_state); +} +EXPORT_SYMBOL_GPL(synth_event_add_val); + +/** + * synth_event_trace_end - End piecewise synthetic event trace + * @trace_state: A pointer to object tracking the piecewise trace state + * + * End the trace of a synthetic event opened by + * synth_event_trace__start(). + * + * This function 'closes' an event trace, which basically means that + * it commits the reserved event and cleans up other loose ends. + * + * A pointer to a trace_state object is passed in, which will keep + * track of the current event trace state opened with + * synth_event_trace_start(). + * + * Note that this function must be called after all values have been + * added for each event trace, regardless of whether adding all field + * values succeeded or not. + * + * Return: 0 on success, err otherwise. + */ +int synth_event_trace_end(struct synth_event_trace_state *trace_state) +{ + if (!trace_state) + return -EINVAL; + + trace_event_buffer_commit(&trace_state->fbuffer); + + ring_buffer_nest_end(trace_state->buffer); + + return 0; +} +EXPORT_SYMBOL_GPL(synth_event_trace_end); + +static int create_synth_event(int argc, const char **argv) { const char *name = argv[0]; int len; @@ -2041,12 +2881,6 @@ static int parse_map_size(char *str) unsigned long size, map_bits; int ret; - strsep(&str, "="); - if (!str) { - ret = -EINVAL; - goto out; - } - ret = kstrtoul(str, 0, &size); if (ret) goto out; @@ -2106,25 +2940,25 @@ static int parse_action(char *str, struct hist_trigger_attrs *attrs) static int parse_assignment(struct trace_array *tr, char *str, struct hist_trigger_attrs *attrs) { - int ret = 0; + int len, ret = 0; - if ((str_has_prefix(str, "key=")) || - (str_has_prefix(str, "keys="))) { - attrs->keys_str = kstrdup(str, GFP_KERNEL); + if ((len = str_has_prefix(str, "key=")) || + (len = str_has_prefix(str, "keys="))) { + attrs->keys_str = kstrdup(str + len, GFP_KERNEL); if (!attrs->keys_str) { ret = -ENOMEM; goto out; } - } else if ((str_has_prefix(str, "val=")) || - (str_has_prefix(str, "vals=")) || - (str_has_prefix(str, "values="))) { - attrs->vals_str = kstrdup(str, GFP_KERNEL); + } else if ((len = str_has_prefix(str, "val=")) || + (len = str_has_prefix(str, "vals=")) || + (len = str_has_prefix(str, "values="))) { + attrs->vals_str = kstrdup(str + len, GFP_KERNEL); if (!attrs->vals_str) { ret = -ENOMEM; goto out; } - } else if (str_has_prefix(str, "sort=")) { - attrs->sort_key_str = kstrdup(str, GFP_KERNEL); + } else if ((len = str_has_prefix(str, "sort="))) { + attrs->sort_key_str = kstrdup(str + len, GFP_KERNEL); if (!attrs->sort_key_str) { ret = -ENOMEM; goto out; @@ -2135,12 +2969,8 @@ static int parse_assignment(struct trace_array *tr, ret = -ENOMEM; goto out; } - } else if (str_has_prefix(str, "clock=")) { - strsep(&str, "="); - if (!str) { - ret = -EINVAL; - goto out; - } + } else if ((len = str_has_prefix(str, "clock="))) { + str += len; str = strstrip(str); attrs->clock = kstrdup(str, GFP_KERNEL); @@ -2148,8 +2978,8 @@ static int parse_assignment(struct trace_array *tr, ret = -ENOMEM; goto out; } - } else if (str_has_prefix(str, "size=")) { - int map_bits = parse_map_size(str); + } else if ((len = str_has_prefix(str, "size="))) { + int map_bits = parse_map_size(str + len); if (map_bits < 0) { ret = map_bits; @@ -2189,8 +3019,15 @@ parse_hist_trigger_attrs(struct trace_array *tr, char *trigger_str) while (trigger_str) { char *str = strsep(&trigger_str, ":"); + char *rhs; - if (strchr(str, '=')) { + rhs = strchr(str, '='); + if (rhs) { + if (!strlen(++rhs)) { + ret = -EINVAL; + hist_err(tr, HIST_ERR_EMPTY_ASSIGNMENT, errpos(str)); + goto free; + } ret = parse_assignment(tr, str, attrs); if (ret) goto free; @@ -2661,6 +3498,22 @@ static int init_var_ref(struct hist_field *ref_field, goto out; } +static int find_var_ref_idx(struct hist_trigger_data *hist_data, + struct hist_field *var_field) +{ + struct hist_field *ref_field; + int i; + + for (i = 0; i < hist_data->n_var_refs; i++) { + ref_field = hist_data->var_refs[i]; + if (ref_field->var.idx == var_field->var.idx && + ref_field->var.hist_data == var_field->hist_data) + return i; + } + + return -ENOENT; +} + /** * create_var_ref - Create a variable reference and attach it to trigger * @hist_data: The trigger that will be referencing the variable @@ -4146,8 +4999,11 @@ static int check_synth_field(struct synth_event *event, field = event->fields[field_pos]; - if (strcmp(field->type, hist_field->type) != 0) - return -EINVAL; + if (strcmp(field->type, hist_field->type) != 0) { + if (field->size != hist_field->size || + field->is_signed != hist_field->is_signed) + return -EINVAL; + } return 0; } @@ -4234,11 +5090,11 @@ static int trace_action_create(struct hist_trigger_data *hist_data, struct trace_array *tr = hist_data->event_file->tr; char *event_name, *param, *system = NULL; struct hist_field *hist_field, *var_ref; - unsigned int i, var_ref_idx; + unsigned int i; unsigned int field_pos = 0; struct synth_event *event; char *synth_event_name; - int ret = 0; + int var_ref_idx, ret = 0; lockdep_assert_held(&event_mutex); @@ -4255,8 +5111,6 @@ static int trace_action_create(struct hist_trigger_data *hist_data, event->ref++; - var_ref_idx = hist_data->n_var_refs; - for (i = 0; i < data->n_params; i++) { char *p; @@ -4305,6 +5159,14 @@ static int trace_action_create(struct hist_trigger_data *hist_data, goto err; } + var_ref_idx = find_var_ref_idx(hist_data, var_ref); + if (WARN_ON(var_ref_idx < 0)) { + ret = var_ref_idx; + goto err; + } + + data->var_ref_idx[i] = var_ref_idx; + field_pos++; kfree(p); continue; @@ -4323,7 +5185,6 @@ static int trace_action_create(struct hist_trigger_data *hist_data, } data->synth_event = event; - data->var_ref_idx = var_ref_idx; out: return ret; err: @@ -4542,10 +5403,6 @@ static int create_val_fields(struct hist_trigger_data *hist_data, if (!fields_str) goto out; - strsep(&fields_str, "="); - if (!fields_str) - goto out; - for (i = 0, j = 1; i < TRACING_MAP_VALS_MAX && j < TRACING_MAP_VALS_MAX; i++) { field_str = strsep(&fields_str, ","); @@ -4640,10 +5497,6 @@ static int create_key_fields(struct hist_trigger_data *hist_data, if (!fields_str) goto out; - strsep(&fields_str, "="); - if (!fields_str) - goto out; - for (i = n_vals; i < n_vals + TRACING_MAP_KEYS_MAX; i++) { field_str = strsep(&fields_str, ","); if (!field_str) @@ -4775,7 +5628,7 @@ static int create_hist_fields(struct hist_trigger_data *hist_data, return ret; } -static int is_descending(const char *str) +static int is_descending(struct trace_array *tr, const char *str) { if (!str) return 0; @@ -4786,11 +5639,14 @@ static int is_descending(const char *str) if (strcmp(str, "ascending") == 0) return 0; + hist_err(tr, HIST_ERR_INVALID_SORT_MODIFIER, errpos((char *)str)); + return -EINVAL; } static int create_sort_keys(struct hist_trigger_data *hist_data) { + struct trace_array *tr = hist_data->event_file->tr; char *fields_str = hist_data->attrs->sort_key_str; struct tracing_map_sort_key *sort_key; int descending, ret = 0; @@ -4801,12 +5657,6 @@ static int create_sort_keys(struct hist_trigger_data *hist_data) if (!fields_str) goto out; - strsep(&fields_str, "="); - if (!fields_str) { - ret = -EINVAL; - goto out; - } - for (i = 0; i < TRACING_MAP_SORT_KEYS_MAX; i++) { struct hist_field *hist_field; char *field_str, *field_name; @@ -4815,25 +5665,30 @@ static int create_sort_keys(struct hist_trigger_data *hist_data) sort_key = &hist_data->sort_keys[i]; field_str = strsep(&fields_str, ","); - if (!field_str) { - if (i == 0) - ret = -EINVAL; + if (!field_str) + break; + + if (!*field_str) { + ret = -EINVAL; + hist_err(tr, HIST_ERR_EMPTY_SORT_FIELD, errpos("sort=")); break; } if ((i == TRACING_MAP_SORT_KEYS_MAX - 1) && fields_str) { + hist_err(tr, HIST_ERR_TOO_MANY_SORT_FIELDS, errpos("sort=")); ret = -EINVAL; break; } field_name = strsep(&field_str, "."); - if (!field_name) { + if (!field_name || !*field_name) { ret = -EINVAL; + hist_err(tr, HIST_ERR_EMPTY_SORT_FIELD, errpos("sort=")); break; } if (strcmp(field_name, "hitcount") == 0) { - descending = is_descending(field_str); + descending = is_descending(tr, field_str); if (descending < 0) { ret = descending; break; @@ -4855,7 +5710,7 @@ static int create_sort_keys(struct hist_trigger_data *hist_data) if (strcmp(field_name, test_name) == 0) { sort_key->field_idx = idx; - descending = is_descending(field_str); + descending = is_descending(tr, field_str); if (descending < 0) { ret = descending; goto out; @@ -4866,6 +5721,7 @@ static int create_sort_keys(struct hist_trigger_data *hist_data) } if (j == hist_data->n_fields) { ret = -EINVAL; + hist_err(tr, HIST_ERR_INVALID_SORT_FIELD, errpos(field_name)); break; } } diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 40106fff06a4..dd34a1b46a86 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -116,9 +116,10 @@ static void *trigger_next(struct seq_file *m, void *t, loff_t *pos) { struct trace_event_file *event_file = event_file_data(m->private); - if (t == SHOW_AVAILABLE_TRIGGERS) + if (t == SHOW_AVAILABLE_TRIGGERS) { + (*pos)++; return NULL; - + } return seq_list_next(t, &event_file->triggers, pos); } @@ -213,7 +214,7 @@ static int event_trigger_regex_open(struct inode *inode, struct file *file) return ret; } -static int trigger_process_regex(struct trace_event_file *file, char *buff) +int trigger_process_regex(struct trace_event_file *file, char *buff) { char *command, *next = buff; struct event_command *p; diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index b611cd36e22d..8a4c8d5c2c98 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -101,7 +101,7 @@ static int function_trace_init(struct trace_array *tr) ftrace_init_array_ops(tr, func); - tr->trace_buffer.cpu = get_cpu(); + tr->array_buffer.cpu = get_cpu(); put_cpu(); tracing_start_cmdline_record(); @@ -118,7 +118,7 @@ static void function_trace_reset(struct trace_array *tr) static void function_trace_start(struct trace_array *tr) { - tracing_reset_online_cpus(&tr->trace_buffer); + tracing_reset_online_cpus(&tr->array_buffer); } static void @@ -143,7 +143,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip, goto out; cpu = smp_processor_id(); - data = per_cpu_ptr(tr->trace_buffer.data, cpu); + data = per_cpu_ptr(tr->array_buffer.data, cpu); if (!atomic_read(&data->disabled)) { local_save_flags(flags); trace_function(tr, ip, parent_ip, flags, pc); @@ -192,7 +192,7 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip, */ local_irq_save(flags); cpu = raw_smp_processor_id(); - data = per_cpu_ptr(tr->trace_buffer.data, cpu); + data = per_cpu_ptr(tr->array_buffer.data, cpu); disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) { diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 78af97163147..7d71546ba00a 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -101,7 +101,7 @@ int __trace_graph_entry(struct trace_array *tr, { struct trace_event_call *call = &event_funcgraph_entry; struct ring_buffer_event *event; - struct ring_buffer *buffer = tr->trace_buffer.buffer; + struct trace_buffer *buffer = tr->array_buffer.buffer; struct ftrace_graph_ent_entry *entry; event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT, @@ -171,7 +171,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) local_irq_save(flags); cpu = raw_smp_processor_id(); - data = per_cpu_ptr(tr->trace_buffer.data, cpu); + data = per_cpu_ptr(tr->array_buffer.data, cpu); disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) { pc = preempt_count(); @@ -221,7 +221,7 @@ void __trace_graph_return(struct trace_array *tr, { struct trace_event_call *call = &event_funcgraph_exit; struct ring_buffer_event *event; - struct ring_buffer *buffer = tr->trace_buffer.buffer; + struct trace_buffer *buffer = tr->array_buffer.buffer; struct ftrace_graph_ret_entry *entry; event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET, @@ -252,7 +252,7 @@ void trace_graph_return(struct ftrace_graph_ret *trace) local_irq_save(flags); cpu = raw_smp_processor_id(); - data = per_cpu_ptr(tr->trace_buffer.data, cpu); + data = per_cpu_ptr(tr->array_buffer.data, cpu); disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) { pc = preempt_count(); @@ -444,9 +444,9 @@ get_return_for_leaf(struct trace_iterator *iter, * We need to consume the current entry to see * the next one. */ - ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, + ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, NULL, NULL); - event = ring_buffer_peek(iter->trace_buffer->buffer, iter->cpu, + event = ring_buffer_peek(iter->array_buffer->buffer, iter->cpu, NULL, NULL); } @@ -503,7 +503,7 @@ print_graph_rel_time(struct trace_iterator *iter, struct trace_seq *s) { unsigned long long usecs; - usecs = iter->ts - iter->trace_buffer->time_start; + usecs = iter->ts - iter->array_buffer->time_start; do_div(usecs, NSEC_PER_USEC); trace_seq_printf(s, "%9llu us | ", usecs); diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c index 6638d63f0921..a48808c43249 100644 --- a/kernel/trace/trace_hwlat.c +++ b/kernel/trace/trace_hwlat.c @@ -104,7 +104,7 @@ static void trace_hwlat_sample(struct hwlat_sample *sample) { struct trace_array *tr = hwlat_trace; struct trace_event_call *call = &event_hwlat; - struct ring_buffer *buffer = tr->trace_buffer.buffer; + struct trace_buffer *buffer = tr->array_buffer.buffer; struct ring_buffer_event *event; struct hwlat_entry *entry; unsigned long flags; @@ -556,7 +556,7 @@ static int init_tracefs(void) return 0; err: - tracefs_remove_recursive(top_dir); + tracefs_remove(top_dir); return -ENOMEM; } diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index a745b0cee5d3..10bbb0f381d5 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -122,7 +122,7 @@ static int func_prolog_dec(struct trace_array *tr, if (!irqs_disabled_flags(*flags) && !preempt_count()) return 0; - *data = per_cpu_ptr(tr->trace_buffer.data, cpu); + *data = per_cpu_ptr(tr->array_buffer.data, cpu); disabled = atomic_inc_return(&(*data)->disabled); if (likely(disabled == 1)) @@ -167,7 +167,7 @@ static int irqsoff_display_graph(struct trace_array *tr, int set) per_cpu(tracing_cpu, cpu) = 0; tr->max_latency = 0; - tracing_reset_online_cpus(&irqsoff_trace->trace_buffer); + tracing_reset_online_cpus(&irqsoff_trace->array_buffer); return start_irqsoff_tracer(irqsoff_trace, set); } @@ -382,7 +382,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip, int pc) if (per_cpu(tracing_cpu, cpu)) return; - data = per_cpu_ptr(tr->trace_buffer.data, cpu); + data = per_cpu_ptr(tr->array_buffer.data, cpu); if (unlikely(!data) || atomic_read(&data->disabled)) return; @@ -420,7 +420,7 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip, int pc) if (!tracer_enabled || !tracing_is_enabled()) return; - data = per_cpu_ptr(tr->trace_buffer.data, cpu); + data = per_cpu_ptr(tr->array_buffer.data, cpu); if (unlikely(!data) || !data->critical_start || atomic_read(&data->disabled)) diff --git a/kernel/trace/trace_kdb.c b/kernel/trace/trace_kdb.c index cca65044c14c..9da76104f7a2 100644 --- a/kernel/trace/trace_kdb.c +++ b/kernel/trace/trace_kdb.c @@ -43,7 +43,7 @@ static void ftrace_dump_buf(int skip_entries, long cpu_file) if (cpu_file == RING_BUFFER_ALL_CPUS) { for_each_tracing_cpu(cpu) { iter.buffer_iter[cpu] = - ring_buffer_read_prepare(iter.trace_buffer->buffer, + ring_buffer_read_prepare(iter.array_buffer->buffer, cpu, GFP_ATOMIC); ring_buffer_read_start(iter.buffer_iter[cpu]); tracing_iter_reset(&iter, cpu); @@ -51,7 +51,7 @@ static void ftrace_dump_buf(int skip_entries, long cpu_file) } else { iter.cpu_file = cpu_file; iter.buffer_iter[cpu_file] = - ring_buffer_read_prepare(iter.trace_buffer->buffer, + ring_buffer_read_prepare(iter.array_buffer->buffer, cpu_file, GFP_ATOMIC); ring_buffer_read_start(iter.buffer_iter[cpu_file]); tracing_iter_reset(&iter, cpu_file); @@ -124,7 +124,7 @@ static int kdb_ftdump(int argc, const char **argv) iter.buffer_iter = buffer_iter; for_each_tracing_cpu(cpu) { - atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled); + atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled); } /* A negative skip_entries means skip all but the last entries */ @@ -139,7 +139,7 @@ static int kdb_ftdump(int argc, const char **argv) ftrace_dump_buf(skip_entries, cpu_file); for_each_tracing_cpu(cpu) { - atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled); + atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled); } kdb_trap_printk--; diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index aa515d578c5b..d8264ebb9581 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -22,7 +22,6 @@ #define KPROBE_EVENT_SYSTEM "kprobes" #define KRETPROBE_MAXACTIVE_MAX 4096 -#define MAX_KPROBE_CMDLINE_SIZE 1024 /* Kprobe early definition from command line */ static char kprobe_boot_events_buf[COMMAND_LINE_SIZE] __initdata; @@ -902,6 +901,167 @@ static int create_or_delete_trace_kprobe(int argc, char **argv) return ret == -ECANCELED ? -EINVAL : ret; } +static int trace_kprobe_run_command(struct dynevent_cmd *cmd) +{ + return trace_run_command(cmd->seq.buffer, create_or_delete_trace_kprobe); +} + +/** + * kprobe_event_cmd_init - Initialize a kprobe event command object + * @cmd: A pointer to the dynevent_cmd struct representing the new event + * @buf: A pointer to the buffer used to build the command + * @maxlen: The length of the buffer passed in @buf + * + * Initialize a synthetic event command object. Use this before + * calling any of the other kprobe_event functions. + */ +void kprobe_event_cmd_init(struct dynevent_cmd *cmd, char *buf, int maxlen) +{ + dynevent_cmd_init(cmd, buf, maxlen, DYNEVENT_TYPE_KPROBE, + trace_kprobe_run_command); +} +EXPORT_SYMBOL_GPL(kprobe_event_cmd_init); + +/** + * __kprobe_event_gen_cmd_start - Generate a kprobe event command from arg list + * @cmd: A pointer to the dynevent_cmd struct representing the new event + * @name: The name of the kprobe event + * @loc: The location of the kprobe event + * @kretprobe: Is this a return probe? + * @args: Variable number of arg (pairs), one pair for each field + * + * NOTE: Users normally won't want to call this function directly, but + * rather use the kprobe_event_gen_cmd_start() wrapper, which automatically + * adds a NULL to the end of the arg list. If this function is used + * directly, make sure the last arg in the variable arg list is NULL. + * + * Generate a kprobe event command to be executed by + * kprobe_event_gen_cmd_end(). This function can be used to generate the + * complete command or only the first part of it; in the latter case, + * kprobe_event_add_fields() can be used to add more fields following this. + * + * Return: 0 if successful, error otherwise. + */ +int __kprobe_event_gen_cmd_start(struct dynevent_cmd *cmd, bool kretprobe, + const char *name, const char *loc, ...) +{ + char buf[MAX_EVENT_NAME_LEN]; + struct dynevent_arg arg; + va_list args; + int ret; + + if (cmd->type != DYNEVENT_TYPE_KPROBE) + return -EINVAL; + + if (kretprobe) + snprintf(buf, MAX_EVENT_NAME_LEN, "r:kprobes/%s", name); + else + snprintf(buf, MAX_EVENT_NAME_LEN, "p:kprobes/%s", name); + + ret = dynevent_str_add(cmd, buf); + if (ret) + return ret; + + dynevent_arg_init(&arg, 0); + arg.str = loc; + ret = dynevent_arg_add(cmd, &arg, NULL); + if (ret) + return ret; + + va_start(args, loc); + for (;;) { + const char *field; + + field = va_arg(args, const char *); + if (!field) + break; + + if (++cmd->n_fields > MAX_TRACE_ARGS) { + ret = -EINVAL; + break; + } + + arg.str = field; + ret = dynevent_arg_add(cmd, &arg, NULL); + if (ret) + break; + } + va_end(args); + + return ret; +} +EXPORT_SYMBOL_GPL(__kprobe_event_gen_cmd_start); + +/** + * __kprobe_event_add_fields - Add probe fields to a kprobe command from arg list + * @cmd: A pointer to the dynevent_cmd struct representing the new event + * @args: Variable number of arg (pairs), one pair for each field + * + * NOTE: Users normally won't want to call this function directly, but + * rather use the kprobe_event_add_fields() wrapper, which + * automatically adds a NULL to the end of the arg list. If this + * function is used directly, make sure the last arg in the variable + * arg list is NULL. + * + * Add probe fields to an existing kprobe command using a variable + * list of args. Fields are added in the same order they're listed. + * + * Return: 0 if successful, error otherwise. + */ +int __kprobe_event_add_fields(struct dynevent_cmd *cmd, ...) +{ + struct dynevent_arg arg; + va_list args; + int ret; + + if (cmd->type != DYNEVENT_TYPE_KPROBE) + return -EINVAL; + + dynevent_arg_init(&arg, 0); + + va_start(args, cmd); + for (;;) { + const char *field; + + field = va_arg(args, const char *); + if (!field) + break; + + if (++cmd->n_fields > MAX_TRACE_ARGS) { + ret = -EINVAL; + break; + } + + arg.str = field; + ret = dynevent_arg_add(cmd, &arg, NULL); + if (ret) + break; + } + va_end(args); + + return ret; +} +EXPORT_SYMBOL_GPL(__kprobe_event_add_fields); + +/** + * kprobe_event_delete - Delete a kprobe event + * @name: The name of the kprobe event to delete + * + * Delete a kprobe event with the give @name from kernel code rather + * than directly from the command line. + * + * Return: 0 if successful, error otherwise. + */ +int kprobe_event_delete(const char *name) +{ + char buf[MAX_EVENT_NAME_LEN]; + + snprintf(buf, MAX_EVENT_NAME_LEN, "-:%s", name); + + return trace_run_command(buf, create_or_delete_trace_kprobe); +} +EXPORT_SYMBOL_GPL(kprobe_event_delete); + static int trace_kprobe_release(struct dyn_event *ev) { struct trace_kprobe *tk = to_trace_kprobe(ev); @@ -1175,35 +1335,35 @@ __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs, struct trace_event_file *trace_file) { struct kprobe_trace_entry_head *entry; - struct ring_buffer_event *event; - struct ring_buffer *buffer; - int size, dsize, pc; - unsigned long irq_flags; struct trace_event_call *call = trace_probe_event_call(&tk->tp); + struct trace_event_buffer fbuffer; + int dsize; WARN_ON(call != trace_file->event_call); if (trace_trigger_soft_disabled(trace_file)) return; - local_save_flags(irq_flags); - pc = preempt_count(); + local_save_flags(fbuffer.flags); + fbuffer.pc = preempt_count(); + fbuffer.trace_file = trace_file; dsize = __get_data_size(&tk->tp, regs); - size = sizeof(*entry) + tk->tp.size + dsize; - event = trace_event_buffer_lock_reserve(&buffer, trace_file, - call->event.type, - size, irq_flags, pc); - if (!event) + fbuffer.event = + trace_event_buffer_lock_reserve(&fbuffer.buffer, trace_file, + call->event.type, + sizeof(*entry) + tk->tp.size + dsize, + fbuffer.flags, fbuffer.pc); + if (!fbuffer.event) return; - entry = ring_buffer_event_data(event); + fbuffer.regs = regs; + entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event); entry->ip = (unsigned long)tk->rp.kp.addr; store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize); - event_trigger_unlock_commit_regs(trace_file, buffer, event, - entry, irq_flags, pc, regs); + trace_event_buffer_commit(&fbuffer); } static void @@ -1223,36 +1383,35 @@ __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, struct trace_event_file *trace_file) { struct kretprobe_trace_entry_head *entry; - struct ring_buffer_event *event; - struct ring_buffer *buffer; - int size, pc, dsize; - unsigned long irq_flags; + struct trace_event_buffer fbuffer; struct trace_event_call *call = trace_probe_event_call(&tk->tp); + int dsize; WARN_ON(call != trace_file->event_call); if (trace_trigger_soft_disabled(trace_file)) return; - local_save_flags(irq_flags); - pc = preempt_count(); + local_save_flags(fbuffer.flags); + fbuffer.pc = preempt_count(); + fbuffer.trace_file = trace_file; dsize = __get_data_size(&tk->tp, regs); - size = sizeof(*entry) + tk->tp.size + dsize; - - event = trace_event_buffer_lock_reserve(&buffer, trace_file, - call->event.type, - size, irq_flags, pc); - if (!event) + fbuffer.event = + trace_event_buffer_lock_reserve(&fbuffer.buffer, trace_file, + call->event.type, + sizeof(*entry) + tk->tp.size + dsize, + fbuffer.flags, fbuffer.pc); + if (!fbuffer.event) return; - entry = ring_buffer_event_data(event); + fbuffer.regs = regs; + entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event); entry->func = (unsigned long)tk->rp.kp.addr; entry->ret_ip = (unsigned long)ri->ret_addr; store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize); - event_trigger_unlock_commit_regs(trace_file, buffer, event, - entry, irq_flags, pc, regs); + trace_event_buffer_commit(&fbuffer); } static void @@ -1698,11 +1857,12 @@ static __init void setup_boot_kprobe_events(void) enable_boot_kprobe_events(); } -/* Make a tracefs interface for controlling probe points */ -static __init int init_kprobe_trace(void) +/* + * Register dynevent at subsys_initcall. This allows kernel to setup kprobe + * events in fs_initcall without tracefs. + */ +static __init int init_kprobe_trace_early(void) { - struct dentry *d_tracer; - struct dentry *entry; int ret; ret = dyn_event_register(&trace_kprobe_ops); @@ -1712,6 +1872,16 @@ static __init int init_kprobe_trace(void) if (register_module_notifier(&trace_kprobe_module_nb)) return -EINVAL; + return 0; +} +subsys_initcall(init_kprobe_trace_early); + +/* Make a tracefs interface for controlling probe points */ +static __init int init_kprobe_trace(void) +{ + struct dentry *d_tracer; + struct dentry *entry; + d_tracer = tracing_init_dentry(); if (IS_ERR(d_tracer)) return 0; diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index b0388016b687..84582bf1ed5f 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -32,7 +32,7 @@ static void mmio_reset_data(struct trace_array *tr) overrun_detected = false; prev_overruns = 0; - tracing_reset_online_cpus(&tr->trace_buffer); + tracing_reset_online_cpus(&tr->array_buffer); } static int mmio_trace_init(struct trace_array *tr) @@ -122,7 +122,7 @@ static void mmio_close(struct trace_iterator *iter) static unsigned long count_overruns(struct trace_iterator *iter) { unsigned long cnt = atomic_xchg(&dropped_count, 0); - unsigned long over = ring_buffer_overruns(iter->trace_buffer->buffer); + unsigned long over = ring_buffer_overruns(iter->array_buffer->buffer); if (over > prev_overruns) cnt += over - prev_overruns; @@ -297,7 +297,7 @@ static void __trace_mmiotrace_rw(struct trace_array *tr, struct mmiotrace_rw *rw) { struct trace_event_call *call = &event_mmiotrace_rw; - struct ring_buffer *buffer = tr->trace_buffer.buffer; + struct trace_buffer *buffer = tr->array_buffer.buffer; struct ring_buffer_event *event; struct trace_mmiotrace_rw *entry; int pc = preempt_count(); @@ -318,7 +318,7 @@ static void __trace_mmiotrace_rw(struct trace_array *tr, void mmio_trace_rw(struct mmiotrace_rw *rw) { struct trace_array *tr = mmio_trace_array; - struct trace_array_cpu *data = per_cpu_ptr(tr->trace_buffer.data, smp_processor_id()); + struct trace_array_cpu *data = per_cpu_ptr(tr->array_buffer.data, smp_processor_id()); __trace_mmiotrace_rw(tr, data, rw); } @@ -327,7 +327,7 @@ static void __trace_mmiotrace_map(struct trace_array *tr, struct mmiotrace_map *map) { struct trace_event_call *call = &event_mmiotrace_map; - struct ring_buffer *buffer = tr->trace_buffer.buffer; + struct trace_buffer *buffer = tr->array_buffer.buffer; struct ring_buffer_event *event; struct trace_mmiotrace_map *entry; int pc = preempt_count(); @@ -351,7 +351,7 @@ void mmio_trace_mapping(struct mmiotrace_map *map) struct trace_array_cpu *data; preempt_disable(); - data = per_cpu_ptr(tr->trace_buffer.data, smp_processor_id()); + data = per_cpu_ptr(tr->array_buffer.data, smp_processor_id()); __trace_mmiotrace_map(tr, data, map); preempt_enable(); } diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index d9b4b7c22db4..b4909082f6a4 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -538,7 +538,7 @@ lat_print_timestamp(struct trace_iterator *iter, u64 next_ts) struct trace_array *tr = iter->tr; unsigned long verbose = tr->trace_flags & TRACE_ITER_VERBOSE; unsigned long in_ns = iter->iter_flags & TRACE_FILE_TIME_IN_NS; - unsigned long long abs_ts = iter->ts - iter->trace_buffer->time_start; + unsigned long long abs_ts = iter->ts - iter->array_buffer->time_start; unsigned long long rel_ts = next_ts - iter->ts; struct trace_seq *s = &iter->seq; diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index e288168661e1..e304196d7c28 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -89,8 +89,10 @@ static void tracing_sched_unregister(void) static void tracing_start_sched_switch(int ops) { - bool sched_register = (!sched_cmdline_ref && !sched_tgid_ref); + bool sched_register; + mutex_lock(&sched_register_mutex); + sched_register = (!sched_cmdline_ref && !sched_tgid_ref); switch (ops) { case RECORD_CMDLINE: diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 617e297f46dc..97b10bb31a1f 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -82,7 +82,7 @@ func_prolog_preempt_disable(struct trace_array *tr, if (cpu != wakeup_current_cpu) goto out_enable; - *data = per_cpu_ptr(tr->trace_buffer.data, cpu); + *data = per_cpu_ptr(tr->array_buffer.data, cpu); disabled = atomic_inc_return(&(*data)->disabled); if (unlikely(disabled != 1)) goto out; @@ -378,7 +378,7 @@ tracing_sched_switch_trace(struct trace_array *tr, unsigned long flags, int pc) { struct trace_event_call *call = &event_context_switch; - struct ring_buffer *buffer = tr->trace_buffer.buffer; + struct trace_buffer *buffer = tr->array_buffer.buffer; struct ring_buffer_event *event; struct ctx_switch_entry *entry; @@ -408,7 +408,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr, struct trace_event_call *call = &event_wakeup; struct ring_buffer_event *event; struct ctx_switch_entry *entry; - struct ring_buffer *buffer = tr->trace_buffer.buffer; + struct trace_buffer *buffer = tr->array_buffer.buffer; event = trace_buffer_lock_reserve(buffer, TRACE_WAKE, sizeof(*entry), flags, pc); @@ -459,7 +459,7 @@ probe_wakeup_sched_switch(void *ignore, bool preempt, /* disable local data, not wakeup_cpu data */ cpu = raw_smp_processor_id(); - disabled = atomic_inc_return(&per_cpu_ptr(wakeup_trace->trace_buffer.data, cpu)->disabled); + disabled = atomic_inc_return(&per_cpu_ptr(wakeup_trace->array_buffer.data, cpu)->disabled); if (likely(disabled != 1)) goto out; @@ -471,7 +471,7 @@ probe_wakeup_sched_switch(void *ignore, bool preempt, goto out_unlock; /* The task we are waiting for is waking up */ - data = per_cpu_ptr(wakeup_trace->trace_buffer.data, wakeup_cpu); + data = per_cpu_ptr(wakeup_trace->array_buffer.data, wakeup_cpu); __trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc); tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc); @@ -494,7 +494,7 @@ out_unlock: arch_spin_unlock(&wakeup_lock); local_irq_restore(flags); out: - atomic_dec(&per_cpu_ptr(wakeup_trace->trace_buffer.data, cpu)->disabled); + atomic_dec(&per_cpu_ptr(wakeup_trace->array_buffer.data, cpu)->disabled); } static void __wakeup_reset(struct trace_array *tr) @@ -513,7 +513,7 @@ static void wakeup_reset(struct trace_array *tr) { unsigned long flags; - tracing_reset_online_cpus(&tr->trace_buffer); + tracing_reset_online_cpus(&tr->array_buffer); local_irq_save(flags); arch_spin_lock(&wakeup_lock); @@ -551,7 +551,7 @@ probe_wakeup(void *ignore, struct task_struct *p) return; pc = preempt_count(); - disabled = atomic_inc_return(&per_cpu_ptr(wakeup_trace->trace_buffer.data, cpu)->disabled); + disabled = atomic_inc_return(&per_cpu_ptr(wakeup_trace->array_buffer.data, cpu)->disabled); if (unlikely(disabled != 1)) goto out; @@ -583,7 +583,7 @@ probe_wakeup(void *ignore, struct task_struct *p) local_save_flags(flags); - data = per_cpu_ptr(wakeup_trace->trace_buffer.data, wakeup_cpu); + data = per_cpu_ptr(wakeup_trace->array_buffer.data, wakeup_cpu); data->preempt_timestamp = ftrace_now(cpu); tracing_sched_wakeup_trace(wakeup_trace, p, current, flags, pc); __trace_stack(wakeup_trace, flags, 0, pc); @@ -598,7 +598,7 @@ probe_wakeup(void *ignore, struct task_struct *p) out_locked: arch_spin_unlock(&wakeup_lock); out: - atomic_dec(&per_cpu_ptr(wakeup_trace->trace_buffer.data, cpu)->disabled); + atomic_dec(&per_cpu_ptr(wakeup_trace->array_buffer.data, cpu)->disabled); } static void start_wakeup_tracer(struct trace_array *tr) diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 69ee8ef12cee..b5e3496cf803 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -23,7 +23,7 @@ static inline int trace_valid_entry(struct trace_entry *entry) return 0; } -static int trace_test_buffer_cpu(struct trace_buffer *buf, int cpu) +static int trace_test_buffer_cpu(struct array_buffer *buf, int cpu) { struct ring_buffer_event *event; struct trace_entry *entry; @@ -60,7 +60,7 @@ static int trace_test_buffer_cpu(struct trace_buffer *buf, int cpu) * Test the trace buffer to see if all the elements * are still sane. */ -static int __maybe_unused trace_test_buffer(struct trace_buffer *buf, unsigned long *count) +static int __maybe_unused trace_test_buffer(struct array_buffer *buf, unsigned long *count) { unsigned long flags, cnt = 0; int cpu, ret = 0; @@ -362,7 +362,7 @@ static int trace_selftest_startup_dynamic_tracing(struct tracer *trace, msleep(100); /* we should have nothing in the buffer */ - ret = trace_test_buffer(&tr->trace_buffer, &count); + ret = trace_test_buffer(&tr->array_buffer, &count); if (ret) goto out; @@ -383,7 +383,7 @@ static int trace_selftest_startup_dynamic_tracing(struct tracer *trace, ftrace_enabled = 0; /* check the trace buffer */ - ret = trace_test_buffer(&tr->trace_buffer, &count); + ret = trace_test_buffer(&tr->array_buffer, &count); ftrace_enabled = 1; tracing_start(); @@ -682,7 +682,7 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) ftrace_enabled = 0; /* check the trace buffer */ - ret = trace_test_buffer(&tr->trace_buffer, &count); + ret = trace_test_buffer(&tr->array_buffer, &count); ftrace_enabled = 1; trace->reset(tr); @@ -768,7 +768,7 @@ trace_selftest_startup_function_graph(struct tracer *trace, * Simulate the init() callback but we attach a watchdog callback * to detect and recover from possible hangs */ - tracing_reset_online_cpus(&tr->trace_buffer); + tracing_reset_online_cpus(&tr->array_buffer); set_graph_array(tr); ret = register_ftrace_graph(&fgraph_ops); if (ret) { @@ -790,7 +790,7 @@ trace_selftest_startup_function_graph(struct tracer *trace, tracing_stop(); /* check the trace buffer */ - ret = trace_test_buffer(&tr->trace_buffer, &count); + ret = trace_test_buffer(&tr->array_buffer, &count); /* Need to also simulate the tr->reset to remove this fgraph_ops */ tracing_stop_cmdline_record(); @@ -848,7 +848,7 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr) /* stop the tracing. */ tracing_stop(); /* check both trace buffers */ - ret = trace_test_buffer(&tr->trace_buffer, NULL); + ret = trace_test_buffer(&tr->array_buffer, NULL); if (!ret) ret = trace_test_buffer(&tr->max_buffer, &count); trace->reset(tr); @@ -910,7 +910,7 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr) /* stop the tracing. */ tracing_stop(); /* check both trace buffers */ - ret = trace_test_buffer(&tr->trace_buffer, NULL); + ret = trace_test_buffer(&tr->array_buffer, NULL); if (!ret) ret = trace_test_buffer(&tr->max_buffer, &count); trace->reset(tr); @@ -976,7 +976,7 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array * /* stop the tracing. */ tracing_stop(); /* check both trace buffers */ - ret = trace_test_buffer(&tr->trace_buffer, NULL); + ret = trace_test_buffer(&tr->array_buffer, NULL); if (ret) goto out; @@ -1006,7 +1006,7 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array * /* stop the tracing. */ tracing_stop(); /* check both trace buffers */ - ret = trace_test_buffer(&tr->trace_buffer, NULL); + ret = trace_test_buffer(&tr->array_buffer, NULL); if (ret) goto out; @@ -1136,7 +1136,7 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr) /* stop the tracing. */ tracing_stop(); /* check both trace buffers */ - ret = trace_test_buffer(&tr->trace_buffer, NULL); + ret = trace_test_buffer(&tr->array_buffer, NULL); if (!ret) ret = trace_test_buffer(&tr->max_buffer, &count); @@ -1177,7 +1177,7 @@ trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr) /* stop the tracing. */ tracing_stop(); /* check the trace buffer */ - ret = trace_test_buffer(&tr->trace_buffer, &count); + ret = trace_test_buffer(&tr->array_buffer, &count); trace->reset(tr); tracing_start(); diff --git a/kernel/trace/trace_seq.c b/kernel/trace/trace_seq.c index 87de6edafd14..1d84fcc78e3e 100644 --- a/kernel/trace/trace_seq.c +++ b/kernel/trace/trace_seq.c @@ -30,9 +30,6 @@ /* How much buffer is left on the trace_seq? */ #define TRACE_SEQ_BUF_LEFT(s) seq_buf_buffer_left(&(s)->seq) -/* How much buffer is written? */ -#define TRACE_SEQ_BUF_USED(s) seq_buf_used(&(s)->seq) - /* * trace_seq should work with being initialized with 0s. */ diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c index 874f1274cf99..d1fa19773cc8 100644 --- a/kernel/trace/trace_stat.c +++ b/kernel/trace/trace_stat.c @@ -280,18 +280,22 @@ static int tracing_stat_init(void) d_tracing = tracing_init_dentry(); if (IS_ERR(d_tracing)) - return 0; + return -ENODEV; stat_dir = tracefs_create_dir("trace_stat", d_tracing); - if (!stat_dir) + if (!stat_dir) { pr_warn("Could not create tracefs 'trace_stat' entry\n"); + return -ENOMEM; + } return 0; } static int init_stat_file(struct stat_session *session) { - if (!stat_dir && tracing_stat_init()) - return -ENODEV; + int ret; + + if (!stat_dir && (ret = tracing_stat_init())) + return ret; session->file = tracefs_create_file(session->ts->name, 0644, stat_dir, @@ -304,7 +308,7 @@ static int init_stat_file(struct stat_session *session) int register_stat_tracer(struct tracer_stat *trace) { struct stat_session *session, *node; - int ret; + int ret = -EINVAL; if (!trace) return -EINVAL; @@ -315,17 +319,15 @@ int register_stat_tracer(struct tracer_stat *trace) /* Already registered? */ mutex_lock(&all_stat_sessions_mutex); list_for_each_entry(node, &all_stat_sessions, session_list) { - if (node->ts == trace) { - mutex_unlock(&all_stat_sessions_mutex); - return -EINVAL; - } + if (node->ts == trace) + goto out; } - mutex_unlock(&all_stat_sessions_mutex); + ret = -ENOMEM; /* Init the session */ session = kzalloc(sizeof(*session), GFP_KERNEL); if (!session) - return -ENOMEM; + goto out; session->ts = trace; INIT_LIST_HEAD(&session->session_list); @@ -334,15 +336,16 @@ int register_stat_tracer(struct tracer_stat *trace) ret = init_stat_file(session); if (ret) { destroy_session(session); - return ret; + goto out; } + ret = 0; /* Register */ - mutex_lock(&all_stat_sessions_mutex); list_add_tail(&session->session_list, &all_stat_sessions); + out: mutex_unlock(&all_stat_sessions_mutex); - return 0; + return ret; } void unregister_stat_tracer(struct tracer_stat *trace) diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 2978c29d87d4..d85a2f0f316b 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -297,7 +297,7 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) struct syscall_trace_enter *entry; struct syscall_metadata *sys_data; struct ring_buffer_event *event; - struct ring_buffer *buffer; + struct trace_buffer *buffer; unsigned long irq_flags; unsigned long args[6]; int pc; @@ -325,7 +325,7 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) local_save_flags(irq_flags); pc = preempt_count(); - buffer = tr->trace_buffer.buffer; + buffer = tr->array_buffer.buffer; event = trace_buffer_lock_reserve(buffer, sys_data->enter_event->event.type, size, irq_flags, pc); if (!event) @@ -347,7 +347,7 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) struct syscall_trace_exit *entry; struct syscall_metadata *sys_data; struct ring_buffer_event *event; - struct ring_buffer *buffer; + struct trace_buffer *buffer; unsigned long irq_flags; int pc; int syscall_nr; @@ -371,7 +371,7 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) local_save_flags(irq_flags); pc = preempt_count(); - buffer = tr->trace_buffer.buffer; + buffer = tr->array_buffer.buffer; event = trace_buffer_lock_reserve(buffer, sys_data->exit_event->event.type, sizeof(*entry), irq_flags, pc); diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 7885ebd23d0c..18d16f3ef980 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -931,8 +931,8 @@ static void __uprobe_trace_func(struct trace_uprobe *tu, struct trace_event_file *trace_file) { struct uprobe_trace_entry_head *entry; + struct trace_buffer *buffer; struct ring_buffer_event *event; - struct ring_buffer *buffer; void *data; int size, esize; struct trace_event_call *call = trace_probe_event_call(&tu->tp); diff --git a/kernel/tsacct.c b/kernel/tsacct.c index 7be3e7530841..257ffb993ea2 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c @@ -24,6 +24,7 @@ void bacct_add_tsk(struct user_namespace *user_ns, const struct cred *tcred; u64 utime, stime, utimescaled, stimescaled; u64 delta; + time64_t btime; BUILD_BUG_ON(TS_COMM_LEN < TASK_COMM_LEN); @@ -32,9 +33,11 @@ void bacct_add_tsk(struct user_namespace *user_ns, /* Convert to micro seconds */ do_div(delta, NSEC_PER_USEC); stats->ac_etime = delta; - /* Convert to seconds for btime */ - do_div(delta, USEC_PER_SEC); - stats->ac_btime = get_seconds() - delta; + /* Convert to seconds for btime (note y2106 limit) */ + btime = ktime_get_real_seconds() - div_u64(delta, USEC_PER_SEC); + stats->ac_btime = clamp_t(time64_t, btime, 0, U32_MAX); + stats->ac_btime64 = btime; + if (thread_group_leader(tsk)) { stats->ac_exitcode = tsk->exit_code; if (tsk->flags & PF_FORKNOEXEC) |